From 96dac0298b0dda73b6a9935530564cc72b0fae83 Mon Sep 17 00:00:00 2001 From: Bimal Timilsina <45899783+TimilsinaBimal@users.noreply.github.com> Date: Sun, 1 Mar 2026 13:14:39 +0545 Subject: [PATCH 01/68] refactor: add auth service and refactor token endpoints (#115) --- app/api/endpoints/manifest.py | 2 - app/api/endpoints/tokens.py | 231 ++++------------------------------ app/api/models/tokens.py | 36 ++++++ app/services/auth.py | 199 +++++++++++++++++++++++++++++ 4 files changed, 257 insertions(+), 211 deletions(-) create mode 100644 app/api/models/tokens.py create mode 100644 app/services/auth.py diff --git a/app/api/endpoints/manifest.py b/app/api/endpoints/manifest.py index 30261f5..f533010 100644 --- a/app/api/endpoints/manifest.py +++ b/app/api/endpoints/manifest.py @@ -7,7 +7,6 @@ @router.get("/manifest.json") async def manifest(): - """Get base manifest for unauthenticated users.""" manifest = manifest_service.get_base_manifest() # since user is not logged in, return empty catalogs manifest["catalogs"] = [] @@ -16,5 +15,4 @@ async def manifest(): @router.get("/{token}/manifest.json") async def manifest_token(token: str): - """Get manifest for authenticated user.""" return await manifest_service.get_manifest_for_token(token) diff --git a/app/api/endpoints/tokens.py b/app/api/endpoints/tokens.py index bea80c0..bb2b541 100644 --- a/app/api/endpoints/tokens.py +++ b/app/api/endpoints/tokens.py @@ -1,229 +1,42 @@ -from datetime import datetime, timezone -from typing import Literal - -from fastapi import APIRouter, HTTPException, Request +from fastapi import APIRouter, HTTPException +from fastapi.responses import JSONResponse from loguru import logger -from pydantic import BaseModel, Field - -from app.core.config import settings -from app.core.security import redact_token -from app.core.settings import CatalogConfig, PosterRatingConfig, UserSettings, get_default_settings -from app.services.manifest import manifest_service -from app.services.stremio.service import StremioBundle -from app.services.token_store import token_store - -router = APIRouter(prefix="/tokens", tags=["tokens"]) - - -class TokenRequest(BaseModel): - authKey: str | None = Field(default=None, description="Stremio auth key") - email: str | None = Field(default=None, description="Stremio account email") - password: str | None = Field(default=None, description="Stremio account password (stored securely)") - catalogs: list[CatalogConfig] | None = Field(default=None, description="Optional catalog configuration") - language: str = Field(default="en-US", description="Language for TMDB API") - poster_rating: PosterRatingConfig | None = Field(default=None, description="Poster rating provider configuration") - excluded_movie_genres: list[str] = Field(default_factory=list, description="List of movie genre IDs to exclude") - excluded_series_genres: list[str] = Field(default_factory=list, description="List of series genre IDs to exclude") - popularity: Literal["mainstream", "balanced", "gems", "all"] = Field( - default="balanced", description="Popularity for TMDB API" - ) - year_min: int = Field(default=2010, description="Minimum release year for TMDB API") - year_max: int = Field(default=2025, description="Maximum release year for TMDB API") - sorting_order: Literal["default", "movies_first", "series_first"] = Field( - default="default", description="Order of movies and series catalogs" - ) - simkl_api_key: str | None = Field(default=None, description="Simkl API Key for the user") - gemini_api_key: str | None = Field(default=None, description="Gemini API Key for AI features") - tmdb_api_key: str | None = Field( - default=None, description="TMDB API Key (required for new clients if server has none)" - ) +from app.api.models.tokens import TokenRequest, TokenResponse +from app.services.auth import auth_service -class TokenResponse(BaseModel): - token: str - manifestUrl: str - expiresInSeconds: int | None = Field( - default=None, - description="Number of seconds before the token expires (None means it does not expire)", - ) - - -async def _verify_credentials_or_raise(bundle: StremioBundle, auth_key: str) -> str: - """Ensure the supplied auth key is valid.""" - try: - await bundle.auth.get_user_info(auth_key) - return auth_key - except Exception as exc: - raise HTTPException( - status_code=400, - detail="Invalid Stremio auth key.", - ) from exc +router = APIRouter(prefix="/tokens", tags=["Tokens"]) @router.post("/", response_model=TokenResponse) -async def create_token(payload: TokenRequest, request: Request) -> TokenResponse: - # Prefer email+password if provided; else require authKey - email = (payload.email or "").strip() or None - password = (payload.password or "").strip() or None - stremio_auth_key = (payload.authKey or "").strip() or None - - if not (email and password) and not stremio_auth_key: - raise HTTPException(status_code=400, detail="Provide email+password or a valid Stremio auth key.") - - # Remove quotes if present for authKey - if stremio_auth_key and stremio_auth_key.startswith('"') and stremio_auth_key.endswith('"'): - stremio_auth_key = stremio_auth_key[1:-1].strip() - - bundle = StremioBundle() - # 1. Establish a valid auth key and fetch user info - if email and password: - stremio_auth_key = await bundle.auth.login(email, password) - +async def create_token(payload: TokenRequest) -> TokenResponse: try: - user_info = await bundle.auth.get_user_info(stremio_auth_key) - user_id = user_info["user_id"] - resolved_email = user_info.get("email", "") - except Exception as e: - raise HTTPException(status_code=400, detail=f"Failed to verify Stremio identity: {e}") - - # 2. Check if user already exists - token = token_store.get_token_from_user_id(user_id) - existing_data = await token_store.get_user_data(token) - - # 3. Construct Settings - default_settings = get_default_settings() - poster_rating = payload.poster_rating - user_settings = UserSettings( - language=payload.language or default_settings.language, - catalogs=payload.catalogs if payload.catalogs else default_settings.catalogs, - poster_rating=poster_rating, - excluded_movie_genres=payload.excluded_movie_genres, - excluded_series_genres=payload.excluded_series_genres, - year_min=payload.year_min, - year_max=payload.year_max, - popularity=payload.popularity, - sorting_order=payload.sorting_order, - simkl_api_key=payload.simkl_api_key, - gemini_api_key=payload.gemini_api_key, - tmdb_api_key=payload.tmdb_api_key, - ) - - # 4. Prepare payload to store - payload_to_store = { - "authKey": stremio_auth_key, - "email": resolved_email or email or "", - "settings": user_settings.model_dump(), - } - if existing_data: - payload_to_store["last_updated"] = existing_data.get("last_updated") - else: - payload_to_store["last_updated"] = datetime.now(timezone.utc).isoformat() - - if email and password: - payload_to_store["password"] = password - - # 5. Store user data - token = await token_store.store_user_data(user_id, payload_to_store) - account_status = "updated" if existing_data else "created" - logger.info(f"[{redact_token(token)}] Account {account_status} for user {user_id}") - - # 6. Cache library items and profiles before returning - # This ensures manifest generation is fast when user installs the addon - # We wait for caching to complete so everything is ready immediately - try: - logger.info(f"[{redact_token(token)}] Caching library and profiles before returning token") - await manifest_service.cache_library_and_profiles(bundle, stremio_auth_key, user_settings, token) - logger.info(f"[{redact_token(token)}] Successfully cached library and profiles") - except Exception as e: - logger.warning( - f"[{redact_token(token)}] Failed to cache library and profiles: {e}. " - "Continuing anyway - will cache on manifest request." - ) - # Continue even if caching fails - manifest service will handle it - - base_url = settings.HOST_NAME - manifest_url = f"{base_url}/{token}/manifest.json" - expires_in = settings.TOKEN_TTL_SECONDS if settings.TOKEN_TTL_SECONDS > 0 else None - - await bundle.close() - - return TokenResponse( - token=token, - manifestUrl=manifest_url, - expiresInSeconds=expires_in, - ) - - -async def get_stremio_user_data(payload: TokenRequest) -> tuple[str, str]: - bundle = StremioBundle() - try: - email = (payload.email or "").strip() or None - password = (payload.password or "").strip() or None - auth_key = (payload.authKey or "").strip() or None - - if email and password: - try: - auth_key = await bundle.auth.login(email, password) - user_info = await bundle.auth.get_user_info(auth_key) - return user_info["user_id"], user_info.get("email", email) - except Exception as e: - logger.error(f"Stremio identity check failed: {e}") - raise HTTPException(status_code=400, detail="Failed to verify Stremio identity.") - elif auth_key: - if auth_key.startswith('"') and auth_key.endswith('"'): - auth_key = auth_key[1:-1].strip() - try: - user_info = await bundle.auth.get_user_info(auth_key) - return user_info["user_id"], user_info.get("email", "") - except Exception as e: - logger.error(f"Stremio identity check failed: {e}") - raise HTTPException(status_code=400, detail="Invalid Stremio auth key.") - else: - raise HTTPException(status_code=400, detail="Credentials required.") - finally: - await bundle.close() + return await auth_service.create_user_token(payload) + except HTTPException: + raise + except Exception as exc: + logger.exception(f"Token creation failed: {exc}") + raise HTTPException(status_code=503, detail="Storage temporarily unavailable.") @router.post("/stremio-identity", status_code=200) async def check_stremio_identity(payload: TokenRequest): - """Fetch user info from Stremio and check if account exists.""" - user_id, email = await get_stremio_user_data(payload) try: - token = token_store.get_token_from_user_id(user_id) - user_data = await token_store.get_user_data(token) - exists = bool(user_data) - except Exception: - exists = False - user_data = None - - response = {"user_id": user_id, "email": email, "exists": exists} - if exists and user_data: - # Reconstruct UserSettings to ensure defaults (like sorting_order) are included for old accounts - raw_settings = user_data.get("settings", {}) - try: - user_settings = UserSettings(**raw_settings) - response["settings"] = user_settings.model_dump() - except Exception as e: - logger.warning(f"Failed to normalize settings for user {user_id}: {e}") - response["settings"] = raw_settings - return response + return await auth_service.get_identity_with_settings(payload) + except HTTPException: + raise + except Exception as exc: + logger.exception(f"Identity check failed: {exc}") + raise HTTPException(status_code=503, detail="Service temporarily unavailable.") @router.delete("/", status_code=200) async def delete_redis_token(payload: TokenRequest): - """Delete a token based on Stremio credentials.""" try: - user_id, _ = await get_stremio_user_data(payload) - token = token_store.get_token_from_user_id(user_id) - existing_data = await token_store.get_user_data(token) - if not existing_data: - raise HTTPException(status_code=404, detail="Account not found.") - - await token_store.delete_token(token) - logger.info(f"[{redact_token(token)}] Token deleted for user {user_id}") - return {"detail": "Settings deleted successfully"} + await auth_service.delete_user_account(payload) + return JSONResponse(status_code=200, content="Settings deleted successfully") except HTTPException: raise except Exception as exc: - logger.error(f"Token deletion failed: {exc}") - raise HTTPException(status_code=503, detail="Storage temporarily unavailable.") + logger.exception(f"Account deletion failed: {exc}") + raise HTTPException(status_code=503, detail="Service temporarily unavailable.") diff --git a/app/api/models/tokens.py b/app/api/models/tokens.py new file mode 100644 index 0000000..60b1a9c --- /dev/null +++ b/app/api/models/tokens.py @@ -0,0 +1,36 @@ +from typing import Literal + +from pydantic import BaseModel, Field + +from app.core.settings import CatalogConfig, PosterRatingConfig + + +class TokenRequest(BaseModel): + authKey: str | None = Field(default=None, description="Stremio auth key") + email: str | None = Field(default=None, description="Stremio account email") + password: str | None = Field(default=None, description="Stremio account password") + catalogs: list[CatalogConfig] | None = Field(default=None, description="Catalog configuration") + language: str = Field(default="en-US", description="Language for TMDB API") + poster_rating: PosterRatingConfig | None = Field(default=None, description="Poster rating provider configuration") + excluded_movie_genres: list[str] = Field(default_factory=list, description="List of movie genre IDs to exclude") + excluded_series_genres: list[str] = Field(default_factory=list, description="List of series genre IDs to exclude") + popularity: Literal["mainstream", "balanced", "gems", "all"] = Field( + default="balanced", description="Popularity for TMDB API" + ) + year_min: int = Field(default=2010, description="Minimum release year for TMDB API") + year_max: int = Field(default=2026, description="Maximum release year for TMDB API") + sorting_order: Literal["default", "movies_first", "series_first"] = Field( + default="default", description="Order of movies and series catalogs" + ) + simkl_api_key: str | None = Field(default=None, description="Simkl API Key for the user") + gemini_api_key: str | None = Field(default=None, description="Gemini API Key for AI features") + tmdb_api_key: str | None = Field(default=None, description="TMDB API Key") + + +class TokenResponse(BaseModel): + token: str + manifestUrl: str + expiresInSeconds: int | None = Field( + default=None, + description="Number of seconds before the token expires (None means it does not expire)", + ) diff --git a/app/services/auth.py b/app/services/auth.py new file mode 100644 index 0000000..16dd23d --- /dev/null +++ b/app/services/auth.py @@ -0,0 +1,199 @@ +from datetime import datetime, timezone + +from fastapi import HTTPException +from loguru import logger + +from app.api.models.tokens import TokenRequest, TokenResponse +from app.core.config import settings +from app.core.security import redact_token +from app.core.settings import UserSettings, get_default_settings +from app.services.manifest import manifest_service +from app.services.stremio.service import StremioBundle +from app.services.token_store import token_store + + +class AuthService: + async def resolve_auth_key(self, credentials: dict, token: str | None = None) -> str | None: + """Validate auth key. If expired, try email+password login. Update store on refresh.""" + auth_key = (credentials.get("authKey") or "").strip() or None + email = (credentials.get("email") or "").strip() or None + password = (credentials.get("password") or "").strip() or None + + if auth_key and auth_key.startswith('"') and auth_key.endswith('"'): + auth_key = auth_key[1:-1].strip() + + bundle = StremioBundle() + try: + # 1. Try existing auth key + if auth_key: + try: + await bundle.auth.get_user_info(auth_key) + return auth_key + except Exception: + logger.info("Stremio auth key expired or invalid, attempting refresh with credentials") + + # 2. Try login if auth key failed or wasn't provided + if email and password: + try: + new_key = await bundle.auth.login(email, password) + if token and new_key != auth_key: + existing_data = await self.get_credentials(token) + if existing_data: + existing_data["authKey"] = new_key + await token_store.update_user_data(token, existing_data) + return new_key + except Exception as e: + logger.error(f"Stremio login failed: {e}") + return None + finally: + await bundle.close() + + return None + + async def get_credentials(self, token: str) -> dict | None: + """Get user credentials from token store.""" + return await token_store.get_user_data(token) + + async def store_credentials(self, user_id: str, payload: dict) -> str: + """Store credentials, return token.""" + # Ensure last_updated is present if it's a new user + if "last_updated" not in payload: + token = token_store.get_token_from_user_id(user_id) + existing = await self.get_credentials(token) + if existing: + payload["last_updated"] = existing.get("last_updated") + else: + payload["last_updated"] = datetime.now(timezone.utc).isoformat() + + return await token_store.store_user_data(user_id, payload) + + async def get_stremio_user_data(self, payload: TokenRequest) -> tuple[str, str, str]: + """ + Authenticates with Stremio and returns (user_id, email, auth_key). + """ + creds = payload.model_dump() + auth_key = await self.resolve_auth_key(creds) + + if not auth_key: + raise HTTPException(status_code=400, detail="Failed to verify Stremio identity. Provide valid credentials.") + + bundle = StremioBundle() + try: + user_info = await bundle.auth.get_user_info(auth_key) + user_id = user_info["user_id"] + resolved_email = user_info.get("email", payload.email or "") + return user_id, resolved_email, auth_key + except Exception as e: + logger.error(f"Stremio identity verification failed: {e}") + raise HTTPException(status_code=400, detail=f"Failed to verify Stremio identity: {e}") + finally: + await bundle.close() + + async def create_user_token(self, payload: TokenRequest) -> TokenResponse: + """ + Main logic for creating or updating a user token and caching their library. + """ + # 1. Authenticate and get user info + user_id, resolved_email, stremio_auth_key = await self.get_stremio_user_data(payload) + + # 2. Check if user already exists + token = token_store.get_token_from_user_id(user_id) + existing_data = await self.get_credentials(token) + + # 3. Prepare payload + user_settings = self._build_user_settings(payload) + payload_to_store = { + "authKey": stremio_auth_key, + "email": resolved_email, + "settings": user_settings.model_dump(), + } + if payload.password: + payload_to_store["password"] = payload.password.strip() + + if existing_data: + payload_to_store["last_updated"] = existing_data.get("last_updated") + + # 4. Store user data + token = await self.store_credentials(user_id, payload_to_store) + + # 5. Cache library items and profiles + await self._trigger_initial_caching(stremio_auth_key, user_settings, token) + + # 6. Build response + base_url = settings.HOST_NAME + manifest_url = f"{base_url}/{token}/manifest.json" + expires_in = settings.TOKEN_TTL_SECONDS if settings.TOKEN_TTL_SECONDS > 0 else None + + return TokenResponse( + token=token, + manifestUrl=manifest_url, + expiresInSeconds=expires_in, + ) + + def _build_user_settings(self, payload: TokenRequest) -> UserSettings: + default_settings = get_default_settings() + return UserSettings( + language=payload.language or default_settings.language, + catalogs=payload.catalogs if payload.catalogs else default_settings.catalogs, + poster_rating=payload.poster_rating, + excluded_movie_genres=payload.excluded_movie_genres, + excluded_series_genres=payload.excluded_series_genres, + year_min=payload.year_min, + year_max=payload.year_max, + popularity=payload.popularity, + sorting_order=payload.sorting_order, + simkl_api_key=payload.simkl_api_key, + gemini_api_key=payload.gemini_api_key, + tmdb_api_key=payload.tmdb_api_key, + ) + + async def get_identity_with_settings(self, payload: TokenRequest) -> dict: + """Fetch Stremio identity and associated user settings if they exist.""" + user_id, email, _ = await self.get_stremio_user_data(payload) + + token = token_store.get_token_from_user_id(user_id) + existing_data = await self.get_credentials(token) + exists = bool(existing_data) + + response = {"user_id": user_id, "email": email, "exists": exists} + + if exists and existing_data: + # Reconstruct UserSettings to ensure defaults are included for old accounts + raw_settings = existing_data.get("settings", {}) + try: + user_settings = UserSettings(**raw_settings) + response["settings"] = user_settings.model_dump() + except Exception as e: + logger.warning(f"Failed to normalize settings for user {user_id}: {e}") + response["settings"] = raw_settings + + return response + + async def delete_user_account(self, payload: TokenRequest) -> None: + """Deletes user account and associated data.""" + user_id, _, _ = await self.get_stremio_user_data(payload) + token = token_store.get_token_from_user_id(user_id) + + existing_data = await self.get_credentials(token) + if not existing_data: + raise HTTPException(status_code=404, detail="Account not found.") + + await token_store.delete_token(token) + logger.info(f"[{redact_token(token)}] Token deleted for user {user_id}") + + async def _trigger_initial_caching(self, auth_key: str, settings: UserSettings, token: str): + bundle = StremioBundle() + try: + logger.info(f"[{redact_token(token)}] Caching library and profiles before returning token") + await manifest_service.cache_library_and_profiles(bundle, auth_key, settings, token) + logger.info(f"[{redact_token(token)}] Successfully cached library and profiles") + except Exception as e: + logger.warning( + f"[{redact_token(token)}] Failed to cache library and profiles: {e}. " + "Continuing anyway - will cache on manifest request." + ) + finally: + await bundle.close() + + +auth_service = AuthService() From d348e97a85d9b811568f105cd967866431850c48 Mon Sep 17 00:00:00 2001 From: Bimal Timilsina Date: Fri, 20 Mar 2026 15:26:00 +0545 Subject: [PATCH 02/68] refactor: refactor translation module --- app/api/endpoints/health.py | 5 +-- app/api/endpoints/languages.py | 16 ++++++++++ app/api/router.py | 4 +-- app/core/app.py | 2 +- app/models/token.py | 11 ------- .../meta.py => services/language_service.py} | 31 ++++--------------- app/services/rpdb.py | 7 ----- 7 files changed, 28 insertions(+), 48 deletions(-) create mode 100644 app/api/endpoints/languages.py delete mode 100644 app/models/token.py rename app/{api/endpoints/meta.py => services/language_service.py} (55%) delete mode 100644 app/services/rpdb.py diff --git a/app/api/endpoints/health.py b/app/api/endpoints/health.py index 0e339e9..65d7a62 100644 --- a/app/api/endpoints/health.py +++ b/app/api/endpoints/health.py @@ -1,8 +1,9 @@ from fastapi import APIRouter +from fastapi.responses import JSONResponse router = APIRouter(tags=["health"]) @router.get("/health", summary="Simple readiness probe") -async def health_check() -> dict[str, str]: - return {"status": "ok"} +async def health_check() -> JSONResponse: + return JSONResponse(status_code=200, content="System healthy!") diff --git a/app/api/endpoints/languages.py b/app/api/endpoints/languages.py new file mode 100644 index 0000000..e5fd94b --- /dev/null +++ b/app/api/endpoints/languages.py @@ -0,0 +1,16 @@ +from fastapi import APIRouter, HTTPException +from loguru import logger + +from app.services.language_service import fetch_languages_list + +router = APIRouter() + + +@router.get("/api/languages") +async def get_languages(): + try: + languages = await fetch_languages_list() + return languages + except Exception as e: + logger.error(f"Failed to fetch languages: {e}") + raise HTTPException(status_code=502, detail=f"Failed to fetch languages from TMDB: {e}") diff --git a/app/api/router.py b/app/api/router.py index 72c99bc..1aff2bb 100644 --- a/app/api/router.py +++ b/app/api/router.py @@ -3,8 +3,8 @@ from .endpoints.announcement import router as announcement_router from .endpoints.catalogs import router as catalogs_router from .endpoints.health import router as health_router +from .endpoints.languages import router as language_router from .endpoints.manifest import router as manifest_router -from .endpoints.meta import router as meta_router from .endpoints.stats import router as stats_router from .endpoints.tokens import router as tokens_router from .endpoints.validation import router as validation_router @@ -21,7 +21,7 @@ async def root(): api_router.include_router(catalogs_router) api_router.include_router(tokens_router) api_router.include_router(health_router) -api_router.include_router(meta_router) +api_router.include_router(language_router) api_router.include_router(announcement_router) api_router.include_router(stats_router) api_router.include_router(validation_router) diff --git a/app/core/app.py b/app/core/app.py index c65f885..84bdbd4 100644 --- a/app/core/app.py +++ b/app/core/app.py @@ -10,7 +10,7 @@ from jinja2 import Environment, FileSystemLoader from loguru import logger -from app.api.endpoints.meta import fetch_languages_list +from app.api.endpoints.languages import fetch_languages_list from app.api.router import api_router from app.core.settings import get_default_catalogs_for_frontend from app.services.redis_service import redis_service diff --git a/app/models/token.py b/app/models/token.py deleted file mode 100644 index 80f6b3a..0000000 --- a/app/models/token.py +++ /dev/null @@ -1,11 +0,0 @@ -from pydantic import BaseModel - - -class UserSettings(BaseModel): - pass - - -class Credentials(BaseModel): - authKey: str - email: str - user_settings: UserSettings diff --git a/app/api/endpoints/meta.py b/app/services/language_service.py similarity index 55% rename from app/api/endpoints/meta.py rename to app/services/language_service.py index db2367d..5a4a5ed 100644 --- a/app/api/endpoints/meta.py +++ b/app/services/language_service.py @@ -1,23 +1,14 @@ import asyncio -from fastapi import APIRouter, HTTPException -from loguru import logger +from app.services.tmdb.service import TMDBService, get_tmdb_service -from app.services.tmdb.service import get_tmdb_service -router = APIRouter() - - -async def fetch_languages_list(): - """ - Fetch and format languages list from TMDB. - Returns a list of language dictionaries with iso_639_1, language, and country. - """ - tmdb = get_tmdb_service() +async def fetch_languages_list() -> list[dict[str, str]]: + tmdb_service: TMDBService = get_tmdb_service() tasks = [ - tmdb.get_primary_translations(), - tmdb.get_languages(), - tmdb.get_countries(), + tmdb_service.get_primary_translations(), + tmdb_service.get_languages(), + tmdb_service.get_countries(), ] primary_translations, languages, countries = await asyncio.gather(*tasks) @@ -45,13 +36,3 @@ async def fetch_languages_list(): ) result.sort(key=lambda x: (x["iso_639_1"] != "en-US", x["language"])) return result - - -@router.get("/api/languages") -async def get_languages(): - try: - languages = await fetch_languages_list() - return languages - except Exception as e: - logger.error(f"Failed to fetch languages: {e}") - raise HTTPException(status_code=502, detail="Failed to fetch languages from TMDB") diff --git a/app/services/rpdb.py b/app/services/rpdb.py deleted file mode 100644 index 8d4e6d6..0000000 --- a/app/services/rpdb.py +++ /dev/null @@ -1,7 +0,0 @@ -class RPDBService: - @staticmethod - def get_poster_url(api_key: str, item_id: str) -> str: - """ - Get poster URL for a specific item by IMDB ID. - """ - return f"https://api.ratingposterdb.com/{api_key}/imdb/poster-default/{item_id}.jpg?fallback=true" From 5b93b0618a23fda2eb19d1245531266abff2c512 Mon Sep 17 00:00:00 2001 From: Bimal Timilsina Date: Fri, 20 Mar 2026 15:31:21 +0545 Subject: [PATCH 03/68] refactor catalog updater and manifest services for improved readability and performance --- app/services/catalog.py | 121 ++++++++++++++++-- app/services/catalog_updater.py | 46 +++++-- app/services/manifest.py | 40 ++++-- app/services/profile/integration.py | 19 +++ .../recommendation/catalog_service.py | 9 +- app/utils/catalog.py | 102 --------------- 6 files changed, 195 insertions(+), 142 deletions(-) delete mode 100644 app/utils/catalog.py diff --git a/app/services/catalog.py b/app/services/catalog.py index 06350cb..7e4c804 100644 --- a/app/services/catalog.py +++ b/app/services/catalog.py @@ -13,7 +13,69 @@ from app.services.scoring import ScoringService from app.services.tmdb.service import get_tmdb_service from app.services.user_cache import user_cache -from app.utils.catalog import get_catalogs_from_config + + +def get_catalogs_from_config( + user_settings: UserSettings, + cat_id: str, + default_name: str, + default_movie: bool, + default_series: bool, +) -> list[dict[str, Any]]: + catalogs = [] + config = next((c for c in user_settings.catalogs if c.id == cat_id), None) + + if config and config.enabled: + name = config.name if config.name else default_name + enabled_movie = getattr(config, "enabled_movie", default_movie) + enabled_series = getattr(config, "enabled_series", default_series) + display_at_home = getattr(config, "display_at_home", True) + extra = DISCOVER_ONLY_EXTRA if not display_at_home else [] + + if enabled_movie: + catalogs.append({"type": "movie", "id": cat_id, "name": name, "extra": extra}) + if enabled_series: + catalogs.append({"type": "series", "id": cat_id, "name": name, "extra": extra}) + + return catalogs + + +def get_config_id(catalog: dict[str, Any]) -> str | None: + catalog_id = catalog.get("id", "") + if catalog_id.startswith("watchly.theme."): + return "watchly.theme" + if catalog_id.startswith("watchly.loved."): + return "watchly.loved" + if catalog_id.startswith("watchly.watched."): + return "watchly.watched" + return catalog_id + + +def sort_catalogs(catalogs: list[dict[str, Any]], user_settings: UserSettings) -> list[dict[str, Any]]: + """Sort catalogs according to user settings and content-type order.""" + if not user_settings: + return catalogs + + order_map = {c.id: i for i, c in enumerate(user_settings.catalogs)} + + def get_setting_index(catalog: dict[str, Any]) -> int: + return order_map.get(get_config_id(catalog), 999) + + sorting_order = getattr(user_settings, "sorting_order", "default") + + if sorting_order == "movies_first": + return sorted( + catalogs, + key=lambda x: (0 if x.get("type") == "movie" else 1, get_setting_index(x)), + ) + + if sorting_order == "series_first": + return sorted( + catalogs, + key=lambda x: (0 if x.get("type") == "series" else 1, get_setting_index(x)), + ) + + return sorted(catalogs, key=get_setting_index) class DynamicCatalogService: @@ -38,7 +100,10 @@ def build_catalog_entry(self, item, label, config_id, display_at_home: bool = Tr if config_id in ["watchly.item", "watchly.loved", "watchly.watched"]: # New Item-based catalog format catalog_id = f"{config_id}.{item_id}" - elif item_id.startswith("tt") and config_id in ["watchly.loved", "watchly.watched"]: + elif item_id.startswith("tt") and config_id in [ + "watchly.loved", + "watchly.watched", + ]: catalog_id = f"{config_id}.{item_id}" else: catalog_id = item_id @@ -200,12 +265,22 @@ async def _generate_for_type(media_type: str, genres: list[int]): continue media_type, rows = result for row in rows: - catalogs.append({"type": media_type, "id": row.id, "name": row.title, "extra": extra}) + catalogs.append( + { + "type": media_type, + "id": row.id, + "name": row.title, + "extra": extra, + } + ) return catalogs async def get_dynamic_catalogs( - self, library_items: dict, user_settings: UserSettings | None = None, token: str | None = None + self, + library_items: dict, + user_settings: UserSettings | None = None, + token: str | None = None, ) -> list[dict]: """Generate all dynamic catalog rows based on enabled configurations.""" catalogs = [] @@ -222,7 +297,12 @@ async def get_dynamic_catalogs( enabled_series = getattr(theme_cfg, "enabled_series", True) display_at_home = getattr(theme_cfg, "display_at_home", True) theme_catalogs = await self.get_theme_based_catalogs( - library_items, user_settings, enabled_movie, enabled_series, display_at_home, token + library_items, + user_settings, + enabled_movie, + enabled_series, + display_at_home, + token, ) catalogs.extend(theme_catalogs) @@ -235,17 +315,35 @@ async def get_dynamic_catalogs( # 5. Add watchly.creators catalog catalogs.extend( - get_catalogs_from_config(user_settings, "watchly.creators", "From your favourite Creators", False, False) + get_catalogs_from_config( + user_settings, + "watchly.creators", + "From your favourite Creators", + False, + False, + ) ) # 6. Add watchly.all.loved catalog catalogs.extend( - get_catalogs_from_config(user_settings, "watchly.all.loved", "Based on what you loved", True, True) + get_catalogs_from_config( + user_settings, + "watchly.all.loved", + "Based on what you loved", + True, + True, + ) ) # 7. Add watchly.liked.all catalog catalogs.extend( - get_catalogs_from_config(user_settings, "watchly.liked.all", "Based on what you liked", True, True) + get_catalogs_from_config( + user_settings, + "watchly.liked.all", + "Based on what you liked", + True, + True, + ) ) return catalogs @@ -336,5 +434,10 @@ def is_type_enabled(config, content_type: str) -> bool: label = watched_config.name if watched_config.name else "Because you watched" watched_config_display_at_home = getattr(watched_config, "display_at_home", True) catalogs.append( - self.build_catalog_entry(last_watched, label, "watchly.watched", watched_config_display_at_home) + self.build_catalog_entry( + last_watched, + label, + "watchly.watched", + watched_config_display_at_home, + ) ) diff --git a/app/services/catalog_updater.py b/app/services/catalog_updater.py index 8c9bc18..031fee1 100644 --- a/app/services/catalog_updater.py +++ b/app/services/catalog_updater.py @@ -1,6 +1,6 @@ import asyncio from datetime import datetime, timezone -from typing import Any +from typing import Any, cast from fastapi import HTTPException from loguru import logger @@ -8,12 +8,11 @@ from app.core.config import settings from app.core.security import redact_token from app.core.settings import UserSettings -from app.services.catalog import DynamicCatalogService +from app.services.catalog import DynamicCatalogService, sort_catalogs from app.services.manifest import manifest_service from app.services.stremio.service import StremioBundle from app.services.token_store import token_store from app.services.translation import translation_service -from app.utils.catalog import sort_catalogs class CatalogUpdater: @@ -71,13 +70,18 @@ async def refresh_catalogs_for_credentials( """ if not credentials: logger.warning(f"[{redact_token(token)}] Attempted to refresh catalogs with no credentials.") - raise HTTPException(status_code=401, detail="Invalid or expired token. Please reconfigure the addon.") + raise HTTPException( + status_code=401, + detail="Invalid or expired token. Please reconfigure the addon.", + ) auth_key = credentials.get("authKey") # check if auth key is valid bundle = StremioBundle() try: try: + if not auth_key: + raise ValueError("Missing auth key") await bundle.auth.get_user_info(auth_key) except Exception as e: logger.exception(f"[{redact_token(token)}] Invalid auth key. Falling back to login: {e}") @@ -90,6 +94,11 @@ async def refresh_catalogs_for_credentials( else: return True # true since we won't be able to update it again. so no need to try again. + if not auth_key: + return True + + resolved_auth_key = cast(str, auth_key) + # 1. Check if addon is still installed try: addon_installed = await bundle.addons.is_addon_installed(auth_key) @@ -111,36 +120,44 @@ async def refresh_catalogs_for_credentials( # so no need to try again. return True - library_items = await manifest_service.cache_library_and_profiles(bundle, auth_key, user_settings, token) - language = user_settings.language if user_settings else "en-US" + if not user_settings: + return True + + resolved_settings = cast(UserSettings, user_settings) + + library_items = await manifest_service.cache_library_and_profiles( + bundle, resolved_auth_key, resolved_settings, token + ) + language = resolved_settings.language from app.core.settings import resolve_tmdb_api_key - tmdb_key = resolve_tmdb_api_key(user_settings) + tmdb_key = resolve_tmdb_api_key(resolved_settings) dynamic_catalog_service = DynamicCatalogService( language=language, tmdb_api_key=tmdb_key, ) catalogs = await dynamic_catalog_service.get_dynamic_catalogs( - library_items=library_items, user_settings=user_settings, token=token + library_items=library_items, + user_settings=resolved_settings, + token=token, ) # Translate catalogs - if user_settings and user_settings.language: + if resolved_settings.language: for cat in catalogs: if name := cat.get("name"): try: - cat["name"] = await translation_service.translate(name, user_settings.language) + cat["name"] = await translation_service.translate(name, resolved_settings.language) except Exception as e: logger.warning(f"Failed to translate catalog name '{name}': {e}") continue # sort catalogs by order in user settings - if user_settings: - catalogs = sort_catalogs(catalogs, user_settings) + catalogs = sort_catalogs(catalogs, resolved_settings) - success = await bundle.addons.update_catalogs(auth_key, catalogs) + success = await bundle.addons.update_catalogs(resolved_auth_key, catalogs) # Update timestamp and invalidate cache only on success if success and update_timestamp: @@ -164,7 +181,8 @@ async def refresh_catalogs_for_credentials( description = ( f"Movie and series recommendations based on your Stremio library.\n\n⚠️ Status: Error\n{error_msg}" ) - await bundle.addons.update_description(auth_key, description) + if isinstance(auth_key, str) and auth_key: + await bundle.addons.update_description(auth_key, description) except Exception as update_err: logger.warning(f"[{redact_token(token)}] Failed to update addon description with error: {update_err}") return False diff --git a/app/services/manifest.py b/app/services/manifest.py index 2d8f29a..eb259a6 100644 --- a/app/services/manifest.py +++ b/app/services/manifest.py @@ -7,13 +7,12 @@ from app.core.security import redact_token from app.core.settings import UserSettings, resolve_tmdb_api_key from app.core.version import __version__ -from app.services.catalog import DynamicCatalogService +from app.services.catalog import DynamicCatalogService, sort_catalogs from app.services.profile.integration import ProfileIntegration from app.services.stremio.service import StremioBundle from app.services.token_store import token_store from app.services.translation import translation_service from app.services.user_cache import user_cache -from app.utils.catalog import cache_profile_and_watched_sets, sort_catalogs class ManifestService: @@ -71,7 +70,11 @@ async def _resolve_auth_key(self, bundle: StremioBundle, credentials: dict[str, return auth_key async def cache_library_and_profiles( - self, bundle: StremioBundle, auth_key: str, user_settings: UserSettings, token: str + self, + bundle: StremioBundle, + auth_key: str, + user_settings: UserSettings, + token: str, ) -> dict[str, Any]: """ Fetch and cache library items and profiles for a user. @@ -104,9 +107,7 @@ async def cache_library_and_profiles( for content_type in ["movie", "series"]: try: logger.info(f"[{redact_token(token)}] Building and caching profile for {content_type}") - _, _, _ = await cache_profile_and_watched_sets( - token, content_type, integration_service, library_items, bundle, auth_key - ) + await integration_service.build_and_cache_profile(token, content_type, library_items, bundle, auth_key) logger.debug(f"[{redact_token(token)}] Cached profile and watched sets for {content_type}") except Exception as e: logger.warning(f"[{redact_token(token)}] Failed to build/cache profile for {content_type}: {e}") @@ -114,7 +115,11 @@ async def cache_library_and_profiles( return library_items async def _ensure_library_and_profiles_cached( - self, bundle: StremioBundle, auth_key: str, user_settings: UserSettings, token: str + self, + bundle: StremioBundle, + auth_key: str, + user_settings: UserSettings, + token: str, ) -> dict[str, Any]: """Ensure library items and profiles are cached, fetching and building if needed.""" # Try to get cached library items first @@ -129,18 +134,27 @@ async def _ensure_library_and_profiles_cached( return await self.cache_library_and_profiles(bundle, auth_key, user_settings, token) async def _build_dynamic_catalogs( - self, bundle: StremioBundle, auth_key: str, user_settings: UserSettings | None, token: str + self, + bundle: StremioBundle, + auth_key: str, + user_settings: UserSettings | None, + token: str, ) -> list[dict[str, Any]]: """Build dynamic catalogs for the manifest.""" + if not user_settings: + return [] + + settings_for_user = user_settings + # check if cached, if not, fetch and cache library_items = await user_cache.get_library_items(token) if not library_items: - library_items = await self._ensure_library_and_profiles_cached(bundle, auth_key, user_settings, token) + library_items = await self._ensure_library_and_profiles_cached(bundle, auth_key, settings_for_user, token) await user_cache.set_library_items(token, library_items) - tmdb_key = resolve_tmdb_api_key(user_settings) - dynamic_catalog_service = DynamicCatalogService(language=user_settings.language, tmdb_api_key=tmdb_key) - return await dynamic_catalog_service.get_dynamic_catalogs(library_items, user_settings, token=token) + tmdb_key = resolve_tmdb_api_key(settings_for_user) + dynamic_catalog_service = DynamicCatalogService(language=settings_for_user.language, tmdb_api_key=tmdb_key) + return await dynamic_catalog_service.get_dynamic_catalogs(library_items, settings_for_user, token=token) async def _translate_catalogs(self, catalogs: list[dict[str, Any]], language: str | None) -> list[dict[str, Any]]: """Translate catalog names to target language.""" @@ -204,7 +218,7 @@ async def get_manifest_for_token(self, token: str) -> dict[str, Any]: # Resolve auth key auth_key = await self._resolve_auth_key(bundle, creds, token) - if auth_key: + if auth_key and user_settings: fetched_catalogs = await self._build_dynamic_catalogs(bundle, auth_key, user_settings, token) except Exception as e: logger.exception(f"[{redact_token(token)}] Dynamic catalog build failed: {e}") diff --git a/app/services/profile/integration.py b/app/services/profile/integration.py index e987147..b5a37d9 100644 --- a/app/services/profile/integration.py +++ b/app/services/profile/integration.py @@ -205,6 +205,25 @@ async def build_profile_incremental( return profile, watched_tmdb, watched_imdb + async def build_and_cache_profile( + self, + token: str, + content_type: str, + library_items: dict, + stremio_service: Any = None, + auth_key: str | None = None, + ) -> tuple[TasteProfile | None, set[int], set[str]]: + """Build profile data and cache the profile and watched sets.""" + profile, watched_tmdb, watched_imdb = await self.build_profile_incremental( + library_items, + content_type, + token, + stremio_service, + auth_key, + ) + await user_cache.set_profile_and_watched_sets(token, content_type, profile, watched_tmdb, watched_imdb) + return profile, watched_tmdb, watched_imdb + async def get_genre_whitelist( self, profile: TasteProfile, diff --git a/app/services/recommendation/catalog_service.py b/app/services/recommendation/catalog_service.py index e80f0e6..7c18fa5 100644 --- a/app/services/recommendation/catalog_service.py +++ b/app/services/recommendation/catalog_service.py @@ -23,7 +23,6 @@ from app.services.tmdb.service import get_tmdb_service from app.services.token_store import token_store from app.services.user_cache import user_cache -from app.utils.catalog import cache_profile_and_watched_sets def should_shuffle(user_settings: UserSettings, catalog_id: str) -> bool: @@ -188,10 +187,9 @@ async def get_catalog( profile, watched_tmdb, watched_imdb, - ) = await cache_profile_and_watched_sets( + ) = await integration_service.build_and_cache_profile( token, content_type, - integration_service, library_items, bundle, auth_key, @@ -328,7 +326,10 @@ def _extract_settings(self, credentials: dict) -> UserSettings: return UserSettings(**settings_dict) if settings_dict else get_default_settings() async def _get_trending_fallback( - self, content_type: str, limit: int = 20, user_settings: UserSettings | None = None + self, + content_type: str, + limit: int = 20, + user_settings: UserSettings | None = None, ) -> list[dict[str, Any]]: """Get trending items for new users without profiles.""" from app.services.recommendation.utils import content_type_to_mtype diff --git a/app/utils/catalog.py b/app/utils/catalog.py deleted file mode 100644 index e5fc6b6..0000000 --- a/app/utils/catalog.py +++ /dev/null @@ -1,102 +0,0 @@ -from typing import Any - -from app.core.constants import DISCOVER_ONLY_EXTRA -from app.core.settings import UserSettings -from app.services.profile.integration import ProfileIntegration -from app.services.stremio.service import StremioBundle -from app.services.user_cache import user_cache - - -def get_catalogs_from_config( - user_settings: UserSettings, - cat_id: str, - default_name: str, - default_movie: bool, - default_series: bool, -): - catalogs = [] - config = next((c for c in user_settings.catalogs if c.id == cat_id), None) - - if config and config.enabled: - name = config.name if config and config.name else default_name - enabled_movie = getattr(config, "enabled_movie", default_movie) if config else default_movie - enabled_series = getattr(config, "enabled_series", default_series) if config else default_series - display_at_home = getattr(config, "display_at_home", True) if config else True - - extra = DISCOVER_ONLY_EXTRA if not display_at_home else [] - - if enabled_movie: - catalogs.append({"type": "movie", "id": cat_id, "name": name, "extra": extra}) - if enabled_series: - catalogs.append({"type": "series", "id": cat_id, "name": name, "extra": extra}) - return catalogs - - -async def cache_profile_and_watched_sets( - token: str, - content_type: str, - integration_service: ProfileIntegration, - library_items: dict, - bundle: StremioBundle, - auth_key: str, -): - """ - Build and cache profile and watched sets for a user and content type. - Uses the centralized UserCacheService for caching. - """ - ( - profile, - watched_tmdb, - watched_imdb, - ) = await integration_service.build_profile_incremental(library_items, content_type, token, bundle, auth_key) - - await user_cache.set_profile_and_watched_sets(token, content_type, profile, watched_tmdb, watched_imdb) - return profile, watched_tmdb, watched_imdb - - -def get_config_id(catalog) -> str | None: - catalog_id = catalog.get("id", "") - if catalog_id.startswith("watchly.theme."): - return "watchly.theme" - if catalog_id.startswith("watchly.loved."): - return "watchly.loved" - if catalog_id.startswith("watchly.watched."): - return "watchly.watched" - return catalog_id - - -def sort_catalogs(catalogs: list[dict[str, Any]], user_settings: UserSettings) -> list[dict[str, Any]]: - """ - Sort catalogs according to user settings and sorting order choice. - - Sorting Orders: - - default: Interleaved (by category priority, then movie then series) - - movies_first: Group all movies first, then all series - - series_first: Group all series first, then all movies - """ - if not user_settings: - return catalogs - - # Get the original order index from user settings for each catalog category - order_map = {c.id: i for i, c in enumerate(user_settings.catalogs)} - - # Base sorting key: setting index (priority) - def get_setting_index(cat): - return order_map.get(get_config_id(cat), 999) - - sorting_order = getattr(user_settings, "sorting_order", "default") - - if sorting_order == "movies_first": - # Group movies first, then series - # movies: type_priority=0, series: type_priority=1 - sorted_catalogs = sorted(catalogs, key=lambda x: (0 if x.get("type") == "movie" else 1, get_setting_index(x))) - elif sorting_order == "series_first": - # Group series first, then movies - # series: type_priority=0, movies: type_priority=1 - sorted_catalogs = sorted(catalogs, key=lambda x: (0 if x.get("type") == "series" else 1, get_setting_index(x))) - else: - # Default: Interleaved (by category priority) - # Python's sorted is stable, preserving movie then series within same priority - sorted_catalogs = sorted(catalogs, key=get_setting_index) - - return sorted_catalogs From 5c2c8ed1ad78e9c24a113f1ed70ca53dd16c8c2f Mon Sep 17 00:00:00 2001 From: Bimal Timilsina Date: Fri, 20 Mar 2026 15:37:53 +0545 Subject: [PATCH 04/68] refactor multiple services to accomodate auth service --- app/services/auth.py | 74 ++++++++++++------- app/services/catalog_updater.py | 19 +---- app/services/manifest.py | 29 +------- .../recommendation/catalog_service.py | 34 +-------- 4 files changed, 53 insertions(+), 103 deletions(-) diff --git a/app/services/auth.py b/app/services/auth.py index 16dd23d..74ab9ed 100644 --- a/app/services/auth.py +++ b/app/services/auth.py @@ -7,7 +7,6 @@ from app.core.config import settings from app.core.security import redact_token from app.core.settings import UserSettings, get_default_settings -from app.services.manifest import manifest_service from app.services.stremio.service import StremioBundle from app.services.token_store import token_store @@ -15,6 +14,19 @@ class AuthService: async def resolve_auth_key(self, credentials: dict, token: str | None = None) -> str | None: """Validate auth key. If expired, try email+password login. Update store on refresh.""" + bundle = StremioBundle() + try: + return await self.resolve_auth_key_with_bundle(bundle, credentials, token) + finally: + await bundle.close() + + async def resolve_auth_key_with_bundle( + self, + bundle: StremioBundle, + credentials: dict, + token: str | None = None, + ) -> str | None: + """Validate auth key with an existing Stremio bundle.""" auth_key = (credentials.get("authKey") or "").strip() or None email = (credentials.get("email") or "").strip() or None password = (credentials.get("password") or "").strip() or None @@ -22,34 +34,37 @@ async def resolve_auth_key(self, credentials: dict, token: str | None = None) -> if auth_key and auth_key.startswith('"') and auth_key.endswith('"'): auth_key = auth_key[1:-1].strip() - bundle = StremioBundle() - try: - # 1. Try existing auth key - if auth_key: - try: - await bundle.auth.get_user_info(auth_key) - return auth_key - except Exception: - logger.info("Stremio auth key expired or invalid, attempting refresh with credentials") - - # 2. Try login if auth key failed or wasn't provided - if email and password: - try: - new_key = await bundle.auth.login(email, password) - if token and new_key != auth_key: - existing_data = await self.get_credentials(token) - if existing_data: - existing_data["authKey"] = new_key - await token_store.update_user_data(token, existing_data) - return new_key - except Exception as e: - logger.error(f"Stremio login failed: {e}") - return None - finally: - await bundle.close() + # 1. Try existing auth key + if auth_key: + try: + await bundle.auth.get_user_info(auth_key) + return auth_key + except Exception: + logger.info("Stremio auth key expired or invalid, attempting refresh with credentials") + + # 2. Try login if auth key failed or wasn't provided + if email and password: + try: + new_key = await bundle.auth.login(email, password) + if token and new_key != auth_key: + existing_data = await self.get_credentials(token) + if existing_data: + existing_data["authKey"] = new_key + await token_store.update_user_data(token, existing_data) + return new_key + except Exception as e: + logger.error(f"Stremio login failed: {e}") + return None return None + async def require_auth_key(self, bundle: StremioBundle, credentials: dict, token: str | None = None) -> str: + """Resolve auth key or raise a user-facing error.""" + auth_key = await self.resolve_auth_key_with_bundle(bundle, credentials, token) + if not auth_key: + raise HTTPException(status_code=401, detail="Stremio session expired. Please reconfigure.") + return auth_key + async def get_credentials(self, token: str) -> dict | None: """Get user credentials from token store.""" return await token_store.get_user_data(token) @@ -75,7 +90,10 @@ async def get_stremio_user_data(self, payload: TokenRequest) -> tuple[str, str, auth_key = await self.resolve_auth_key(creds) if not auth_key: - raise HTTPException(status_code=400, detail="Failed to verify Stremio identity. Provide valid credentials.") + raise HTTPException( + status_code=400, + detail="Failed to verify Stremio identity. Provide valid credentials.", + ) bundle = StremioBundle() try: @@ -182,6 +200,8 @@ async def delete_user_account(self, payload: TokenRequest) -> None: logger.info(f"[{redact_token(token)}] Token deleted for user {user_id}") async def _trigger_initial_caching(self, auth_key: str, settings: UserSettings, token: str): + from app.services.manifest import manifest_service + bundle = StremioBundle() try: logger.info(f"[{redact_token(token)}] Caching library and profiles before returning token") diff --git a/app/services/catalog_updater.py b/app/services/catalog_updater.py index 031fee1..10cc599 100644 --- a/app/services/catalog_updater.py +++ b/app/services/catalog_updater.py @@ -8,6 +8,7 @@ from app.core.config import settings from app.core.security import redact_token from app.core.settings import UserSettings +from app.services.auth import auth_service from app.services.catalog import DynamicCatalogService, sort_catalogs from app.services.manifest import manifest_service from app.services.stremio.service import StremioBundle @@ -75,25 +76,9 @@ async def refresh_catalogs_for_credentials( detail="Invalid or expired token. Please reconfigure the addon.", ) - auth_key = credentials.get("authKey") - # check if auth key is valid bundle = StremioBundle() try: - try: - if not auth_key: - raise ValueError("Missing auth key") - await bundle.auth.get_user_info(auth_key) - except Exception as e: - logger.exception(f"[{redact_token(token)}] Invalid auth key. Falling back to login: {e}") - email = credentials.get("email") - password = credentials.get("password") - if email and password: - auth_key = await bundle.auth.login(email, password) - credentials["authKey"] = auth_key - await token_store.update_user_data(token, credentials) - else: - return True # true since we won't be able to update it again. so no need to try again. - + auth_key = await auth_service.resolve_auth_key_with_bundle(bundle, credentials, token) if not auth_key: return True diff --git a/app/services/manifest.py b/app/services/manifest.py index eb259a6..d131b30 100644 --- a/app/services/manifest.py +++ b/app/services/manifest.py @@ -7,6 +7,7 @@ from app.core.security import redact_token from app.core.settings import UserSettings, resolve_tmdb_api_key from app.core.version import __version__ +from app.services.auth import auth_service from app.services.catalog import DynamicCatalogService, sort_catalogs from app.services.profile.integration import ProfileIntegration from app.services.stremio.service import StremioBundle @@ -43,32 +44,6 @@ def get_base_manifest() -> dict[str, Any]: }, } - async def _resolve_auth_key(self, bundle: StremioBundle, credentials: dict[str, Any], token: str) -> str | None: - """Resolve and validate auth key, refreshing if needed.""" - auth_key = credentials.get("authKey") - email = credentials.get("email") - password = credentials.get("password") - - is_valid = False - if auth_key: - try: - await bundle.auth.get_user_info(auth_key) - is_valid = True - except Exception as e: - logger.debug(f"Auth key check failed for {email or 'unknown'}: {e}") - - if not is_valid and email and password: - try: - auth_key = await bundle.auth.login(email, password) - # Update store - credentials["authKey"] = auth_key - await token_store.update_user_data(token, credentials) - except Exception as e: - logger.error(f"Failed to refresh auth key during manifest fetch: {e}") - return None - - return auth_key - async def cache_library_and_profiles( self, bundle: StremioBundle, @@ -216,7 +191,7 @@ async def get_manifest_for_token(self, token: str) -> dict[str, Any]: fetched_catalogs = [] try: # Resolve auth key - auth_key = await self._resolve_auth_key(bundle, creds, token) + auth_key = await auth_service.resolve_auth_key_with_bundle(bundle, creds, token) if auth_key and user_settings: fetched_catalogs = await self._build_dynamic_catalogs(bundle, auth_key, user_settings, token) diff --git a/app/services/recommendation/catalog_service.py b/app/services/recommendation/catalog_service.py index 7c18fa5..73f690a 100644 --- a/app/services/recommendation/catalog_service.py +++ b/app/services/recommendation/catalog_service.py @@ -11,6 +11,7 @@ from app.core.security import redact_token from app.core.settings import UserSettings, get_default_settings, resolve_tmdb_api_key from app.models.taste_profile import TasteProfile +from app.services.auth import auth_service from app.services.catalog_updater import catalog_updater from app.services.profile.integration import ProfileIntegration from app.services.recommendation.all_based import AllBasedService @@ -153,7 +154,7 @@ async def get_catalog( ) # Resolve auth and settings - auth_key = await self._resolve_auth(bundle, credentials, token) + auth_key = await auth_service.require_auth_key(bundle, credentials, token) user_settings = self._extract_settings(credentials) language = user_settings.language if user_settings else "en-US" @@ -290,37 +291,6 @@ def _validate_inputs(self, token: str, content_type: str, catalog_id: str) -> No ), ) - async def _resolve_auth(self, bundle: StremioBundle, credentials: dict, token: str) -> str: - auth_key = credentials.get("authKey") - email = credentials.get("email") - password = credentials.get("password") - - # Validate existing auth key - is_valid = False - if auth_key: - try: - await bundle.auth.get_user_info(auth_key) - is_valid = True - except Exception as e: - logger.error(f"Failed to validate auth key during catalog fetch: {e}") - pass - - # Try to refresh if invalid - if not is_valid and email and password: - try: - auth_key = await bundle.auth.login(email, password) - credentials["authKey"] = auth_key - # Update token store with refreshed credentials - await token_store.update_user_data(token, credentials) - except Exception as e: - logger.error(f"Failed to refresh auth key during catalog fetch: {e}") - - if not auth_key: - logger.error("No auth key found during catalog fetch") - raise HTTPException(status_code=401, detail="Stremio session expired. Please reconfigure.") - - return auth_key - def _extract_settings(self, credentials: dict) -> UserSettings: settings_dict = credentials.get("settings", {}) return UserSettings(**settings_dict) if settings_dict else get_default_settings() From 7034be94a756a9f877573a372bdb490131cafea1 Mon Sep 17 00:00:00 2001 From: Bimal Timilsina Date: Fri, 20 Mar 2026 15:47:25 +0545 Subject: [PATCH 05/68] refactor: profile service merge with integration and changes --- app/services/catalog.py | 4 +- app/services/manifest.py | 4 +- app/services/profile/__init__.py | 3 +- app/services/profile/integration.py | 254 +----------------- app/services/profile/service.py | 184 +++++++++++++ .../recommendation/catalog_service.py | 6 +- 6 files changed, 198 insertions(+), 257 deletions(-) create mode 100644 app/services/profile/service.py diff --git a/app/services/catalog.py b/app/services/catalog.py index 7e4c804..498737a 100644 --- a/app/services/catalog.py +++ b/app/services/catalog.py @@ -8,7 +8,7 @@ from app.core.constants import DISCOVER_ONLY_EXTRA from app.core.settings import CatalogConfig, UserSettings from app.services.interest_summary import interest_summary_service -from app.services.profile.integration import ProfileIntegration +from app.services.profile.service import ProfileService from app.services.row_generator import RowGeneratorService from app.services.scoring import ScoringService from app.services.tmdb.service import get_tmdb_service @@ -86,7 +86,7 @@ class DynamicCatalogService: def __init__(self, language: str = "en-US", tmdb_api_key: str | None = None): self.tmdb_service = get_tmdb_service(language=language, api_key=tmdb_api_key) self.scoring_service = ScoringService() - self.profile_integration = ProfileIntegration(language=language, tmdb_api_key=tmdb_api_key) + self.profile_integration = ProfileService(language=language, tmdb_api_key=tmdb_api_key) self.row_generator = RowGeneratorService(tmdb_service=self.tmdb_service) self.PROFILE_MAX_ITEMS = 50 diff --git a/app/services/manifest.py b/app/services/manifest.py index d131b30..777f439 100644 --- a/app/services/manifest.py +++ b/app/services/manifest.py @@ -9,7 +9,7 @@ from app.core.version import __version__ from app.services.auth import auth_service from app.services.catalog import DynamicCatalogService, sort_catalogs -from app.services.profile.integration import ProfileIntegration +from app.services.profile.service import ProfileService from app.services.stremio.service import StremioBundle from app.services.token_store import token_store from app.services.translation import translation_service @@ -77,7 +77,7 @@ async def cache_library_and_profiles( # Build and cache profiles for both movie and series language = user_settings.language tmdb_key = resolve_tmdb_api_key(user_settings) - integration_service = ProfileIntegration(language=language, tmdb_api_key=tmdb_key) + integration_service = ProfileService(language=language, tmdb_api_key=tmdb_key) for content_type in ["movie", "series"]: try: diff --git a/app/services/profile/__init__.py b/app/services/profile/__init__.py index a9b1489..cf3beb5 100644 --- a/app/services/profile/__init__.py +++ b/app/services/profile/__init__.py @@ -7,9 +7,9 @@ from app.services.profile.builder import ProfileBuilder from app.services.profile.evidence import EvidenceCalculator -from app.services.profile.integration import ProfileIntegration from app.services.profile.sampling import SmartSampler from app.services.profile.scorer import ProfileScorer +from app.services.profile.service import ProfileIntegration, ProfileService from app.services.profile.vectorizer import ItemVectorizer __all__ = [ @@ -18,5 +18,6 @@ "EvidenceCalculator", "ItemVectorizer", "SmartSampler", + "ProfileService", "ProfileIntegration", ] diff --git a/app/services/profile/integration.py b/app/services/profile/integration.py index b5a37d9..054a1b4 100644 --- a/app/services/profile/integration.py +++ b/app/services/profile/integration.py @@ -1,252 +1,8 @@ -from typing import Any +"""Compatibility wrapper for the profile service. -from loguru import logger +New code should import from `app.services.profile.service`. +""" -from app.models.taste_profile import TasteProfile -from app.services.profile.builder import ProfileBuilder -from app.services.profile.constants import GENRE_WHITELIST_LIMIT -from app.services.profile.sampling import SmartSampler -from app.services.profile.vectorizer import ItemVectorizer -from app.services.recommendation.filtering import RecommendationFiltering -from app.services.scoring import ScoringService -from app.services.tmdb.service import get_tmdb_service -from app.services.user_cache import user_cache +from app.services.profile.service import ProfileIntegration, ProfileService - -class ProfileIntegration: - """ - Helper class to integrate taste profile services with existing systems. - """ - - def __init__(self, language: str = "en-US", tmdb_api_key: str | None = None): - self.scoring_service = ScoringService() - self.sampler = SmartSampler(self.scoring_service) - tmdb_service = get_tmdb_service(language=language, api_key=tmdb_api_key) - vectorizer = ItemVectorizer(tmdb_service) - self.builder = ProfileBuilder(vectorizer) - - async def build_profile_from_library( - self, - library_items: dict, - content_type: str, - stremio_service: Any = None, - auth_key: str | None = None, - ) -> tuple[TasteProfile | None, set[int], set[str]]: - """ - Build taste profile from library items and get watched sets. - - Args: - library_items: Library items dict from Stremio - content_type: Content type (movie/series) - stremio_service: Stremio service (optional, for watched sets) - auth_key: Auth key (optional, for watched sets) - - Returns: - Tuple of (profile, watched_tmdb, watched_imdb) - """ - # Get watched sets - watched_imdb, watched_tmdb = await RecommendationFiltering.get_exclusion_sets( - stremio_service, library_items, auth_key - ) - - # Convert library items to ScoredItems - all_items = ( - library_items.get("loved", []) - + library_items.get("liked", []) - + library_items.get("watched", []) - + library_items.get("added", []) - ) - typed_items = [it for it in all_items if it.get("type") == content_type] - - if not typed_items: - return None, watched_tmdb, watched_imdb - - # Sample items using SmartSampler (it expects raw library items dict) - library_items_dict = { - "loved": [it for it in library_items.get("loved", []) if it.get("type") == content_type], - "liked": [it for it in library_items.get("liked", []) if it.get("type") == content_type], - "watched": [it for it in library_items.get("watched", []) if it.get("type") == content_type], - "added": [it for it in library_items.get("added", []) if it.get("type") == content_type], - } - sampled = self.sampler.sample_items(library_items_dict, content_type) - - # Build profile - profile = await self.builder.build_profile(sampled, content_type=content_type) - - return profile, watched_tmdb, watched_imdb - - async def build_profile_incremental( - self, - library_items: dict, - content_type: str, - token: str, - stremio_service: Any = None, - auth_key: str | None = None, - ) -> tuple[TasteProfile | None, set[int], set[str]]: - """ - Build profile incrementally if possible, fallback to full rebuild. - - Args: - library_items: Library items dict from Stremio - content_type: Content type (movie/series) - token: User token for change detection - stremio_service: Stremio service (optional, for watched sets) - auth_key: Auth key (optional, for watched sets) - - Returns: - Tuple of (profile, watched_tmdb, watched_imdb) - """ - # Get watched sets - watched_imdb, watched_tmdb = await RecommendationFiltering.get_exclusion_sets( - stremio_service, library_items, auth_key - ) - - # Convert library items to ScoredItems for change detection - all_items = ( - library_items.get("loved", []) - + library_items.get("liked", []) - + library_items.get("watched", []) - + library_items.get("added", []) - ) - typed_items = [it for it in all_items if it.get("type") == content_type] - - if not typed_items: - return None, watched_tmdb, watched_imdb - - # Check if we can use incremental update - try: - # Check if library has changed - library_changed = await user_cache.has_library_changed(token, content_type, typed_items) - - if not library_changed: - # No changes - return existing profile - existing_profile = await user_cache.get_profile(token, content_type) - if existing_profile: - return existing_profile, watched_tmdb, watched_imdb - - # Try to get existing profile for incremental update - existing_profile = await user_cache.get_profile(token, content_type) - - if existing_profile: - # Check for removals or new items - processed_ids = existing_profile.processed_items - current_ids = {it.get("_id", it.get("id")) for it in typed_items if it.get("_id", it.get("id"))} - - # Check if this is a legacy profile (has scores but no processed_items) - is_legacy = not processed_ids and (existing_profile.genre_scores or existing_profile.director_scores) - - # If items were removed, or it's a legacy profile, we must do a full rebuild - if not processed_ids.issubset(current_ids) or is_legacy: - reason = "Legacy profile detected" if is_legacy else "Items removed from library" - logger.debug(f"[{token[:8]}...] {reason}, falling back to full rebuild") - # Fall through to full rebuild - else: - # Identify new items - new_item_ids = current_ids - processed_ids - - if not new_item_ids: - # No new items and no removals (maybe just metadata changed?) - # We can just return the existing profile - return existing_profile, watched_tmdb, watched_imdb - - logger.debug(f"[{token[:8]}...] Found {len(new_item_ids)} new items, using incremental update") - - # Filter library items to only new ones for sampling - new_library_items_dict = { - "loved": [ - it - for it in library_items.get("loved", []) - if it.get("type") == content_type and (it.get("_id") or it.get("id")) in new_item_ids - ], - "liked": [ - it - for it in library_items.get("liked", []) - if it.get("type") == content_type and (it.get("_id") or it.get("id")) in new_item_ids - ], - "watched": [ - it - for it in library_items.get("watched", []) - if it.get("type") == content_type and (it.get("_id") or it.get("id")) in new_item_ids - ], - "added": [ - it - for it in library_items.get("added", []) - if it.get("type") == content_type and (it.get("_id") or it.get("id")) in new_item_ids - ], - } - - # Sample only new items - sampled = self.sampler.sample_items(new_library_items_dict, content_type) - - if not sampled: - # Should not happen if new_item_ids is not empty, but just in case - return existing_profile, watched_tmdb, watched_imdb - - # Update existing profile incrementally - updated_profile = await self.builder.update_profile_incrementally( - existing_profile, sampled, content_type=content_type - ) - - # Update library hash to mark as processed - await user_cache.update_library_hash(token, content_type, typed_items) - - return updated_profile, watched_tmdb, watched_imdb - - except Exception as e: - logger.warning(f"[{token[:8]}...] Incremental update failed, falling back to full rebuild: {e}") - - # Fallback to full rebuild - logger.debug(f"[{token[:8]}...] Using full rebuild") - profile_tuple = await self.build_profile_from_library(library_items, content_type, stremio_service, auth_key) - profile, _, _ = profile_tuple - - # Update library hash after successful build - await user_cache.update_library_hash(token, content_type, typed_items) - - return profile, watched_tmdb, watched_imdb - - async def build_and_cache_profile( - self, - token: str, - content_type: str, - library_items: dict, - stremio_service: Any = None, - auth_key: str | None = None, - ) -> tuple[TasteProfile | None, set[int], set[str]]: - """Build profile data and cache the profile and watched sets.""" - profile, watched_tmdb, watched_imdb = await self.build_profile_incremental( - library_items, - content_type, - token, - stremio_service, - auth_key, - ) - await user_cache.set_profile_and_watched_sets(token, content_type, profile, watched_tmdb, watched_imdb) - return profile, watched_tmdb, watched_imdb - - async def get_genre_whitelist( - self, - profile: TasteProfile, - content_type: str, - ) -> set[int]: - """ - Get genre whitelist from user's top genres in profile. - - Args: - profile: Taste profile - content_type: Content type (movie/series) - - Returns: - Set of top genre IDs - """ - try: - if not profile: - whitelist = set() - else: - # Get top genres - top_genres = profile.get_top_genres(limit=GENRE_WHITELIST_LIMIT) - whitelist = {int(genre_id) for genre_id, _ in top_genres} - return whitelist - except Exception as e: - logger.warning(f"Failed to build genre whitelist for {content_type}: {e}") - return set() +__all__ = ["ProfileService", "ProfileIntegration"] diff --git a/app/services/profile/service.py b/app/services/profile/service.py new file mode 100644 index 0000000..f61e937 --- /dev/null +++ b/app/services/profile/service.py @@ -0,0 +1,184 @@ +from typing import Any + +from loguru import logger + +from app.models.taste_profile import TasteProfile +from app.services.profile.builder import ProfileBuilder +from app.services.profile.constants import GENRE_WHITELIST_LIMIT +from app.services.profile.sampling import SmartSampler +from app.services.profile.vectorizer import ItemVectorizer +from app.services.recommendation.filtering import RecommendationFiltering +from app.services.scoring import ScoringService +from app.services.tmdb.service import get_tmdb_service +from app.services.user_cache import user_cache + + +class ProfileService: + """Builds, updates, caches, and exposes user taste profiles.""" + + def __init__(self, language: str = "en-US", tmdb_api_key: str | None = None): + self.scoring_service = ScoringService() + self.sampler = SmartSampler(self.scoring_service) + tmdb_service = get_tmdb_service(language=language, api_key=tmdb_api_key) + vectorizer = ItemVectorizer(tmdb_service) + self.builder = ProfileBuilder(vectorizer) + + async def build_profile_from_library( + self, + library_items: dict, + content_type: str, + stremio_service: Any = None, + auth_key: str | None = None, + ) -> tuple[TasteProfile | None, set[int], set[str]]: + """Build taste profile from library items and get watched sets.""" + watched_imdb, watched_tmdb = await RecommendationFiltering.get_exclusion_sets( + stremio_service, library_items, auth_key + ) + + all_items = ( + library_items.get("loved", []) + + library_items.get("liked", []) + + library_items.get("watched", []) + + library_items.get("added", []) + ) + typed_items = [it for it in all_items if it.get("type") == content_type] + + if not typed_items: + return None, watched_tmdb, watched_imdb + + library_items_dict = { + "loved": [it for it in library_items.get("loved", []) if it.get("type") == content_type], + "liked": [it for it in library_items.get("liked", []) if it.get("type") == content_type], + "watched": [it for it in library_items.get("watched", []) if it.get("type") == content_type], + "added": [it for it in library_items.get("added", []) if it.get("type") == content_type], + } + sampled = self.sampler.sample_items(library_items_dict, content_type) + profile = await self.builder.build_profile(sampled, content_type=content_type) + return profile, watched_tmdb, watched_imdb + + async def build_profile_incremental( + self, + library_items: dict, + content_type: str, + token: str, + stremio_service: Any = None, + auth_key: str | None = None, + ) -> tuple[TasteProfile | None, set[int], set[str]]: + """Build profile incrementally if possible, fallback to full rebuild.""" + watched_imdb, watched_tmdb = await RecommendationFiltering.get_exclusion_sets( + stremio_service, library_items, auth_key + ) + + all_items = ( + library_items.get("loved", []) + + library_items.get("liked", []) + + library_items.get("watched", []) + + library_items.get("added", []) + ) + typed_items = [it for it in all_items if it.get("type") == content_type] + + if not typed_items: + return None, watched_tmdb, watched_imdb + + try: + library_changed = await user_cache.has_library_changed(token, content_type, typed_items) + + if not library_changed: + existing_profile = await user_cache.get_profile(token, content_type) + if existing_profile: + return existing_profile, watched_tmdb, watched_imdb + + existing_profile = await user_cache.get_profile(token, content_type) + + if existing_profile: + processed_ids = existing_profile.processed_items + current_ids = {it.get("_id", it.get("id")) for it in typed_items if it.get("_id", it.get("id"))} + is_legacy = not processed_ids and (existing_profile.genre_scores or existing_profile.director_scores) + + if not processed_ids.issubset(current_ids) or is_legacy: + reason = "Legacy profile detected" if is_legacy else "Items removed from library" + logger.debug(f"[{token[:8]}...] {reason}, falling back to full rebuild") + else: + new_item_ids = current_ids - processed_ids + + if not new_item_ids: + return existing_profile, watched_tmdb, watched_imdb + + logger.debug(f"[{token[:8]}...] Found {len(new_item_ids)} new items, using incremental update") + + new_library_items_dict = { + "loved": [ + it + for it in library_items.get("loved", []) + if it.get("type") == content_type and (it.get("_id") or it.get("id")) in new_item_ids + ], + "liked": [ + it + for it in library_items.get("liked", []) + if it.get("type") == content_type and (it.get("_id") or it.get("id")) in new_item_ids + ], + "watched": [ + it + for it in library_items.get("watched", []) + if it.get("type") == content_type and (it.get("_id") or it.get("id")) in new_item_ids + ], + "added": [ + it + for it in library_items.get("added", []) + if it.get("type") == content_type and (it.get("_id") or it.get("id")) in new_item_ids + ], + } + + sampled = self.sampler.sample_items(new_library_items_dict, content_type) + + if not sampled: + return existing_profile, watched_tmdb, watched_imdb + + updated_profile = await self.builder.update_profile_incrementally( + existing_profile, sampled, content_type=content_type + ) + + await user_cache.update_library_hash(token, content_type, typed_items) + return updated_profile, watched_tmdb, watched_imdb + + except Exception as e: + logger.warning(f"[{token[:8]}...] Incremental update failed, falling back to full rebuild: {e}") + + logger.debug(f"[{token[:8]}...] Using full rebuild") + profile, _, _ = await self.build_profile_from_library(library_items, content_type, stremio_service, auth_key) + await user_cache.update_library_hash(token, content_type, typed_items) + return profile, watched_tmdb, watched_imdb + + async def build_and_cache_profile( + self, + token: str, + content_type: str, + library_items: dict, + stremio_service: Any = None, + auth_key: str | None = None, + ) -> tuple[TasteProfile | None, set[int], set[str]]: + """Build profile data and cache the profile and watched sets.""" + profile, watched_tmdb, watched_imdb = await self.build_profile_incremental( + library_items, + content_type, + token, + stremio_service, + auth_key, + ) + await user_cache.set_profile_and_watched_sets(token, content_type, profile, watched_tmdb, watched_imdb) + return profile, watched_tmdb, watched_imdb + + async def get_genre_whitelist(self, profile: TasteProfile, content_type: str) -> set[int]: + """Get genre whitelist from the user's top genres in the profile.""" + try: + if not profile: + return set() + + top_genres = profile.get_top_genres(limit=GENRE_WHITELIST_LIMIT) + return {int(genre_id) for genre_id, _ in top_genres} + except Exception as e: + logger.warning(f"Failed to build genre whitelist for {content_type}: {e}") + return set() + + +ProfileIntegration = ProfileService diff --git a/app/services/recommendation/catalog_service.py b/app/services/recommendation/catalog_service.py index 73f690a..589f874 100644 --- a/app/services/recommendation/catalog_service.py +++ b/app/services/recommendation/catalog_service.py @@ -13,7 +13,7 @@ from app.models.taste_profile import TasteProfile from app.services.auth import auth_service from app.services.catalog_updater import catalog_updater -from app.services.profile.integration import ProfileIntegration +from app.services.profile.service import ProfileService from app.services.recommendation.all_based import AllBasedService from app.services.recommendation.creators import CreatorsService from app.services.recommendation.item_based import ItemBasedService @@ -172,7 +172,7 @@ async def get_catalog( await user_cache.set_library_items(token, library_items) services = self._initialize_services(language, user_settings) - integration_service: ProfileIntegration = services["integration"] + integration_service: ProfileService = services["integration"] # Try to get cached profile and watched sets cached_data = await user_cache.get_profile_and_watched_sets(token, content_type) @@ -327,7 +327,7 @@ def _initialize_services(self, language: str, user_settings: UserSettings) -> di tmdb_service = get_tmdb_service(language=language, api_key=tmdb_key) return { "tmdb": tmdb_service, - "integration": ProfileIntegration(language=language, tmdb_api_key=tmdb_key), + "integration": ProfileService(language=language, tmdb_api_key=tmdb_key), "item": ItemBasedService(tmdb_service, user_settings), "theme": ThemeBasedService(tmdb_service, user_settings), "top_picks": TopPicksService(tmdb_service, user_settings), From 74d9d6d266540288ba53276e423184e93699d689 Mon Sep 17 00:00:00 2001 From: Bimal Timilsina Date: Sat, 28 Mar 2026 12:39:11 +0545 Subject: [PATCH 06/68] refactor: refactor profile sectino --- app/models/library.py | 39 + app/services/catalog.py | 443 --------- app/services/catalog_definitions.py | 341 +++++++ app/services/catalog_updater.py | 128 +-- app/services/manifest.py | 15 +- app/services/profile/__init__.py | 20 +- app/services/profile/integration.py | 8 - app/services/profile/sampling.py | 29 +- app/services/profile/service.py | 73 +- app/services/recommendation/all_based.py | 8 +- .../recommendation/catalog_service.py | 14 +- app/services/recommendation/filtering.py | 18 +- app/services/recommendation/top_picks.py | 7 +- app/services/row_generator.py | 842 ++++++------------ app/services/stremio/library.py | 19 +- app/services/user_cache.py | 32 +- 16 files changed, 762 insertions(+), 1274 deletions(-) create mode 100644 app/models/library.py delete mode 100644 app/services/catalog.py create mode 100644 app/services/catalog_definitions.py delete mode 100644 app/services/profile/integration.py diff --git a/app/models/library.py b/app/models/library.py new file mode 100644 index 0000000..920ecc5 --- /dev/null +++ b/app/models/library.py @@ -0,0 +1,39 @@ +from typing import Any + +from pydantic import BaseModel + + +class LibraryCollection(BaseModel): + """Typed container for categorized library items. + + This is the single shape that flows through the app. When Trakt/Simkl + history providers are added, they produce the same LibraryCollection + so the rest of the app doesn't care about the source. + """ + + loved: list[dict[str, Any]] = [] + liked: list[dict[str, Any]] = [] + watched: list[dict[str, Any]] = [] + added: list[dict[str, Any]] = [] + removed: list[dict[str, Any]] = [] + + def all_items(self) -> list[dict[str, Any]]: + return self.loved + self.liked + self.watched + self.added + + def all_items_with_removed(self) -> list[dict[str, Any]]: + return self.loved + self.liked + self.watched + self.added + self.removed + + def for_type(self, content_type: str) -> "LibraryCollection": + return LibraryCollection( + loved=[i for i in self.loved if i.get("type") == content_type], + liked=[i for i in self.liked if i.get("type") == content_type], + watched=[i for i in self.watched if i.get("type") == content_type], + added=[i for i in self.added if i.get("type") == content_type], + removed=[i for i in self.removed if i.get("type") == content_type], + ) + + def all_imdb_ids(self) -> set[str]: + return {i.get("_id", "") for i in self.all_items_with_removed() if i.get("_id", "").startswith("tt")} + + def is_empty(self) -> bool: + return not any([self.loved, self.liked, self.watched, self.added]) diff --git a/app/services/catalog.py b/app/services/catalog.py deleted file mode 100644 index 498737a..0000000 --- a/app/services/catalog.py +++ /dev/null @@ -1,443 +0,0 @@ -import asyncio -import random -from datetime import datetime, timezone -from typing import Any - -from loguru import logger - -from app.core.constants import DISCOVER_ONLY_EXTRA -from app.core.settings import CatalogConfig, UserSettings -from app.services.interest_summary import interest_summary_service -from app.services.profile.service import ProfileService -from app.services.row_generator import RowGeneratorService -from app.services.scoring import ScoringService -from app.services.tmdb.service import get_tmdb_service -from app.services.user_cache import user_cache - - -def get_catalogs_from_config( - user_settings: UserSettings, - cat_id: str, - default_name: str, - default_movie: bool, - default_series: bool, -) -> list[dict[str, Any]]: - catalogs = [] - config = next((c for c in user_settings.catalogs if c.id == cat_id), None) - - if config and config.enabled: - name = config.name if config.name else default_name - enabled_movie = getattr(config, "enabled_movie", default_movie) - enabled_series = getattr(config, "enabled_series", default_series) - display_at_home = getattr(config, "display_at_home", True) - extra = DISCOVER_ONLY_EXTRA if not display_at_home else [] - - if enabled_movie: - catalogs.append({"type": "movie", "id": cat_id, "name": name, "extra": extra}) - if enabled_series: - catalogs.append({"type": "series", "id": cat_id, "name": name, "extra": extra}) - - return catalogs - - -def get_config_id(catalog: dict[str, Any]) -> str | None: - catalog_id = catalog.get("id", "") - if catalog_id.startswith("watchly.theme."): - return "watchly.theme" - if catalog_id.startswith("watchly.loved."): - return "watchly.loved" - if catalog_id.startswith("watchly.watched."): - return "watchly.watched" - return catalog_id - - -def sort_catalogs(catalogs: list[dict[str, Any]], user_settings: UserSettings) -> list[dict[str, Any]]: - """Sort catalogs according to user settings and content-type order.""" - if not user_settings: - return catalogs - - order_map = {c.id: i for i, c in enumerate(user_settings.catalogs)} - - def get_setting_index(catalog: dict[str, Any]) -> int: - return order_map.get(get_config_id(catalog), 999) - - sorting_order = getattr(user_settings, "sorting_order", "default") - - if sorting_order == "movies_first": - return sorted( - catalogs, - key=lambda x: (0 if x.get("type") == "movie" else 1, get_setting_index(x)), - ) - - if sorting_order == "series_first": - return sorted( - catalogs, - key=lambda x: (0 if x.get("type") == "series" else 1, get_setting_index(x)), - ) - - return sorted(catalogs, key=get_setting_index) - - -class DynamicCatalogService: - """ - Generates dynamic catalog rows based on user library and preferences. - """ - - def __init__(self, language: str = "en-US", tmdb_api_key: str | None = None): - self.tmdb_service = get_tmdb_service(language=language, api_key=tmdb_api_key) - self.scoring_service = ScoringService() - self.profile_integration = ProfileService(language=language, tmdb_api_key=tmdb_api_key) - self.row_generator = RowGeneratorService(tmdb_service=self.tmdb_service) - self.PROFILE_MAX_ITEMS = 50 - - @staticmethod - def normalize_type(type_): - return "series" if type_ == "tv" else type_ - - def build_catalog_entry(self, item, label, config_id, display_at_home: bool = True): - item_id = item.get("_id", "") - # Use watchly.{config_id}.{item_id} format for better organization - if config_id in ["watchly.item", "watchly.loved", "watchly.watched"]: - # New Item-based catalog format - catalog_id = f"{config_id}.{item_id}" - elif item_id.startswith("tt") and config_id in [ - "watchly.loved", - "watchly.watched", - ]: - catalog_id = f"{config_id}.{item_id}" - else: - catalog_id = item_id - - name = item.get("name") - - extra = DISCOVER_ONLY_EXTRA if not display_at_home else [] - - return { - "type": self.normalize_type(item.get("type")), - "id": catalog_id, - "name": f"{label} {name}", - "extra": extra, - } - - def _get_smart_scored_items(self, library_items: dict, content_type: str, max_items: int = 50) -> list: - """ - Get smart sampled items for profile building. - Always includes all loved/liked/added items, then top watched items by interest_score. - - Args: - library_items: Library items dict - content_type: Type of content (movie/series) - max_items: Maximum items to return (default: 50) - - Returns: - List of ScoredItem objects - """ - all_items = ( - library_items.get("loved", []) - + library_items.get("liked", []) - + library_items.get("watched", []) - + library_items.get("added", []) - ) - typed_items = [it for it in all_items if it.get("type") == content_type] - - if not typed_items: - return [] - - # Get added items (strong signal - user wants to watch these) - added_item_ids = {it.get("_id") for it in library_items.get("added", [])} - added_items = [it for it in typed_items if it.get("_id") in added_item_ids] - - # Separate loved/liked from watched items (excluding added) - loved_liked_items = [ - it - for it in typed_items - if (it.get("_is_loved") or it.get("_is_liked")) and it.get("_id") not in added_item_ids - ] - watched_items = [ - it - for it in typed_items - if not (it.get("_is_loved") or it.get("_is_liked") or it.get("_id") in added_item_ids) - ] - - # Always include all loved/liked/added items (score them) - # These are strong signals of user intent - strong_signal_items = loved_liked_items + added_items - strong_signal_scored = [self.scoring_service.process_item(it) for it in strong_signal_items] - - # For watched items, score them and sort by interest_score - watched_scored = [self.scoring_service.process_item(it) for it in watched_items] - watched_scored.sort(key=lambda x: x.score, reverse=True) - - # Combine: all loved/liked/added + top watched items by score - # Limit total to max_items - remaining_slots = max(0, max_items - len(strong_signal_scored)) - top_watched = watched_scored[:remaining_slots] - - return strong_signal_scored + top_watched - - async def get_theme_based_catalogs( - self, - library_items: dict, - user_settings: UserSettings | None = None, - enabled_movie: bool = True, - enabled_series: bool = True, - display_at_home: bool = True, - token: str | None = None, - ) -> list[dict]: - """Build thematic catalogs by profiling items using smart sampling.""" - # 1. Prepare Scored History using smart sampling (loved/liked + top watched by score) - # We'll get items per content type in the generation function - - # 2. Extract Genre Filters - excluded_movie_genres = [] - excluded_series_genres = [] - gemini_api_key = None - if user_settings: - excluded_movie_genres = [int(g) for g in user_settings.excluded_movie_genres] - excluded_series_genres = [int(g) for g in user_settings.excluded_series_genres] - gemini_api_key = user_settings.gemini_api_key - - logger.info( - f"[Theme Catalogs] gemini_api_key={'SET' if gemini_api_key else 'NONE'}," - f" token={'SET' if token else 'NONE'}" - ) - - # 3. Generate Rows - async def _generate_for_type(media_type: str, genres: list[int]): - logger.info(f"[Theme Catalogs] _generate_for_type called for {media_type}") - - # Build profile using new system - profile, _, _ = await self.profile_integration.build_profile_from_library( - library_items, media_type, None, None - ) - if not profile: - logger.warning(f"Failed to build profile for {media_type}") - return media_type, [] - - # Generate interest summary if API key is present. - if gemini_api_key and token: - try: - logger.info(f"Generating interest summary for {media_type}...") - summary = await interest_summary_service.generate_summary(profile, gemini_api_key) - if summary: - profile.interest_summary = summary - logger.info(f"Interest summary generated for {media_type}: {summary[:80]}...") - else: - logger.warning(f"Interest summary generation returned empty for {media_type}") - except Exception as e: - logger.warning(f"Failed to generate interest summary for {media_type}: {e}") - else: - logger.info( - f"[Theme Catalogs] Skipping summary: gemini_api_key={'SET' if gemini_api_key else 'NONE'}," - f" token={'SET' if token else 'NONE'}" - ) - - # Always save the updated profile (with or without summary) - if token: - try: - await user_cache.set_profile(token, media_type, profile) - logger.info(f"Saved profile for {media_type} (has_summary={profile.interest_summary is not None})") - except Exception as e: - logger.warning(f"Failed to save profile for {media_type}: {e}") - - try: - catalogs = await self.row_generator.generate_rows(profile, media_type, api_key=gemini_api_key) - return media_type, catalogs - except Exception as e: - logger.error(f"Failed to generate thematic rows for {media_type}: {e}") - raise e - - tasks = [] - if enabled_movie: - tasks.append(_generate_for_type("movie", excluded_movie_genres)) - if enabled_series: - tasks.append(_generate_for_type("series", excluded_series_genres)) - - results = await asyncio.gather(*tasks, return_exceptions=True) - - # 4. Assembly with error handling - catalogs = [] - - extra = DISCOVER_ONLY_EXTRA if not display_at_home else [] - - for result in results: - if isinstance(result, Exception): - continue - media_type, rows = result - for row in rows: - catalogs.append( - { - "type": media_type, - "id": row.id, - "name": row.title, - "extra": extra, - } - ) - - return catalogs - - async def get_dynamic_catalogs( - self, - library_items: dict, - user_settings: UserSettings | None = None, - token: str | None = None, - ) -> list[dict]: - """Generate all dynamic catalog rows based on enabled configurations.""" - catalogs = [] - if not user_settings: - return catalogs - - # 1. Resolve Configs - theme_cfg, loved_cfg, watched_cfg = self._resolve_catalog_configs(user_settings) - - # 2. Add Thematic Catalogs - if theme_cfg and theme_cfg.enabled: - # Filter theme catalogs by enabled_movie/enabled_series - enabled_movie = getattr(theme_cfg, "enabled_movie", True) - enabled_series = getattr(theme_cfg, "enabled_series", True) - display_at_home = getattr(theme_cfg, "display_at_home", True) - theme_catalogs = await self.get_theme_based_catalogs( - library_items, - user_settings, - enabled_movie, - enabled_series, - display_at_home, - token, - ) - catalogs.extend(theme_catalogs) - - # 3. Add Item-Based Catalogs (Movies & Series) - for mtype in ["movie", "series"]: - await self._add_item_based_rows(catalogs, library_items, mtype, loved_cfg, watched_cfg) - - # 4. Add watchly.rec catalog - catalogs.extend(get_catalogs_from_config(user_settings, "watchly.rec", "Top Picks for You", True, True)) - - # 5. Add watchly.creators catalog - catalogs.extend( - get_catalogs_from_config( - user_settings, - "watchly.creators", - "From your favourite Creators", - False, - False, - ) - ) - - # 6. Add watchly.all.loved catalog - catalogs.extend( - get_catalogs_from_config( - user_settings, - "watchly.all.loved", - "Based on what you loved", - True, - True, - ) - ) - - # 7. Add watchly.liked.all catalog - catalogs.extend( - get_catalogs_from_config( - user_settings, - "watchly.liked.all", - "Based on what you liked", - True, - True, - ) - ) - - return catalogs - - def _resolve_catalog_configs(self, user_settings: UserSettings) -> tuple[Any, Any, Any]: - """Extract and fallback catalog configurations from user settings.""" - cfg_map = {c.id: c for c in user_settings.catalogs} - - theme = cfg_map.get("watchly.theme") - loved = cfg_map.get("watchly.loved") - watched = cfg_map.get("watchly.watched") - - # Fallback for old settings format (watchly.item) - if not loved and not watched: - old_item = cfg_map.get("watchly.item") - if old_item and old_item.enabled: - loved = CatalogConfig(id="watchly.loved", name=None, enabled=True) - watched = CatalogConfig(id="watchly.watched", name=None, enabled=True) - - return theme, loved, watched - - def _parse_item_last_watched(self, item: dict) -> datetime: - """Helper to extract and parse the most relevant activity date for an item.""" - val = item.get("state", {}).get("lastWatched") - if val: - try: - if isinstance(val, str): - return datetime.fromisoformat(val.replace("Z", "+00:00")) - return val - except (ValueError, TypeError): - pass - - # Fallback to mtime - val = item.get("_mtime") - if val: - try: - return datetime.fromisoformat(str(val).replace("Z", "+00:00")) - except (ValueError, TypeError): - pass - return datetime.min.replace(tzinfo=timezone.utc) - - async def _add_item_based_rows( - self, - catalogs: list, - library_items: dict, - content_type: str, - loved_config, - watched_config, - ): - # Check if this content type is enabled for the configs - def is_type_enabled(config, content_type: str) -> bool: - if not config: - return False - if content_type == "movie": - return getattr(config, "enabled_movie", True) - elif content_type == "series": - return getattr(config, "enabled_series", True) - return True - - # 1. More Like - last_loved = None # Initialize for the watched check - if loved_config and loved_config.enabled and is_type_enabled(loved_config, content_type): - loved = [i for i in library_items.get("loved", []) if i.get("type") == content_type] - loved.sort(key=self._parse_item_last_watched, reverse=True) - - # gather random last loved from last 3 items - last_loved = random.choice(loved[:3]) if loved else None - if last_loved: - label = loved_config.name if loved_config.name else "More like" - loved_config_display_at_home = getattr(loved_config, "display_at_home", True) - catalogs.append( - self.build_catalog_entry(last_loved, label, "watchly.loved", loved_config_display_at_home) - ) - - # 2. Because you watched - if watched_config and watched_config.enabled and is_type_enabled(watched_config, content_type): - watched = [i for i in library_items.get("watched", []) if i.get("type") == content_type] - watched.sort(key=self._parse_item_last_watched, reverse=True) - - # watched cannot be similar to loved - if last_loved: - watched = [i for i in watched if i.get("_id") != last_loved.get("_id")] - - # gather random last watched from last 3 items - last_watched = random.choice(watched[:3]) if watched else None - - if last_watched: - label = watched_config.name if watched_config.name else "Because you watched" - watched_config_display_at_home = getattr(watched_config, "display_at_home", True) - catalogs.append( - self.build_catalog_entry( - last_watched, - label, - "watchly.watched", - watched_config_display_at_home, - ) - ) diff --git a/app/services/catalog_definitions.py b/app/services/catalog_definitions.py new file mode 100644 index 0000000..2a03b4f --- /dev/null +++ b/app/services/catalog_definitions.py @@ -0,0 +1,341 @@ +import asyncio +import random +from datetime import datetime, timezone +from typing import Any, cast + +from loguru import logger + +from app.core.constants import DISCOVER_ONLY_EXTRA +from app.core.settings import CatalogConfig, UserSettings +from app.models.library import LibraryCollection +from app.services.interest_summary import interest_summary_service +from app.services.profile.service import ProfileService +from app.services.row_generator import RowGeneratorService +from app.services.tmdb.service import get_tmdb_service +from app.services.user_cache import user_cache + + +def get_catalogs_from_config( + user_settings: UserSettings, + cat_id: str, + default_name: str, + default_movie: bool, + default_series: bool, +) -> list[dict[str, Any]]: + catalogs = [] + config = next((c for c in user_settings.catalogs if c.id == cat_id), None) + + if config and config.enabled: + name = config.name if config.name else default_name + enabled_movie = getattr(config, "enabled_movie", default_movie) + enabled_series = getattr(config, "enabled_series", default_series) + display_at_home = getattr(config, "display_at_home", True) + extra = DISCOVER_ONLY_EXTRA if not display_at_home else [] + + if enabled_movie: + catalogs.append({"type": "movie", "id": cat_id, "name": name, "extra": extra}) + if enabled_series: + catalogs.append({"type": "series", "id": cat_id, "name": name, "extra": extra}) + + return catalogs + + +def get_config_id(catalog: dict[str, Any]) -> str | None: + catalog_id = catalog.get("id", "") + if catalog_id.startswith("watchly.theme."): + return "watchly.theme" + if catalog_id.startswith("watchly.loved."): + return "watchly.loved" + if catalog_id.startswith("watchly.watched."): + return "watchly.watched" + return catalog_id + + +def sort_catalogs(catalogs: list[dict[str, Any]], user_settings: UserSettings) -> list[dict[str, Any]]: + """Sort catalogs according to user settings and content-type order.""" + if not user_settings: + return catalogs + + order_map = {c.id: i for i, c in enumerate(user_settings.catalogs)} + + def get_setting_index(catalog: dict[str, Any]) -> int: + config_id = get_config_id(catalog) + if config_id is None: + return 999 + return order_map.get(config_id, 999) + + sorting_order = getattr(user_settings, "sorting_order", "default") + + if sorting_order == "movies_first": + return sorted( + catalogs, + key=lambda x: ( + 0 if x.get("type") == "movie" else 1, + get_setting_index(x), + ), + ) + + if sorting_order == "series_first": + return sorted( + catalogs, + key=lambda x: ( + 0 if x.get("type") == "series" else 1, + get_setting_index(x), + ), + ) + + return sorted(catalogs, key=get_setting_index) + + +class DynamicCatalogService: + """Generates catalog definitions from user history and settings.""" + + def __init__(self, language: str = "en-US", tmdb_api_key: str | None = None): + self.language = language + self.tmdb_api_key = tmdb_api_key + tmdb_service = get_tmdb_service(language=language, api_key=tmdb_api_key) + self.profile_service = ProfileService(language=language, tmdb_api_key=tmdb_api_key) + self.row_generator = RowGeneratorService(tmdb_service=tmdb_service) + + @staticmethod + def normalize_type(type_: str) -> str: + return "series" if type_ == "tv" else type_ + + def build_catalog_entry( + self, + item: dict[str, Any], + label: str, + config_id: str, + display_at_home: bool = True, + ) -> dict[str, Any]: + item_id = item.get("_id", "") + if config_id in ["watchly.item", "watchly.loved", "watchly.watched"]: + catalog_id = f"{config_id}.{item_id}" + elif item_id.startswith("tt") and config_id in [ + "watchly.loved", + "watchly.watched", + ]: + catalog_id = f"{config_id}.{item_id}" + else: + catalog_id = item_id + + extra = DISCOVER_ONLY_EXTRA if not display_at_home else [] + return { + "type": self.normalize_type(item.get("type")), + "id": catalog_id, + "name": f"{label} {item.get('name')}", + "extra": extra, + } + + async def get_dynamic_catalogs( + self, + library_items: LibraryCollection, + user_settings: UserSettings | None = None, + token: str | None = None, + ) -> list[dict[str, Any]]: + """Generate all dynamic catalog rows based on enabled configurations.""" + catalogs: list[dict[str, Any]] = [] + if not user_settings: + return catalogs + + theme_cfg, loved_cfg, watched_cfg = self._resolve_catalog_configs(user_settings) + + if theme_cfg and theme_cfg.enabled: + enabled_movie = getattr(theme_cfg, "enabled_movie", True) + enabled_series = getattr(theme_cfg, "enabled_series", True) + display_at_home = getattr(theme_cfg, "display_at_home", True) + theme_catalogs = await self._build_theme_catalogs( + library_items, + user_settings, + enabled_movie, + enabled_series, + display_at_home, + token, + ) + catalogs.extend(theme_catalogs) + + for mtype in ["movie", "series"]: + await self._add_item_based_rows(catalogs, library_items, mtype, loved_cfg, watched_cfg) + + catalogs.extend(get_catalogs_from_config(user_settings, "watchly.rec", "Top Picks for You", True, True)) + catalogs.extend( + get_catalogs_from_config( + user_settings, + "watchly.creators", + "From your favourite Creators", + False, + False, + ) + ) + catalogs.extend( + get_catalogs_from_config( + user_settings, + "watchly.all.loved", + "Based on what you loved", + True, + True, + ) + ) + catalogs.extend( + get_catalogs_from_config( + user_settings, + "watchly.liked.all", + "Based on what you liked", + True, + True, + ) + ) + + return catalogs + + # --- Theme catalog building (was ThemeCatalogService) --- + + async def _build_theme_catalogs( + self, + library_items: LibraryCollection, + user_settings: UserSettings | None, + enabled_movie: bool, + enabled_series: bool, + display_at_home: bool, + token: str | None, + ) -> list[dict[str, Any]]: + gemini_api_key = user_settings.gemini_api_key if user_settings else None + + tasks = [] + if enabled_movie: + tasks.append(self._build_theme_rows_for_type(library_items, "movie", gemini_api_key, token)) + if enabled_series: + tasks.append(self._build_theme_rows_for_type(library_items, "series", gemini_api_key, token)) + + results = await asyncio.gather(*tasks, return_exceptions=True) + catalogs: list[dict[str, Any]] = [] + extra = DISCOVER_ONLY_EXTRA if not display_at_home else [] + + for result in results: + if not isinstance(result, tuple): + continue + media_type, rows = cast(tuple[str, list[Any]], result) + for row in rows: + catalogs.append( + { + "type": media_type, + "id": row.id, + "name": row.title, + "extra": extra, + } + ) + + return catalogs + + async def _build_theme_rows_for_type( + self, + library_items: LibraryCollection, + media_type: str, + gemini_api_key: str | None, + token: str | None, + ) -> tuple[str, list[Any]]: + logger.info(f"[Theme Catalogs] Building rows for {media_type}") + + profile, _, _ = await self.profile_service.build_profile_from_library(library_items, media_type, None, None) + if not profile: + logger.warning(f"Failed to build profile for {media_type}") + return media_type, [] + + if gemini_api_key and token: + try: + summary = await interest_summary_service.generate_summary(profile, gemini_api_key) + if summary: + profile.interest_summary = summary + logger.info(f"Interest summary generated for {media_type}: " f"{summary[:80]}...") + except Exception as e: + logger.warning(f"Failed to generate interest summary for {media_type}: {e}") + + if token: + try: + await user_cache.set_profile(token, media_type, profile) + except Exception as e: + logger.warning(f"Failed to save profile for {media_type}: {e}") + + rows = await self.row_generator.generate_rows(profile, media_type, api_key=gemini_api_key) + return media_type, rows + + # --- Item-based rows --- + + def _resolve_catalog_configs(self, user_settings: UserSettings) -> tuple[Any, Any, Any]: + cfg_map = {c.id: c for c in user_settings.catalogs} + theme = cfg_map.get("watchly.theme") + loved = cfg_map.get("watchly.loved") + watched = cfg_map.get("watchly.watched") + + if not loved and not watched: + old_item = cfg_map.get("watchly.item") + if old_item and old_item.enabled: + loved = CatalogConfig(id="watchly.loved", name=None, enabled=True) + watched = CatalogConfig(id="watchly.watched", name=None, enabled=True) + + return theme, loved, watched + + def _parse_item_last_watched(self, item: dict[str, Any]) -> datetime: + val = item.get("state", {}).get("lastWatched") + if val: + try: + if isinstance(val, str): + return datetime.fromisoformat(val.replace("Z", "+00:00")) + return val + except (ValueError, TypeError): + pass + + val = item.get("_mtime") + if val: + try: + return datetime.fromisoformat(str(val).replace("Z", "+00:00")) + except (ValueError, TypeError): + pass + return datetime.min.replace(tzinfo=timezone.utc) + + async def _add_item_based_rows( + self, + catalogs: list[dict[str, Any]], + library_items: LibraryCollection, + content_type: str, + loved_config: Any, + watched_config: Any, + ) -> None: + def is_type_enabled(config: Any, ct: str) -> bool: + if not config: + return False + if ct == "movie": + return getattr(config, "enabled_movie", True) + if ct == "series": + return getattr(config, "enabled_series", True) + return True + + last_loved = None + if loved_config and loved_config.enabled and is_type_enabled(loved_config, content_type): + loved = [i for i in library_items.loved if i.get("type") == content_type] + loved.sort(key=self._parse_item_last_watched, reverse=True) + last_loved = random.choice(loved[:3]) if loved else None + if last_loved: + label = loved_config.name if loved_config.name else "More like" + display_at_home = getattr(loved_config, "display_at_home", True) + catalogs.append(self.build_catalog_entry(last_loved, label, "watchly.loved", display_at_home)) + + if watched_config and watched_config.enabled and is_type_enabled(watched_config, content_type): + watched = [i for i in library_items.watched if i.get("type") == content_type] + watched.sort(key=self._parse_item_last_watched, reverse=True) + + if last_loved: + watched = [i for i in watched if i.get("_id") != last_loved.get("_id")] + + last_watched = random.choice(watched[:3]) if watched else None + if last_watched: + label = watched_config.name if watched_config.name else "Because you watched" + display_at_home = getattr(watched_config, "display_at_home", True) + catalogs.append( + self.build_catalog_entry( + last_watched, + label, + "watchly.watched", + display_at_home, + ) + ) diff --git a/app/services/catalog_updater.py b/app/services/catalog_updater.py index 10cc599..7aaa82f 100644 --- a/app/services/catalog_updater.py +++ b/app/services/catalog_updater.py @@ -1,29 +1,25 @@ import asyncio from datetime import datetime, timezone -from typing import Any, cast +from typing import Any from fastapi import HTTPException from loguru import logger from app.core.config import settings from app.core.security import redact_token -from app.core.settings import UserSettings from app.services.auth import auth_service -from app.services.catalog import DynamicCatalogService, sort_catalogs -from app.services.manifest import manifest_service from app.services.stremio.service import StremioBundle from app.services.token_store import token_store -from app.services.translation import translation_service class CatalogUpdater: """ - Catalog updater that triggers updates on-demand when users request catalogs. + Triggers on-demand catalog updates by building a fresh manifest + and pushing the catalogs to Stremio's addon collection. Uses in-memory locking to prevent duplicate concurrent updates. """ def __init__(self): - # In-memory lock to prevent duplicate updates for the same token self._updating_tokens: set[str] = set() def _needs_update(self, credentials: dict[str, Any]) -> bool: @@ -33,23 +29,19 @@ def _needs_update(self, credentials: dict[str, Any]) -> bool: last_updated = credentials.get("last_updated") if not last_updated: - # No timestamp means never updated, needs update return True try: - # Parse ISO format timestamp if isinstance(last_updated, str): last_update_time = datetime.fromisoformat(last_updated.replace("Z", "+00:00")) else: last_update_time = last_updated - # Check if more than 11 hours have passed (update if less than 1 hour remaining) now = datetime.now(timezone.utc) if last_update_time.tzinfo is None: last_update_time = last_update_time.replace(tzinfo=timezone.utc) time_since_update = (now - last_update_time).total_seconds() - # Update if less than 1 hour remaining until next update return time_since_update >= (settings.CATALOG_REFRESH_INTERVAL_SECONDS - 3600) except (ValueError, TypeError, AttributeError) as e: logger.warning(f"Failed to parse last_updated timestamp: {e}. Treating as needs update.") @@ -58,17 +50,7 @@ def _needs_update(self, credentials: dict[str, Any]) -> bool: async def refresh_catalogs_for_credentials( self, token: str, credentials: dict[str, Any], update_timestamp: bool = True ) -> bool: - """ - Refresh catalogs for a user's credentials. - - Args: - token: User token - credentials: User credentials dict - update_timestamp: Whether to update last_updated timestamp on success - - Returns: - True if update was successful, False otherwise - """ + """Build a fresh manifest and push the catalogs to Stremio.""" if not credentials: logger.warning(f"[{redact_token(token)}] Attempted to refresh catalogs with no credentials.") raise HTTPException( @@ -82,119 +64,64 @@ async def refresh_catalogs_for_credentials( if not auth_key: return True - resolved_auth_key = cast(str, auth_key) - - # 1. Check if addon is still installed + # Check if addon is still installed try: - addon_installed = await bundle.addons.is_addon_installed(auth_key) - if not addon_installed: - logger.info(f"[{redact_token(token)}] User has not installed addon. Removing token from redis") + if not await bundle.addons.is_addon_installed(auth_key): + logger.info(f"[{redact_token(token)}] Addon not installed, skipping update") return True except Exception as e: - logger.exception(f"[{redact_token(token)}] Failed to check if addon is installed: {e}") + logger.exception(f"[{redact_token(token)}] Failed to check addon install status: {e}") return False - # 2. Extract settings and refresh - user_settings = None - if credentials.get("settings"): - try: - user_settings = UserSettings(**credentials["settings"]) - except Exception as e: - logger.exception(f"[{redact_token(token)}] Failed to parse user settings: {e}") - # if user doesn't have setting, we can't update the catalogs. - # so no need to try again. - return True - - if not user_settings: - return True - - resolved_settings = cast(UserSettings, user_settings) + # Reuse ManifestService to build catalogs + # (handles library caching, profile building, catalog definitions, + # translation, and sorting — no need to reimplement here) + from app.services.manifest import manifest_service - library_items = await manifest_service.cache_library_and_profiles( - bundle, resolved_auth_key, resolved_settings, token - ) - language = resolved_settings.language - - from app.core.settings import resolve_tmdb_api_key + manifest = await manifest_service.get_manifest_for_token(token) + catalogs = manifest.get("catalogs", []) - tmdb_key = resolve_tmdb_api_key(resolved_settings) - dynamic_catalog_service = DynamicCatalogService( - language=language, - tmdb_api_key=tmdb_key, - ) - - catalogs = await dynamic_catalog_service.get_dynamic_catalogs( - library_items=library_items, - user_settings=resolved_settings, - token=token, - ) + success = await bundle.addons.update_catalogs(auth_key, catalogs) - # Translate catalogs - if resolved_settings.language: - for cat in catalogs: - if name := cat.get("name"): - try: - cat["name"] = await translation_service.translate(name, resolved_settings.language) - except Exception as e: - logger.warning(f"Failed to translate catalog name '{name}': {e}") - continue - - # sort catalogs by order in user settings - catalogs = sort_catalogs(catalogs, resolved_settings) - - success = await bundle.addons.update_catalogs(resolved_auth_key, catalogs) - - # Update timestamp and invalidate cache only on success if success and update_timestamp: try: - # Update last_updated timestamp to current time - # This represents when the update completed successfully now = datetime.now(timezone.utc) - last_updated_str = now.replace(microsecond=0).isoformat() - credentials["last_updated"] = last_updated_str + credentials["last_updated"] = now.replace(microsecond=0).isoformat() await token_store.update_user_data(token, credentials) - logger.debug(f"[{redact_token(token)}] Updated last_updated timestamp to {last_updated_str}") + logger.debug(f"[{redact_token(token)}] Updated last_updated timestamp") except Exception as e: - logger.warning(f"[{redact_token(token)}] Failed to update last_updated timestamp: {e}") + logger.warning(f"[{redact_token(token)}] Failed to update timestamp: {e}") return success except Exception as e: logger.exception(f"[{redact_token(token)}] Failed to update catalogs in background: {e}") try: - error_msg = f"Failed to update catalogs: {str(e)}" - description = ( - f"Movie and series recommendations based on your Stremio library.\n\n⚠️ Status: Error\n{error_msg}" - ) - if isinstance(auth_key, str) and auth_key: - await bundle.addons.update_description(auth_key, description) + error_auth_key = credentials.get("authKey") + if isinstance(error_auth_key, str) and error_auth_key: + description = ( + "Movie and series recommendations based on your Stremio library.\n\n" + f"⚠️ Status: Error\nFailed to update catalogs: {e}" + ) + await bundle.addons.update_description(error_auth_key, description) except Exception as update_err: - logger.warning(f"[{redact_token(token)}] Failed to update addon description with error: {update_err}") + logger.warning(f"[{redact_token(token)}] Failed to update addon description: {update_err}") return False finally: await bundle.close() async def trigger_update(self, token: str, credentials: dict[str, Any]) -> None: - """ - Trigger a catalog update if needed. - This function checks if update is needed and fires a background task. - Uses in-memory lock to prevent duplicate updates. - """ - # Check if already updating + """Fire a background catalog update if needed. In-memory lock prevents duplicates.""" if token in self._updating_tokens: logger.debug(f"[{redact_token(token)}] Update already in progress, skipping") return - # Check if update is needed if not self._needs_update(credentials): logger.debug(f"[{redact_token(token)}] Catalog update not needed yet") return - # Add to lock and fire background update self._updating_tokens.add(token) logger.info(f"[{redact_token(token)}] Triggering catalog update") - - # Fire and forget background task asyncio.create_task(self._update_task(token, credentials)) async def _update_task(self, token: str, credentials: dict[str, Any]) -> None: @@ -208,7 +135,6 @@ async def _update_task(self, token: str, credentials: dict[str, Any]) -> None: except Exception as e: logger.exception(f"[{redact_token(token)}] Catalog update task failed: {e}") finally: - # Always remove from lock self._updating_tokens.discard(token) diff --git a/app/services/manifest.py b/app/services/manifest.py index 777f439..6346334 100644 --- a/app/services/manifest.py +++ b/app/services/manifest.py @@ -7,8 +7,9 @@ from app.core.security import redact_token from app.core.settings import UserSettings, resolve_tmdb_api_key from app.core.version import __version__ +from app.models.library import LibraryCollection from app.services.auth import auth_service -from app.services.catalog import DynamicCatalogService, sort_catalogs +from app.services.catalog_definitions import DynamicCatalogService, sort_catalogs from app.services.profile.service import ProfileService from app.services.stremio.service import StremioBundle from app.services.token_store import token_store @@ -50,7 +51,7 @@ async def cache_library_and_profiles( auth_key: str, user_settings: UserSettings, token: str, - ) -> dict[str, Any]: + ) -> LibraryCollection: """ Fetch and cache library items and profiles for a user. @@ -95,16 +96,14 @@ async def _ensure_library_and_profiles_cached( auth_key: str, user_settings: UserSettings, token: str, - ) -> dict[str, Any]: + ) -> LibraryCollection: """Ensure library items and profiles are cached, fetching and building if needed.""" - # Try to get cached library items first library_items = await user_cache.get_library_items(token) if library_items: logger.debug(f"[{redact_token(token)}] Using cached library items for manifest") return library_items - # If not cached, fetch and cache logger.info(f"[{redact_token(token)}] Library items not cached, fetching from Stremio for manifest") return await self.cache_library_and_profiles(bundle, auth_key, user_settings, token) @@ -121,11 +120,7 @@ async def _build_dynamic_catalogs( settings_for_user = user_settings - # check if cached, if not, fetch and cache - library_items = await user_cache.get_library_items(token) - if not library_items: - library_items = await self._ensure_library_and_profiles_cached(bundle, auth_key, settings_for_user, token) - await user_cache.set_library_items(token, library_items) + library_items = await self._ensure_library_and_profiles_cached(bundle, auth_key, settings_for_user, token) tmdb_key = resolve_tmdb_api_key(settings_for_user) dynamic_catalog_service = DynamicCatalogService(language=settings_for_user.language, tmdb_api_key=tmdb_key) diff --git a/app/services/profile/__init__.py b/app/services/profile/__init__.py index cf3beb5..9bb00ee 100644 --- a/app/services/profile/__init__.py +++ b/app/services/profile/__init__.py @@ -1,23 +1,7 @@ -""" -Profile System - Additive, Transparent Design. +"""Profile service exports.""" -This package implements a transparent, additive user profile system. -No hidden interactions, easy to debug, powerful enough for all row types. -""" - -from app.services.profile.builder import ProfileBuilder -from app.services.profile.evidence import EvidenceCalculator -from app.services.profile.sampling import SmartSampler -from app.services.profile.scorer import ProfileScorer -from app.services.profile.service import ProfileIntegration, ProfileService -from app.services.profile.vectorizer import ItemVectorizer +from app.services.profile.service import ProfileService __all__ = [ - "ProfileBuilder", - "ProfileScorer", - "EvidenceCalculator", - "ItemVectorizer", - "SmartSampler", "ProfileService", - "ProfileIntegration", ] diff --git a/app/services/profile/integration.py b/app/services/profile/integration.py deleted file mode 100644 index 054a1b4..0000000 --- a/app/services/profile/integration.py +++ /dev/null @@ -1,8 +0,0 @@ -"""Compatibility wrapper for the profile service. - -New code should import from `app.services.profile.service`. -""" - -from app.services.profile.service import ProfileIntegration, ProfileService - -__all__ = ["ProfileService", "ProfileIntegration"] diff --git a/app/services/profile/sampling.py b/app/services/profile/sampling.py index ea444e9..2e2b3fc 100644 --- a/app/services/profile/sampling.py +++ b/app/services/profile/sampling.py @@ -1,5 +1,4 @@ -from typing import Any - +from app.models.library import LibraryCollection from app.models.scoring import ScoredItem from app.services.profile.constants import SMART_SAMPLING_MAX_ITEMS from app.services.scoring import ScoringService @@ -26,29 +25,12 @@ def __init__(self, scoring_service: ScoringService): def sample_items( self, - library_items: dict[str, list[dict[str, Any]]], + library_items: LibraryCollection, content_type: str, max_items: int = SMART_SAMPLING_MAX_ITEMS, ) -> list[ScoredItem]: - """ - Sample items for profile building. - - Args: - library_items: Library items dict with 'loved', 'liked', 'watched', 'added' keys - content_type: Content type to filter (movie/series) - max_items: Maximum items to return - - Returns: - List of ScoredItem objects - """ - # Get all items of the requested type - all_items = ( - library_items.get("loved", []) - + library_items.get("liked", []) - + library_items.get("watched", []) - + library_items.get("added", []) - ) - typed_items = [it for it in all_items if it.get("type") == content_type] + """Sample items for profile building with quota-based selection.""" + typed_items = [it for it in library_items.all_items() if it.get("type") == content_type] if not typed_items: return [] @@ -68,8 +50,7 @@ def sample_items( if len(unique_items) <= max_items: return [self.scoring_service.process_item(it) for it in unique_items.values()] - # Get set of added item IDs for classification - added_item_ids = {it.get("_id") for it in library_items.get("added", [])} + added_item_ids = {it.get("_id") for it in library_items.added} # Separate items into pools and score them loved_liked_pool = [] diff --git a/app/services/profile/service.py b/app/services/profile/service.py index f61e937..8ebba84 100644 --- a/app/services/profile/service.py +++ b/app/services/profile/service.py @@ -2,6 +2,7 @@ from loguru import logger +from app.models.library import LibraryCollection from app.models.taste_profile import TasteProfile from app.services.profile.builder import ProfileBuilder from app.services.profile.constants import GENRE_WHITELIST_LIMIT @@ -25,7 +26,7 @@ def __init__(self, language: str = "en-US", tmdb_api_key: str | None = None): async def build_profile_from_library( self, - library_items: dict, + library_items: LibraryCollection, content_type: str, stremio_service: Any = None, auth_key: str | None = None, @@ -35,30 +36,17 @@ async def build_profile_from_library( stremio_service, library_items, auth_key ) - all_items = ( - library_items.get("loved", []) - + library_items.get("liked", []) - + library_items.get("watched", []) - + library_items.get("added", []) - ) - typed_items = [it for it in all_items if it.get("type") == content_type] - - if not typed_items: + typed = library_items.for_type(content_type) + if typed.is_empty(): return None, watched_tmdb, watched_imdb - library_items_dict = { - "loved": [it for it in library_items.get("loved", []) if it.get("type") == content_type], - "liked": [it for it in library_items.get("liked", []) if it.get("type") == content_type], - "watched": [it for it in library_items.get("watched", []) if it.get("type") == content_type], - "added": [it for it in library_items.get("added", []) if it.get("type") == content_type], - } - sampled = self.sampler.sample_items(library_items_dict, content_type) + sampled = self.sampler.sample_items(typed, content_type) profile = await self.builder.build_profile(sampled, content_type=content_type) return profile, watched_tmdb, watched_imdb async def build_profile_incremental( self, - library_items: dict, + library_items: LibraryCollection, content_type: str, token: str, stremio_service: Any = None, @@ -69,13 +57,8 @@ async def build_profile_incremental( stremio_service, library_items, auth_key ) - all_items = ( - library_items.get("loved", []) - + library_items.get("liked", []) - + library_items.get("watched", []) - + library_items.get("added", []) - ) - typed_items = [it for it in all_items if it.get("type") == content_type] + typed = library_items.for_type(content_type) + typed_items = typed.all_items() if not typed_items: return None, watched_tmdb, watched_imdb @@ -106,30 +89,17 @@ async def build_profile_incremental( logger.debug(f"[{token[:8]}...] Found {len(new_item_ids)} new items, using incremental update") - new_library_items_dict = { - "loved": [ - it - for it in library_items.get("loved", []) - if it.get("type") == content_type and (it.get("_id") or it.get("id")) in new_item_ids - ], - "liked": [ - it - for it in library_items.get("liked", []) - if it.get("type") == content_type and (it.get("_id") or it.get("id")) in new_item_ids - ], - "watched": [ - it - for it in library_items.get("watched", []) - if it.get("type") == content_type and (it.get("_id") or it.get("id")) in new_item_ids - ], - "added": [ - it - for it in library_items.get("added", []) - if it.get("type") == content_type and (it.get("_id") or it.get("id")) in new_item_ids - ], - } - - sampled = self.sampler.sample_items(new_library_items_dict, content_type) + def _is_new(it: dict) -> bool: + return (it.get("_id") or it.get("id")) in new_item_ids + + new_library = LibraryCollection( + loved=[it for it in typed.loved if _is_new(it)], + liked=[it for it in typed.liked if _is_new(it)], + watched=[it for it in typed.watched if _is_new(it)], + added=[it for it in typed.added if _is_new(it)], + ) + + sampled = self.sampler.sample_items(new_library, content_type) if not sampled: return existing_profile, watched_tmdb, watched_imdb @@ -153,7 +123,7 @@ async def build_and_cache_profile( self, token: str, content_type: str, - library_items: dict, + library_items: LibraryCollection, stremio_service: Any = None, auth_key: str | None = None, ) -> tuple[TasteProfile | None, set[int], set[str]]: @@ -179,6 +149,3 @@ async def get_genre_whitelist(self, profile: TasteProfile, content_type: str) -> except Exception as e: logger.warning(f"Failed to build genre whitelist for {content_type}: {e}") return set() - - -ProfileIntegration = ProfileService diff --git a/app/services/recommendation/all_based.py b/app/services/recommendation/all_based.py index bb6910b..9c01fe9 100644 --- a/app/services/recommendation/all_based.py +++ b/app/services/recommendation/all_based.py @@ -4,6 +4,7 @@ from loguru import logger from app.core.settings import UserSettings +from app.models.library import LibraryCollection from app.models.taste_profile import TasteProfile from app.services.profile.scorer import ProfileScorer from app.services.recommendation.filtering import RecommendationFiltering @@ -34,13 +35,13 @@ def __init__(self, tmdb_service: TMDBService, user_settings: UserSettings | None async def get_recommendations_from_all_items( self, - library_items: dict[str, list[dict[str, Any]]], + library_items: LibraryCollection, content_type: str, watched_tmdb: set[int], watched_imdb: set[str], whitelist: set[int] | None = None, limit: int = 20, - item_type: str = "loved", # "loved" or "liked" + item_type: str = "loved", profile: TasteProfile | None = None, ) -> list[dict[str, Any]]: """ @@ -66,8 +67,7 @@ async def get_recommendations_from_all_items( Returns: List of recommended items """ - # Get all loved or liked items for the content type - items = library_items.get(item_type, []) + items = getattr(library_items, item_type, []) typed_items = [it for it in items if it.get("type") == content_type] diff --git a/app/services/recommendation/catalog_service.py b/app/services/recommendation/catalog_service.py index 589f874..5386740 100644 --- a/app/services/recommendation/catalog_service.py +++ b/app/services/recommendation/catalog_service.py @@ -10,6 +10,7 @@ from app.core.constants import DEFAULT_CATALOG_LIMIT, DEFAULT_MIN_ITEMS from app.core.security import redact_token from app.core.settings import UserSettings, get_default_settings, resolve_tmdb_api_key +from app.models.library import LibraryCollection from app.models.taste_profile import TasteProfile from app.services.auth import auth_service from app.services.catalog_updater import catalog_updater @@ -159,20 +160,17 @@ async def get_catalog( language = user_settings.language if user_settings else "en-US" - # Try to get cached library items first library_items = await user_cache.get_library_items(token) if library_items: logger.debug(f"[{redact_token(token)}...] Using cached library items") else: - # Fetch library if not cached logger.info(f"[{redact_token(token)}...] Library items not cached, fetching from Stremio") library_items = await bundle.library.get_library_items(auth_key) - # Cache it for future use await user_cache.set_library_items(token, library_items) services = self._initialize_services(language, user_settings) - integration_service: ProfileService = services["integration"] + profile_service: ProfileService = services["profile"] # Try to get cached profile and watched sets cached_data = await user_cache.get_profile_and_watched_sets(token, content_type) @@ -188,7 +186,7 @@ async def get_catalog( profile, watched_tmdb, watched_imdb, - ) = await integration_service.build_and_cache_profile( + ) = await profile_service.build_and_cache_profile( token, content_type, library_items, @@ -196,7 +194,7 @@ async def get_catalog( auth_key, ) - whitelist = await integration_service.get_genre_whitelist(profile, content_type) if profile else set() + whitelist = await profile_service.get_genre_whitelist(profile, content_type) if profile else set() # Route to appropriate recommendation service recommendations = await self._get_recommendations( @@ -327,7 +325,7 @@ def _initialize_services(self, language: str, user_settings: UserSettings) -> di tmdb_service = get_tmdb_service(language=language, api_key=tmdb_key) return { "tmdb": tmdb_service, - "integration": ProfileService(language=language, tmdb_api_key=tmdb_key), + "profile": ProfileService(language=language, tmdb_api_key=tmdb_key), "item": ItemBasedService(tmdb_service, user_settings), "theme": ThemeBasedService(tmdb_service, user_settings), "top_picks": TopPicksService(tmdb_service, user_settings), @@ -344,7 +342,7 @@ async def _get_recommendations( watched_tmdb: set[int], watched_imdb: set[str], whitelist: set[int], - library_items: dict, + library_items: LibraryCollection, limit: int, user_settings: UserSettings | None = None, ) -> list[dict[str, Any]]: diff --git a/app/services/recommendation/filtering.py b/app/services/recommendation/filtering.py index f3f9794..4caed98 100644 --- a/app/services/recommendation/filtering.py +++ b/app/services/recommendation/filtering.py @@ -1,6 +1,8 @@ from typing import Any from urllib.parse import unquote +from app.models.library import LibraryCollection + def parse_identifier(identifier: str) -> tuple[str | None, int | None]: """Parse Stremio identifier to extract IMDB ID and TMDB ID.""" @@ -36,25 +38,19 @@ class RecommendationFiltering: @staticmethod async def get_exclusion_sets( stremio_service: Any, - library_data: dict | None = None, + library_data: LibraryCollection | None = None, auth_key: str | None = None, ) -> tuple[set[str], set[int]]: - """ - Fetch library items and build exclusion sets for watched/loved content. - """ + """Build exclusion sets for watched/loved content.""" if library_data is None: if not auth_key: return set(), set() library_data = await stremio_service.library.get_library_items(auth_key) - library_data = library_data or {} + if library_data is None: + return set(), set() - all_items = ( - library_data.get("loved", []) - + library_data.get("watched", []) - + library_data.get("removed", []) - + library_data.get("liked", []) - ) + all_items = library_data.all_items_with_removed() imdb_ids = set() tmdb_ids = set() diff --git a/app/services/recommendation/top_picks.py b/app/services/recommendation/top_picks.py index e245598..ccc5b2f 100644 --- a/app/services/recommendation/top_picks.py +++ b/app/services/recommendation/top_picks.py @@ -8,6 +8,7 @@ from app.core.constants import DEFAULT_CATALOG_LIMIT, MAX_CATALOG_ITEMS from app.core.settings import UserSettings +from app.models.library import LibraryCollection from app.models.taste_profile import TasteProfile from app.services.profile.constants import TOP_PICKS_CREATOR_CAP, TOP_PICKS_GENRE_CAP from app.services.profile.sampling import SmartSampler @@ -44,7 +45,7 @@ async def get_top_picks( self, profile: TasteProfile, content_type: str, - library_items: dict[str, list[dict[str, Any]]], + library_items: LibraryCollection, watched_tmdb: set[int], watched_imdb: set[str], limit: int = DEFAULT_CATALOG_LIMIT, @@ -166,7 +167,7 @@ async def get_top_picks( async def _fetch_recommendations_from_top_items( self, - library_items: dict[str, list[dict[str, Any]]], + library_items: LibraryCollection, content_type: str, mtype: str, ) -> list[dict[str, Any]]: @@ -222,7 +223,7 @@ async def _fetch_recommendations_from_top_items( async def _fetch_simkl_recommendations( self, - library_items: dict[str, list[dict[str, Any]]], + library_items: LibraryCollection, content_type: str, mtype: str, ) -> list[dict[str, Any]]: diff --git a/app/services/row_generator.py b/app/services/row_generator.py index b503774..758a2ce 100644 --- a/app/services/row_generator.py +++ b/app/services/row_generator.py @@ -1,15 +1,14 @@ """ Dynamic Row Generator Service. -Generates 3 personalized catalog rows using a tiered sampling system: -- Row 1 (The Core): User's strongest preferences (Gold tier: Top 1-3) -- Row 2 (The Blend): Mixed preferences with higher complexity (Gold+Silver: Top 1-8) -- Row 3 (The Rising Star): Emerging interests (Silver tier: Rank 4-10) +Generates 3 personalized catalog rows from a user's taste profile: +- Row 1 (Core): Strongest preferences +- Row 2 (Blend): Mixed preferences with variety +- Row 3 (Rising Star): Emerging/exploratory interests """ import asyncio import random -from enum import Enum from typing import Any from loguru import logger @@ -21,531 +20,350 @@ from app.services.tmdb.genre import movie_genres, series_genres from app.services.tmdb.service import TMDBService, get_tmdb_service -GOLD_TIER_LIMIT = 3 # Top 1-3 items -SILVER_TIER_START = 3 # Rank 4+ -SILVER_TIER_END = 10 # Up to Rank 10 +GOLD_END = 3 +SILVER_START = 3 +SILVER_END = 10 -# Available axes for row generation -AXIS_GENRE = "genre" -AXIS_KEYWORD = "keyword" -AXIS_COUNTRY = "country" -AXIS_RUNTIME = "runtime" -AXIS_CREATOR = "creator" +ROLE_ANCHOR = "a" +ROLE_FLAVOR = "f" +ROLE_FALLBACK = "b" +AXIS_GENRE = "g" +AXIS_KEYWORD = "k" +AXIS_COUNTRY = "ct" +AXIS_RUNTIME = "r" +AXIS_CREATOR = "cr" -class AxisRole(str, Enum): - ANCHOR = "anchor" # strong signal, near-required - FLAVOR = "flavor" # boosts relevance, optional - FALLBACK = "fallback" # ranking only, never filtering - - -class RowAxis(BaseModel): - name: str - value: Any - role: AxisRole - weight: float = 1.0 - - -def normalize_keyword(kw: str) -> str: - """Normalize keyword for display.""" - return kw.strip().replace("-", " ").replace("_", " ").title() - - -def get_genre_name(genre_id: int, content_type: str) -> str: - """Get genre name from ID.""" - genre_map = movie_genres if content_type == "movie" else series_genres - return genre_map.get(genre_id, "Movies" if content_type == "movie" else "Series") - - -def get_country_adjective(country_code: str) -> str | None: - """Get country adjective (e.g., 'US' -> 'American').""" - adjectives = COUNTRY_ADJECTIVES.get(country_code, []) - return random.choice(adjectives) if adjectives else None - - -def runtime_to_modifier(bucket: str) -> str | None: - """Get display modifier for runtime bucket.""" - modifiers = { - "short": "Short & Sweet", - "medium": None, # No modifier for medium - "long": "Epic", - } - return modifiers.get(bucket) +class RowDefinition(BaseModel): + """A dynamic catalog row with an ID (encodes TMDB params) and a display title.""" -def sample_from_tier(items: list[tuple[Any, float]], start: int, end: int, count: int = 1) -> list[tuple[Any, float]]: - """Sample random items from a specific tier range.""" - tier_items = items[start:end] - if not tier_items: - return [] - return random.sample(tier_items, min(count, len(tier_items))) + title: str + id: str -def sample_from_gold(items: list[tuple[Any, float]], count: int = 1) -> list[tuple[Any, float]]: - """Sample from Gold tier (Top 1-3).""" - return sample_from_tier(items, 0, GOLD_TIER_LIMIT, count) +class LLMRowTheme(BaseModel): + """Schema for Gemini structured output — a single themed catalog row.""" + title: str = Field(description="Creative, short title for the collection (2-5 words)") + genres: list[int] = Field(description="List of valid TMDB genre IDs") + keywords: list[str] = Field(default_factory=list, description="Specific TMDB keyword names") + country: str | None = Field(default=None, description="ISO 3166-1 country code or null") -def sample_from_silver(items: list[tuple[Any, float]], count: int = 1) -> list[tuple[Any, float]]: - """Sample from Silver tier (Rank 4-10).""" - return sample_from_tier(items, SILVER_TIER_START, SILVER_TIER_END, count) +# --- ID building (format must match theme_based.py parser) --- -def sample_from_gold_silver(items: list[tuple[Any, float]], count: int = 1) -> list[tuple[Any, float]]: - """Sample from combined Gold+Silver tier (Rank 1-10).""" - return sample_from_tier(items, 0, SILVER_TIER_END, count) +def build_row_id(axes: list[tuple[str, str, Any]]) -> str: + """Build row ID from axes. Each axis is (role, axis_type, value). -def build_row_id(axes: list[RowAxis]) -> str: - """Build a unique row ID from axes and their roles.""" + Example output: watchly.theme.a:g28.f:k1234.b:rshort + """ parts = ["watchly.theme"] + sorted_axes = sorted(axes, key=lambda x: (x[0], x[1], str(x[2]))) + for role, axis_type, value in sorted_axes: + if isinstance(value, (list, tuple)): + value = "-".join(str(v) for v in value) + parts.append(f"{role}:{axis_type}{value}") + return ".".join(parts) - role_map = { - AxisRole.ANCHOR: "a", - AxisRole.FLAVOR: "f", - AxisRole.FALLBACK: "b", - } - # Sort axes for consistent IDs - sorted_axes = sorted(axes, key=lambda x: (x.role, x.name, str(x.value))) +# --- Display helpers --- - for axis in sorted_axes: - role_pfx = role_map.get(axis.role, "f") - axis_pfx = { - AXIS_GENRE: "g", - AXIS_KEYWORD: "k", - AXIS_COUNTRY: "ct", - AXIS_RUNTIME: "r", - AXIS_CREATOR: "cr", - }.get(axis.name, "x") - # Handle value formatting - val = axis.value - if isinstance(val, (list, tuple)): - val = "-".join(str(v) for v in val) +def _genre_name(genre_id: int, content_type: str) -> str: + genre_map = movie_genres if content_type == "movie" else series_genres + return genre_map.get(genre_id, "Movies" if content_type == "movie" else "Series") - parts.append(f"{role_pfx}:{axis_pfx}{val}") - return ".".join(parts) +def _country_adjective(code: str) -> str | None: + adjs = COUNTRY_ADJECTIVES.get(code, []) + return random.choice(adjs) if adjs else None -class RowDefinition(BaseModel): - """Defines a dynamic catalog row.""" +def _keyword_display(name: str) -> str: + return name.strip().replace("-", " ").replace("_", " ").title() - title: str - id: str - axes: list[RowAxis] = [] - explanation: str | None = None - expansion_strategy: str | None = None - @property - def is_valid(self) -> bool: - return len(self.axes) > 0 +def _runtime_modifier(bucket: str) -> str | None: + return {"short": "Short & Sweet", "long": "Epic"}.get(bucket) -class LLMRowTheme(BaseModel): - """Schema for structured LLM output - a single themed catalog row.""" +def _pick(items: list, start: int, end: int, exclude: set | None = None) -> Any | None: + """Pick a random item from items[start:end], excluding IDs in `exclude`.""" + pool = items[start:end] + if exclude: + pool = [x for x in pool if x[0] not in exclude] + if not pool: + pool = items[start:end] + return random.choice(pool) if pool else None - title: str = Field(description="Creative, short title for the collection (2-5 words)") - genres: list[int] = Field(description="List of valid TMDB genre IDs") - keywords: list[str] = Field(default_factory=list, description="Specific TMDB keyword names") - country: str | None = Field(default=None, description="ISO 3166-1 country code or null") +class RowGeneratorService: + """Generates dynamic, personalized row definitions from a taste profile.""" -class RowComponents(BaseModel): - """Internal structure for building a row.""" - - axes: list[RowAxis] = [] - explanation: str | None = None - - # For title generation - prompt_parts: list[str] = [] - fallback_parts: list[str] = [] + def __init__(self, tmdb_service: TMDBService | None = None): + self.tmdb_service = tmdb_service or get_tmdb_service() - def build_prompt(self) -> str: - """Build Gemini prompt from parts.""" - return " + ".join(self.prompt_parts) + async def generate_rows( + self, + profile: TasteProfile, + content_type: str = "movie", + api_key: str | None = None, + ) -> list[RowDefinition]: + """Generate up to 3 personalized catalog rows.""" + genres = profile.get_top_genres(limit=5) + keywords = profile.get_top_keywords(limit=10) + countries = profile.get_top_countries(limit=2) + runtimes = sorted(profile.runtime_bucket_scores.items(), key=lambda x: x[1], reverse=True) - def build_fallback(self) -> str: - """Build fallback title from parts.""" - return " ".join(self.fallback_parts) + keyword_names = await self._resolve_keyword_names([kid for kid, _ in keywords]) - def to_dict(self) -> dict[str, Any]: - """Convert to dict for row building.""" - return { - "axes": self.axes, - "explanation": self.explanation, - } + if api_key: + try: + llm_rows = await self._generate_with_llm( + profile, genres, keywords, keyword_names, content_type, api_key + ) + if llm_rows: + logger.info(f"Generated {len(llm_rows)} LLM-driven rows for {content_type}") + return llm_rows + except Exception as e: + logger.warning(f"LLM row generation failed, using fallback: {e}") + rows = self._build_rows_fallback(genres, keywords, countries, runtimes, keyword_names, content_type) + titled = await self._generate_titles(rows) + logger.info(f"Generated {len(titled)} rows (fallback) for {content_type}") + return titled -class ExtractedFeatures: - """Container for all extracted profile features with keyword names resolved.""" + # --- Fallback row building (non-LLM) --- - def __init__( + def _build_rows_fallback( self, genres: list[tuple[int, float]], keywords: list[tuple[int, float]], countries: list[tuple[str, float]], runtimes: list[tuple[str, float]], - creators: list[tuple[int, float]], keyword_names: dict[int, str], content_type: str, - ): - self.genres = genres - self.keywords = keywords - self.countries = countries - self.runtimes = runtimes - self.creators = creators - self.keyword_names = keyword_names - self.content_type = content_type - - def get_keyword_name(self, keyword_id: int) -> str | None: - return self.keyword_names.get(keyword_id) - - def get_genre_name(self, genre_id: int) -> str: - return get_genre_name(genre_id, self.content_type) - - -class RowBuilder: - """Builds a single row by sampling from axes with specific roles.""" - - def __init__(self, features: ExtractedFeatures): - self.features = features - self.components = RowComponents() - self.used_axes: set[str] = set() - - def add_axis(self, name: str, value: Any, role: AxisRole, weight: float = 1.0) -> "RowBuilder": - """Add an axis with a specific role and weight.""" - axis = RowAxis(name=name, value=value, role=role, weight=weight) - self.components.axes.append(axis) - - # Build prompt and fallback title parts - display_val = self._get_display_value(name, value) - if display_val: - prefix = "" - if role == AxisRole.ANCHOR: - prefix = "Anchor: " - elif role == AxisRole.FLAVOR: - prefix = "Flavor: " - - self.components.prompt_parts.append(f"{prefix}{name.title()}: {display_val}") - - # For fallback title, we prioritize Anchor and Flavor - if role in (AxisRole.ANCHOR, AxisRole.FLAVOR): - if name == AXIS_COUNTRY: - self.components.fallback_parts.insert(0, display_val) - else: - self.components.fallback_parts.append(display_val) - - self.used_axes.add(f"{name}:{value}") - return self - - def _get_display_value(self, name: str, value: Any) -> str | None: - """Get human-readable value for an axis.""" - if name == AXIS_GENRE: - return self.features.get_genre_name(value) - if name == AXIS_KEYWORD: - return normalize_keyword(self.features.get_keyword_name(value) or "") - if name == AXIS_COUNTRY: - return get_country_adjective(value) - if name == AXIS_RUNTIME: - return runtime_to_modifier(value) - return str(value) - - def build(self) -> RowComponents | None: - """Build and return the row components if valid (has at least one anchor).""" - has_anchor = any(a.role == AxisRole.ANCHOR for a in self.components.axes) - if has_anchor: - return self.components - return None + ) -> list[tuple[list[tuple[str, str, Any]], str]]: + """Build up to 3 rows as (axes, fallback_title) tuples.""" + rows = [] + used_genres: set[int] = set() + used_keywords: set[int] = set() + + # Row 1: Core — top genre + top keywords + r1 = self._build_core(genres, keywords, runtimes, keyword_names, content_type, used_genres, used_keywords) + if r1: + rows.append(r1) + + # Row 2: Blend — genre + country or secondary genre + r2 = self._build_blend(genres, countries, content_type, used_genres) + if r2: + rows.append(r2) + + # Row 3: Rising Star — emerging keyword + secondary genre + country + r3 = self._build_rising(genres, keywords, countries, keyword_names, content_type, used_genres, used_keywords) + if r3: + rows.append(r3) + + return rows[:3] + + def _build_core(self, genres, keywords, runtimes, keyword_names, content_type, used_genres, used_keywords): + axes = [] + title_parts = [] + + g = _pick(genres, 0, GOLD_END, used_genres) + if not g: + return None + axes.append((ROLE_ANCHOR, AXIS_GENRE, g[0])) + title_parts.append(_genre_name(g[0], content_type)) + used_genres.add(g[0]) + + for _ in range(random.randint(1, 2)): + k = _pick(keywords, 0, GOLD_END, used_keywords) + if k and k[0] in keyword_names: + axes.append((ROLE_FLAVOR, AXIS_KEYWORD, k[0])) + title_parts.append(_keyword_display(keyword_names[k[0]])) + used_keywords.add(k[0]) + + if runtimes: + rt = random.choice(runtimes[:2]) + axes.append((ROLE_FALLBACK, AXIS_RUNTIME, rt[0])) + mod = _runtime_modifier(rt[0]) + if mod: + title_parts.insert(0, mod) + + return (axes, " ".join(title_parts)) + + def _build_blend(self, genres, countries, content_type, used_genres): + axes = [] + title_parts = [] + + g = _pick(genres, 0, GOLD_END, used_genres) + if not g: + return None + axes.append((ROLE_ANCHOR, AXIS_GENRE, g[0])) + title_parts.append(_genre_name(g[0], content_type)) + used_genres.add(g[0]) + + use_country = random.choice([True, False]) + if use_country and countries: + c = _pick(countries, 0, SILVER_END) + if c: + axes.append((ROLE_FLAVOR, AXIS_COUNTRY, c[0])) + adj = _country_adjective(c[0]) + if adj: + title_parts.insert(0, adj) + else: + other = [gx for gx in genres if gx[0] != g[0]] + sg = _pick(other, 0, SILVER_END) if other else None + if sg: + axes.append((ROLE_FLAVOR, AXIS_GENRE, sg[0])) + title_parts.append(_genre_name(sg[0], content_type)) + + return (axes, " ".join(title_parts)) + + def _build_rising(self, genres, keywords, countries, keyword_names, content_type, used_genres, used_keywords): + axes = [] + title_parts = [] + + k = _pick(keywords, SILVER_START, SILVER_END, used_keywords) + if not k or k[0] not in keyword_names: + return None + axes.append((ROLE_ANCHOR, AXIS_KEYWORD, k[0])) + title_parts.append(_keyword_display(keyword_names[k[0]])) + used_keywords.add(k[0]) + g = _pick(genres, SILVER_START, SILVER_END, used_genres) + if g: + axes.append((ROLE_FLAVOR, AXIS_GENRE, g[0])) + title_parts.append(_genre_name(g[0], content_type)) -class RowGeneratorService: - """Generates dynamic, personalized row definitions from a User Taste Profile.""" + if countries: + c = _pick(countries, 0, SILVER_END) + if c: + axes.append((ROLE_FALLBACK, AXIS_COUNTRY, c[0])) - def __init__(self, tmdb_service: TMDBService | None = None): - self.tmdb_service = tmdb_service or get_tmdb_service() + return (axes, " ".join(title_parts)) - async def generate_rows( - self, profile: TasteProfile, content_type: str = "movie", api_key: str | None = None - ) -> list[RowDefinition]: - """ - Generate exactly 3 personalized catalog rows. - If api_key is provided, uses LLM to generate creative themes. - Otherwise uses tiered sampling system. - - Returns: - List of RowDefinition - """ - # 1. Extract all features from profile - features = await self._extract_features(profile, content_type) - - # 2. Try LLM generation if key is present - if api_key: - try: - llm_rows = await self._generate_rows_with_llm(profile, features, content_type, api_key) - if llm_rows: - logger.info(f"Generated {len(llm_rows)} LLM-driven rows for {content_type}") - return llm_rows - except Exception as e: - logger.warning(f"LLM row generation failed, falling back to tiered sampling: {e}") - - # 3. Fallback to Tiered Sampling - rows_data = [] - used_genres = set() - used_keywords = set() - - # Row 1: The Core (Strongest matches) - core_row = self._build_core_row(features, exclude_genres=used_genres, exclude_keywords=used_keywords) - if core_row: - rows_data.append(core_row) - self._update_used_axes(core_row, used_genres, used_keywords) - - # Row 2: The Blend (Mixing themes) - blend_row = self._build_blend_row(features, exclude_genres=used_genres, exclude_keywords=used_keywords) - if blend_row: - rows_data.append(blend_row) - self._update_used_axes(blend_row, used_genres, used_keywords) - - # Row 3: The Rising Star (Exploration) - rising_row = self._build_rising_star_row(features, exclude_genres=used_genres, exclude_keywords=used_keywords) - if rising_row: - rows_data.append(rising_row) - - # 4. Generate titles via server's default Gemini model (gemma) - final_rows = await self._generate_titles(rows_data[:3]) - - logger.info(f"Generated {len(final_rows)} dynamic rows (Tiered Sampling) for {content_type}") - return final_rows - - def _update_used_axes(self, row: RowComponents, used_genres: set, used_keywords: set): - """Track used genres and keywords to ensure row diversity.""" - for axis in row.axes: - if axis.name == AXIS_GENRE: - used_genres.add(axis.value) - elif axis.name == AXIS_KEYWORD: - used_keywords.add(axis.value) - - async def _extract_features(self, profile: TasteProfile, content_type: str) -> ExtractedFeatures: - """Extract all features from profile and resolve keyword names.""" - # Get raw features - genres = profile.get_top_genres(limit=5) - keywords = profile.get_top_keywords(limit=10) - countries = profile.get_top_countries(limit=2) - runtimes = sorted(profile.runtime_bucket_scores.items(), key=lambda x: x[1], reverse=True) - creators = profile.get_top_creators(limit=5) + # --- Title generation via Gemini --- - # Fetch keyword names in parallel - keyword_ids = [k_id for k_id, _ in keywords] - keyword_names_raw = await asyncio.gather( - *[self._get_keyword_name(kid) for kid in keyword_ids], + async def _generate_titles(self, rows: list[tuple[list[tuple[str, str, Any]], str]]) -> list[RowDefinition]: + if not rows: + return [] + + prompts = [fallback for _, fallback in rows] + results = await asyncio.gather( + *[gemini_service.generate_content_async(p) for p in prompts], return_exceptions=True, ) - keyword_names = { - kid: name for kid, name in zip(keyword_ids, keyword_names_raw) if name and not isinstance(name, Exception) - } - - return ExtractedFeatures( - genres=genres, - keywords=keywords, - countries=countries, - runtimes=runtimes, - creators=creators, - keyword_names=keyword_names, - content_type=content_type, - ) - async def _get_keyword_name(self, keyword_id: int) -> str | None: - """Fetch keyword name from TMDB.""" - try: - data = await self.tmdb_service.get_keyword_details(keyword_id) - return data.get("name") - except Exception: - return None + final = [] + for i, (axes, fallback) in enumerate(rows): + result = results[i] + title = result.strip() if isinstance(result, str) else fallback + final.append(RowDefinition(title=title, id=build_row_id(axes))) + return final + + # --- LLM-based generation --- - def _build_core_row( + async def _generate_with_llm( self, - features: ExtractedFeatures, - exclude_genres: set[int] | None = None, - exclude_keywords: set[int] | None = None, - ) -> RowComponents | None: - """ - Build 'The Core' row: - Anchor: GENRE (Gold) - Flavor: 1-2 KEYWORDS (Gold) - Fallback: RUNTIME (Gold/Silver) - """ - exclude_genres = exclude_genres or set() - exclude_keywords = exclude_keywords or set() - builder = RowBuilder(features) - - # 1. Anchor: Genre - available_genres = [g for g in features.genres if g[0] not in exclude_genres] - genres = sample_from_gold(available_genres, 1) if available_genres else sample_from_gold(features.genres, 1) - if not genres: - return None - builder.add_axis(AXIS_GENRE, genres[0][0], AxisRole.ANCHOR, 1.0) + profile: TasteProfile, + genres: list[tuple[int, float]], + keywords: list[tuple[int, float]], + keyword_names: dict[int, str], + content_type: str, + api_key: str, + ) -> list[RowDefinition] | None: + summary = profile.interest_summary or "No summary available." + genre_map = movie_genres if content_type == "movie" else series_genres + valid_genres = ", ".join(f"{name} (ID: {gid})" for gid, name in genre_map.items()) + + profile_keywords = [name for kid, _ in keywords[:12] if (name := keyword_names.get(kid))] + kw_list = f"Themes they already like: {', '.join(profile_keywords)}. " if profile_keywords else "" + keyword_hint = kw_list + "You can also suggest new themes for discovery." + + prompt = ( + "Using the user's interest summary below, generate exactly 3 streaming " + f"collections for {content_type}. " + "Use genres (required), keywords, and country when relevant.\n\n" + f"Interest Summary:\n{summary}\n\n" + "Generate 3 rows:\n" + "1. THE CORE — strongest match to their taste\n" + "2. MIXED PREFERENCES — blend with variety\n" + "3. RISING STAR — discovery, adjacent to their taste\n\n" + f"Genres: use ONLY these TMDB Genre IDs: {valid_genres}\n" + f"Keywords: {keyword_hint}\n" + "Country: ISO 3166-1 code or null.\n" + "Each row: title (2-5 words), genres (list of IDs), " + "keywords (list of strings), country (string or null).\n" + "Output a JSON array of 3 objects." + ) - # 2. Flavor: 1-2 Keywords - available_keywords = [k for k in features.keywords if k[0] not in exclude_keywords] - keywords = sample_from_gold(available_keywords, random.randint(1, 2)) if available_keywords else [] - for k_id, _ in keywords: - builder.add_axis(AXIS_KEYWORD, k_id, AxisRole.FLAVOR, 0.7) + data = await gemini_service.generate_structured_async( + prompt=prompt, + response_schema=list[LLMRowTheme], + system_instruction=( + "You are a creative film curator. Design 3 catalog rows from the user's interest summary. " + "Row 1: strong match. Row 2: blend + variety. Row 3: discovery. " + "Use genres, keywords, and country. Output valid JSON only." + ), + api_key=api_key, + ) - # 3. Fallback: Runtime - if features.runtimes: - runtime = random.choice(features.runtimes[:2]) - builder.add_axis(AXIS_RUNTIME, runtime[0], AxisRole.FALLBACK, 0.3) + if not data or not isinstance(data, list): + return None - row = builder.build() - if row: - row.explanation = "The Core: Based on your absolute favorite genres and recurring themes." - return row + profile_kw_map = {name.lower(): kid for kid, name in keyword_names.items()} + final = [] - def _build_blend_row( - self, - features: ExtractedFeatures, - exclude_genres: set[int] | None = None, - exclude_keywords: set[int] | None = None, - ) -> RowComponents | None: - """ - Build 'The Blend' row: - Anchor: GENRE (Gold) - Flavor: COUNTRY or secondary GENRE (Gold/Silver) - """ - exclude_genres = exclude_genres or set() - builder = RowBuilder(features) - - # 1. Anchor: Genre - available_genres = [g for g in features.genres if g[0] not in exclude_genres] - genres = sample_from_gold(available_genres, 1) if available_genres else sample_from_gold(features.genres, 1) - if not genres: - return None - builder.add_axis(AXIS_GENRE, genres[0][0], AxisRole.ANCHOR, 1.0) - - # 2. Flavor: Country or Secondary Genre - flavor_type = random.choice([AXIS_COUNTRY, AXIS_GENRE]) - - if flavor_type == AXIS_COUNTRY and features.countries: - country = sample_from_gold_silver(features.countries, 1) - builder.add_axis(AXIS_COUNTRY, country[0][0], AxisRole.FLAVOR, 0.7) - elif flavor_type == AXIS_GENRE: - other_genres = [g for g in features.genres if g[0] != genres[0][0]] - if other_genres: - sec_genre = sample_from_gold_silver(other_genres, 1) - builder.add_axis(AXIS_GENRE, sec_genre[0][0], AxisRole.FLAVOR, 0.7) - - row = builder.build() - if row: - row.explanation = "The Blend: Mixing your top genres with international flavor or secondary interests." - return row - - def _build_rising_star_row( - self, - features: ExtractedFeatures, - exclude_genres: set[int] | None = None, - exclude_keywords: set[int] | None = None, - ) -> RowComponents | None: - """ - Build 'The Rising Star' row: - Anchor: recent KEYWORD (Silver) - Flavor: GENRE (Silver) - Fallback: COUNTRY (Gold/Silver) - """ - exclude_genres = exclude_genres or set() - exclude_keywords = exclude_keywords or set() - builder = RowBuilder(features) - - # 1. Anchor: Recent Keyword (Sampling from Silver to promote exploration) - available_keywords = [k for k in features.keywords if k[0] not in exclude_keywords] - keywords = sample_from_silver(available_keywords, 1) if available_keywords else [] - if keywords: - builder.add_axis(AXIS_KEYWORD, keywords[0][0], AxisRole.ANCHOR, 1.0) - - # If we couldn't add an anchor, this row fails - if not builder.components.axes: - return None + for item in data: + if isinstance(item, dict): + title, genre_ids, kw_names, country = ( + item.get("title", "Recommended"), + item.get("genres", []), + item.get("keywords", []), + item.get("country"), + ) + else: + title, genre_ids, kw_names, country = item.title, item.genres, item.keywords, item.country - # 2. Flavor: Genre (Silver) - available_genres = [g for g in features.genres if g[0] not in exclude_genres] - genres = sample_from_silver(available_genres, 1) if available_genres else [] - if genres: - builder.add_axis(AXIS_GENRE, genres[0][0], AxisRole.FLAVOR, 0.7) - - # 3. Fallback: Country - if features.countries: - country = sample_from_gold_silver(features.countries, 1) - builder.add_axis(AXIS_COUNTRY, country[0][0], AxisRole.FALLBACK, 0.3) - - row = builder.build() - if row: - row.explanation = "The Rising Star: Exploring emerging interests and newer themes in your history." - return row - - def _build_signature_rows(self, features: ExtractedFeatures) -> list[RowComponents]: - """Generate dynamic signature recipes from user history.""" - signature_rows = [] - - # 1. Top genre × dominant keyword - if features.genres and features.keywords: - builder = RowBuilder(features) - builder.add_axis(AXIS_GENRE, features.genres[0][0], AxisRole.ANCHOR, 1.0) - builder.add_axis(AXIS_KEYWORD, features.keywords[0][0], AxisRole.FLAVOR, 0.7) - row = builder.build() - if row: - row.explanation = "Signature: Your #1 genre paired with your most frequent theme." - signature_rows.append(row) - - # 2. Top genre × preferred runtime - if features.genres and features.runtimes: - builder = RowBuilder(features) - builder.add_axis(AXIS_GENRE, features.genres[0][0], AxisRole.ANCHOR, 1.0) - builder.add_axis(AXIS_RUNTIME, features.runtimes[0][0], AxisRole.FLAVOR, 0.7) - row = builder.build() - if row: - row.explanation = "Signature: Favorite genre fit for your preferred watch duration." - signature_rows.append(row) - - return signature_rows - - async def _generate_titles(self, rows_data: list[RowComponents]) -> list[RowDefinition]: - """Generate titles for tiered sampling rows via server's default Gemini model.""" - if not rows_data: - return [] + axes: list[tuple[str, str, Any]] = [] + for gid in genre_ids: + if int(gid) in genre_map: + axes.append((ROLE_ANCHOR, AXIS_GENRE, int(gid))) - # Build prompts and fire Gemini requests (uses server key + default model) - prompts = [row.build_prompt() for row in rows_data] - gemini_tasks = [gemini_service.generate_content_async(p) for p in prompts] - results = await asyncio.gather(*gemini_tasks, return_exceptions=True) + for kw_name in kw_names: + kid = await self._resolve_keyword_to_id(kw_name, profile_kw_map) + if kid is not None: + axes.append((ROLE_FLAVOR, AXIS_KEYWORD, kid)) - final_rows = [] - for i, row in enumerate(rows_data): - result = results[i] + if country: + axes.append((ROLE_FLAVOR, AXIS_COUNTRY, country)) - # Determine title - if isinstance(result, Exception): - logger.warning(f"Gemini failed for row {i}: {result}") - title = row.build_fallback() - elif result: - title = result.strip() - else: - title = row.build_fallback() + if axes: + final.append(RowDefinition(title=title, id=build_row_id(axes))) - # Build the row ID - row_id = build_row_id(row.axes) + return final if final else None - final_rows.append( - RowDefinition( - title=title, - id=row_id, - **row.to_dict(), - ) - ) + # --- Helpers --- - return final_rows + async def _resolve_keyword_names(self, keyword_ids: list[int]) -> dict[int, str]: + results = await asyncio.gather( + *[self._get_keyword_name(kid) for kid in keyword_ids], + return_exceptions=True, + ) + return {kid: name for kid, name in zip(keyword_ids, results) if isinstance(name, str) and name} + + async def _get_keyword_name(self, keyword_id: int) -> str | None: + try: + data = await self.tmdb_service.get_keyword_details(keyword_id) + return data.get("name") + except Exception: + return None async def _resolve_keyword_to_id(self, kw_name: str, profile_kw_map: dict[str, int]) -> int | None: - """Resolve a keyword name to TMDB ID: profile first, then TMDB search (for discovery).""" kw_lower = str(kw_name).strip().lower() if not kw_lower: return None @@ -562,97 +380,3 @@ async def _resolve_keyword_to_id(self, kw_name: str, profile_kw_map: dict[str, i except Exception: pass return None - - async def _generate_rows_with_llm( - self, - profile: TasteProfile, - features: ExtractedFeatures, - content_type: str, - api_key: str, - ) -> list[RowDefinition] | None: - """Generate rows from the user's interest summary; balance personalization with discovery.""" - try: - summary = profile.interest_summary or "No summary available." - - current_genre_map = movie_genres if content_type == "movie" else series_genres - valid_genre_list = ", ".join([f"{name} (ID: {gid})" for gid, name in current_genre_map.items()]) - - profile_keywords = [name for k_id, _ in features.keywords[:12] if (name := features.get_keyword_name(k_id))] - keyword_hint = ( - ( - f"Themes they already like (you can use these): {', '.join(profile_keywords)}. " - if profile_keywords - else "" - ) - + "You can also suggest new themes for discovery—especially for Rising Star—" - "e.g. adjacent genres or topics they might not have tried yet. We will resolve keywords." - ) - - prompt = ( - "Using only the user's interest summary below, generate exactly 3 streaming collections for" - f" {content_type}. Use genres (required), keywords, and country when relevant.\n\nInterest" - f" Summary:\n{summary}\n\nGenerate 3 rows in this order:\n1. THE CORE — What they will love" - " most: strongest match to their taste (genres + keywords + country if relevant).\n2. MIXED" - " PREFERENCES — Blend of their tastes with more variety (genres + keywords + country if" - " relevant).\n3. RISING STAR — Discovery: suggest themes they might not have explored yet but" - " would likely enjoy (adjacent to their taste, or natural next step). Use genres + keywords +" - " country; openness to new content here.\n\nRules:\n- Genres: use ONLY these TMDB Genre IDs:" - f" {valid_genre_list}\n- Keywords: {keyword_hint}\n- Country: ISO 3166-1 code (e.g. US, KR, JP)" - " or null when relevant.\n- Each row: title (2-5 words), genres (list of IDs), keywords (list" - " of strings), country (string or null).\n- Output a JSON array of 3 objects." - ) - - data = await gemini_service.generate_structured_async( - prompt=prompt, - response_schema=list[LLMRowTheme], - system_instruction=( - "You are a creative film curator. Design 3 catalog rows from the user's interest summary." - " Row 1 (The Core): strong match. Row 2 (Mixed): blend + variety. Row 3 (Rising Star):" - " discovery—suggest new content they would enjoy, not just more of the same. Use genres," - " keywords, and country. Output valid JSON only." - ), - api_key=api_key, - ) - - if not data or not isinstance(data, list): - return None - - final_rows = [] - profile_kw_map = {name.lower(): kid for kid, name in features.keyword_names.items()} - - for item in data: - if isinstance(item, dict): - title = item.get("title", "Recommended") - genre_ids = item.get("genres", []) - kw_names = item.get("keywords", []) - country = item.get("country") - else: - title = item.title - genre_ids = item.genres - kw_names = item.keywords - country = item.country - - builder = RowBuilder(features) - - for gid in genre_ids: - if int(gid) in current_genre_map: - builder.add_axis(AXIS_GENRE, int(gid), AxisRole.ANCHOR) - - for kw_name in kw_names: - kid = await self._resolve_keyword_to_id(kw_name, profile_kw_map) - if kid is not None: - builder.add_axis(AXIS_KEYWORD, kid, AxisRole.FLAVOR) - - if country: - builder.add_axis(AXIS_COUNTRY, country, AxisRole.FLAVOR) - - row_comp = builder.build() - if row_comp and row_comp.axes: - row_id = build_row_id(row_comp.axes) - final_rows.append(RowDefinition(title=title, id=row_id, axes=row_comp.axes)) - - return final_rows if final_rows else None - - except Exception as e: - logger.warning(f"Error in _generate_rows_with_llm: {e}") - return None diff --git a/app/services/stremio/library.py b/app/services/stremio/library.py index f17c890..06a2683 100644 --- a/app/services/stremio/library.py +++ b/app/services/stremio/library.py @@ -4,6 +4,7 @@ from async_lru import alru_cache from loguru import logger +from app.models.library import LibraryCollection from app.services.stremio.client import StremioClient, StremioLikesClient @@ -33,7 +34,7 @@ async def get_likes_by_type(self, auth_token: str, media_type: str, status: str logger.exception(f"Failed to fetch {status} {media_type} items: {e}") return [] - async def get_library_items(self, auth_key: str) -> dict[str, list[dict[str, Any]]]: + async def get_library_items(self, auth_key: str) -> LibraryCollection: """ Fetch all library items and categorize them (watched, loved, added, removed). """ @@ -185,13 +186,13 @@ def sort_by_recency(x: dict): f" {len(removed)} removed items" ) - return { - "watched": watched, - "loved": loved, - "liked": liked, - "added": added, - "removed": removed, - } + return LibraryCollection( + watched=watched, + loved=loved, + liked=liked, + added=added, + removed=removed, + ) except Exception as e: logger.exception(f"Error processing library items: {e}") - return {"watched": [], "loved": [], "liked": [], "added": [], "removed": []} + return LibraryCollection() diff --git a/app/services/user_cache.py b/app/services/user_cache.py index d1ace43..b694bed 100644 --- a/app/services/user_cache.py +++ b/app/services/user_cache.py @@ -7,6 +7,7 @@ from app.core.constants import CATALOG_KEY, LIBRARY_ITEMS_KEY, PROFILE_KEY, WATCHED_SETS_KEY from app.core.security import redact_token +from app.models.library import LibraryCollection from app.models.taste_profile import TasteProfile from app.services.redis_service import redis_service @@ -39,42 +40,27 @@ def _last_profile_build_key(token: str, content_type: str) -> str: # Library Items Methods - async def get_library_items(self, token: str) -> dict[str, Any] | None: - """ - Get cached library items for a user. - - Args: - token: User token - - Returns: - Library items dictionary, or None if not cached - """ + async def get_library_items(self, token: str) -> LibraryCollection | None: + """Get cached library items for a user.""" key = self._library_items_key(token) cached = await redis_service.get(key) if cached: try: - return json.loads(cached) - except json.JSONDecodeError as e: + data = json.loads(cached) + return LibraryCollection(**data) + except (json.JSONDecodeError, Exception) as e: logger.warning(f"Failed to decode cached library items for {redact_token(token)}...: {e}") return None return None - async def set_library_items(self, token: str, library_items: dict[str, Any]) -> None: - """ - Cache library items for a user. - - Args: - token: User token - library_items: Library items dictionary to cache - """ + async def set_library_items(self, token: str, library_items: LibraryCollection) -> None: + """Cache library items for a user.""" key = self._library_items_key(token) - await redis_service.set(key, json.dumps(library_items)) + await redis_service.set(key, library_items.model_dump_json()) logger.debug(f"[{redact_token(token)}...] Cached library items") - # Invalidate all catalog caches when library items are updated - # This ensures catalogs are regenerated with fresh library data await self.invalidate_all_catalogs(token) async def invalidate_library_items(self, token: str) -> None: From 4df4d70cca4e6970f7c2e60b67d4731147cba570 Mon Sep 17 00:00:00 2001 From: Bimal Timilsina Date: Sat, 28 Mar 2026 13:10:02 +0545 Subject: [PATCH 07/68] feat: implement user context service for managing user data and settings --- app/services/context.py | 101 ++++++++ app/services/manifest.py | 160 +++---------- app/services/profile/sampling.py | 2 +- app/services/{ => profile}/scoring.py | 25 -- app/services/profile/service.py | 2 +- .../recommendation/catalog_service.py | 226 ++++++------------ app/services/recommendation/top_picks.py | 2 +- 7 files changed, 219 insertions(+), 299 deletions(-) create mode 100644 app/services/context.py rename app/services/{ => profile}/scoring.py (88%) diff --git a/app/services/context.py b/app/services/context.py new file mode 100644 index 0000000..a08bece --- /dev/null +++ b/app/services/context.py @@ -0,0 +1,101 @@ +from dataclasses import dataclass +from typing import Any + +from fastapi import HTTPException +from loguru import logger + +from app.core.security import redact_token +from app.core.settings import UserSettings, get_default_settings +from app.models.library import LibraryCollection +from app.services.auth import auth_service +from app.services.stremio.service import StremioBundle +from app.services.token_store import token_store +from app.services.user_cache import user_cache + + +@dataclass +class UserContext: + """Everything a request handler needs about a user. + + The caller MUST call close() when done (or use as async context manager). + """ + + token: str + credentials: dict[str, Any] + user_settings: UserSettings + auth_key: str | None + library: LibraryCollection + bundle: StremioBundle + + async def close(self): + await self.bundle.close() + + async def __aenter__(self): + return self + + async def __aexit__(self, *exc): + await self.close() + + +def extract_settings(credentials: dict[str, Any]) -> UserSettings: + """Parse UserSettings from credentials, falling back to defaults.""" + settings_dict = credentials.get("settings", {}) + return UserSettings(**settings_dict) if settings_dict else get_default_settings() + + +async def load_user_context( + token: str, + *, + require_auth: bool = True, +) -> UserContext: + """Load credentials, settings, auth key, and library for a token. + + Args: + token: User token + require_auth: If True, raises 401 on auth failure. If False, auth_key may be None. + + Returns: + UserContext with all resolved data. Caller must call .close(). + """ + if not token: + raise HTTPException( + status_code=401, + detail="Missing token. Please reconfigure the addon.", + ) + + credentials = await token_store.get_user_data(token) + if not credentials: + raise HTTPException( + status_code=401, + detail="Token not found. Please reconfigure the addon.", + ) + + user_settings = extract_settings(credentials) + bundle = StremioBundle() + + try: + if require_auth: + auth_key = await auth_service.require_auth_key(bundle, credentials, token) + else: + auth_key = await auth_service.resolve_auth_key_with_bundle(bundle, credentials, token) + + library = await user_cache.get_library_items(token) + if not library and auth_key: + logger.info(f"[{redact_token(token)}] Library not cached, fetching from Stremio") + library = await bundle.library.get_library_items(auth_key) + await user_cache.set_library_items(token, library) + + if not library: + library = LibraryCollection() + + return UserContext( + token=token, + credentials=credentials, + user_settings=user_settings, + auth_key=auth_key, + library=library, + bundle=bundle, + ) + except Exception: + await bundle.close() + raise diff --git a/app/services/manifest.py b/app/services/manifest.py index 6346334..d0384cd 100644 --- a/app/services/manifest.py +++ b/app/services/manifest.py @@ -1,6 +1,5 @@ from typing import Any -from fastapi import HTTPException from loguru import logger from app.core.config import settings @@ -8,11 +7,10 @@ from app.core.settings import UserSettings, resolve_tmdb_api_key from app.core.version import __version__ from app.models.library import LibraryCollection -from app.services.auth import auth_service from app.services.catalog_definitions import DynamicCatalogService, sort_catalogs +from app.services.context import load_user_context from app.services.profile.service import ProfileService from app.services.stremio.service import StremioBundle -from app.services.token_store import token_store from app.services.translation import translation_service from app.services.user_cache import user_cache @@ -28,9 +26,9 @@ def get_base_manifest() -> dict[str, Any]: "version": __version__, "name": settings.ADDON_NAME, "description": "Movie and series recommendations based on your Stremio library.", - "logo": ("https://raw.githubusercontent.com/TimilsinaBimal/Watchly/refs/heads/main/app/static/logo.png"), + "logo": ("https://raw.githubusercontent.com/TimilsinaBimal/Watchly" "/refs/heads/main/app/static/logo.png"), "background": ( - "https://raw.githubusercontent.com/TimilsinaBimal/Watchly/refs/heads/main/app/static/cover.png" + "https://raw.githubusercontent.com/TimilsinaBimal/Watchly" "/refs/heads/main/app/static/cover.png" ), "resources": ["catalog"], "types": ["movie", "series"], @@ -40,7 +38,11 @@ def get_base_manifest() -> dict[str, Any]: "stremioAddonsConfig": { "issuer": "https://stremio-addons.net", "signature": ( - "eyJhbGciOiJkaXIiLCJlbmMiOiJBMTI4Q0JDLUhTMjU2In0..WSrhzzlj1TuDycD6QoVLuA.Dzmxzr4y83uqQF15r4tC1bB9-vtZRh1Rvy4BqgDYxu91c2esiJuov9KnnI_cboQCgZS7hjwnIqRSlQ-jEyGwXHHRerh9QklyfdxpXqNUyBgTWFzDOVdVvDYJeM_tGMmR.sezAChlWGV7lNS-t9HWB6A" # noqa + "eyJhbGciOiJkaXIiLCJlbmMiOiJBMTI4Q0JDLUhTMjU2In0" + "..WSrhzzlj1TuDycD6QoVLuA" + ".Dzmxzr4y83uqQF15r4tC1bB9-vtZRh1Rvy4BqgDYxu91c2esiJuov9KnnI_cboQC" + "gZS7hjwnIqRSlQ-jEyGwXHHRerh9QklyfdxpXqNUyBgTWFzDOVdVvDYJeM_tGMmR" + ".sezAChlWGV7lNS-t9HWB6A" # noqa ), }, } @@ -52,79 +54,58 @@ async def cache_library_and_profiles( user_settings: UserSettings, token: str, ) -> LibraryCollection: - """ - Fetch and cache library items and profiles for a user. - - This should be called during token creation to pre-cache data - so manifest generation is fast. + """Fetch and cache library items and profiles for a user. - Args: - bundle: StremioBundle instance - auth_key: Stremio auth key - user_settings: User settings - token: User token - - Returns: - Library items dictionary + Called during token creation to pre-cache data so manifest generation is fast. """ - # Fetch library items logger.info(f"[{redact_token(token)}] Fetching library items for caching") library_items = await bundle.library.get_library_items(auth_key) - - # Cache library items using centralized cache service await user_cache.set_library_items(token, library_items) logger.debug(f"[{redact_token(token)}] Cached library items") - # Build and cache profiles for both movie and series language = user_settings.language tmdb_key = resolve_tmdb_api_key(user_settings) - integration_service = ProfileService(language=language, tmdb_api_key=tmdb_key) + profile_service = ProfileService(language=language, tmdb_api_key=tmdb_key) for content_type in ["movie", "series"]: try: logger.info(f"[{redact_token(token)}] Building and caching profile for {content_type}") - await integration_service.build_and_cache_profile(token, content_type, library_items, bundle, auth_key) + await profile_service.build_and_cache_profile(token, content_type, library_items, bundle, auth_key) logger.debug(f"[{redact_token(token)}] Cached profile and watched sets for {content_type}") except Exception as e: logger.warning(f"[{redact_token(token)}] Failed to build/cache profile for {content_type}: {e}") return library_items - async def _ensure_library_and_profiles_cached( - self, - bundle: StremioBundle, - auth_key: str, - user_settings: UserSettings, - token: str, - ) -> LibraryCollection: - """Ensure library items and profiles are cached, fetching and building if needed.""" - library_items = await user_cache.get_library_items(token) - - if library_items: - logger.debug(f"[{redact_token(token)}] Using cached library items for manifest") - return library_items + async def get_manifest_for_token(self, token: str) -> dict[str, Any]: + """Generate manifest for a given token.""" + base_manifest = self.get_base_manifest() - logger.info(f"[{redact_token(token)}] Library items not cached, fetching from Stremio for manifest") - return await self.cache_library_and_profiles(bundle, auth_key, user_settings, token) + ctx = await load_user_context(token, require_auth=False) + fetched_catalogs: list[dict[str, Any]] = [] + try: + if ctx.auth_key: + tmdb_key = resolve_tmdb_api_key(ctx.user_settings) + catalog_def_service = DynamicCatalogService(language=ctx.user_settings.language, tmdb_api_key=tmdb_key) + fetched_catalogs = await catalog_def_service.get_dynamic_catalogs( + ctx.library, ctx.user_settings, token=token + ) + except Exception as e: + logger.exception(f"[{redact_token(token)}] Dynamic catalog build failed: {e}") + fetched_catalogs = [] + finally: + await ctx.close() - async def _build_dynamic_catalogs( - self, - bundle: StremioBundle, - auth_key: str, - user_settings: UserSettings | None, - token: str, - ) -> list[dict[str, Any]]: - """Build dynamic catalogs for the manifest.""" - if not user_settings: - return [] + all_catalogs = [c.copy() for c in base_manifest["catalogs"]] + [c.copy() for c in fetched_catalogs] - settings_for_user = user_settings + language = ctx.user_settings.language + translated = await self._translate_catalogs(all_catalogs, language) + sorted_catalogs = sort_catalogs(translated, ctx.user_settings) - library_items = await self._ensure_library_and_profiles_cached(bundle, auth_key, settings_for_user, token) + if sorted_catalogs: + base_manifest["catalogs"] = sorted_catalogs - tmdb_key = resolve_tmdb_api_key(settings_for_user) - dynamic_catalog_service = DynamicCatalogService(language=settings_for_user.language, tmdb_api_key=tmdb_key) - return await dynamic_catalog_service.get_dynamic_catalogs(library_items, settings_for_user, token=token) + return base_manifest async def _translate_catalogs(self, catalogs: list[dict[str, Any]], language: str | None) -> list[dict[str, Any]]: """Translate catalog names to target language.""" @@ -142,74 +123,5 @@ async def _translate_catalogs(self, catalogs: list[dict[str, Any]], language: st return translated_catalogs - def _sort_catalogs( - self, catalogs: list[dict[str, Any]], user_settings: UserSettings | None - ) -> list[dict[str, Any]]: - """Sort catalogs according to user settings order.""" - if not user_settings: - return catalogs - - return sort_catalogs(catalogs, user_settings) - - async def get_manifest_for_token(self, token: str) -> dict[str, Any]: - """ - Generate manifest for a given token. - - Args: - token: User token - - Returns: - Complete manifest dictionary - - Raises: - HTTPException: If token is invalid or credentials are missing - """ - if not token: - raise HTTPException(status_code=401, detail="Missing token. Please reconfigure the addon.") - - # Load user credentials and settings - creds = await token_store.get_user_data(token) - if not creds: - raise HTTPException(status_code=401, detail="Token not found. Please reconfigure the addon.") - - user_settings = None - try: - if creds.get("settings"): - user_settings = UserSettings(**creds["settings"]) - except Exception as e: - logger.error(f"[{redact_token(token)}] Error loading user data from token store: {e}") - raise HTTPException(status_code=401, detail="Invalid token session. Please reconfigure.") - - base_manifest = self.get_base_manifest() - - bundle = StremioBundle() - fetched_catalogs = [] - try: - # Resolve auth key - auth_key = await auth_service.resolve_auth_key_with_bundle(bundle, creds, token) - - if auth_key and user_settings: - fetched_catalogs = await self._build_dynamic_catalogs(bundle, auth_key, user_settings, token) - except Exception as e: - logger.exception(f"[{redact_token(token)}] Dynamic catalog build failed: {e}") - fetched_catalogs = [] - finally: - await bundle.close() - - # Combine base catalogs with fetched catalogs - all_catalogs = [c.copy() for c in base_manifest["catalogs"]] + [c.copy() for c in fetched_catalogs] - - # Translate catalogs - language = user_settings.language if user_settings else None - translated_catalogs = await self._translate_catalogs(all_catalogs, language) - - # Sort catalogs - sorted_catalogs = self._sort_catalogs(translated_catalogs, user_settings) - - if sorted_catalogs: - base_manifest["catalogs"] = sorted_catalogs - - return base_manifest - manifest_service = ManifestService() diff --git a/app/services/profile/sampling.py b/app/services/profile/sampling.py index 2e2b3fc..7da1c28 100644 --- a/app/services/profile/sampling.py +++ b/app/services/profile/sampling.py @@ -1,7 +1,7 @@ from app.models.library import LibraryCollection from app.models.scoring import ScoredItem from app.services.profile.constants import SMART_SAMPLING_MAX_ITEMS -from app.services.scoring import ScoringService +from app.services.profile.scoring import ScoringService class SmartSampler: diff --git a/app/services/scoring.py b/app/services/profile/scoring.py similarity index 88% rename from app/services/scoring.py rename to app/services/profile/scoring.py index 7cfdc6c..b5f8b50 100644 --- a/app/services/scoring.py +++ b/app/services/profile/scoring.py @@ -40,28 +40,6 @@ def process_item(self, raw_item: dict) -> ScoredItem: source_type="loved" if item.is_loved else ("liked" if item.is_liked else "watched"), ) - def calculate_score( - self, - item: dict | StremioLibraryItem, - is_loved: bool = False, - is_liked: bool = False, - ) -> float: - """ - Backwards compatible method to just get the float score. - Accepts either a raw dict or a StremioLibraryItem. - """ - if isinstance(item, dict): - # Temporarily inject flags if passed separately (legacy support) - if "_is_loved" not in item: - item["_is_loved"] = is_loved - if "_is_liked" not in item: - item["_is_liked"] = is_liked - model_item = StremioLibraryItem(**item) - else: - model_item = item - - return self._calculate_score_components(model_item)["final_score"] - def _calculate_score_components(self, item: StremioLibraryItem) -> dict: """Internal logic to calculate score components.""" state = item.state @@ -160,9 +138,6 @@ def _calculate_score_components(self, item: StremioLibraryItem) -> dict: added_to_library_score = 0.0 if not item.temp and not item.removed: added_to_library_score = 100.0 - # if item.removed: - # # should we penalize for removed items? - # added_to_library_score = -50.0 # Calculate Final Score final_score = ( diff --git a/app/services/profile/service.py b/app/services/profile/service.py index 8ebba84..1334db8 100644 --- a/app/services/profile/service.py +++ b/app/services/profile/service.py @@ -7,9 +7,9 @@ from app.services.profile.builder import ProfileBuilder from app.services.profile.constants import GENRE_WHITELIST_LIMIT from app.services.profile.sampling import SmartSampler +from app.services.profile.scoring import ScoringService from app.services.profile.vectorizer import ItemVectorizer from app.services.recommendation.filtering import RecommendationFiltering -from app.services.scoring import ScoringService from app.services.tmdb.service import get_tmdb_service from app.services.user_cache import user_cache diff --git a/app/services/recommendation/catalog_service.py b/app/services/recommendation/catalog_service.py index 5386740..fbdf892 100644 --- a/app/services/recommendation/catalog_service.py +++ b/app/services/recommendation/catalog_service.py @@ -9,11 +9,11 @@ from app.core.config import settings from app.core.constants import DEFAULT_CATALOG_LIMIT, DEFAULT_MIN_ITEMS from app.core.security import redact_token -from app.core.settings import UserSettings, get_default_settings, resolve_tmdb_api_key +from app.core.settings import UserSettings, resolve_tmdb_api_key from app.models.library import LibraryCollection from app.models.taste_profile import TasteProfile -from app.services.auth import auth_service from app.services.catalog_updater import catalog_updater +from app.services.context import UserContext, extract_settings, load_user_context from app.services.profile.service import ProfileService from app.services.recommendation.all_based import AllBasedService from app.services.recommendation.creators import CreatorsService @@ -21,7 +21,6 @@ from app.services.recommendation.theme_based import ThemeBasedService from app.services.recommendation.top_picks import TopPicksService from app.services.recommendation.utils import pad_to_min -from app.services.stremio.service import StremioBundle from app.services.tmdb.service import get_tmdb_service from app.services.token_store import token_store from app.services.user_cache import user_cache @@ -41,10 +40,7 @@ def shuffle_data_if_needed( def _clean_meta(meta: dict) -> dict | None: - """Return a sanitized Stremio meta object without internal fields. - - Keeps only public keys and drops internal scoring/IDs/keywords/cast, etc. - """ + """Return a sanitized Stremio meta object without internal fields.""" allowed = { "id", "type", @@ -58,10 +54,8 @@ def _clean_meta(meta: dict) -> dict | None: "runtime", } cleaned = {k: v for k, v in meta.items() if k in allowed} - # Drop empty values cleaned = {k: v for k, v in cleaned.items() if v not in (None, "", [], {}, ())} - # if id does not start with tt, return None if not cleaned.get("id", "").startswith("tt"): return None return cleaned @@ -74,22 +68,9 @@ def __init__(self): async def get_catalog( self, token: str, content_type: str, catalog_id: str ) -> tuple[dict[str, Any], dict[str, Any]]: - """ - Get catalog recommendations. - - Args: - token: User token - content_type: Content type (movie/series) - catalog_id: Catalog ID (watchly.rec, watchly.creators, watchly.theme.*, etc.) - - Returns: - Tuple of (recommendations dict, response headers dict) - """ - # Validate inputs + """Get catalog recommendations.""" self._validate_inputs(token, content_type, catalog_id) - # Prepare response headers - headers: dict[str, Any] = { "Access-Control-Allow-Origin": "*", "Access-Control-Allow-Headers": "*", @@ -99,9 +80,9 @@ async def get_catalog( ), } - logger.info(f"[{redact_token(token)}...] Fetching catalog for {content_type} with id {catalog_id}") + logger.info(f"[{redact_token(token)}] Fetching catalog for {content_type} with id {catalog_id}") - # Get credentials + # Load credentials (needed for cache check + shuffle settings) credentials = await token_store.get_user_data(token) if not credentials: logger.error("No credentials found for token") @@ -112,91 +93,73 @@ async def get_catalog( # Trigger lazy update if needed if settings.AUTO_UPDATE_CATALOGS: - logger.info(f"[{redact_token(token)}...] Triggering auto update for token") try: await catalog_updater.trigger_update(token, credentials) except Exception as e: - logger.error(f"[{redact_token(token)}...] Failed to trigger auto update: {e}") - # continue with the request even if the auto update fails - pass + logger.error(f"[{redact_token(token)}] Failed to trigger auto update: {e}") - bundle = StremioBundle() - user_settings = None + # Check cache first — avoids auth/library/profile loading on cache hit stale_data = None + cached_result = await user_cache.get_catalog(token, content_type, catalog_id) + + if cached_result: + data, created_at = cached_result + age = int(time.time()) - created_at + + if age < settings.CATALOG_REFRESH_INTERVAL_SECONDS: + logger.debug(f"[{redact_token(token)}] Using cached catalog for {content_type}/{catalog_id}") + user_settings = extract_settings(credentials) + data["metas"] = shuffle_data_if_needed(user_settings, catalog_id, data["metas"]) + return data, headers + + stale_data = data + logger.info( + f"[{redact_token(token)}] Catalog stale (age: {age}s) for " + f"{content_type}/{catalog_id}, refreshing..." + ) + else: + logger.info( + f"[{redact_token(token)}] Catalog not cached for " f"{content_type}/{catalog_id}, building from scratch" + ) + # Cache miss — load full user context + ctx = await load_user_context(token) try: - # get cached catalog - cached_result = await user_cache.get_catalog(token, content_type, catalog_id) - - if cached_result: - data, created_at = cached_result - age = int(time.time()) - created_at - - # If data is fresh enough (within refresh interval), return it - if age < settings.CATALOG_REFRESH_INTERVAL_SECONDS: - logger.debug(f"[{redact_token(token)}...] Using cached catalog for {content_type}/{catalog_id}") - # Try to extract settings from credentials for shuffling, even on cached path - user_settings = self._extract_settings(credentials) - meta_data = data["metas"] - meta_data = shuffle_data_if_needed(user_settings, catalog_id, meta_data) - data["metas"] = meta_data - return data, headers - - # If data is stale, keep it for fallback - stale_data = data - logger.info( - f"[{redact_token(token)}...] Catalog is stale (age: {age}s) for {content_type}/{catalog_id}," - "refreshing..." - ) - else: - logger.info( - f"[{redact_token(token)}...] Catalog not cached for {content_type}/{catalog_id}, building from" - " scratch" - ) - - # Resolve auth and settings - auth_key = await auth_service.require_auth_key(bundle, credentials, token) - user_settings = self._extract_settings(credentials) - - language = user_settings.language if user_settings else "en-US" - - library_items = await user_cache.get_library_items(token) - - if library_items: - logger.debug(f"[{redact_token(token)}...] Using cached library items") - else: - logger.info(f"[{redact_token(token)}...] Library items not cached, fetching from Stremio") - library_items = await bundle.library.get_library_items(auth_key) - await user_cache.set_library_items(token, library_items) + return await self._build_catalog(ctx, content_type, catalog_id, headers, stale_data) + finally: + await ctx.close() - services = self._initialize_services(language, user_settings) + async def _build_catalog( + self, + ctx: UserContext, + content_type: str, + catalog_id: str, + headers: dict[str, Any], + stale_data: dict[str, Any] | None, + ) -> tuple[dict[str, Any], dict[str, Any]]: + """Build fresh catalog content using the loaded user context.""" + try: + services = self._initialize_services(ctx.user_settings) profile_service: ProfileService = services["profile"] - # Try to get cached profile and watched sets - cached_data = await user_cache.get_profile_and_watched_sets(token, content_type) + # Load profile (cached or build fresh) + cached_data = await user_cache.get_profile_and_watched_sets(ctx.token, content_type) if cached_data: - # Use cached profile and watched sets profile, watched_tmdb, watched_imdb = cached_data - logger.debug(f"[{redact_token(token)}...] Using cached profile and watched sets for {content_type}") + logger.debug(f"[{redact_token(ctx.token)}] Using cached profile for {content_type}") else: - # Build profile if not cached - logger.info(f"[{redact_token(token)}...] Profile not cached for {content_type}, building from library") - ( - profile, - watched_tmdb, - watched_imdb, - ) = await profile_service.build_and_cache_profile( - token, + logger.info(f"[{redact_token(ctx.token)}] Profile not cached for {content_type}, building") + profile, watched_tmdb, watched_imdb = await profile_service.build_and_cache_profile( + ctx.token, content_type, - library_items, - bundle, - auth_key, + ctx.library, + ctx.bundle, + ctx.auth_key, ) whitelist = await profile_service.get_genre_whitelist(profile, content_type) if profile else set() - # Route to appropriate recommendation service recommendations = await self._get_recommendations( catalog_id=catalog_id, content_type=content_type, @@ -205,61 +168,49 @@ async def get_catalog( watched_tmdb=watched_tmdb, watched_imdb=watched_imdb, whitelist=whitelist, - library_items=library_items, + library_items=ctx.library, limit=DEFAULT_CATALOG_LIMIT, - user_settings=user_settings, + user_settings=ctx.user_settings, ) - # Pad if needed to meet minimum of 8 items - # # TODO: This is risky because it can fetch too many unrelated items. + # Pad if needed to meet minimum items if recommendations and len(recommendations) < DEFAULT_MIN_ITEMS: recommendations = await pad_to_min( content_type, recommendations, DEFAULT_MIN_ITEMS, services["tmdb"], - user_settings, + ctx.user_settings, watched_tmdb, watched_imdb, ) logger.info(f"Returning {len(recommendations)} items for {content_type}") - # Clean and format metadata - cleaned = [_clean_meta(m) for m in recommendations] - cleaned = [m for m in cleaned if m is not None] - - cleaned = shuffle_data_if_needed(user_settings, catalog_id, cleaned) + cleaned = [m for m in (_clean_meta(m) for m in recommendations) if m is not None] + cleaned = shuffle_data_if_needed(ctx.user_settings, catalog_id, cleaned) data = {"metas": cleaned} - # if catalog data is not empty, set the cache with STALE_TTL (7 days) - # This ensures we have fallback data available if the next refresh fails if cleaned: - await user_cache.set_catalog(token, content_type, catalog_id, data, settings.CATALOG_STALE_TTL) + await user_cache.set_catalog(ctx.token, content_type, catalog_id, data, settings.CATALOG_STALE_TTL) return data, headers except Exception as e: - logger.error(f"[{redact_token(token)}...] Failed to generate catalog: {e}") + logger.error(f"[{redact_token(ctx.token)}] Failed to generate catalog: {e}") - # Fallback 1: Return Stale Data if available if stale_data: logger.warning( - f"[{redact_token(token)}...] Serving stale content for {content_type}/{catalog_id} due to error" + f"[{redact_token(ctx.token)}] Serving stale content for " + f"{content_type}/{catalog_id} due to error" ) - # Shuffle stale data too if needed - user_settings = user_settings or self._extract_settings(credentials) meta_data = stale_data.get("metas", []) - meta_data = shuffle_data_if_needed(user_settings, catalog_id, meta_data) + meta_data = shuffle_data_if_needed(ctx.user_settings, catalog_id, meta_data) stale_data["metas"] = meta_data return stale_data, headers - # Fallback 2: Return Empty (prevents 500 error) return {"metas": []}, headers - finally: - await bundle.close() - def _validate_inputs(self, token: str, content_type: str, catalog_id: str) -> None: if not token: raise HTTPException( @@ -271,7 +222,6 @@ def _validate_inputs(self, token: str, content_type: str, catalog_id: str) -> No logger.warning(f"Invalid type: {content_type}") raise HTTPException(status_code=400, detail="Invalid type. Use 'movie' or 'series'") - # Supported IDs supported_base = [ "watchly.rec", "watchly.creators", @@ -289,9 +239,19 @@ def _validate_inputs(self, token: str, content_type: str, catalog_id: str) -> No ), ) - def _extract_settings(self, credentials: dict) -> UserSettings: - settings_dict = credentials.get("settings", {}) - return UserSettings(**settings_dict) if settings_dict else get_default_settings() + def _initialize_services(self, user_settings: UserSettings) -> dict[str, Any]: + tmdb_key = resolve_tmdb_api_key(user_settings) + language = user_settings.language + tmdb_service = get_tmdb_service(language=language, api_key=tmdb_key) + return { + "tmdb": tmdb_service, + "profile": ProfileService(language=language, tmdb_api_key=tmdb_key), + "item": ItemBasedService(tmdb_service, user_settings), + "theme": ThemeBasedService(tmdb_service, user_settings), + "top_picks": TopPicksService(tmdb_service, user_settings), + "creators": CreatorsService(tmdb_service, user_settings), + "all_based": AllBasedService(tmdb_service, user_settings), + } async def _get_trending_fallback( self, @@ -308,11 +268,9 @@ async def _get_trending_fallback( tmdb_service = get_tmdb_service(language=language, api_key=tmdb_key) try: - # Fetch trending week trending = await tmdb_service.get_trending(mtype, "week") items = trending.get("results", []) - # Enrich metadata from app.services.recommendation.metadata import RecommendationMetadata return await RecommendationMetadata.fetch_batch(tmdb_service, items, content_type, user_settings=None) @@ -320,19 +278,6 @@ async def _get_trending_fallback( logger.warning(f"Failed to fetch trending items: {e}") return [] - def _initialize_services(self, language: str, user_settings: UserSettings) -> dict[str, Any]: - tmdb_key = resolve_tmdb_api_key(user_settings) - tmdb_service = get_tmdb_service(language=language, api_key=tmdb_key) - return { - "tmdb": tmdb_service, - "profile": ProfileService(language=language, tmdb_api_key=tmdb_key), - "item": ItemBasedService(tmdb_service, user_settings), - "theme": ThemeBasedService(tmdb_service, user_settings), - "top_picks": TopPicksService(tmdb_service, user_settings), - "creators": CreatorsService(tmdb_service, user_settings), - "all_based": AllBasedService(tmdb_service, user_settings), - } - async def _get_recommendations( self, catalog_id: str, @@ -347,17 +292,8 @@ async def _get_recommendations( user_settings: UserSettings | None = None, ) -> list[dict[str, Any]]: """Route to appropriate recommendation service based on catalog ID.""" - # Item-based recommendations - if any( - catalog_id.startswith(p) - for p in ( - "watchly.loved.", - "watchly.watched.", - ) - ): - # Extract item ID + if any(catalog_id.startswith(p) for p in ("watchly.loved.", "watchly.watched.")): item_id = re.sub(r"^watchly\.(loved|watched)\.", "", catalog_id) - item_service: ItemBasedService = services["item"] recommendations = await item_service.get_recommendations_for_item( @@ -370,7 +306,6 @@ async def _get_recommendations( ) logger.info(f"Found {len(recommendations)} recommendations for item {item_id}") - # Theme-based recommendations elif catalog_id.startswith("watchly.theme."): theme_service: ThemeBasedService = services["theme"] @@ -385,7 +320,6 @@ async def _get_recommendations( ) logger.info(f"Found {len(recommendations)} recommendations for theme {catalog_id}") - # Creators-based recommendations elif catalog_id == "watchly.creators": creators_service: CreatorsService = services["creators"] @@ -402,7 +336,6 @@ async def _get_recommendations( recommendations = await self._get_trending_fallback(content_type, limit, user_settings) logger.info(f"Found {len(recommendations)} recommendations from creators") - # Top picks elif catalog_id == "watchly.rec": if profile: top_picks_service: TopPicksService = services["top_picks"] @@ -420,7 +353,6 @@ async def _get_recommendations( recommendations = await self._get_trending_fallback(content_type, limit, user_settings) logger.info(f"Found {len(recommendations)} top picks for {content_type}") - # Based on what you loved elif catalog_id in ("watchly.all.loved", "watchly.liked.all"): item_type = "loved" if catalog_id == "watchly.all.loved" else "liked" all_based_service: AllBasedService = services["all_based"] diff --git a/app/services/recommendation/top_picks.py b/app/services/recommendation/top_picks.py index ccc5b2f..2648d70 100644 --- a/app/services/recommendation/top_picks.py +++ b/app/services/recommendation/top_picks.py @@ -13,6 +13,7 @@ from app.services.profile.constants import TOP_PICKS_CREATOR_CAP, TOP_PICKS_GENRE_CAP from app.services.profile.sampling import SmartSampler from app.services.profile.scorer import ProfileScorer +from app.services.profile.scoring import ScoringService from app.services.recommendation.filtering import RecommendationFiltering from app.services.recommendation.metadata import RecommendationMetadata from app.services.recommendation.rotation import DailyRotation @@ -24,7 +25,6 @@ filter_watched_by_imdb, resolve_tmdb_id, ) -from app.services.scoring import ScoringService from app.services.simkl import simkl_service from app.services.tmdb.service import TMDBService From 354433676202cb00f28f7185450fad778ea03661 Mon Sep 17 00:00:00 2001 From: Bimal Timilsina Date: Sat, 28 Mar 2026 13:23:50 +0545 Subject: [PATCH 08/68] feat: introduce new profile and scoring models for improved data handling --- app/models/library.py | 51 ++++++++++++++- app/models/{taste_profile.py => profile.py} | 31 +++++---- app/models/scoring.py | 65 ------------------- app/services/interest_summary.py | 2 +- app/services/profile/builder.py | 3 +- app/services/profile/evidence.py | 2 +- app/services/profile/sampling.py | 2 +- app/services/profile/scorer.py | 2 +- app/services/profile/scoring.py | 3 +- app/services/profile/service.py | 2 +- app/services/profile/vectorizer.py | 2 +- app/services/recommendation/all_based.py | 2 +- .../recommendation/catalog_service.py | 2 +- app/services/recommendation/creators.py | 2 +- app/services/recommendation/theme_based.py | 2 +- app/services/recommendation/top_picks.py | 2 +- app/services/row_generator.py | 2 +- app/services/user_cache.py | 2 +- 18 files changed, 85 insertions(+), 94 deletions(-) rename app/models/{taste_profile.py => profile.py} (88%) delete mode 100644 app/models/scoring.py diff --git a/app/models/library.py b/app/models/library.py index 920ecc5..ae2eacc 100644 --- a/app/models/library.py +++ b/app/models/library.py @@ -1,6 +1,55 @@ +from datetime import datetime from typing import Any -from pydantic import BaseModel +from pydantic import BaseModel, Field, field_validator + + +class StremioState(BaseModel): + """Represents the user state for a library item.""" + + lastWatched: datetime | None = None + timeWatched: int = 0 + timeOffset: int = 0 + overallTimeWatched: int = 0 + timesWatched: int = 0 + flaggedWatched: int = 0 + duration: int = 0 + video_id: str | None = None + watched: str | None = None + noNotif: bool = False + season: int = 0 + episode: int = 0 + + @field_validator("lastWatched", mode="before") + @classmethod + def parse_last_watched(cls, v): + if isinstance(v, str): + try: + return datetime.fromisoformat(v.replace("Z", "+00:00")) + except ValueError: + return None + return v + + +class StremioLibraryItem(BaseModel): + """Represents a raw item from Stremio library.""" + + id: str = Field(..., alias="_id") + type: str + name: str + state: StremioState = Field(default_factory=StremioState) + mtime: str = Field(default="", alias="_mtime") + poster: str | None = None + temp: bool + removed: bool + + # Enriched fields (not in raw Stremio JSON, added by our service) + is_loved: bool = Field(default=False, alias="_is_loved") + is_liked: bool = Field(default=False, alias="_is_liked") + interest_score: float = Field(default=0.0, alias="_interest_score") + + class Config: + populate_by_name = True class LibraryCollection(BaseModel): diff --git a/app/models/taste_profile.py b/app/models/profile.py similarity index 88% rename from app/models/taste_profile.py rename to app/models/profile.py index e144b67..75d835f 100644 --- a/app/models/taste_profile.py +++ b/app/models/profile.py @@ -3,10 +3,25 @@ from pydantic import BaseModel, Field +from app.models.library import StremioLibraryItem -class TasteProfile(BaseModel): + +class ScoredItem(BaseModel): + """A processed library item with calculated interest scores. + + Output of the ScoringService — used by the profile builder and sampler. """ - Transparent, additive taste profile. + + item: StremioLibraryItem + score: float + completion_rate: float + is_rewatched: bool + is_recent: bool + source_type: str # 'loved' | 'watched' | 'liked' + + +class TasteProfile(BaseModel): + """Transparent, additive taste profile. Answers one question: "Which item is more likely to be liked by this user?" @@ -41,8 +56,6 @@ class TasteProfile(BaseModel): interest_summary: str | None = Field(default=None, description="LLM-generated description of user interests") class Config: - """Pydantic configuration.""" - json_encoders = {datetime: lambda v: v.isoformat()} def get_top_genres(self, limit: int = 5) -> list[tuple[int, float]]: @@ -70,18 +83,12 @@ def get_top_cast(self, limit: int = 5) -> list[tuple[int, float]]: return sorted(self.cast_scores.items(), key=lambda x: x[1], reverse=True)[:limit] def get_top_creators(self, limit: int = 5) -> list[tuple[int, float]]: - """ - Get top N creators (directors + cast merged) by score. - - Runtime merge for convenience. Profile stores them separately. - """ - # Merge directors and cast for combined ranking + """Get top N creators (directors + cast merged) by score.""" all_creators = {**self.director_scores, **self.cast_scores} return sorted(all_creators.items(), key=lambda x: x[1], reverse=True)[:limit] def normalize_for_ranking(self) -> dict[str, dict[Any, float]]: - """ - Normalize scores for ranking (read-time only). + """Normalize scores for ranking (read-time only). Returns normalized scores (0-1 range) for each feature type. Used only when generating recommendations, never during profile updates. diff --git a/app/models/scoring.py b/app/models/scoring.py deleted file mode 100644 index 6cc007e..0000000 --- a/app/models/scoring.py +++ /dev/null @@ -1,65 +0,0 @@ -from datetime import datetime - -from pydantic import BaseModel, Field, field_validator - - -class StremioState(BaseModel): - """Represents the user state for a library item.""" - - lastWatched: datetime | None = None - timeWatched: int = 0 - timeOffset: int = 0 - overallTimeWatched: int = 0 - timesWatched: int = 0 - flaggedWatched: int = 0 - duration: int = 0 - video_id: str | None = None - watched: str | None = None - noNotif: bool = False - season: int = 0 - episode: int = 0 - - @field_validator("lastWatched", mode="before") - @classmethod - def parse_last_watched(cls, v): - if isinstance(v, str): - try: - return datetime.fromisoformat(v.replace("Z", "+00:00")) - except ValueError: - return None - return v - - -class StremioLibraryItem(BaseModel): - """Represents a raw item from Stremio library.""" - - id: str = Field(..., alias="_id") - type: str - name: str - state: StremioState = Field(default_factory=StremioState) - mtime: str = Field(default="", alias="_mtime") - poster: str | None = None - temp: bool - removed: bool - - # Enriched fields (not in raw Stremio JSON, added by our service) - is_loved: bool = Field(default=False, alias="_is_loved") - is_liked: bool = Field(default=False, alias="_is_liked") - interest_score: float = Field(default=0.0, alias="_interest_score") - - class Config: - populate_by_name = True - - -class ScoredItem(BaseModel): - """ - A processed item with calculated scores. - This is the output of the ScoringService. - """ - - item: StremioLibraryItem - score: float - completion_rate: float - is_rewatched: bool - is_recent: bool - source_type: str # 'loved' | 'watched' | 'liked' diff --git a/app/services/interest_summary.py b/app/services/interest_summary.py index ddafe9c..99d7e78 100644 --- a/app/services/interest_summary.py +++ b/app/services/interest_summary.py @@ -1,6 +1,6 @@ from loguru import logger -from app.models.taste_profile import TasteProfile +from app.models.profile import TasteProfile from app.services.gemini import gemini_service from app.services.profile.constants import GENRE_MAP diff --git a/app/services/profile/builder.py b/app/services/profile/builder.py index 4face31..8b6ca25 100644 --- a/app/services/profile/builder.py +++ b/app/services/profile/builder.py @@ -5,8 +5,7 @@ from loguru import logger -from app.models.scoring import ScoredItem -from app.models.taste_profile import TasteProfile +from app.models.profile import ScoredItem, TasteProfile from app.services.profile.constants import ( CAP_CAST, CAP_COUNTRY, diff --git a/app/services/profile/evidence.py b/app/services/profile/evidence.py index 1e0bd9d..def65c5 100644 --- a/app/services/profile/evidence.py +++ b/app/services/profile/evidence.py @@ -2,7 +2,7 @@ from datetime import datetime, timezone from typing import Literal -from app.models.scoring import ScoredItem +from app.models.profile import ScoredItem from app.services.profile.constants import ( EVIDENCE_WEIGHT_ADDED, EVIDENCE_WEIGHT_LIKED, diff --git a/app/services/profile/sampling.py b/app/services/profile/sampling.py index 7da1c28..0f64c19 100644 --- a/app/services/profile/sampling.py +++ b/app/services/profile/sampling.py @@ -1,5 +1,5 @@ from app.models.library import LibraryCollection -from app.models.scoring import ScoredItem +from app.models.profile import ScoredItem from app.services.profile.constants import SMART_SAMPLING_MAX_ITEMS from app.services.profile.scoring import ScoringService diff --git a/app/services/profile/scorer.py b/app/services/profile/scorer.py index e67efe8..fc906ff 100644 --- a/app/services/profile/scorer.py +++ b/app/services/profile/scorer.py @@ -1,6 +1,6 @@ from typing import Any -from app.models.taste_profile import TasteProfile +from app.models.profile import TasteProfile from app.services.profile.constants import ( FEATURE_WEIGHT_COUNTRY, FEATURE_WEIGHT_CREATOR, diff --git a/app/services/profile/scoring.py b/app/services/profile/scoring.py index b5f8b50..95a3c51 100644 --- a/app/services/profile/scoring.py +++ b/app/services/profile/scoring.py @@ -3,7 +3,8 @@ from loguru import logger -from app.models.scoring import ScoredItem, StremioLibraryItem +from app.models.library import StremioLibraryItem +from app.models.profile import ScoredItem class ScoringService: diff --git a/app/services/profile/service.py b/app/services/profile/service.py index 1334db8..22ae4f6 100644 --- a/app/services/profile/service.py +++ b/app/services/profile/service.py @@ -3,7 +3,7 @@ from loguru import logger from app.models.library import LibraryCollection -from app.models.taste_profile import TasteProfile +from app.models.profile import TasteProfile from app.services.profile.builder import ProfileBuilder from app.services.profile.constants import GENRE_WHITELIST_LIMIT from app.services.profile.sampling import SmartSampler diff --git a/app/services/profile/vectorizer.py b/app/services/profile/vectorizer.py index 83fd541..44f1a4a 100644 --- a/app/services/profile/vectorizer.py +++ b/app/services/profile/vectorizer.py @@ -2,7 +2,7 @@ import httpx -from app.models.scoring import ScoredItem +from app.models.profile import ScoredItem from app.services.cinemeta_service import CinemetaService, cinemeta_service from app.services.profile.constants import ( CAST_POSITION_LEAD, diff --git a/app/services/recommendation/all_based.py b/app/services/recommendation/all_based.py index 9c01fe9..794758a 100644 --- a/app/services/recommendation/all_based.py +++ b/app/services/recommendation/all_based.py @@ -5,7 +5,7 @@ from app.core.settings import UserSettings from app.models.library import LibraryCollection -from app.models.taste_profile import TasteProfile +from app.models.profile import TasteProfile from app.services.profile.scorer import ProfileScorer from app.services.recommendation.filtering import RecommendationFiltering from app.services.recommendation.metadata import RecommendationMetadata diff --git a/app/services/recommendation/catalog_service.py b/app/services/recommendation/catalog_service.py index fbdf892..d836cc6 100644 --- a/app/services/recommendation/catalog_service.py +++ b/app/services/recommendation/catalog_service.py @@ -11,7 +11,7 @@ from app.core.security import redact_token from app.core.settings import UserSettings, resolve_tmdb_api_key from app.models.library import LibraryCollection -from app.models.taste_profile import TasteProfile +from app.models.profile import TasteProfile from app.services.catalog_updater import catalog_updater from app.services.context import UserContext, extract_settings, load_user_context from app.services.profile.service import ProfileService diff --git a/app/services/recommendation/creators.py b/app/services/recommendation/creators.py index a8544f8..3e82652 100644 --- a/app/services/recommendation/creators.py +++ b/app/services/recommendation/creators.py @@ -5,7 +5,7 @@ from loguru import logger from app.core.settings import UserSettings -from app.models.taste_profile import TasteProfile +from app.models.profile import TasteProfile from app.services.recommendation.filtering import RecommendationFiltering from app.services.recommendation.metadata import RecommendationMetadata from app.services.recommendation.utils import content_type_to_mtype, filter_watched_by_imdb diff --git a/app/services/recommendation/theme_based.py b/app/services/recommendation/theme_based.py index 2838b94..7623e46 100644 --- a/app/services/recommendation/theme_based.py +++ b/app/services/recommendation/theme_based.py @@ -3,7 +3,7 @@ from loguru import logger -from app.models.taste_profile import TasteProfile +from app.models.profile import TasteProfile from app.services.profile.constants import ( RUNTIME_BUCKET_MEDIUM_MAX_MOVIE, RUNTIME_BUCKET_MEDIUM_MAX_SERIES, diff --git a/app/services/recommendation/top_picks.py b/app/services/recommendation/top_picks.py index 2648d70..ce383d4 100644 --- a/app/services/recommendation/top_picks.py +++ b/app/services/recommendation/top_picks.py @@ -9,7 +9,7 @@ from app.core.constants import DEFAULT_CATALOG_LIMIT, MAX_CATALOG_ITEMS from app.core.settings import UserSettings from app.models.library import LibraryCollection -from app.models.taste_profile import TasteProfile +from app.models.profile import TasteProfile from app.services.profile.constants import TOP_PICKS_CREATOR_CAP, TOP_PICKS_GENRE_CAP from app.services.profile.sampling import SmartSampler from app.services.profile.scorer import ProfileScorer diff --git a/app/services/row_generator.py b/app/services/row_generator.py index 758a2ce..682a038 100644 --- a/app/services/row_generator.py +++ b/app/services/row_generator.py @@ -14,7 +14,7 @@ from loguru import logger from pydantic import BaseModel, Field -from app.models.taste_profile import TasteProfile +from app.models.profile import TasteProfile from app.services.gemini import gemini_service from app.services.tmdb.countries import COUNTRY_ADJECTIVES from app.services.tmdb.genre import movie_genres, series_genres diff --git a/app/services/user_cache.py b/app/services/user_cache.py index b694bed..cc7dbea 100644 --- a/app/services/user_cache.py +++ b/app/services/user_cache.py @@ -8,7 +8,7 @@ from app.core.constants import CATALOG_KEY, LIBRARY_ITEMS_KEY, PROFILE_KEY, WATCHED_SETS_KEY from app.core.security import redact_token from app.models.library import LibraryCollection -from app.models.taste_profile import TasteProfile +from app.models.profile import TasteProfile from app.services.redis_service import redis_service From 4f7cd43c64fdb227bf70033923f48616741c67e1 Mon Sep 17 00:00:00 2001 From: Bimal Timilsina Date: Sat, 28 Mar 2026 13:35:51 +0545 Subject: [PATCH 09/68] refactor: replace SmartSampler class with standalone sample_items function for improved clarity and maintainability --- app/services/profile/sampling.py | 167 +++++++-------- app/services/profile/service.py | 7 +- app/services/recommendation/all_based.py | 11 +- app/services/recommendation/creators.py | 4 +- app/services/recommendation/filtering.py | 130 ++++++++++++ app/services/recommendation/item_based.py | 9 +- app/services/recommendation/rotation.py | 31 --- app/services/recommendation/scoring.py | 13 ++ app/services/recommendation/theme_based.py | 10 +- app/services/recommendation/top_picks.py | 19 +- app/services/recommendation/utils.py | 233 +-------------------- 11 files changed, 252 insertions(+), 382 deletions(-) delete mode 100644 app/services/recommendation/rotation.py diff --git a/app/services/profile/sampling.py b/app/services/profile/sampling.py index 0f64c19..e1584a9 100644 --- a/app/services/profile/sampling.py +++ b/app/services/profile/sampling.py @@ -4,105 +4,80 @@ from app.services.profile.scoring import ScoringService -class SmartSampler: - """ - Smart sampling for profile building. +def sample_items( + library_items: LibraryCollection, + content_type: str, + scoring_service: ScoringService, + max_items: int = SMART_SAMPLING_MAX_ITEMS, +) -> list[ScoredItem]: + """Sample items for profile building with quota-based selection. Strategy: 1. Always include all loved/liked/added items (strong signals) 2. Fill remaining slots with top watched items by score 3. Limit total to prevent excessive API calls """ - - def __init__(self, scoring_service: ScoringService): - """ - Initialize smart sampler. - - Args: - scoring_service: Service for scoring items - """ - self.scoring_service = scoring_service - - def sample_items( - self, - library_items: LibraryCollection, - content_type: str, - max_items: int = SMART_SAMPLING_MAX_ITEMS, - ) -> list[ScoredItem]: - """Sample items for profile building with quota-based selection.""" - typed_items = [it for it in library_items.all_items() if it.get("type") == content_type] - - if not typed_items: - return [] - - if len(typed_items) <= max_items: - # score all typed items and return - return [self.scoring_service.process_item(it) for it in typed_items] - - # De-duplicate by ID - unique_items = {} - for it in typed_items: - item_id = it.get("_id") - if item_id: - unique_items[item_id] = it - - # If still within limit after de-duplication - if len(unique_items) <= max_items: - return [self.scoring_service.process_item(it) for it in unique_items.values()] - - added_item_ids = {it.get("_id") for it in library_items.added} - - # Separate items into pools and score them - loved_liked_pool = [] - added_pool = [] - watched_pool = [] - - for it in unique_items.values(): - scored = self.scoring_service.process_item(it) - if scored.source_type in ["loved", "liked"]: - loved_liked_pool.append(scored) - elif it.get("_id") in added_item_ids: - added_pool.append(scored) - else: - watched_pool.append(scored) - - # Sort pools by score to ensure we take the most relevant items first - # If we sort this, we will get high scoring items, but if we don't sort this, - # we will get recent items. Maybe recent is good? I think yeah. Lets do that... - # it will likely by almost similar but not confirmed. - # loved_liked_pool.sort(key=lambda x: x.score, reverse=True) - # added_pool.sort(key=lambda x: x.score, reverse=True) - # watched_pool.sort(key=lambda x: x.score, reverse=True) - - # Step 1: Fill quotas - final_scored_items: list[ScoredItem] = [] - used_ids: set[str] = set() - - loved_quota = int(max_items * 0.40) - added_quota = int(max_items * 0.20) - watched_quota = max_items - loved_quota - added_quota - - # Add initial quotas - for pool, quota in [ - (loved_liked_pool, loved_quota), - (added_pool, added_quota), - (watched_pool, watched_quota), - ]: - for scored in pool[:quota]: - final_scored_items.append(scored) - used_ids.add(scored.item.id) - - # Step 2: Backfill if we have remaining slots - remaining_slots = max_items - len(final_scored_items) - if remaining_slots > 0: - # Priority for backfill: Loved > Added > Watched - for pool in [loved_liked_pool, added_pool, watched_pool]: - for scored in pool: - if remaining_slots <= 0: - break - if scored.item.id not in used_ids: - final_scored_items.append(scored) - used_ids.add(scored.item.id) - remaining_slots -= 1 - - return final_scored_items + typed_items = [it for it in library_items.all_items() if it.get("type") == content_type] + + if not typed_items: + return [] + + if len(typed_items) <= max_items: + return [scoring_service.process_item(it) for it in typed_items] + + # De-duplicate by ID + unique_items: dict[str, dict] = {} + for it in typed_items: + item_id = it.get("_id") + if item_id: + unique_items[item_id] = it + + if len(unique_items) <= max_items: + return [scoring_service.process_item(it) for it in unique_items.values()] + + added_item_ids = {it.get("_id") for it in library_items.added} + + # Separate into pools and score + loved_liked_pool: list[ScoredItem] = [] + added_pool: list[ScoredItem] = [] + watched_pool: list[ScoredItem] = [] + + for it in unique_items.values(): + scored = scoring_service.process_item(it) + if scored.source_type in ["loved", "liked"]: + loved_liked_pool.append(scored) + elif it.get("_id") in added_item_ids: + added_pool.append(scored) + else: + watched_pool.append(scored) + + # Fill quotas + final: list[ScoredItem] = [] + used_ids: set[str] = set() + + loved_quota = int(max_items * 0.40) + added_quota = int(max_items * 0.20) + watched_quota = max_items - loved_quota - added_quota + + for pool, quota in [ + (loved_liked_pool, loved_quota), + (added_pool, added_quota), + (watched_pool, watched_quota), + ]: + for scored in pool[:quota]: + final.append(scored) + used_ids.add(scored.item.id) + + # Backfill remaining slots (priority: Loved > Added > Watched) + remaining = max_items - len(final) + if remaining > 0: + for pool in [loved_liked_pool, added_pool, watched_pool]: + for scored in pool: + if remaining <= 0: + break + if scored.item.id not in used_ids: + final.append(scored) + used_ids.add(scored.item.id) + remaining -= 1 + + return final diff --git a/app/services/profile/service.py b/app/services/profile/service.py index 22ae4f6..1729a45 100644 --- a/app/services/profile/service.py +++ b/app/services/profile/service.py @@ -6,7 +6,7 @@ from app.models.profile import TasteProfile from app.services.profile.builder import ProfileBuilder from app.services.profile.constants import GENRE_WHITELIST_LIMIT -from app.services.profile.sampling import SmartSampler +from app.services.profile.sampling import sample_items from app.services.profile.scoring import ScoringService from app.services.profile.vectorizer import ItemVectorizer from app.services.recommendation.filtering import RecommendationFiltering @@ -19,7 +19,6 @@ class ProfileService: def __init__(self, language: str = "en-US", tmdb_api_key: str | None = None): self.scoring_service = ScoringService() - self.sampler = SmartSampler(self.scoring_service) tmdb_service = get_tmdb_service(language=language, api_key=tmdb_api_key) vectorizer = ItemVectorizer(tmdb_service) self.builder = ProfileBuilder(vectorizer) @@ -40,7 +39,7 @@ async def build_profile_from_library( if typed.is_empty(): return None, watched_tmdb, watched_imdb - sampled = self.sampler.sample_items(typed, content_type) + sampled = sample_items(typed, content_type, self.scoring_service) profile = await self.builder.build_profile(sampled, content_type=content_type) return profile, watched_tmdb, watched_imdb @@ -99,7 +98,7 @@ def _is_new(it: dict) -> bool: added=[it for it in typed.added if _is_new(it)], ) - sampled = self.sampler.sample_items(new_library, content_type) + sampled = sample_items(new_library, content_type, self.scoring_service) if not sampled: return existing_profile, watched_tmdb, watched_imdb diff --git a/app/services/recommendation/all_based.py b/app/services/recommendation/all_based.py index 794758a..05ef7b6 100644 --- a/app/services/recommendation/all_based.py +++ b/app/services/recommendation/all_based.py @@ -7,16 +7,15 @@ from app.models.library import LibraryCollection from app.models.profile import TasteProfile from app.services.profile.scorer import ProfileScorer -from app.services.recommendation.filtering import RecommendationFiltering -from app.services.recommendation.metadata import RecommendationMetadata -from app.services.recommendation.scoring import RecommendationScoring -from app.services.recommendation.utils import ( - content_type_to_mtype, +from app.services.recommendation.filtering import ( + RecommendationFiltering, filter_by_genres, filter_items_by_settings, filter_watched_by_imdb, - resolve_tmdb_id, ) +from app.services.recommendation.metadata import RecommendationMetadata +from app.services.recommendation.scoring import RecommendationScoring +from app.services.recommendation.utils import content_type_to_mtype, resolve_tmdb_id from app.services.simkl import simkl_service from app.services.tmdb.service import TMDBService diff --git a/app/services/recommendation/creators.py b/app/services/recommendation/creators.py index 3e82652..f1e6b20 100644 --- a/app/services/recommendation/creators.py +++ b/app/services/recommendation/creators.py @@ -6,9 +6,9 @@ from app.core.settings import UserSettings from app.models.profile import TasteProfile -from app.services.recommendation.filtering import RecommendationFiltering +from app.services.recommendation.filtering import RecommendationFiltering, filter_watched_by_imdb from app.services.recommendation.metadata import RecommendationMetadata -from app.services.recommendation.utils import content_type_to_mtype, filter_watched_by_imdb +from app.services.recommendation.utils import content_type_to_mtype from app.services.tmdb.service import TMDBService diff --git a/app/services/recommendation/filtering.py b/app/services/recommendation/filtering.py index 4caed98..d826465 100644 --- a/app/services/recommendation/filtering.py +++ b/app/services/recommendation/filtering.py @@ -1,6 +1,8 @@ +from datetime import datetime from typing import Any from urllib.parse import unquote +from app.core.constants import DISCOVERY_SETTINGS from app.models.library import LibraryCollection @@ -195,3 +197,131 @@ def passes_top_genre_whitelist(genre_ids: list[int] | None, whitelist: set[int]) if not gids: return True return True + + +# --- Standalone filtering functions (moved from utils.py) --- + + +def filter_watched_by_imdb(enriched: list[dict[str, Any]], watched_imdb: set[str]) -> list[dict[str, Any]]: + """Filter enriched items by watched IMDB IDs.""" + final = [] + for item in enriched: + if item.get("id") in watched_imdb: + continue + if item.get("_external_ids", {}).get("imdb_id") in watched_imdb: + continue + final.append(item) + return final + + +def filter_by_genres( + items: list[dict[str, Any]], + watched_tmdb: set[int], + whitelist: set[int] | None = None, + excluded_ids: list[int] | None = None, +) -> list[dict[str, Any]]: + """Filter items by watched set and excluded genres.""" + whitelist = whitelist or set() + excluded_ids = excluded_ids or [] + filtered = [] + + for item in items: + item_id = item.get("id") + if not item_id or item_id in watched_tmdb: + continue + genre_ids = item.get("genre_ids", []) + if excluded_ids and any(gid in excluded_ids for gid in genre_ids): + continue + filtered.append(item) + + return filtered + + +def build_discover_params(user_settings: Any) -> dict[str, Any]: + """Build TMDB discover API parameters based on user settings.""" + params: dict[str, Any] = {} + if not user_settings: + return params + + current_date = datetime.now() + current_year = current_date.year + + year_min = getattr(user_settings, "year_min", 1970) + year_max = getattr(user_settings, "year_max", current_year) + + for prefix in ["primary_release_date", "first_air_date"]: + params[f"{prefix}.gte"] = f"{year_min}-01-01" + if year_max >= current_year: + params[f"{prefix}.lte"] = current_date.strftime("%Y-%m-%d") + else: + params[f"{prefix}.lte"] = f"{year_max}-12-31" + + return params + + +def apply_discover_filters(params: dict[str, Any], user_settings: Any) -> dict[str, Any]: + """Merge discover params with global user settings (years, popularity).""" + if not user_settings: + return params + + global_params = build_discover_params(user_settings) + params = {**global_params, **params} + + min_rating, min_votes = RecommendationFiltering.get_quality_thresholds(user_settings) + + if "vote_count.gte" not in params: + params["vote_count.gte"] = min_votes + if "vote_average.gte" not in params: + params["vote_average.gte"] = min_rating + + return params + + +def filter_items_by_settings( + items: list[dict[str, Any]], user_settings: Any, simkl: bool = False +) -> list[dict[str, Any]]: + """Filter items post-fetch based on user settings (years, popularity).""" + if not user_settings: + return items + + year_min = getattr(user_settings, "year_min", 1970) + year_max = getattr(user_settings, "year_max", 2026) + pop_pref = getattr(user_settings, "popularity", "balanced") + + filtered = [] + for item in items: + release_date = item.get("release_date") or item.get("first_air_date") or item.get("released") + if release_date: + try: + year = int(release_date.split("-")[0]) + if year < year_min or year > year_max: + continue + except (ValueError, IndexError): + pass + + params = DISCOVERY_SETTINGS.get(pop_pref, {}) + if not params: + continue + + ops = { + "gte": lambda x, y: x >= y, + "lte": lambda x, y: x <= y, + } + + passes_all = True + for param in params: + t_param, param_ops = param.split(".") + param_operator = ops.get(param_ops) + if not param_operator: + continue + if simkl and t_param == "popularity": + continue + item_value = item.get(t_param) + if item_value is None or not param_operator(item_value, params[param]): + passes_all = False + break + + if passes_all: + filtered.append(item) + + return filtered diff --git a/app/services/recommendation/item_based.py b/app/services/recommendation/item_based.py index 85bee27..14754f2 100644 --- a/app/services/recommendation/item_based.py +++ b/app/services/recommendation/item_based.py @@ -3,15 +3,14 @@ from loguru import logger -from app.services.recommendation.filtering import RecommendationFiltering -from app.services.recommendation.metadata import RecommendationMetadata -from app.services.recommendation.utils import ( - content_type_to_mtype, +from app.services.recommendation.filtering import ( + RecommendationFiltering, filter_by_genres, filter_items_by_settings, filter_watched_by_imdb, - resolve_tmdb_id, ) +from app.services.recommendation.metadata import RecommendationMetadata +from app.services.recommendation.utils import content_type_to_mtype, resolve_tmdb_id from app.services.simkl import simkl_service from app.services.tmdb.service import TMDBService diff --git a/app/services/recommendation/rotation.py b/app/services/recommendation/rotation.py deleted file mode 100644 index 4f36ee0..0000000 --- a/app/services/recommendation/rotation.py +++ /dev/null @@ -1,31 +0,0 @@ -"""Daily rotation utilities for fresh recommendations.""" - -import random - - -class DailyRotation: - """Utilities for rotating recommendations daily while maintaining quality.""" - - @staticmethod - def rotate_items(items: list, seed: str) -> list: - """ - Rotate the items daily. - - This provides freshness while maintaining quality: - - shuffled deterministically based on daily seed - - User sees different content every day without sacrificing quality - - Args: - items: List of items - seed: Daily rotation seed (changes daily) - - Returns: - Rotated list with items shuffled deterministically - """ - - # Deterministically shuffle items based on daily seed - rng = random.Random(seed) - shuffled_items = items.copy() - rng.shuffle(shuffled_items) - - return shuffled_items diff --git a/app/services/recommendation/scoring.py b/app/services/recommendation/scoring.py index 57ed57b..aca30f8 100644 --- a/app/services/recommendation/scoring.py +++ b/app/services/recommendation/scoring.py @@ -1,5 +1,6 @@ import hashlib import math +import random from collections.abc import Callable from typing import Any @@ -176,3 +177,15 @@ def calculate_final_score( final_score += epsilon return final_score + + +class DailyRotation: + """Utilities for rotating recommendations daily while maintaining quality.""" + + @staticmethod + def rotate_items(items: list, seed: str) -> list: + """Deterministically shuffle items based on daily seed for freshness.""" + rng = random.Random(seed) + shuffled = items.copy() + rng.shuffle(shuffled) + return shuffled diff --git a/app/services/recommendation/theme_based.py b/app/services/recommendation/theme_based.py index 7623e46..29a9602 100644 --- a/app/services/recommendation/theme_based.py +++ b/app/services/recommendation/theme_based.py @@ -11,15 +11,15 @@ RUNTIME_BUCKET_SHORT_MAX_SERIES, ) from app.services.profile.scorer import ProfileScorer -from app.services.recommendation.filtering import RecommendationFiltering -from app.services.recommendation.metadata import RecommendationMetadata -from app.services.recommendation.scoring import RecommendationScoring -from app.services.recommendation.utils import ( +from app.services.recommendation.filtering import ( + RecommendationFiltering, apply_discover_filters, - content_type_to_mtype, filter_by_genres, filter_watched_by_imdb, ) +from app.services.recommendation.metadata import RecommendationMetadata +from app.services.recommendation.scoring import RecommendationScoring +from app.services.recommendation.utils import content_type_to_mtype from app.services.tmdb.service import TMDBService diff --git a/app/services/recommendation/top_picks.py b/app/services/recommendation/top_picks.py index ce383d4..a6a9076 100644 --- a/app/services/recommendation/top_picks.py +++ b/app/services/recommendation/top_picks.py @@ -11,20 +11,18 @@ from app.models.library import LibraryCollection from app.models.profile import TasteProfile from app.services.profile.constants import TOP_PICKS_CREATOR_CAP, TOP_PICKS_GENRE_CAP -from app.services.profile.sampling import SmartSampler +from app.services.profile.sampling import sample_items from app.services.profile.scorer import ProfileScorer from app.services.profile.scoring import ScoringService -from app.services.recommendation.filtering import RecommendationFiltering -from app.services.recommendation.metadata import RecommendationMetadata -from app.services.recommendation.rotation import DailyRotation -from app.services.recommendation.scoring import RecommendationScoring -from app.services.recommendation.utils import ( +from app.services.recommendation.filtering import ( + RecommendationFiltering, apply_discover_filters, - content_type_to_mtype, filter_items_by_settings, filter_watched_by_imdb, - resolve_tmdb_id, ) +from app.services.recommendation.metadata import RecommendationMetadata +from app.services.recommendation.scoring import DailyRotation, RecommendationScoring +from app.services.recommendation.utils import content_type_to_mtype, resolve_tmdb_id from app.services.simkl import simkl_service from app.services.tmdb.service import TMDBService @@ -39,7 +37,6 @@ def __init__(self, tmdb_service: TMDBService, user_settings: UserSettings | None self.user_settings: UserSettings | None = user_settings self.scorer: ProfileScorer = ProfileScorer() self.scoring_service = ScoringService() - self.smart_sampler = SmartSampler(self.scoring_service) async def get_top_picks( self, @@ -183,7 +180,7 @@ async def _fetch_recommendations_from_top_items( List of candidate items """ # Get top items (loved first, then liked, then added, then top watched) - top_items = self.smart_sampler.sample_items(library_items, content_type, max_items=15) + top_items = sample_items(library_items, content_type, self.scoring_service, max_items=15) candidates = [] tasks = [] @@ -244,7 +241,7 @@ async def _fetch_simkl_recommendations( return [] # Sample top items (same as TMDB flow - 15 items) - top_items = self.smart_sampler.sample_items(library_items, content_type, max_items=15) + top_items = sample_items(library_items, content_type, self.scoring_service, max_items=15) # Extract IMDB IDs imdb_ids = [] diff --git a/app/services/recommendation/utils.py b/app/services/recommendation/utils.py index ab6ea90..9d844d6 100644 --- a/app/services/recommendation/utils.py +++ b/app/services/recommendation/utils.py @@ -2,8 +2,7 @@ from loguru import logger -from app.core.constants import DISCOVERY_SETTINGS -from app.services.recommendation.filtering import RecommendationFiltering +from app.services.recommendation.filtering import RecommendationFiltering, filter_items_by_settings from app.services.recommendation.metadata import RecommendationMetadata @@ -12,17 +11,9 @@ def content_type_to_mtype(content_type: str) -> str: async def resolve_tmdb_id(item_id: str, tmdb_service: Any) -> int | None: - """ - Resolve item ID to TMDB ID. - - Handles various formats: tmdb:123, tt123456, or plain integer. - - Args: - item_id: Item ID in various formats - tmdb_service: TMDB service instance for IMDB lookups + """Resolve item ID to TMDB ID. - Returns: - TMDB ID or None + Handles formats: tmdb:123, tt123456, or plain integer. """ if item_id.startswith("tmdb:"): try: @@ -39,67 +30,6 @@ async def resolve_tmdb_id(item_id: str, tmdb_service: Any) -> int | None: return None -def filter_watched_by_imdb(enriched: list[dict[str, Any]], watched_imdb: set[str]) -> list[dict[str, Any]]: - """ - Filter enriched items by watched IMDB IDs. - - Checks both the item's 'id' field and '_external_ids.imdb_id' field. - - Args: - enriched: List of enriched metadata items - watched_imdb: Set of watched IMDB IDs - - Returns: - Filtered list excluding watched items - """ - final = [] - for item in enriched: - if item.get("id") in watched_imdb: - continue - if item.get("_external_ids", {}).get("imdb_id") in watched_imdb: - continue - final.append(item) - return final - - -def filter_by_genres( - items: list[dict[str, Any]], - watched_tmdb: set[int], - whitelist: set[int] | None = None, - excluded_ids: list[int] | None = None, -) -> list[dict[str, Any]]: - """ - Filter items by genre whitelist and excluded genres. - - Args: - items: List of candidate items - watched_tmdb: Set of watched TMDB IDs to exclude - whitelist: Optional genre whitelist - excluded_ids: Optional list of excluded genre IDs - - Returns: - Filtered list of items - """ - whitelist = whitelist or set() - excluded_ids = excluded_ids or [] - filtered = [] - - for item in items: - item_id = item.get("id") - if not item_id or item_id in watched_tmdb: - continue - - genre_ids = item.get("genre_ids", []) - - # Excluded genres check - if excluded_ids and any(gid in excluded_ids for gid in genre_ids): - continue - - filtered.append(item) - - return filtered - - async def pad_to_min( content_type: str, existing: list[dict], @@ -109,32 +39,17 @@ async def pad_to_min( watched_tmdb: set[int] | None = None, watched_imdb: set[str] | None = None, ) -> list[dict]: - """ - Pad recommendations to meet minimum item count by fetching trending/popular items. - - Args: - content_type: Content type (movie/series) - existing: Existing recommendations - min_items: Minimum number of items required - tmdb_service: TMDB service instance - user_settings: User settings (optional) - watched_tmdb: Set of watched TMDB IDs (optional) - watched_imdb: Set of watched IMDB IDs (optional) - - Returns: - List of recommendations padded to min_items - """ + """Pad recommendations to meet minimum item count with trending/popular items.""" need = max(0, int(min_items) - len(existing)) if need <= 0: return existing - # Use provided watched sets (or empty sets if not provided) watched_tmdb = watched_tmdb or set() watched_imdb = watched_imdb or set() excluded_ids = set(RecommendationFiltering.get_excluded_genre_ids(user_settings, content_type)) mtype = content_type_to_mtype(content_type) - pool = [] + pool: list[dict] = [] try: tr = await tmdb_service.get_trending(mtype, time_window="week") @@ -145,11 +60,9 @@ async def pad_to_min( logger.debug(f"Error fetching trending/top-rated for padding: {e}") return existing - # Filter pool by user settings (years, popularity) pool = filter_items_by_settings(pool, user_settings) - # Get existing TMDB IDs - existing_tmdb = set() + existing_tmdb: set[int] = set() for it in existing: tid = it.get("_tmdb_id") or it.get("tmdb_id") or it.get("id") try: @@ -159,8 +72,7 @@ async def pad_to_min( except Exception: pass - # Filter pool - dedup = {} + dedup: dict[int, dict] = {} for it in pool: tid = it.get("id") if not tid or tid in existing_tmdb or tid in watched_tmdb: @@ -168,9 +80,8 @@ async def pad_to_min( gids = it.get("genre_ids") or [] if excluded_ids.intersection(gids): continue - - # Quality threshold - va, vc = float(it.get("vote_average") or 0.0), int(it.get("vote_count") or 0) + va = float(it.get("vote_average") or 0.0) + vc = int(it.get("vote_count") or 0) if vc < 200 or va < 6.0: continue dedup[tid] = it @@ -180,7 +91,6 @@ async def pad_to_min( if not dedup: return existing - # Enrich metadata meta = await RecommendationMetadata.fetch_batch( tmdb_service, list(dedup.values()), @@ -188,139 +98,18 @@ async def pad_to_min( user_settings=user_settings, ) - # Add to existing, filtering watched items - extra = [] + extra: list[dict] = [] for it in meta: if it.get("id") in watched_imdb: continue if it.get("_external_ids", {}).get("imdb_id") in watched_imdb: continue - - # Final check against existing - is_dup = False - for e in existing: - if e.get("id") == it.get("id"): - is_dup = True - break + is_dup = any(e.get("id") == it.get("id") for e in existing) if is_dup: continue - it.pop("_external_ids", None) extra.append(it) if len(extra) >= need: break return existing + extra - - -def build_discover_params(user_settings: Any) -> dict[str, Any]: - """ - Build TMDB discover API parameters based on user settings. - """ - params = {} - if not user_settings: - return params - - from datetime import datetime - - current_date = datetime.now() - current_year = current_date.year - - # 1. Year Range - year_min = getattr(user_settings, "year_min", 1970) - year_max = getattr(user_settings, "year_max", current_year) - - # Apply to both movie and tv date fields for convenience in merging - for prefix in ["primary_release_date", "first_air_date"]: - params[f"{prefix}.gte"] = f"{year_min}-01-01" - - # If year_max is current year or greater, use today's date for 'lte' - # relative to the current time. - if year_max >= current_year: - params[f"{prefix}.lte"] = current_date.strftime("%Y-%m-%d") - else: - params[f"{prefix}.lte"] = f"{year_max}-12-31" - - return params - - -def apply_discover_filters(params: dict[str, Any], user_settings: Any) -> dict[str, Any]: - """ - Merge specific discover params with global user settings (years, popularity). - """ - if not user_settings: - return params - - global_params = build_discover_params(user_settings) - - params = {**global_params, **params} - - min_rating, min_votes = RecommendationFiltering.get_quality_thresholds(user_settings) - - # Apply dynamic thresholds if not overridden by stricter local params - if "vote_count.gte" not in params: - params["vote_count.gte"] = min_votes - - if "vote_average.gte" not in params: - params["vote_average.gte"] = min_rating - - return params - - -def filter_items_by_settings( - items: list[dict[str, Any]], user_settings: Any, simkl: bool = False -) -> list[dict[str, Any]]: - """ - Filter items post-fetch based on global user settings (years, popularity). - Used for items from recommendations/similar APIs that don't support early filtering. - """ - if not user_settings: - return items - - year_min = getattr(user_settings, "year_min", 1970) - year_max = getattr(user_settings, "year_max", 2026) - pop_pref = getattr(user_settings, "popularity", "balanced") - - filtered = [] - - for item in items: - # 1. Year Filtering - release_date = item.get("release_date") or item.get("first_air_date") or item.get("released") - if release_date: - try: - year = int(release_date.split("-")[0]) - if year < year_min or year > year_max: - continue - except (ValueError, IndexError): - pass - - params = DISCOVERY_SETTINGS.get(pop_pref, {}) - if not params: - continue - - # determine operations - ops = { - "gte": lambda x, y: x >= y, - "lte": lambda x, y: x <= y, - } - - passes_all_checks = True - for param in params: - t_param, param_ops = param.split(".") - param_operator = ops.get(param_ops) - if not param_operator: - continue - - # skip popularity params if simkl - if simkl and t_param == "popularity": - continue - - item_value = item.get(t_param) - if item_value is None or not param_operator(item_value, params[param]): - passes_all_checks = False - break - - if passes_all_checks: - filtered.append(item) - - return filtered From c3fdd047f914741b619d8374a7d06d2b3e9f8121 Mon Sep 17 00:00:00 2001 From: Bimal Timilsina Date: Sat, 28 Mar 2026 14:09:55 +0545 Subject: [PATCH 10/68] feat: add new row building functions for dynamic content generation in row_generator service --- .../recommendation/catalog_service.py | 3 - app/services/row_generator.py | 243 ++++++++++-------- 2 files changed, 131 insertions(+), 115 deletions(-) diff --git a/app/services/recommendation/catalog_service.py b/app/services/recommendation/catalog_service.py index d836cc6..f82f790 100644 --- a/app/services/recommendation/catalog_service.py +++ b/app/services/recommendation/catalog_service.py @@ -62,9 +62,6 @@ def _clean_meta(meta: dict) -> dict | None: class CatalogService: - def __init__(self): - pass - async def get_catalog( self, token: str, content_type: str, catalog_id: str ) -> tuple[dict[str, Any], dict[str, Any]]: diff --git a/app/services/row_generator.py b/app/services/row_generator.py index 682a038..636cf1c 100644 --- a/app/services/row_generator.py +++ b/app/services/row_generator.py @@ -99,6 +99,136 @@ def _pick(items: list, start: int, end: int, exclude: set | None = None) -> Any return random.choice(pool) if pool else None +def _build_core_row( + genres: list[tuple[int, float]], + keywords: list[tuple[int, float]], + runtimes: list[tuple[str, float]], + keyword_names: dict[int, str], + content_type: str, + used_genres: set[int], + used_keywords: set[int], +) -> tuple[list[tuple[str, str, Any]], str] | None: + axes: list[tuple[str, str, Any]] = [] + title_parts: list[str] = [] + + g = _pick(genres, 0, GOLD_END, used_genres) + if not g: + return None + axes.append((ROLE_ANCHOR, AXIS_GENRE, g[0])) + title_parts.append(_genre_name(g[0], content_type)) + used_genres.add(g[0]) + + for _ in range(random.randint(1, 2)): + k = _pick(keywords, 0, GOLD_END, used_keywords) + if k and k[0] in keyword_names: + axes.append((ROLE_FLAVOR, AXIS_KEYWORD, k[0])) + title_parts.append(_keyword_display(keyword_names[k[0]])) + used_keywords.add(k[0]) + + if runtimes: + rt = random.choice(runtimes[:2]) + axes.append((ROLE_FALLBACK, AXIS_RUNTIME, rt[0])) + mod = _runtime_modifier(rt[0]) + if mod: + title_parts.insert(0, mod) + + return (axes, " ".join(title_parts)) + + +def _build_blend_row( + genres: list[tuple[int, float]], + countries: list[tuple[str, float]], + content_type: str, + used_genres: set[int], +) -> tuple[list[tuple[str, str, Any]], str] | None: + axes: list[tuple[str, str, Any]] = [] + title_parts: list[str] = [] + + g = _pick(genres, 0, GOLD_END, used_genres) + if not g: + return None + axes.append((ROLE_ANCHOR, AXIS_GENRE, g[0])) + title_parts.append(_genre_name(g[0], content_type)) + used_genres.add(g[0]) + + use_country = random.choice([True, False]) + if use_country and countries: + c = _pick(countries, 0, SILVER_END) + if c: + axes.append((ROLE_FLAVOR, AXIS_COUNTRY, c[0])) + adj = _country_adjective(c[0]) + if adj: + title_parts.insert(0, adj) + else: + other = [gx for gx in genres if gx[0] != g[0]] + sg = _pick(other, 0, SILVER_END) if other else None + if sg: + axes.append((ROLE_FLAVOR, AXIS_GENRE, sg[0])) + title_parts.append(_genre_name(sg[0], content_type)) + + return (axes, " ".join(title_parts)) + + +def _build_rising_row( + genres: list[tuple[int, float]], + keywords: list[tuple[int, float]], + countries: list[tuple[str, float]], + keyword_names: dict[int, str], + content_type: str, + used_genres: set[int], + used_keywords: set[int], +) -> tuple[list[tuple[str, str, Any]], str] | None: + axes: list[tuple[str, str, Any]] = [] + title_parts: list[str] = [] + + k = _pick(keywords, SILVER_START, SILVER_END, used_keywords) + if not k or k[0] not in keyword_names: + return None + axes.append((ROLE_ANCHOR, AXIS_KEYWORD, k[0])) + title_parts.append(_keyword_display(keyword_names[k[0]])) + used_keywords.add(k[0]) + + g = _pick(genres, SILVER_START, SILVER_END, used_genres) + if g: + axes.append((ROLE_FLAVOR, AXIS_GENRE, g[0])) + title_parts.append(_genre_name(g[0], content_type)) + + if countries: + c = _pick(countries, 0, SILVER_END) + if c: + axes.append((ROLE_FALLBACK, AXIS_COUNTRY, c[0])) + + return (axes, " ".join(title_parts)) + + +def build_fallback_rows( + genres: list[tuple[int, float]], + keywords: list[tuple[int, float]], + countries: list[tuple[str, float]], + runtimes: list[tuple[str, float]], + keyword_names: dict[int, str], + content_type: str, +) -> list[tuple[list[tuple[str, str, Any]], str]]: + """Build up to 3 rows as (axes, fallback_title) tuples.""" + rows: list[tuple[list[tuple[str, str, Any]], str]] = [] + used_genres: set[int] = set() + used_keywords: set[int] = set() + + r1 = _build_core_row(genres, keywords, runtimes, keyword_names, content_type, used_genres, used_keywords) + if r1: + rows.append(r1) + + r2 = _build_blend_row(genres, countries, content_type, used_genres) + if r2: + rows.append(r2) + + r3 = _build_rising_row(genres, keywords, countries, keyword_names, content_type, used_genres, used_keywords) + if r3: + rows.append(r3) + + return rows[:3] + + class RowGeneratorService: """Generates dynamic, personalized row definitions from a taste profile.""" @@ -130,122 +260,11 @@ async def generate_rows( except Exception as e: logger.warning(f"LLM row generation failed, using fallback: {e}") - rows = self._build_rows_fallback(genres, keywords, countries, runtimes, keyword_names, content_type) + rows = build_fallback_rows(genres, keywords, countries, runtimes, keyword_names, content_type) titled = await self._generate_titles(rows) logger.info(f"Generated {len(titled)} rows (fallback) for {content_type}") return titled - # --- Fallback row building (non-LLM) --- - - def _build_rows_fallback( - self, - genres: list[tuple[int, float]], - keywords: list[tuple[int, float]], - countries: list[tuple[str, float]], - runtimes: list[tuple[str, float]], - keyword_names: dict[int, str], - content_type: str, - ) -> list[tuple[list[tuple[str, str, Any]], str]]: - """Build up to 3 rows as (axes, fallback_title) tuples.""" - rows = [] - used_genres: set[int] = set() - used_keywords: set[int] = set() - - # Row 1: Core — top genre + top keywords - r1 = self._build_core(genres, keywords, runtimes, keyword_names, content_type, used_genres, used_keywords) - if r1: - rows.append(r1) - - # Row 2: Blend — genre + country or secondary genre - r2 = self._build_blend(genres, countries, content_type, used_genres) - if r2: - rows.append(r2) - - # Row 3: Rising Star — emerging keyword + secondary genre + country - r3 = self._build_rising(genres, keywords, countries, keyword_names, content_type, used_genres, used_keywords) - if r3: - rows.append(r3) - - return rows[:3] - - def _build_core(self, genres, keywords, runtimes, keyword_names, content_type, used_genres, used_keywords): - axes = [] - title_parts = [] - - g = _pick(genres, 0, GOLD_END, used_genres) - if not g: - return None - axes.append((ROLE_ANCHOR, AXIS_GENRE, g[0])) - title_parts.append(_genre_name(g[0], content_type)) - used_genres.add(g[0]) - - for _ in range(random.randint(1, 2)): - k = _pick(keywords, 0, GOLD_END, used_keywords) - if k and k[0] in keyword_names: - axes.append((ROLE_FLAVOR, AXIS_KEYWORD, k[0])) - title_parts.append(_keyword_display(keyword_names[k[0]])) - used_keywords.add(k[0]) - - if runtimes: - rt = random.choice(runtimes[:2]) - axes.append((ROLE_FALLBACK, AXIS_RUNTIME, rt[0])) - mod = _runtime_modifier(rt[0]) - if mod: - title_parts.insert(0, mod) - - return (axes, " ".join(title_parts)) - - def _build_blend(self, genres, countries, content_type, used_genres): - axes = [] - title_parts = [] - - g = _pick(genres, 0, GOLD_END, used_genres) - if not g: - return None - axes.append((ROLE_ANCHOR, AXIS_GENRE, g[0])) - title_parts.append(_genre_name(g[0], content_type)) - used_genres.add(g[0]) - - use_country = random.choice([True, False]) - if use_country and countries: - c = _pick(countries, 0, SILVER_END) - if c: - axes.append((ROLE_FLAVOR, AXIS_COUNTRY, c[0])) - adj = _country_adjective(c[0]) - if adj: - title_parts.insert(0, adj) - else: - other = [gx for gx in genres if gx[0] != g[0]] - sg = _pick(other, 0, SILVER_END) if other else None - if sg: - axes.append((ROLE_FLAVOR, AXIS_GENRE, sg[0])) - title_parts.append(_genre_name(sg[0], content_type)) - - return (axes, " ".join(title_parts)) - - def _build_rising(self, genres, keywords, countries, keyword_names, content_type, used_genres, used_keywords): - axes = [] - title_parts = [] - - k = _pick(keywords, SILVER_START, SILVER_END, used_keywords) - if not k or k[0] not in keyword_names: - return None - axes.append((ROLE_ANCHOR, AXIS_KEYWORD, k[0])) - title_parts.append(_keyword_display(keyword_names[k[0]])) - used_keywords.add(k[0]) - - g = _pick(genres, SILVER_START, SILVER_END, used_genres) - if g: - axes.append((ROLE_FLAVOR, AXIS_GENRE, g[0])) - title_parts.append(_genre_name(g[0], content_type)) - - if countries: - c = _pick(countries, 0, SILVER_END) - if c: - axes.append((ROLE_FALLBACK, AXIS_COUNTRY, c[0])) - - return (axes, " ".join(title_parts)) - # --- Title generation via Gemini --- async def _generate_titles(self, rows: list[tuple[list[tuple[str, str, Any]], str]]) -> list[RowDefinition]: From 05db8724ef70dc64ff640c3dbe50ce8dd8e28b6e Mon Sep 17 00:00:00 2001 From: Bimal Timilsina Date: Sat, 28 Mar 2026 14:52:31 +0545 Subject: [PATCH 11/68] refactor: remove unused daily rotation genre whitelist and interest summaries --- app/core/constants.py | 1 - app/models/profile.py | 1 - app/services/catalog_definitions.py | 10 -- app/services/interest_summary.py | 112 ------------------ app/services/profile/constants.py | 3 - app/services/profile/service.py | 13 -- app/services/recommendation/all_based.py | 11 +- .../recommendation/catalog_service.py | 22 +--- app/services/recommendation/filtering.py | 29 ----- app/services/recommendation/item_based.py | 6 +- app/services/recommendation/scoring.py | 59 +-------- app/services/recommendation/theme_based.py | 35 ------ app/services/recommendation/top_picks.py | 10 +- app/services/recommendation/utils.py | 90 -------------- app/services/row_generator.py | 19 ++- 15 files changed, 24 insertions(+), 397 deletions(-) delete mode 100644 app/services/interest_summary.py diff --git a/app/core/constants.py b/app/core/constants.py index 96cbed3..e6a0704 100644 --- a/app/core/constants.py +++ b/app/core/constants.py @@ -1,5 +1,4 @@ RECOMMENDATIONS_CATALOG_NAME: str = "Top Picks For You" -DEFAULT_MIN_ITEMS: int = 8 DEFAULT_CATALOG_LIMIT: int = 20 MAX_CATALOG_ITEMS: int = 100 diff --git a/app/models/profile.py b/app/models/profile.py index 75d835f..cb4e1f1 100644 --- a/app/models/profile.py +++ b/app/models/profile.py @@ -53,7 +53,6 @@ class TasteProfile(BaseModel): default_factory=set, description="Set of processed item IDs to prevent double counting", ) - interest_summary: str | None = Field(default=None, description="LLM-generated description of user interests") class Config: json_encoders = {datetime: lambda v: v.isoformat()} diff --git a/app/services/catalog_definitions.py b/app/services/catalog_definitions.py index 2a03b4f..37dc5dd 100644 --- a/app/services/catalog_definitions.py +++ b/app/services/catalog_definitions.py @@ -8,7 +8,6 @@ from app.core.constants import DISCOVER_ONLY_EXTRA from app.core.settings import CatalogConfig, UserSettings from app.models.library import LibraryCollection -from app.services.interest_summary import interest_summary_service from app.services.profile.service import ProfileService from app.services.row_generator import RowGeneratorService from app.services.tmdb.service import get_tmdb_service @@ -241,15 +240,6 @@ async def _build_theme_rows_for_type( logger.warning(f"Failed to build profile for {media_type}") return media_type, [] - if gemini_api_key and token: - try: - summary = await interest_summary_service.generate_summary(profile, gemini_api_key) - if summary: - profile.interest_summary = summary - logger.info(f"Interest summary generated for {media_type}: " f"{summary[:80]}...") - except Exception as e: - logger.warning(f"Failed to generate interest summary for {media_type}: {e}") - if token: try: await user_cache.set_profile(token, media_type, profile) diff --git a/app/services/interest_summary.py b/app/services/interest_summary.py deleted file mode 100644 index 99d7e78..0000000 --- a/app/services/interest_summary.py +++ /dev/null @@ -1,112 +0,0 @@ -from loguru import logger - -from app.models.profile import TasteProfile -from app.services.gemini import gemini_service -from app.services.profile.constants import GENRE_MAP - - -class InterestSummaryService: - def _get_system_prompt(self) -> str: - return ( - "You are a film analyst and recommender system expert.\n" - "Your task is to analyze a user's taste profile data and generate an engaging " - "summary of their viewing preferences.\n\n" - "The summary should:\n" - '1. Be written in the second person ("You love...", "Your taste leans towards...").\n' - "2. Be a short paragraph: 3-5 sentences, so you can capture nuance and variety.\n" - '3. Capture the main vibe of their interests (e.g., "fast-paced action," ' - '"dark historical dramas," "lighthearted animation").\n' - "4. Prioritize genres and keywords as the strongest signals of taste.\n" - "5. Mention specific eras, countries, or runtime preferences when they add color.\n" - "6. Sound natural, premium, and personalized—like a thoughtful friend describing their taste.\n\n" - "Do NOT mention specific IDs or raw metrics. Translate the data into a narrative." - ) - - def _format_profile_data(self, profile: TasteProfile) -> str: - """Format all available profile data into a structured context string. - - Genres and keywords are primary signals; eras, countries, and runtime are context. - We include more of each so the summary can be richer and longer. - """ - parts: list[str] = [] - - # --- Primary: more genres and keywords for a richer summary --- - top_genres = profile.get_top_genres(limit=5) - genre_names = [GENRE_MAP.get(g_id, f"Unknown({g_id})") for g_id, _ in top_genres] - if genre_names: - parts.append(f"[Primary] Top Genres (strongest first): {', '.join(genre_names)}") - - top_keywords = profile.get_top_keywords(limit=15) - if top_keywords: - keyword_ids = [str(k_id) for k_id, _ in top_keywords] - parts.append(f"[Primary] Top Keyword IDs (higher = more watched): {', '.join(keyword_ids)}") - - top_countries = [country for country, _ in profile.get_top_countries(limit=2)] - if top_countries: - parts.append(f"[Context] Preferred Countries: {', '.join(top_countries)}") - - top_runtimes = sorted(profile.runtime_bucket_scores.items(), key=lambda x: x[1], reverse=True) - runtime_prefs = [bucket for bucket, _ in top_runtimes[:3]] - if runtime_prefs: - parts.append(f"[Context] Runtime Preference: {', '.join(runtime_prefs)}") - - return "\n".join(parts) - - async def generate_summary( - self, - profile: TasteProfile, - api_key: str, - keyword_names: dict[int, str] | None = None, - ) -> str: - """Generate a text summary of the user's interest profile using Gemini. - - Args: - profile: The user's TasteProfile. - api_key: Gemini API key (required). - keyword_names: Optional mapping of keyword ID -> name for richer context. - - Returns: - Generated summary string, or empty string on failure. - """ - if not api_key: - return "" - - try: - profile_text = self._format_profile_data(profile) - if not profile_text: - return "" - - # Enrich with resolved keyword names if available - if keyword_names: - top_keywords = profile.get_top_keywords(limit=12) - resolved = [keyword_names[k_id] for k_id, _ in top_keywords if k_id in keyword_names] - if resolved: - # Replace the keyword IDs line with actual names - profile_text = profile_text.replace( - next( - (line for line in profile_text.split("\n") if "Keyword IDs" in line), - "", - ), - f"[Primary] Top Keywords: {', '.join(resolved)}", - ) - - prompt = ( - "Based on the following user profile data, write an interest summary (3-5 sentences).\n" - "Focus primarily on [Primary] signals (genres and keywords); use [Context] to add " - "flavor. Make it feel personal and specific to this viewer.\n\n" - f"{profile_text}" - ) - - summary = await gemini_service.generate_flash_content_async( - prompt=prompt, - system_instruction=self._get_system_prompt(), - api_key=api_key, - ) - - return summary - except Exception as e: - logger.error(f"Failed to generate interest summary: {e}") - return "" - - -interest_summary_service = InterestSummaryService() diff --git a/app/services/profile/constants.py b/app/services/profile/constants.py index 2131ce9..a035db7 100644 --- a/app/services/profile/constants.py +++ b/app/services/profile/constants.py @@ -55,9 +55,6 @@ MAXIMUM_POPULARITY_SCORE: Final[float] = 100.0 # Increased from 15.0 to allow popular items -# Genre whitelist limit (top N genres) -GENRE_WHITELIST_LIMIT: Final[int] = 7 - # Runtime Bucket Boundaries (in minutes) RUNTIME_BUCKET_SHORT_MAX_SERIES: Final[int] = 30 # < 30 min RUNTIME_BUCKET_MEDIUM_MAX_SERIES: Final[int] = 60 # 30-60 min, > 60 is long diff --git a/app/services/profile/service.py b/app/services/profile/service.py index 1729a45..8ffb942 100644 --- a/app/services/profile/service.py +++ b/app/services/profile/service.py @@ -5,7 +5,6 @@ from app.models.library import LibraryCollection from app.models.profile import TasteProfile from app.services.profile.builder import ProfileBuilder -from app.services.profile.constants import GENRE_WHITELIST_LIMIT from app.services.profile.sampling import sample_items from app.services.profile.scoring import ScoringService from app.services.profile.vectorizer import ItemVectorizer @@ -136,15 +135,3 @@ async def build_and_cache_profile( ) await user_cache.set_profile_and_watched_sets(token, content_type, profile, watched_tmdb, watched_imdb) return profile, watched_tmdb, watched_imdb - - async def get_genre_whitelist(self, profile: TasteProfile, content_type: str) -> set[int]: - """Get genre whitelist from the user's top genres in the profile.""" - try: - if not profile: - return set() - - top_genres = profile.get_top_genres(limit=GENRE_WHITELIST_LIMIT) - return {int(genre_id) for genre_id, _ in top_genres} - except Exception as e: - logger.warning(f"Failed to build genre whitelist for {content_type}: {e}") - return set() diff --git a/app/services/recommendation/all_based.py b/app/services/recommendation/all_based.py index 05ef7b6..bfa92b2 100644 --- a/app/services/recommendation/all_based.py +++ b/app/services/recommendation/all_based.py @@ -38,7 +38,6 @@ async def get_recommendations_from_all_items( content_type: str, watched_tmdb: set[int], watched_imdb: set[str], - whitelist: set[int] | None = None, limit: int = 20, item_type: str = "loved", profile: TasteProfile | None = None, @@ -58,7 +57,6 @@ async def get_recommendations_from_all_items( content_type: Content type (movie/series) watched_tmdb: Set of watched TMDB IDs watched_imdb: Set of watched IMDB IDs - whitelist: Genre whitelist limit: Number of items to return item_type: "loved" or "liked" profile: Optional profile for scoring (if None, uses popularity only) @@ -138,15 +136,13 @@ async def get_recommendations_from_all_items( # Filter by genres and watched items excluded_ids = RecommendationFiltering.get_excluded_genre_ids(self.user_settings, content_type) - whitelist = whitelist or set() - filtered = filter_by_genres(candidates, watched_tmdb, whitelist, excluded_ids) + filtered = filter_by_genres(candidates, watched_tmdb, excluded_ids) logger.info(f"Filtered {len(filtered)} candidates") # Score with profile if available scored = [] if profile: - rotation_seed = RecommendationScoring.generate_rotation_seed() # Daily rotation for fresh recommendations for item in filtered: try: final_score = RecommendationScoring.calculate_final_score( @@ -154,13 +150,8 @@ async def get_recommendations_from_all_items( profile=profile, scorer=self.scorer, mtype=mtype, - rotation_seed=rotation_seed, ) - # Apply genre multiplier (if whitelist available) - genre_mult = RecommendationFiltering.get_genre_multiplier(item.get("genre_ids"), whitelist) - final_score *= genre_mult - scored.append((final_score, item)) except Exception as e: logger.debug(f"Failed to score item {item.get('id')}: {e}") diff --git a/app/services/recommendation/catalog_service.py b/app/services/recommendation/catalog_service.py index f82f790..1be9486 100644 --- a/app/services/recommendation/catalog_service.py +++ b/app/services/recommendation/catalog_service.py @@ -7,7 +7,7 @@ from loguru import logger from app.core.config import settings -from app.core.constants import DEFAULT_CATALOG_LIMIT, DEFAULT_MIN_ITEMS +from app.core.constants import DEFAULT_CATALOG_LIMIT from app.core.security import redact_token from app.core.settings import UserSettings, resolve_tmdb_api_key from app.models.library import LibraryCollection @@ -20,7 +20,6 @@ from app.services.recommendation.item_based import ItemBasedService from app.services.recommendation.theme_based import ThemeBasedService from app.services.recommendation.top_picks import TopPicksService -from app.services.recommendation.utils import pad_to_min from app.services.tmdb.service import get_tmdb_service from app.services.token_store import token_store from app.services.user_cache import user_cache @@ -155,8 +154,6 @@ async def _build_catalog( ctx.auth_key, ) - whitelist = await profile_service.get_genre_whitelist(profile, content_type) if profile else set() - recommendations = await self._get_recommendations( catalog_id=catalog_id, content_type=content_type, @@ -164,24 +161,11 @@ async def _build_catalog( profile=profile, watched_tmdb=watched_tmdb, watched_imdb=watched_imdb, - whitelist=whitelist, library_items=ctx.library, limit=DEFAULT_CATALOG_LIMIT, user_settings=ctx.user_settings, ) - # Pad if needed to meet minimum items - if recommendations and len(recommendations) < DEFAULT_MIN_ITEMS: - recommendations = await pad_to_min( - content_type, - recommendations, - DEFAULT_MIN_ITEMS, - services["tmdb"], - ctx.user_settings, - watched_tmdb, - watched_imdb, - ) - logger.info(f"Returning {len(recommendations)} items for {content_type}") cleaned = [m for m in (_clean_meta(m) for m in recommendations) if m is not None] @@ -283,7 +267,6 @@ async def _get_recommendations( profile: TasteProfile | None, watched_tmdb: set[int], watched_imdb: set[str], - whitelist: set[int], library_items: LibraryCollection, limit: int, user_settings: UserSettings | None = None, @@ -299,7 +282,6 @@ async def _get_recommendations( watched_tmdb=watched_tmdb, watched_imdb=watched_imdb, limit=limit, - whitelist=whitelist, ) logger.info(f"Found {len(recommendations)} recommendations for item {item_id}") @@ -313,7 +295,6 @@ async def _get_recommendations( watched_tmdb=watched_tmdb, watched_imdb=watched_imdb, limit=limit, - whitelist=whitelist, ) logger.info(f"Found {len(recommendations)} recommendations for theme {catalog_id}") @@ -358,7 +339,6 @@ async def _get_recommendations( content_type=content_type, watched_tmdb=watched_tmdb, watched_imdb=watched_imdb, - whitelist=whitelist, limit=limit, item_type=item_type, profile=profile, diff --git a/app/services/recommendation/filtering.py b/app/services/recommendation/filtering.py index d826465..2e8f840 100644 --- a/app/services/recommendation/filtering.py +++ b/app/services/recommendation/filtering.py @@ -171,33 +171,6 @@ def get_excluded_genre_ids(user_settings: Any, content_type: str) -> list[int]: return [int(g) for g in user_settings.excluded_series_genres] return [] - @staticmethod - def get_genre_multiplier(genre_ids: list[int] | None, whitelist: set[int]) -> float: - """Calculate a score multiplier based on genre preference. Blocks animation if not preferred.""" - if not whitelist: - return 1.0 - - gids = set(genre_ids or []) - if not gids: - return 1.0 - - # If it has at least one preferred genre, full score - if gids & whitelist: - return 1.0 - - # Otherwise, soft penalty to prioritize whitelist items without blocking variety - return 0.4 - - @staticmethod - def passes_top_genre_whitelist(genre_ids: list[int] | None, whitelist: set[int]) -> bool: - """Check if an item's genres match the user's top genre whitelist (Softened).""" - if not whitelist: - return True - gids = set(genre_ids or []) - if not gids: - return True - return True - # --- Standalone filtering functions (moved from utils.py) --- @@ -217,11 +190,9 @@ def filter_watched_by_imdb(enriched: list[dict[str, Any]], watched_imdb: set[str def filter_by_genres( items: list[dict[str, Any]], watched_tmdb: set[int], - whitelist: set[int] | None = None, excluded_ids: list[int] | None = None, ) -> list[dict[str, Any]]: """Filter items by watched set and excluded genres.""" - whitelist = whitelist or set() excluded_ids = excluded_ids or [] filtered = [] diff --git a/app/services/recommendation/item_based.py b/app/services/recommendation/item_based.py index 14754f2..1c6cb6e 100644 --- a/app/services/recommendation/item_based.py +++ b/app/services/recommendation/item_based.py @@ -31,7 +31,6 @@ async def get_recommendations_for_item( watched_tmdb: set[int] | None = None, watched_imdb: set[str] | None = None, limit: int = 20, - whitelist: set[int] | None = None, ) -> list[dict[str, Any]]: """ Get recommendations for a specific item. @@ -40,8 +39,7 @@ async def get_recommendations_for_item( 1. Fetch similar + recommendations from TMDB (2 pages each) 2. Filter watched items 3. Filter excluded genres - 4. Apply genre whitelist - 5. Return top N + 4. Return top N Args: item_id: Item ID (tt... or tmdb:...) @@ -76,7 +74,7 @@ async def get_recommendations_for_item( # Filter by genres and watched items excluded_ids = RecommendationFiltering.get_excluded_genre_ids(self.user_settings, content_type) - filtered = filter_by_genres(candidates, watched_tmdb, whitelist, excluded_ids) + filtered = filter_by_genres(candidates, watched_tmdb, excluded_ids) # Enrich metadata enriched = await RecommendationMetadata.fetch_batch( diff --git a/app/services/recommendation/scoring.py b/app/services/recommendation/scoring.py index aca30f8..1abddcd 100644 --- a/app/services/recommendation/scoring.py +++ b/app/services/recommendation/scoring.py @@ -1,6 +1,4 @@ -import hashlib import math -import random from collections.abc import Callable from typing import Any @@ -29,34 +27,6 @@ def normalize(value: float, min_v: float = 0.0, max_v: float = 10.0) -> float: return 0.0 return max(0.0, min(1.0, (value - min_v) / (max_v - min_v))) - @staticmethod - def stable_epsilon(tmdb_id: int, seed: str) -> float: - """Generate a stable tiny epsilon to break ties deterministically.""" - if not seed: - return 0.0 - h = hashlib.md5(f"{seed}:{tmdb_id}".encode()).hexdigest() - eps = int(h[-6:], 16) % 1000 - return eps / 1_000_000.0 - - @staticmethod - def generate_rotation_seed(token: str | None = None) -> str: - """ - Generate a daily rotation seed for deterministic but fresh recommendations. - - Args: - token: Optional user token for per-user variation. - If None, uses a global seed (same for all users on same day). - - Returns: - A seed string like "abc123:2026-01-15" - """ - from datetime import date - - today = date.today().isoformat() - if token: - return f"{token}:{today}" - return f"global:{today}" - @staticmethod def get_recency_multiplier_fn( profile: Any, candidate_decades: set[int] | None = None @@ -130,8 +100,7 @@ def calculate_final_score( profile: Any, scorer: Any, mtype: str, - rotation_seed: str | None = None, - ) -> float: # noqa: E501 + ) -> float: """ Calculate final recommendation score combining profile similarity and quality. @@ -140,12 +109,9 @@ def calculate_final_score( profile: User taste profile scorer: ProfileScorer instance mtype: Media type (movie/tv) to determine minimum rating - rotation_seed: Optional seed for daily rotation (e.g., "token:2026-01-15"). - When provided, adds a tiny epsilon for deterministic tie-breaking - that changes daily, making recommendations feel fresh. Returns: - Final combined score (0-1 range, with optional epsilon for rotation) + Final combined score (0-1 range) """ # Score with profile profile_score = scorer.score_item(item, profile) @@ -169,23 +135,4 @@ def calculate_final_score( # light boost for high-confidence items (no penalties!) vote_count = item.get("vote_count", 0) popularity = item.get("popularity", 0) - final_score = RecommendationScoring.apply_quality_adjustments(base_score, wr, vote_count, popularity) - # Apply daily rotation epsilon for tie-breaking (if seed provided) - if rotation_seed: - tmdb_id = item.get("id", 0) - epsilon = RecommendationScoring.stable_epsilon(tmdb_id, rotation_seed) - final_score += epsilon - - return final_score - - -class DailyRotation: - """Utilities for rotating recommendations daily while maintaining quality.""" - - @staticmethod - def rotate_items(items: list, seed: str) -> list: - """Deterministically shuffle items based on daily seed for freshness.""" - rng = random.Random(seed) - shuffled = items.copy() - rng.shuffle(shuffled) - return shuffled + return RecommendationScoring.apply_quality_adjustments(base_score, wr, vote_count, popularity) diff --git a/app/services/recommendation/theme_based.py b/app/services/recommendation/theme_based.py index 29a9602..323d901 100644 --- a/app/services/recommendation/theme_based.py +++ b/app/services/recommendation/theme_based.py @@ -14,7 +14,6 @@ from app.services.recommendation.filtering import ( RecommendationFiltering, apply_discover_filters, - filter_by_genres, filter_watched_by_imdb, ) from app.services.recommendation.metadata import RecommendationMetadata @@ -48,7 +47,6 @@ async def get_recommendations_for_theme( watched_tmdb: set[int] | None = None, watched_imdb: set[str] | None = None, limit: int = 20, - whitelist: set[int] | None = None, ) -> list[dict[str, Any]]: """Get recommendations for a role-based theme.""" watched_tmdb = watched_tmdb or set() @@ -148,7 +146,6 @@ async def get_recommendations_for_theme( # 5. Weighted Scoring scored = [] - rotation_seed = RecommendationScoring.generate_rotation_seed() mtype = content_type_to_mtype(content_type) for item in candidates: @@ -162,7 +159,6 @@ async def get_recommendations_for_theme( profile=profile, scorer=self.scorer, mtype=mtype, - rotation_seed=rotation_seed, ) else: base_score = RecommendationScoring.normalize(item.get("vote_average", 0)) @@ -387,34 +383,3 @@ async def _fetch_discover_candidates( candidates.extend(res.get("results", [])) return candidates - - def _filter_candidates( - self, - candidates: list[dict[str, Any]], - watched_tmdb: set[int], - whitelist: set[int], - existing_ids: set[int] | None = None, - ) -> list[dict[str, Any]]: - """ - Filter candidates by watched items and genre whitelist. - - Args: - candidates: List of candidate items - watched_tmdb: Set of watched TMDB IDs - whitelist: Set of genre IDs in whitelist - existing_ids: Set of IDs to exclude (for deduplication) - - Returns: - Filtered list of items - """ - existing = existing_ids or set() - # First filter by genres (includes watched_tmdb check) - filtered = filter_by_genres(candidates, watched_tmdb, whitelist, None) - # Then deduplicate - result = [] - for item in filtered: - item_id = item.get("id") - if item_id and item_id not in existing: - result.append(item) - existing.add(item_id) - return result diff --git a/app/services/recommendation/top_picks.py b/app/services/recommendation/top_picks.py index a6a9076..2948169 100644 --- a/app/services/recommendation/top_picks.py +++ b/app/services/recommendation/top_picks.py @@ -21,7 +21,7 @@ filter_watched_by_imdb, ) from app.services.recommendation.metadata import RecommendationMetadata -from app.services.recommendation.scoring import DailyRotation, RecommendationScoring +from app.services.recommendation.scoring import RecommendationScoring from app.services.recommendation.utils import content_type_to_mtype, resolve_tmdb_id from app.services.simkl import simkl_service from app.services.tmdb.service import TMDBService @@ -115,7 +115,6 @@ async def get_top_picks( # Score all candidates with profile scored_candidates = [] - rotation_seed = RecommendationScoring.generate_rotation_seed() # Daily rotation for fresh recommendations for item in filtered_candidates: try: final_score = RecommendationScoring.calculate_final_score( @@ -123,7 +122,6 @@ async def get_top_picks( profile=profile, scorer=self.scorer, mtype=mtype, - rotation_seed=rotation_seed, ) scored_candidates.append((final_score, item)) except Exception as e: @@ -152,15 +150,13 @@ async def get_top_picks( # Final filter filtered = filter_watched_by_imdb(enriched, watched_imdb) - rotated = DailyRotation.rotate_items(filtered, rotation_seed) - elapsed_time = time.time() - start_time logger.info( - f"Top picks complete: {len(rotated)} items returned in {elapsed_time:.2f}s " + f"Top picks complete: {len(filtered)} items returned in {elapsed_time:.2f}s " f"(target: {limit}, candidates: {len(all_candidates)}, scored: {len(scored_candidates)})" ) - return rotated[:MAX_CATALOG_ITEMS] + return filtered[:MAX_CATALOG_ITEMS] async def _fetch_recommendations_from_top_items( self, diff --git a/app/services/recommendation/utils.py b/app/services/recommendation/utils.py index 9d844d6..89b12ef 100644 --- a/app/services/recommendation/utils.py +++ b/app/services/recommendation/utils.py @@ -1,10 +1,5 @@ from typing import Any -from loguru import logger - -from app.services.recommendation.filtering import RecommendationFiltering, filter_items_by_settings -from app.services.recommendation.metadata import RecommendationMetadata - def content_type_to_mtype(content_type: str) -> str: return "tv" if content_type in ("tv", "series") else "movie" @@ -28,88 +23,3 @@ async def resolve_tmdb_id(item_id: str, tmdb_service: Any) -> int | None: return int(item_id) except ValueError: return None - - -async def pad_to_min( - content_type: str, - existing: list[dict], - min_items: int, - tmdb_service: Any, - user_settings: Any = None, - watched_tmdb: set[int] | None = None, - watched_imdb: set[str] | None = None, -) -> list[dict]: - """Pad recommendations to meet minimum item count with trending/popular items.""" - need = max(0, int(min_items) - len(existing)) - if need <= 0: - return existing - - watched_tmdb = watched_tmdb or set() - watched_imdb = watched_imdb or set() - excluded_ids = set(RecommendationFiltering.get_excluded_genre_ids(user_settings, content_type)) - - mtype = content_type_to_mtype(content_type) - pool: list[dict] = [] - - try: - tr = await tmdb_service.get_trending(mtype, time_window="week") - pool.extend(tr.get("results", [])[:60]) - tr2 = await tmdb_service.get_top_rated(mtype) - pool.extend(tr2.get("results", [])[:60]) - except Exception as e: - logger.debug(f"Error fetching trending/top-rated for padding: {e}") - return existing - - pool = filter_items_by_settings(pool, user_settings) - - existing_tmdb: set[int] = set() - for it in existing: - tid = it.get("_tmdb_id") or it.get("tmdb_id") or it.get("id") - try: - if isinstance(tid, str) and tid.startswith("tmdb:"): - tid = int(tid.split(":")[1]) - existing_tmdb.add(int(tid)) - except Exception: - pass - - dedup: dict[int, dict] = {} - for it in pool: - tid = it.get("id") - if not tid or tid in existing_tmdb or tid in watched_tmdb: - continue - gids = it.get("genre_ids") or [] - if excluded_ids.intersection(gids): - continue - va = float(it.get("vote_average") or 0.0) - vc = int(it.get("vote_count") or 0) - if vc < 200 or va < 6.0: - continue - dedup[tid] = it - if len(dedup) >= need * 3: - break - - if not dedup: - return existing - - meta = await RecommendationMetadata.fetch_batch( - tmdb_service, - list(dedup.values()), - content_type, - user_settings=user_settings, - ) - - extra: list[dict] = [] - for it in meta: - if it.get("id") in watched_imdb: - continue - if it.get("_external_ids", {}).get("imdb_id") in watched_imdb: - continue - is_dup = any(e.get("id") == it.get("id") for e in existing) - if is_dup: - continue - it.pop("_external_ids", None) - extra.append(it) - if len(extra) >= need: - break - - return existing + extra diff --git a/app/services/row_generator.py b/app/services/row_generator.py index 636cf1c..cd37f73 100644 --- a/app/services/row_generator.py +++ b/app/services/row_generator.py @@ -295,19 +295,28 @@ async def _generate_with_llm( content_type: str, api_key: str, ) -> list[RowDefinition] | None: - summary = profile.interest_summary or "No summary available." genre_map = movie_genres if content_type == "movie" else series_genres valid_genres = ", ".join(f"{name} (ID: {gid})" for gid, name in genre_map.items()) + # Build profile context from actual data + top_genre_names = [genre_map.get(gid, f"ID:{gid}") for gid, _ in genres[:5]] profile_keywords = [name for kid, _ in keywords[:12] if (name := keyword_names.get(kid))] - kw_list = f"Themes they already like: {', '.join(profile_keywords)}. " if profile_keywords else "" - keyword_hint = kw_list + "You can also suggest new themes for discovery." + top_countries = profile.get_top_countries(limit=2) + country_list = [c for c, _ in top_countries] if top_countries else [] + + profile_context = f"Top genres: {', '.join(top_genre_names)}." + if profile_keywords: + profile_context += f" Themes they enjoy: {', '.join(profile_keywords)}." + if country_list: + profile_context += f" Preferred countries: {', '.join(country_list)}." + + keyword_hint = "You can suggest themes from the user's preferences or new ones for discovery." prompt = ( - "Using the user's interest summary below, generate exactly 3 streaming " + "Based on the user's taste profile below, generate exactly 3 streaming " f"collections for {content_type}. " "Use genres (required), keywords, and country when relevant.\n\n" - f"Interest Summary:\n{summary}\n\n" + f"User Profile:\n{profile_context}\n\n" "Generate 3 rows:\n" "1. THE CORE — strongest match to their taste\n" "2. MIXED PREFERENCES — blend with variety\n" From de4f7cc96fd2494a2cf3971bc67e1af9738fbd08 Mon Sep 17 00:00:00 2001 From: Bimal Timilsina Date: Sun, 29 Mar 2026 12:46:05 +0545 Subject: [PATCH 12/68] fix: add tests and make defaults pass from BE to fe --- app/api/endpoints/catalogs.py | 4 +- app/api/models/tokens.py | 6 +-- app/core/app.py | 5 ++- app/core/settings.py | 23 +++++++++- app/services/recommendation/filtering.py | 9 ++-- app/services/token_store.py | 13 +++--- app/static/js/modules/form.js | 6 ++- app/templates/base.html | 1 + app/templates/components/section_config.html | 6 +-- tests/test_catalog_endpoint.py | 32 ++++++++++++++ tests/test_configure_page.py | 27 ++++++++++++ tests/test_token_store_migration.py | 44 ++++++++++++++++++++ 12 files changed, 153 insertions(+), 23 deletions(-) create mode 100644 tests/test_catalog_endpoint.py create mode 100644 tests/test_configure_page.py create mode 100644 tests/test_token_store_migration.py diff --git a/app/api/endpoints/catalogs.py b/app/api/endpoints/catalogs.py index f1f0763..e4ed6b7 100644 --- a/app/api/endpoints/catalogs.py +++ b/app/api/endpoints/catalogs.py @@ -24,8 +24,8 @@ async def get_catalog(response: Response, type: str, id: str, token: str, extra: for key, value in headers.items(): response.headers[key] = value - # if recommendations are none or empty, then set cache header to no-cache - if recommendations and not recommendations.get("meta"): + # If recommendations are empty, avoid caching the empty payload aggressively. + if recommendations is not None and not recommendations.get("metas"): response.headers["Cache-Control"] = "no-cache" return recommendations diff --git a/app/api/models/tokens.py b/app/api/models/tokens.py index 60b1a9c..47f0633 100644 --- a/app/api/models/tokens.py +++ b/app/api/models/tokens.py @@ -2,7 +2,7 @@ from pydantic import BaseModel, Field -from app.core.settings import CatalogConfig, PosterRatingConfig +from app.core.settings import DEFAULT_YEAR_MIN, CatalogConfig, PosterRatingConfig, get_default_year_max class TokenRequest(BaseModel): @@ -17,8 +17,8 @@ class TokenRequest(BaseModel): popularity: Literal["mainstream", "balanced", "gems", "all"] = Field( default="balanced", description="Popularity for TMDB API" ) - year_min: int = Field(default=2010, description="Minimum release year for TMDB API") - year_max: int = Field(default=2026, description="Maximum release year for TMDB API") + year_min: int = Field(default=DEFAULT_YEAR_MIN, description="Minimum release year for TMDB API") + year_max: int = Field(default_factory=get_default_year_max, description="Maximum release year for TMDB API") sorting_order: Literal["default", "movies_first", "series_first"] = Field( default="default", description="Order of movies and series catalogs" ) diff --git a/app/core/app.py b/app/core/app.py index 6bc610e..5fd555d 100644 --- a/app/core/app.py +++ b/app/core/app.py @@ -12,7 +12,7 @@ from app.api.endpoints.languages import fetch_languages_list from app.api.router import api_router -from app.core.settings import get_default_catalogs_for_frontend +from app.core.settings import get_current_year, get_default_catalogs_for_frontend, get_default_year_range from app.services.redis_service import redis_service from app.services.tmdb.genre import movie_genres, series_genres from app.services.token_store import token_store @@ -117,6 +117,7 @@ async def configure_page(request: Request, _token: str | None = None): # Format default catalogs for frontend default_catalogs = get_default_catalogs_for_frontend() + year_range_defaults = get_default_year_range() # Format genres for frontend movie_genres_list = [{"id": str(id), "name": name} for id, name in movie_genres.items()] @@ -131,6 +132,8 @@ async def configure_page(request: Request, _token: str | None = None): announcement_html=settings.ANNOUNCEMENT_HTML or "", languages=languages, default_catalogs=default_catalogs, + current_year=get_current_year(), + year_range_defaults=year_range_defaults, movie_genres=movie_genres_list, series_genres=series_genres_list, ) diff --git a/app/core/settings.py b/app/core/settings.py index 01ae6f0..2368120 100644 --- a/app/core/settings.py +++ b/app/core/settings.py @@ -1,3 +1,4 @@ +from datetime import datetime from typing import Literal from pydantic import BaseModel, Field @@ -24,14 +25,32 @@ class PosterRatingConfig(BaseModel): api_key: str = Field(description="API key for the provider") +def get_current_year() -> int: + return datetime.now().year + + +DEFAULT_YEAR_MIN = 1970 + + +def get_default_year_max() -> int: + return get_current_year() + + +def get_default_year_range() -> dict[str, int]: + return { + "min": DEFAULT_YEAR_MIN, + "max": get_default_year_max(), + } + + class UserSettings(BaseModel): catalogs: list[CatalogConfig] language: str = "en-US" poster_rating: PosterRatingConfig | None = Field(default=None, description="Poster rating provider configuration") excluded_movie_genres: list[str] = Field(default_factory=list) excluded_series_genres: list[str] = Field(default_factory=list) - year_min: int = Field(default=1970, description="Minimum release year") - year_max: int = Field(default=2026, description="Maximum release year") + year_min: int = Field(default=DEFAULT_YEAR_MIN, description="Minimum release year") + year_max: int = Field(default_factory=get_default_year_max, description="Maximum release year") popularity: Literal["mainstream", "balanced", "gems", "all"] = Field( default="balanced", description="Popularity preference" ) diff --git a/app/services/recommendation/filtering.py b/app/services/recommendation/filtering.py index 2e8f840..b3e78c9 100644 --- a/app/services/recommendation/filtering.py +++ b/app/services/recommendation/filtering.py @@ -3,6 +3,7 @@ from urllib.parse import unquote from app.core.constants import DISCOVERY_SETTINGS +from app.core.settings import DEFAULT_YEAR_MIN, get_current_year from app.models.library import LibraryCollection @@ -215,9 +216,9 @@ def build_discover_params(user_settings: Any) -> dict[str, Any]: return params current_date = datetime.now() - current_year = current_date.year + current_year = get_current_year() - year_min = getattr(user_settings, "year_min", 1970) + year_min = getattr(user_settings, "year_min", DEFAULT_YEAR_MIN) year_max = getattr(user_settings, "year_max", current_year) for prefix in ["primary_release_date", "first_air_date"]: @@ -255,8 +256,8 @@ def filter_items_by_settings( if not user_settings: return items - year_min = getattr(user_settings, "year_min", 1970) - year_max = getattr(user_settings, "year_max", 2026) + year_min = getattr(user_settings, "year_min", DEFAULT_YEAR_MIN) + year_max = getattr(user_settings, "year_max", get_current_year()) pop_pref = getattr(user_settings, "popularity", "balanced") filtered = [] diff --git a/app/services/token_store.py b/app/services/token_store.py index 2f486c3..5a0c84c 100644 --- a/app/services/token_store.py +++ b/app/services/token_store.py @@ -191,14 +191,15 @@ async def _migrate_poster_rating_format_raw(self, token: str, redis_key: str, da needs_save = True # Case 2: Clean up deprecated rpdb_key field if it exists (even if empty/null) - # Remove it since we've migrated to poster_rating or it's no longer needed + # Remove it since we've migrated to poster_rating or it's no longer needed. + # Do not overwrite a valid migrated poster_rating payload. if "rpdb_key" in settings_dict: settings_dict.pop("rpdb_key") - # keep empty poster_rating field for now - settings_dict["poster_rating"] = { - "provider": "rpdb", - "api_key": None, - } + if not settings_dict.get("poster_rating"): + settings_dict["poster_rating"] = { + "provider": "rpdb", + "api_key": None, + } if not needs_save: # Only log if we didn't already log migration logger.info(f"[MIGRATION] Removing deprecated rpdb_key field for {redact_token(token)}") needs_save = True diff --git a/app/static/js/modules/form.js b/app/static/js/modules/form.js index 4c0c76d..d26d147 100644 --- a/app/static/js/modules/form.js +++ b/app/static/js/modules/form.js @@ -4,6 +4,8 @@ import { showToast, showConfirm, escapeHtml } from './ui.js'; import { switchSection } from './navigation.js'; import { MOVIE_GENRES, SERIES_GENRES } from '../constants.js'; +const YEAR_RANGE_DEFAULTS = window.YEAR_RANGE_DEFAULTS || { min: 1970, max: new Date().getFullYear() }; + // DOM Elements - will be initialized let configForm = null; let submitBtn = null; @@ -51,8 +53,8 @@ async function initializeFormSubmission() { const password = passwordInput?.value; const language = languageSelect.value; const popularity = document.getElementById("popularitySelect")?.value || "balanced"; - const yearMin = parseInt(document.getElementById("yearMin")?.value || "1980"); - const yearMax = parseInt(document.getElementById("yearMax")?.value || "2026"); + const yearMin = parseInt(document.getElementById("yearMin")?.value || String(YEAR_RANGE_DEFAULTS.min)); + const yearMax = parseInt(document.getElementById("yearMax")?.value || String(YEAR_RANGE_DEFAULTS.max)); const sortingOrder = document.getElementById("sortingOrderSelect")?.value || "default"; const posterRatingProvider = document.getElementById("posterRatingProvider")?.value || ""; const posterRatingApiKey = document.getElementById("posterRatingApiKey")?.value.trim() || ""; diff --git a/app/templates/base.html b/app/templates/base.html index d7ff376..85e32c6 100644 --- a/app/templates/base.html +++ b/app/templates/base.html @@ -292,6 +292,7 @@ +""" + + +def _oauth_error_page(provider: str, error: str) -> str: + return f""" +{provider.title()} Error + +

{provider.title()} login failed

+

{error}

+

Please close this window and try again.

+""" diff --git a/app/api/endpoints/validation.py b/app/api/endpoints/validation.py index e324116..5818be8 100644 --- a/app/api/endpoints/validation.py +++ b/app/api/endpoints/validation.py @@ -1,11 +1,13 @@ from fastapi import APIRouter, HTTPException from google import genai from loguru import logger +from pydantic import BaseModel, Field from app.api.models.validation import BaseValidationInput, BaseValidationResponse, PosterRatingValidationInput from app.services.poster_ratings.factory import PosterProvider, poster_ratings_factory from app.services.simkl import simkl_service from app.services.tmdb.client import TMDBClient +from app.services.trakt import trakt_service router = APIRouter(tags=["Validation"]) @@ -69,3 +71,47 @@ async def validate_simkl_api_key(data: BaseValidationInput) -> BaseValidationRes except Exception as e: logger.error(f"Validation failed: {str(e)}") raise HTTPException(status_code=500, detail="Validation failed due to an internal error.") + + +class OAuthTokenValidationInput(BaseModel): + access_token: str = Field(description="OAuth access token to validate") + + +@router.post("/trakt/validation") +async def validate_trakt_token(data: OAuthTokenValidationInput) -> BaseValidationResponse: + """Validate a Trakt OAuth access token by calling /users/me.""" + try: + user_info = await trakt_service.get_user_info(data.access_token) + username = user_info.get("user", {}).get("username") or user_info.get("username", "") + return BaseValidationResponse(valid=True, message=f"Connected as {username}") + except Exception as e: + logger.debug(f"Trakt token validation failed: {e}") + return BaseValidationResponse(valid=False, message="Invalid or expired Trakt token") + + +@router.post("/simkl-sync/validation") +async def validate_simkl_sync_token(data: OAuthTokenValidationInput) -> BaseValidationResponse: + """Validate a Simkl OAuth access token.""" + from app.core.config import settings as app_settings + + if not app_settings.SIMKL_CLIENT_ID: + return BaseValidationResponse(valid=False, message="Simkl integration is not configured on this server") + try: + from httpx import AsyncClient + + async with AsyncClient(timeout=10) as client: + resp = await client.get( + "https://api.simkl.com/users/settings", + headers={ + "Authorization": f"Bearer {data.access_token}", + "simkl-api-key": app_settings.SIMKL_CLIENT_ID, + }, + follow_redirects=True, + ) + resp.raise_for_status() + user_info = resp.json() + username = user_info.get("user", {}).get("name") or "Unknown" + return BaseValidationResponse(valid=True, message=f"Connected as {username}") + except Exception as e: + logger.debug(f"Simkl sync token validation failed: {e}") + return BaseValidationResponse(valid=False, message="Invalid or expired Simkl token") diff --git a/app/api/models/tokens.py b/app/api/models/tokens.py index 47f0633..b87857c 100644 --- a/app/api/models/tokens.py +++ b/app/api/models/tokens.py @@ -25,6 +25,12 @@ class TokenRequest(BaseModel): simkl_api_key: str | None = Field(default=None, description="Simkl API Key for the user") gemini_api_key: str | None = Field(default=None, description="Gemini API Key for AI features") tmdb_api_key: str | None = Field(default=None, description="TMDB API Key") + trakt_access_token: str | None = Field(default=None, description="Trakt OAuth access token") + trakt_refresh_token: str | None = Field(default=None, description="Trakt OAuth refresh token") + simkl_access_token: str | None = Field(default=None, description="Simkl OAuth access token") + watch_history_source: Literal["stremio", "trakt", "simkl"] = Field( + default="stremio", description="Source for watch history" + ) class TokenResponse(BaseModel): diff --git a/app/api/router.py b/app/api/router.py index 1aff2bb..f30acfc 100644 --- a/app/api/router.py +++ b/app/api/router.py @@ -5,6 +5,7 @@ from .endpoints.health import router as health_router from .endpoints.languages import router as language_router from .endpoints.manifest import router as manifest_router +from .endpoints.oauth import router as oauth_router from .endpoints.stats import router as stats_router from .endpoints.tokens import router as tokens_router from .endpoints.validation import router as validation_router @@ -25,3 +26,4 @@ async def root(): api_router.include_router(announcement_router) api_router.include_router(stats_router) api_router.include_router(validation_router) +api_router.include_router(oauth_router) diff --git a/app/core/config.py b/app/core/config.py index 1c250d6..db900d6 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -44,6 +44,12 @@ class Settings(BaseSettings): CATALOG_CACHE_TTL: int = 43200 # 12 hours CATALOG_STALE_TTL: int = 604800 # 7 days (soft expiration fallback) + # External history providers (OAuth app credentials) + TRAKT_CLIENT_ID: str | None = None + TRAKT_CLIENT_SECRET: str | None = None + SIMKL_CLIENT_ID: str | None = None + SIMKL_CLIENT_SECRET: str | None = None + # AI DEFAULT_GEMINI_MODEL: str = "gemma-3-27b-it" GEMINI_API_KEY: str | None = None diff --git a/app/core/settings.py b/app/core/settings.py index 2368120..52d87fb 100644 --- a/app/core/settings.py +++ b/app/core/settings.py @@ -60,6 +60,12 @@ class UserSettings(BaseModel): simkl_api_key: str | None = Field(default=None, description="Simkl API Key for the user") gemini_api_key: str | None = Field(default=None, description="Gemini API Key for AI-powered features") tmdb_api_key: str | None = Field(default=None, description="TMDB API Key (used if set; else server config)") + trakt_access_token: str | None = Field(default=None, description="Trakt OAuth access token") + trakt_refresh_token: str | None = Field(default=None, description="Trakt OAuth refresh token") + simkl_access_token: str | None = Field(default=None, description="Simkl OAuth access token") + watch_history_source: Literal["stremio", "trakt", "simkl"] = Field( + default="stremio", description="Source for watch history used in profile building" + ) # Catalog descriptions for frontend diff --git a/app/models/history.py b/app/models/history.py new file mode 100644 index 0000000..391afad --- /dev/null +++ b/app/models/history.py @@ -0,0 +1,27 @@ +from datetime import datetime +from typing import Literal + +from pydantic import BaseModel, Field + + +class WatchHistoryItem(BaseModel): + """Unified watch history item from any source (Stremio, Trakt, Simkl).""" + + imdb_id: str # tt1234567 + type: str # "movie" | "series" + name: str + rating: float | None = None # 1-10 explicit rating (None = unrated) + watch_count: int = 1 + completion: float = 1.0 # 0.0-1.0 (fraction of content watched) + last_watched: datetime | None = None + source: Literal["stremio", "trakt", "simkl"] = "stremio" + + +class WatchHistory(BaseModel): + """Collection of watch history items from a single source.""" + + items: list[WatchHistoryItem] = Field(default_factory=list) + source: Literal["stremio", "trakt", "simkl"] = "stremio" + + def imdb_ids(self) -> set[str]: + return {i.imdb_id for i in self.items} diff --git a/app/services/auth.py b/app/services/auth.py index 4e12056..e96843d 100644 --- a/app/services/auth.py +++ b/app/services/auth.py @@ -165,6 +165,10 @@ def _build_user_settings(self, payload: TokenRequest) -> UserSettings: simkl_api_key=payload.simkl_api_key, gemini_api_key=payload.gemini_api_key, tmdb_api_key=payload.tmdb_api_key, + trakt_access_token=payload.trakt_access_token, + trakt_refresh_token=payload.trakt_refresh_token, + simkl_access_token=payload.simkl_access_token, + watch_history_source=payload.watch_history_source, ) async def get_identity_with_settings(self, payload: TokenRequest) -> dict: diff --git a/app/services/profile/builder.py b/app/services/profile/builder.py index 8b6ca25..7568bab 100644 --- a/app/services/profile/builder.py +++ b/app/services/profile/builder.py @@ -305,39 +305,20 @@ def _apply_frequency_multipliers(self, profile: TasteProfile, frequencies: dict[ @staticmethod def _apply_caps(profile: TasteProfile) -> None: """ - Apply score caps to prevent unbounded growth. - - Args: - profile: Profile to cap + Apply score caps to prevent unbounded growth (both positive and negative). """ - # Cap genres - for genre_id in profile.genre_scores: - profile.genre_scores[genre_id] = min(profile.genre_scores[genre_id], CAP_GENRE) - - # Cap keywords - for keyword_id in profile.keyword_scores: - profile.keyword_scores[keyword_id] = min(profile.keyword_scores[keyword_id], CAP_KEYWORD) - - # Cap directors - for director_id in profile.director_scores: - profile.director_scores[director_id] = min(profile.director_scores[director_id], CAP_DIRECTOR) - - # Cap cast - for cast_id in profile.cast_scores: - profile.cast_scores[cast_id] = min(profile.cast_scores[cast_id], CAP_CAST) - - # Cap eras - for era in profile.era_scores: - profile.era_scores[era] = min(profile.era_scores[era], CAP_ERA) - - # Cap countries - for country in profile.country_scores: - profile.country_scores[country] = min(profile.country_scores[country], CAP_COUNTRY) - - # Cap runtime buckets - for runtime_bucket in profile.runtime_bucket_scores: - current_score = profile.runtime_bucket_scores[runtime_bucket] - profile.runtime_bucket_scores[runtime_bucket] = min(current_score, CAP_RUNTIME) + cap_pairs = [ + (profile.genre_scores, CAP_GENRE), + (profile.keyword_scores, CAP_KEYWORD), + (profile.director_scores, CAP_DIRECTOR), + (profile.cast_scores, CAP_CAST), + (profile.era_scores, CAP_ERA), + (profile.country_scores, CAP_COUNTRY), + (profile.runtime_bucket_scores, CAP_RUNTIME), + ] + for scores, cap in cap_pairs: + for key in scores: + scores[key] = max(-cap, min(scores[key], cap)) async def update_profile_incrementally( self, diff --git a/app/services/profile/evidence.py b/app/services/profile/evidence.py index def65c5..4c06c0d 100644 --- a/app/services/profile/evidence.py +++ b/app/services/profile/evidence.py @@ -12,25 +12,22 @@ RECENCY_HALF_LIFE_DAYS, ) +# Abandonment thresholds (in minutes of watch time) +_ABANDON_IGNORE_MINUTES = 15 # < 15 min: too short, ignore +_ABANDON_NEGATIVE_THRESHOLD = 0.30 # 15 min – 30%: mild negative + class EvidenceCalculator: """ Calculates evidence weights for user interactions. - Pure function: no side effects, easy to test. + Supports both legacy Stremio interaction types and explicit 1-10 ratings + from external sources (Trakt, Simkl). """ @staticmethod def get_interaction_type(item: ScoredItem) -> Literal["loved", "liked", "watched_high", "watched_medium", "added"]: - """ - Determine interaction type from scored item. - - Args: - item: ScoredItem with interaction data - - Returns: - Interaction type string - """ + """Determine interaction type from scored item.""" if item.item.is_loved: return "loved" if item.item.is_liked: @@ -39,22 +36,13 @@ def get_interaction_type(item: ScoredItem) -> Literal["loved", "liked", "watched return "watched_high" if item.completion_rate >= 0.4: return "watched_medium" - # Check if added to library (not watched, not removed, not temp) if not item.item.temp and not item.item.removed and item.completion_rate < 0.4: return "added" - return "watched_medium" # Fallback + return "watched_medium" @staticmethod def get_base_weight(interaction_type: str) -> float: - """ - Get base evidence weight for interaction type. - - Args: - interaction_type: Type of interaction - - Returns: - Base weight value - """ + """Get base evidence weight for interaction type (legacy bucket system).""" weights = { "loved": EVIDENCE_WEIGHT_LOVED, "liked": EVIDENCE_WEIGHT_LIKED, @@ -65,18 +53,46 @@ def get_base_weight(interaction_type: str) -> float: return weights.get(interaction_type, EVIDENCE_WEIGHT_WATCHED_MEDIUM) @staticmethod - def calculate_recency_multiplier(last_interaction: datetime | None) -> float: + def weight_from_rating(rating: float) -> float: """ - Calculate recency multiplier using exponential decay. + Continuous evidence weight from an explicit 1-10 rating. - Args: - last_interaction: When the interaction occurred + Positive: 5→0.3, 6→0.8, 7→1.3, 8→1.8, 9→2.5, 10→3.0 + Negative: 1→-1.5, 2→-1.0, 3→-0.5, 4→-0.1 + """ + if rating >= 5: + return max(0.1, (rating - 4) / 2) + return (rating - 5) / 2 - Returns: - Multiplier (1.0 for recent, <1.0 for old) + @staticmethod + def weight_from_completion(completion: float, watch_time_minutes: float | None = None) -> float: """ + Evidence weight for unrated items based on watch completion. + + Implements abandonment detection: + - < 15 min watched: ignore (weight 0.0) + - 15 min to 30% completion: mild negative (-0.2) + - 30%-70% completion: neutral (0.0) + - > 70% completion: positive (1.0) + """ + # If we have actual watch time, use the abandonment thresholds + if watch_time_minutes is not None and watch_time_minutes < _ABANDON_IGNORE_MINUTES: + return 0.0 + + if completion >= 0.7: + return 1.0 + if completion >= 0.3: + return 0.0 # Ambiguous — neutral + if watch_time_minutes is not None and watch_time_minutes >= _ABANDON_IGNORE_MINUTES: + return -0.2 # Gave it a fair shot and quit + # Low completion without enough info — treat as neutral + return 0.0 + + @staticmethod + def calculate_recency_multiplier(last_interaction: datetime | None) -> float: + """Calculate recency multiplier using exponential decay.""" if not last_interaction: - return 0.5 # No date = old, reduce weight + return 0.5 now = datetime.now(timezone.utc) if last_interaction.tzinfo is None: @@ -84,35 +100,74 @@ def calculate_recency_multiplier(last_interaction: datetime | None) -> float: days_ago = (now - last_interaction).days if days_ago < 0: - return 1.0 # Future date = treat as recent + return 1.0 - # Exponential decay: multiplier = exp(-days / half_life) multiplier = math.exp(-days_ago / RECENCY_HALF_LIFE_DAYS) - return max(0.1, multiplier) # Minimum 0.1 to keep some signal + return max(0.1, multiplier) @staticmethod def calculate_evidence_weight(item: ScoredItem) -> float: """ Calculate final evidence weight for an item. - Combines base weight (interaction type) with recency multiplier. - - Args: - item: ScoredItem with interaction data - - Returns: - Final evidence weight + Uses explicit rating if available (from external history sources), + otherwise falls back to the legacy interaction-type bucket system. + Abandonment detection is applied for unrated items. """ - interaction_type = EvidenceCalculator.get_interaction_type(item) - base_weight = EvidenceCalculator.get_base_weight(interaction_type) + # Check for an explicit rating (set by the WatchHistory → ScoredItem converter) + # The converter maps loved→is_loved (rating≥9) and liked→is_liked (rating≥7). + # For items with external ratings, we use the continuous scale. + has_explicit_rating = False + rating: float | None = None + + # Detect external-history items by checking the synthetic state pattern: + # External items have flaggedWatched=1 and a specific duration sentinel (6000) + # OR they have is_loved/is_liked set from external ratings. + # We use a simpler heuristic: if is_loved with flaggedWatched=1, compute from rating=9. + # For more granularity, we'll check the state for our sentinel. + state = item.item.state + + if item.item.is_loved: + # Could be Stremio loved (legacy) or external rating ≥ 9 + # Use rating-proportional weight for loved items + rating = 9.0 + has_explicit_rating = True + elif item.item.is_liked: + rating = 7.0 + has_explicit_rating = True + + if has_explicit_rating and rating is not None: + base_weight = EvidenceCalculator.weight_from_rating(rating) + else: + # Check for abandonment on unrated items + watch_time_minutes: float | None = None + if state.duration > 0 and state.timeWatched > 0: + watch_time_minutes = state.timeWatched / 60.0 + + completion = item.completion_rate + + # Use completion-based weight with abandonment detection + completion_weight = EvidenceCalculator.weight_from_completion(completion, watch_time_minutes) + + if ( + completion_weight == 0.0 + and watch_time_minutes is not None + and watch_time_minutes < _ABANDON_IGNORE_MINUTES + ): + # Too short, skip this item entirely + return 0.0 + + if completion_weight != 0.0: + base_weight = completion_weight + else: + # Fall back to legacy bucket system for ambiguous cases + interaction_type = EvidenceCalculator.get_interaction_type(item) + base_weight = EvidenceCalculator.get_base_weight(interaction_type) # Get last interaction date - last_interaction = item.item.state.lastWatched - if not last_interaction and interaction_type == "added": - # For added items, use mtime if available + last_interaction = state.lastWatched + if not last_interaction: try: - from datetime import datetime - if item.item.mtime: last_interaction = datetime.fromisoformat(item.item.mtime.replace("Z", "+00:00")) except Exception: diff --git a/app/services/profile/integration.py b/app/services/profile/integration.py new file mode 100644 index 0000000..bfb1097 --- /dev/null +++ b/app/services/profile/integration.py @@ -0,0 +1,312 @@ +from typing import Any + +from loguru import logger + +from app.models.history import WatchHistory, WatchHistoryItem +from app.models.library import StremioLibraryItem, StremioState +from app.models.profile import ScoredItem, TasteProfile +from app.services.profile.builder import ProfileBuilder +from app.services.profile.constants import GENRE_WHITELIST_LIMIT +from app.services.profile.sampling import SmartSampler +from app.services.profile.vectorizer import ItemVectorizer +from app.services.recommendation.filtering import RecommendationFiltering +from app.services.scoring import ScoringService +from app.services.tmdb.service import get_tmdb_service +from app.services.user_cache import user_cache + + +def _watch_history_item_to_scored(item: WatchHistoryItem) -> ScoredItem: + """Convert a WatchHistoryItem to a ScoredItem for the existing vectorizer pipeline.""" + state_kwargs: dict[str, Any] = {} + if item.last_watched: + state_kwargs["lastWatched"] = item.last_watched + state_kwargs["timesWatched"] = item.watch_count + + # Synthesize duration/timeWatched from completion ratio + if item.completion < 1.0: + state_kwargs["duration"] = 6000 # 100 min in seconds + state_kwargs["timeWatched"] = int(6000 * item.completion) + else: + state_kwargs["timesWatched"] = max(item.watch_count, 1) + state_kwargs["flaggedWatched"] = 1 + + state = StremioState(**state_kwargs) + + is_loved = item.rating is not None and item.rating >= 9.0 + is_liked = item.rating is not None and 7.0 <= item.rating < 9.0 + + lib_item = StremioLibraryItem( + _id=item.imdb_id, + type=item.type, + name=item.name, + state=state, + temp=False, + removed=False, + _is_loved=is_loved, + _is_liked=is_liked, + ) + + source_type = "loved" if is_loved else ("liked" if is_liked else "watched") + + return ScoredItem( + item=lib_item, + score=50.0, + completion_rate=item.completion, + is_rewatched=item.watch_count > 1, + is_recent=False, + source_type=source_type, + ) + + +class ProfileIntegration: + """ + Helper class to integrate taste profile services with existing systems. + """ + + def __init__(self, language: str = "en-US", tmdb_api_key: str | None = None): + self.scoring_service = ScoringService() + self.sampler = SmartSampler(self.scoring_service) + tmdb_service = get_tmdb_service(language=language, api_key=tmdb_api_key) + vectorizer = ItemVectorizer(tmdb_service) + self.builder = ProfileBuilder(vectorizer) + + async def build_profile_from_library( + self, + library_items: dict, + content_type: str, + stremio_service: Any = None, + auth_key: str | None = None, + ) -> tuple[TasteProfile | None, set[int], set[str]]: + """ + Build taste profile from library items and get watched sets. + + Args: + library_items: Library items dict from Stremio + content_type: Content type (movie/series) + stremio_service: Stremio service (optional, for watched sets) + auth_key: Auth key (optional, for watched sets) + + Returns: + Tuple of (profile, watched_tmdb, watched_imdb) + """ + # Get watched sets + watched_imdb, watched_tmdb = await RecommendationFiltering.get_exclusion_sets( + stremio_service, library_items, auth_key + ) + + # Convert library items to ScoredItems + all_items = ( + library_items.get("loved", []) + + library_items.get("liked", []) + + library_items.get("watched", []) + + library_items.get("added", []) + ) + typed_items = [it for it in all_items if it.get("type") == content_type] + + if not typed_items: + return None, watched_tmdb, watched_imdb + + # Sample items using SmartSampler (it expects raw library items dict) + library_items_dict = { + "loved": [it for it in library_items.get("loved", []) if it.get("type") == content_type], + "liked": [it for it in library_items.get("liked", []) if it.get("type") == content_type], + "watched": [it for it in library_items.get("watched", []) if it.get("type") == content_type], + "added": [it for it in library_items.get("added", []) if it.get("type") == content_type], + } + sampled = self.sampler.sample_items(library_items_dict, content_type) + + # Build profile + profile = await self.builder.build_profile(sampled, content_type=content_type) + + return profile, watched_tmdb, watched_imdb + + async def build_profile_incremental( + self, + library_items: dict, + content_type: str, + token: str, + stremio_service: Any = None, + auth_key: str | None = None, + ) -> tuple[TasteProfile | None, set[int], set[str]]: + """ + Build profile incrementally if possible, fallback to full rebuild. + + Args: + library_items: Library items dict from Stremio + content_type: Content type (movie/series) + token: User token for change detection + stremio_service: Stremio service (optional, for watched sets) + auth_key: Auth key (optional, for watched sets) + + Returns: + Tuple of (profile, watched_tmdb, watched_imdb) + """ + # Get watched sets + watched_imdb, watched_tmdb = await RecommendationFiltering.get_exclusion_sets( + stremio_service, library_items, auth_key + ) + + # Convert library items to ScoredItems for change detection + all_items = ( + library_items.get("loved", []) + + library_items.get("liked", []) + + library_items.get("watched", []) + + library_items.get("added", []) + ) + typed_items = [it for it in all_items if it.get("type") == content_type] + + if not typed_items: + return None, watched_tmdb, watched_imdb + + # Check if we can use incremental update + try: + # Check if library has changed + library_changed = await user_cache.has_library_changed(token, content_type, typed_items) + + if not library_changed: + # No changes - return existing profile + existing_profile = await user_cache.get_profile(token, content_type) + if existing_profile: + return existing_profile, watched_tmdb, watched_imdb + + # Try to get existing profile for incremental update + existing_profile = await user_cache.get_profile(token, content_type) + + if existing_profile: + # Check for removals or new items + processed_ids = existing_profile.processed_items + current_ids = {it.get("_id", it.get("id")) for it in typed_items if it.get("_id", it.get("id"))} + + # Check if this is a legacy profile (has scores but no processed_items) + is_legacy = not processed_ids and (existing_profile.genre_scores or existing_profile.director_scores) + + # If items were removed, or it's a legacy profile, we must do a full rebuild + if not processed_ids.issubset(current_ids) or is_legacy: + reason = "Legacy profile detected" if is_legacy else "Items removed from library" + logger.debug(f"[{token[:8]}...] {reason}, falling back to full rebuild") + # Fall through to full rebuild + else: + # Identify new items + new_item_ids = current_ids - processed_ids + + if not new_item_ids: + # No new items and no removals (maybe just metadata changed?) + # We can just return the existing profile + return existing_profile, watched_tmdb, watched_imdb + + logger.debug(f"[{token[:8]}...] Found {len(new_item_ids)} new items, using incremental update") + + # Filter library items to only new ones for sampling + new_library_items_dict = { + "loved": [ + it + for it in library_items.get("loved", []) + if it.get("type") == content_type and (it.get("_id") or it.get("id")) in new_item_ids + ], + "liked": [ + it + for it in library_items.get("liked", []) + if it.get("type") == content_type and (it.get("_id") or it.get("id")) in new_item_ids + ], + "watched": [ + it + for it in library_items.get("watched", []) + if it.get("type") == content_type and (it.get("_id") or it.get("id")) in new_item_ids + ], + "added": [ + it + for it in library_items.get("added", []) + if it.get("type") == content_type and (it.get("_id") or it.get("id")) in new_item_ids + ], + } + + # Sample only new items + sampled = self.sampler.sample_items(new_library_items_dict, content_type) + + if not sampled: + # Should not happen if new_item_ids is not empty, but just in case + return existing_profile, watched_tmdb, watched_imdb + + # Update existing profile incrementally + updated_profile = await self.builder.update_profile_incrementally( + existing_profile, sampled, content_type=content_type + ) + + # Update library hash to mark as processed + await user_cache.update_library_hash(token, content_type, typed_items) + + return updated_profile, watched_tmdb, watched_imdb + + except Exception as e: + logger.warning(f"[{token[:8]}...] Incremental update failed, falling back to full rebuild: {e}") + + # Fallback to full rebuild + logger.debug(f"[{token[:8]}...] Using full rebuild") + profile_tuple = await self.build_profile_from_library(library_items, content_type, stremio_service, auth_key) + profile, _, _ = profile_tuple + + # Update library hash after successful build + await user_cache.update_library_hash(token, content_type, typed_items) + + return profile, watched_tmdb, watched_imdb + + async def build_profile_from_watch_history( + self, + watch_history: WatchHistory, + content_type: str, + extra_exclusion_imdb: set[str] | None = None, + ) -> tuple[TasteProfile | None, set[str]]: + """ + Build taste profile from external watch history (Trakt/Simkl). + + Args: + watch_history: WatchHistory from external source + content_type: Content type (movie/series) + extra_exclusion_imdb: Additional IMDB IDs to exclude (e.g. Stremio library) + + Returns: + Tuple of (profile, watched_imdb_ids) + """ + typed_items = [it for it in watch_history.items if it.type == content_type] + if not typed_items: + return None, extra_exclusion_imdb or set() + + # Convert to ScoredItems for the existing pipeline + scored_items = [_watch_history_item_to_scored(it) for it in typed_items] + + # Build profile + profile = await self.builder.build_profile(scored_items, content_type=content_type) + + # Exclusion set: union of external history + any extra (Stremio library) + watched_imdb = watch_history.imdb_ids() + if extra_exclusion_imdb: + watched_imdb |= extra_exclusion_imdb + + return profile, watched_imdb + + async def get_genre_whitelist( + self, + profile: TasteProfile, + content_type: str, + ) -> set[int]: + """ + Get genre whitelist from user's top genres in profile. + + Args: + profile: Taste profile + content_type: Content type (movie/series) + + Returns: + Set of top genre IDs + """ + try: + if not profile: + whitelist = set() + else: + # Get top genres + top_genres = profile.get_top_genres(limit=GENRE_WHITELIST_LIMIT) + whitelist = {int(genre_id) for genre_id, _ in top_genres} + return whitelist + except Exception as e: + logger.warning(f"Failed to build genre whitelist for {content_type}: {e}") + return set() diff --git a/app/services/profile/service.py b/app/services/profile/service.py index f28937d..27419cf 100644 --- a/app/services/profile/service.py +++ b/app/services/profile/service.py @@ -2,8 +2,9 @@ from loguru import logger -from app.models.library import LibraryCollection -from app.models.profile import TasteProfile +from app.models.history import WatchHistory, WatchHistoryItem +from app.models.library import LibraryCollection, StremioLibraryItem, StremioState +from app.models.profile import ScoredItem, TasteProfile from app.services.profile.builder import ProfileBuilder from app.services.profile.sampling import sample_items from app.services.profile.scoring import ScoringService @@ -13,6 +14,48 @@ from app.services.user_cache import user_cache +def _watch_history_item_to_scored(item: WatchHistoryItem) -> ScoredItem: + """Convert a WatchHistoryItem to a ScoredItem for the existing vectorizer pipeline.""" + state_kwargs: dict[str, Any] = {} + if item.last_watched: + state_kwargs["lastWatched"] = item.last_watched + state_kwargs["timesWatched"] = item.watch_count + + if item.completion < 1.0: + state_kwargs["duration"] = 6000 + state_kwargs["timeWatched"] = int(6000 * item.completion) + else: + state_kwargs["timesWatched"] = max(item.watch_count, 1) + state_kwargs["flaggedWatched"] = 1 + + state = StremioState(**state_kwargs) + + is_loved = item.rating is not None and item.rating >= 9.0 + is_liked = item.rating is not None and 7.0 <= item.rating < 9.0 + + lib_item = StremioLibraryItem( + _id=item.imdb_id, + type=item.type, + name=item.name, + state=state, + temp=False, + removed=False, + _is_loved=is_loved, + _is_liked=is_liked, + ) + + source_type = "loved" if is_loved else ("liked" if is_liked else "watched") + + return ScoredItem( + item=lib_item, + score=50.0, + completion_rate=item.completion, + is_rewatched=item.watch_count > 1, + is_recent=False, + source_type=source_type, + ) + + class ProfileService: """Builds, updates, caches, and exposes user taste profiles.""" @@ -118,6 +161,26 @@ def _is_new(it) -> bool: await user_cache.update_library_hash(token, content_type, typed_items) return profile, watched_tmdb, watched_imdb + async def build_profile_from_watch_history( + self, + watch_history: WatchHistory, + content_type: str, + extra_exclusion_imdb: set[str] | None = None, + ) -> tuple[TasteProfile | None, set[str]]: + """Build taste profile from external watch history (Trakt/Simkl).""" + typed_items = [it for it in watch_history.items if it.type == content_type] + if not typed_items: + return None, extra_exclusion_imdb or set() + + scored_items = [_watch_history_item_to_scored(it) for it in typed_items] + profile = await self.builder.build_profile(scored_items, content_type=content_type) + + watched_imdb = watch_history.imdb_ids() + if extra_exclusion_imdb: + watched_imdb |= extra_exclusion_imdb + + return profile, watched_imdb + async def build_and_cache_profile( self, token: str, diff --git a/app/services/recommendation/catalog_service.py b/app/services/recommendation/catalog_service.py index f825598..940c213 100644 --- a/app/services/recommendation/catalog_service.py +++ b/app/services/recommendation/catalog_service.py @@ -20,6 +20,7 @@ from app.services.recommendation.item_based import ItemBasedService from app.services.recommendation.theme_based import ThemeBasedService from app.services.recommendation.top_picks import TopPicksService +from app.services.stremio.library import stremio_library_to_watch_history from app.services.tmdb.service import get_tmdb_service from app.services.token_store import token_store from app.services.user_cache import user_cache @@ -110,14 +111,42 @@ async def _build_catalog( profile, watched_tmdb, watched_imdb = cached_data logger.debug(f"[{redact_token(ctx.token)}] Using cached profile for {content_type}") else: - logger.info(f"[{redact_token(ctx.token)}] Profile not cached for {content_type}, building") - profile, watched_tmdb, watched_imdb = await profile_service.build_and_cache_profile( - ctx.token, - content_type, - ctx.library, - ctx.bundle, - ctx.auth_key, - ) + # Build profile — use external history source if configured + history_source = ctx.user_settings.watch_history_source if ctx.user_settings else "stremio" + + if history_source == "trakt" and ctx.user_settings and ctx.user_settings.trakt_access_token: + logger.info(f"[{redact_token(ctx.token)}] Building profile from Trakt history for {content_type}") + profile, watched_tmdb, watched_imdb = await self._build_from_external_history( + "trakt", + ctx.user_settings, + content_type, + profile_service, + ctx.library, + ) + await user_cache.set_profile_and_watched_sets( + ctx.token, content_type, profile, watched_tmdb, watched_imdb + ) + elif history_source == "simkl" and ctx.user_settings and ctx.user_settings.simkl_access_token: + logger.info(f"[{redact_token(ctx.token)}] Building profile from Simkl history for {content_type}") + profile, watched_tmdb, watched_imdb = await self._build_from_external_history( + "simkl", + ctx.user_settings, + content_type, + profile_service, + ctx.library, + ) + await user_cache.set_profile_and_watched_sets( + ctx.token, content_type, profile, watched_tmdb, watched_imdb + ) + else: + logger.info(f"[{redact_token(ctx.token)}] Profile not cached for {content_type}, building") + profile, watched_tmdb, watched_imdb = await profile_service.build_and_cache_profile( + ctx.token, + content_type, + ctx.library, + ctx.bundle, + ctx.auth_key, + ) recommendations = await self._get_recommendations( catalog_id=catalog_id, @@ -224,6 +253,47 @@ async def _get_trending_fallback( logger.warning(f"Failed to fetch trending items: {e}") return [] + async def _build_from_external_history( + self, + source: str, + user_settings: UserSettings, + content_type: str, + profile_service: ProfileService, + library: LibraryCollection, + ) -> tuple[TasteProfile | None, set[int], set[str]]: + """Build profile from external history (Trakt or Simkl), with Stremio library fallback.""" + try: + if source == "trakt": + from app.services.trakt import trakt_service + + watch_history = await trakt_service.get_history(user_settings.trakt_access_token) + elif source == "simkl": + from app.core.config import settings as app_settings + from app.services.simkl import simkl_service + + watch_history = await simkl_service.get_history( + user_settings.simkl_access_token, + app_settings.SIMKL_CLIENT_ID or "", + ) + else: + watch_history = stremio_library_to_watch_history(library) + + stremio_imdb = library.all_imdb_ids() + + profile, watched_imdb = await profile_service.build_profile_from_watch_history( + watch_history, content_type, extra_exclusion_imdb=stremio_imdb + ) + return profile, set(), watched_imdb + + except Exception as e: + logger.warning(f"External history ({source}) failed, falling back to Stremio: {e}") + watch_history = stremio_library_to_watch_history(library) + stremio_imdb = library.all_imdb_ids() + profile, watched_imdb = await profile_service.build_profile_from_watch_history( + watch_history, content_type, extra_exclusion_imdb=stremio_imdb + ) + return profile, set(), watched_imdb + async def _get_recommendations( self, catalog_id: str, diff --git a/app/services/recommendation/filtering.py b/app/services/recommendation/filtering.py index 6c1b978..83059d6 100644 --- a/app/services/recommendation/filtering.py +++ b/app/services/recommendation/filtering.py @@ -84,6 +84,19 @@ async def get_exclusion_sets( return imdb_ids, tmdb_ids + @staticmethod + def get_library_imdb_ids(library_data: dict | None) -> set[str]: + """Extract all IMDB IDs from Stremio library data.""" + if not library_data: + return set() + imdb_ids: set[str] = set() + for category in ("loved", "liked", "watched", "added", "removed"): + for item in library_data.get(category, []): + item_id = item.get("_id", "") + if item_id.startswith("tt"): + imdb_ids.add(item_id.split(":")[0]) + return imdb_ids + @staticmethod def filter_candidates( candidates: list[dict[str, Any]], watched_imdb: set[str], watched_tmdb: set[int] diff --git a/app/services/simkl.py b/app/services/simkl.py index c9fb1ae..7e238be 100644 --- a/app/services/simkl.py +++ b/app/services/simkl.py @@ -1,10 +1,13 @@ import asyncio +from datetime import datetime from typing import Any from cachetools import TTLCache from httpx import AsyncClient from loguru import logger +from app.models.history import WatchHistory, WatchHistoryItem + def get_popularity(rank: int | None, N: int = 100000, K: int = 100) -> float: if rank is None: @@ -107,6 +110,78 @@ async def get_item_details(self, simkl_id, mtype: str, api_key: str) -> dict[str logger.error(f"Error fetching details from Simkl: {e}") return {} + async def get_history(self, access_token: str, client_id: str) -> WatchHistory: + """Fetch watch history from Simkl using OAuth access token.""" + headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {access_token}", + "simkl-api-key": client_id, + } + + movies_coro = self.client.get( + f"{self.base_url}/sync/all-items/movies", + headers=headers, + follow_redirects=True, + ) + shows_coro = self.client.get( + f"{self.base_url}/sync/all-items/shows", + headers=headers, + follow_redirects=True, + ) + + results = await asyncio.gather(movies_coro, shows_coro, return_exceptions=True) + + items: list[WatchHistoryItem] = [] + seen: set[str] = set() + + for idx, result in enumerate(results): + if isinstance(result, Exception): + logger.warning(f"Simkl sync request failed: {result}") + continue + try: + result.raise_for_status() + data = result.json() + except Exception as e: + logger.warning(f"Failed to parse Simkl sync response: {e}") + continue + + mtype = "movie" if idx == 0 else "series" + entries = data.get("movies", []) if idx == 0 else data.get("shows", []) + + for entry in entries: + media = entry.get("movie") or entry.get("show") or {} + imdb_id = media.get("ids", {}).get("imdb") + if not imdb_id or imdb_id in seen: + continue + seen.add(imdb_id) + + user_rating = entry.get("user_rating") + rating = float(user_rating) if user_rating is not None else None + + last_watched = None + raw_date = entry.get("last_watched_at") + if raw_date: + try: + last_watched = datetime.fromisoformat(str(raw_date).replace("Z", "+00:00")) + except (ValueError, TypeError): + pass + + items.append( + WatchHistoryItem( + imdb_id=imdb_id, + type=mtype, + name=media.get("title", ""), + rating=rating, + watch_count=1, + completion=1.0, + last_watched=last_watched, + source="simkl", + ) + ) + + logger.info(f"Simkl history: {len(items)} items") + return WatchHistory(items=items, source="simkl") + async def get_recommendations(self, imdb_id: str, mtype: str, api_key: str) -> list[dict[str, Any]]: """Get recommendations for a single item (original method for item-based).""" item_details = await self.get_item_details(imdb_id, mtype, api_key) diff --git a/app/services/stremio/library.py b/app/services/stremio/library.py index d41a1e6..979a32b 100644 --- a/app/services/stremio/library.py +++ b/app/services/stremio/library.py @@ -1,13 +1,76 @@ import asyncio +from datetime import datetime from typing import Any from async_lru import alru_cache from loguru import logger +from app.models.history import WatchHistory, WatchHistoryItem from app.models.library import LibraryCollection, StremioLibraryItem from app.services.stremio.client import StremioClient, StremioLikesClient +def stremio_library_to_watch_history(library: LibraryCollection) -> WatchHistory: + """Convert typed LibraryCollection to unified WatchHistory format.""" + items: list[WatchHistoryItem] = [] + seen: set[str] = set() + + category_items = [ + (library.loved, True, False), + (library.liked, False, True), + (library.watched, False, False), + (library.added, False, False), + ] + + for lib_items, is_loved, is_liked in category_items: + for item in lib_items: + imdb_id = item.id + if not imdb_id.startswith("tt") or imdb_id in seen: + continue + seen.add(imdb_id) + + state = item.state + duration = state.duration + time_watched = state.timeWatched + times_watched = state.timesWatched + flagged_watched = state.flaggedWatched + + if flagged_watched > 0 or times_watched > 0: + completion = 1.0 + elif duration > 0: + completion = min(time_watched / duration, 1.0) + else: + completion = 0.0 + + rating: float | None = None + if is_loved or item.is_loved: + rating = 9.0 + elif is_liked or item.is_liked: + rating = 7.0 + + last_watched: datetime | None = state.lastWatched + if not last_watched and item.mtime: + try: + last_watched = datetime.fromisoformat(str(item.mtime).replace("Z", "+00:00")) + except (ValueError, TypeError): + pass + + items.append( + WatchHistoryItem( + imdb_id=imdb_id, + type=item.type, + name=item.name, + rating=rating, + watch_count=max(times_watched, 1) if completion > 0 else 0, + completion=completion, + last_watched=last_watched, + source="stremio", + ) + ) + + return WatchHistory(items=items, source="stremio") + + class StremioLibraryService: """ Handles fetching and processing of user's Stremio library and likes. diff --git a/app/services/token_store.py b/app/services/token_store.py index 5a0c84c..2e52fef 100644 --- a/app/services/token_store.py +++ b/app/services/token_store.py @@ -133,6 +133,28 @@ async def store_user_data(self, user_id: str, payload: dict[str, Any]) -> str: storage_data["settings"]["tmdb_api_key"] = self.encrypt_token(tmdb_api_key) except Exception as exc: logger.warning(f"Failed to encrypt tmdb_api_key for {redact_token(user_id)}: {exc}") + + # Encrypt trakt tokens if present + if storage_data.get("settings") and isinstance(storage_data["settings"], dict): + for trakt_field in ("trakt_access_token", "trakt_refresh_token"): + value = storage_data["settings"].get(trakt_field) + if value: + try: + if not value.startswith("gAAAAAB"): + storage_data["settings"][trakt_field] = self.encrypt_token(value) + except Exception as exc: + logger.warning(f"Failed to encrypt {trakt_field} for {redact_token(user_id)}: {exc}") + + # Encrypt simkl_access_token if present + if storage_data.get("settings") and isinstance(storage_data["settings"], dict): + simkl_access_token = storage_data["settings"].get("simkl_access_token") + if simkl_access_token: + try: + if not simkl_access_token.startswith("gAAAAAB"): + storage_data["settings"]["simkl_access_token"] = self.encrypt_token(simkl_access_token) + except Exception as exc: + logger.warning(f"Failed to encrypt simkl_access_token for {redact_token(user_id)}: {exc}") + json_str = json.dumps(storage_data) if settings.TOKEN_TTL_SECONDS and settings.TOKEN_TTL_SECONDS > 0: @@ -313,6 +335,25 @@ async def get_user_data(self, token: str) -> dict[str, Any] | None: except Exception as e: logger.debug(f"Decryption failed for tmdb_api_key associated with {redact_token(token)}: {e}") + # Decrypt trakt tokens + for trakt_field in ("trakt_access_token", "trakt_refresh_token"): + value = data["settings"].get(trakt_field) + if value: + try: + if value.startswith("gAAAAA"): + data["settings"][trakt_field] = self.decrypt_token(value) + except Exception as e: + logger.debug(f"Decryption failed for {trakt_field} associated with {redact_token(token)}: {e}") + + # Decrypt simkl_access_token + simkl_access_token = data["settings"].get("simkl_access_token") + if simkl_access_token: + try: + if simkl_access_token.startswith("gAAAAA"): + data["settings"]["simkl_access_token"] = self.decrypt_token(simkl_access_token) + except Exception as e: + logger.debug(f"Decryption failed for simkl_access_token associated with {redact_token(token)}: {e}") + return data async def delete_token(self, token: str = None, key: str = None) -> None: diff --git a/app/services/trakt.py b/app/services/trakt.py new file mode 100644 index 0000000..70b454a --- /dev/null +++ b/app/services/trakt.py @@ -0,0 +1,206 @@ +import asyncio +from typing import Any + +from httpx import AsyncClient +from loguru import logger + +from app.core.config import settings +from app.models.history import WatchHistory, WatchHistoryItem + + +class TraktService: + """Service for interacting with the Trakt API.""" + + BASE_URL = "https://api.trakt.tv" + + def __init__(self): + self.client = AsyncClient(timeout=15) + + def _headers(self, access_token: str) -> dict[str, str]: + return { + "Content-Type": "application/json", + "trakt-api-version": "2", + "trakt-api-key": settings.TRAKT_CLIENT_ID or "", + "Authorization": f"Bearer {access_token}", + } + + async def get_user_info(self, access_token: str) -> dict[str, Any]: + """GET /users/me - validate token and get username.""" + response = await self.client.get( + f"{self.BASE_URL}/users/me", + headers=self._headers(access_token), + follow_redirects=True, + ) + response.raise_for_status() + return response.json() + + async def exchange_code(self, code: str, redirect_uri: str) -> dict[str, Any]: + """Exchange authorization code for tokens.""" + response = await self.client.post( + f"{self.BASE_URL}/oauth/token", + json={ + "code": code, + "client_id": settings.TRAKT_CLIENT_ID, + "client_secret": settings.TRAKT_CLIENT_SECRET, + "redirect_uri": redirect_uri, + "grant_type": "authorization_code", + }, + follow_redirects=True, + ) + response.raise_for_status() + return response.json() + + async def refresh_token(self, refresh_token: str, redirect_uri: str) -> dict[str, Any]: + """Refresh expired Trakt access token.""" + response = await self.client.post( + f"{self.BASE_URL}/oauth/token", + json={ + "refresh_token": refresh_token, + "client_id": settings.TRAKT_CLIENT_ID, + "client_secret": settings.TRAKT_CLIENT_SECRET, + "redirect_uri": redirect_uri, + "grant_type": "refresh_token", + }, + follow_redirects=True, + ) + response.raise_for_status() + return response.json() + + async def get_history(self, access_token: str) -> WatchHistory: + """Fetch watched + rated items, return as WatchHistory.""" + headers = self._headers(access_token) + + # Fetch all 4 endpoints in parallel + watched_movies_coro = self.client.get( + f"{self.BASE_URL}/users/me/watched/movies", + headers=headers, + follow_redirects=True, + ) + watched_shows_coro = self.client.get( + f"{self.BASE_URL}/users/me/watched/shows", + headers=headers, + follow_redirects=True, + ) + rated_movies_coro = self.client.get( + f"{self.BASE_URL}/users/me/ratings/movies", + headers=headers, + follow_redirects=True, + ) + rated_shows_coro = self.client.get( + f"{self.BASE_URL}/users/me/ratings/shows", + headers=headers, + follow_redirects=True, + ) + + results = await asyncio.gather( + watched_movies_coro, + watched_shows_coro, + rated_movies_coro, + rated_shows_coro, + return_exceptions=True, + ) + + watched_movies = self._safe_json(results[0]) + watched_shows = self._safe_json(results[1]) + rated_movies = self._safe_json(results[2]) + rated_shows = self._safe_json(results[3]) + + # Build rating lookup: imdb_id -> rating (1-10) + ratings: dict[str, float] = {} + for item in rated_movies + rated_shows: + media = item.get("movie") or item.get("show") or {} + imdb_id = media.get("ids", {}).get("imdb") + if imdb_id and item.get("rating"): + ratings[imdb_id] = float(item["rating"]) + + # Convert watched items to WatchHistoryItem + items: list[WatchHistoryItem] = [] + seen_ids: set[str] = set() + + for entry in watched_movies: + movie = entry.get("movie", {}) + imdb_id = movie.get("ids", {}).get("imdb") + if not imdb_id or imdb_id in seen_ids: + continue + seen_ids.add(imdb_id) + items.append( + WatchHistoryItem( + imdb_id=imdb_id, + type="movie", + name=movie.get("title", ""), + rating=ratings.get(imdb_id), + watch_count=entry.get("plays", 1), + completion=1.0, + last_watched=self._parse_date(entry.get("last_watched_at")), + source="trakt", + ) + ) + + for entry in watched_shows: + show = entry.get("show", {}) + imdb_id = show.get("ids", {}).get("imdb") + if not imdb_id or imdb_id in seen_ids: + continue + seen_ids.add(imdb_id) + items.append( + WatchHistoryItem( + imdb_id=imdb_id, + type="series", + name=show.get("title", ""), + rating=ratings.get(imdb_id), + watch_count=entry.get("plays", 1), + completion=1.0, + last_watched=self._parse_date(entry.get("last_watched_at")), + source="trakt", + ) + ) + + # Add rated-but-not-watched items (user rated without watching on Trakt) + for item in rated_movies + rated_shows: + media = item.get("movie") or item.get("show") or {} + imdb_id = media.get("ids", {}).get("imdb") + if not imdb_id or imdb_id in seen_ids: + continue + seen_ids.add(imdb_id) + mtype = "movie" if "movie" in item else "series" + items.append( + WatchHistoryItem( + imdb_id=imdb_id, + type=mtype, + name=media.get("title", ""), + rating=float(item.get("rating", 0)), + watch_count=0, + completion=0.0, + last_watched=self._parse_date(item.get("rated_at")), + source="trakt", + ) + ) + + logger.info(f"Trakt history: {len(items)} items ({len(ratings)} rated)") + return WatchHistory(items=items, source="trakt") + + @staticmethod + def _safe_json(result) -> list: + if isinstance(result, Exception): + logger.warning(f"Trakt API request failed: {result}") + return [] + try: + result.raise_for_status() + return result.json() + except Exception as e: + logger.warning(f"Failed to parse Trakt response: {e}") + return [] + + @staticmethod + def _parse_date(date_str: str | None): + if not date_str: + return None + try: + from datetime import datetime + + return datetime.fromisoformat(date_str.replace("Z", "+00:00")) + except (ValueError, TypeError): + return None + + +trakt_service = TraktService() diff --git a/app/static/js/modules/auth.js b/app/static/js/modules/auth.js index 3d2b877..0c85a6b 100644 --- a/app/static/js/modules/auth.js +++ b/app/static/js/modules/auth.js @@ -299,6 +299,9 @@ async function fetchStremioIdentity(authKey) { const geminiApiKeyInput = document.getElementById('geminiApiKey'); if (s.gemini_api_key && geminiApiKeyInput) geminiApiKeyInput.value = s.gemini_api_key; + // Watch History Source + OAuth tokens + restoreWatchHistoryState(s); + // Genres (Checked = Excluded) document.querySelectorAll('input[name="movie-genre"]').forEach(cb => cb.checked = false); document.querySelectorAll('input[name="series-genre"]').forEach(cb => cb.checked = false); @@ -435,3 +438,96 @@ export function setStremioLoggedOutState() { renderLoggedOutControls({ stremioLoginBtn, stremioLoginText, emailInput, passwordInput }); } + +// Restore Watch History Source and OAuth connected state from saved settings +function restoreWatchHistoryState(settings) { + const sourceSelect = document.getElementById('watchHistorySource'); + if (!sourceSelect) return; + + // Initialize global OAuth state + window._watchlyOAuth = window._watchlyOAuth || {}; + + // Restore Trakt connected state + if (settings.trakt_access_token) { + window._watchlyOAuth.trakt = { + access_token: settings.trakt_access_token, + refresh_token: settings.trakt_refresh_token || '', + }; + + const traktOption = sourceSelect.querySelector('option[value="trakt"]'); + if (traktOption) traktOption.disabled = false; + + const traktStatus = document.getElementById('traktStatus'); + if (traktStatus) { + traktStatus.textContent = 'Connected'; + traktStatus.classList.remove('text-slate-500'); + traktStatus.classList.add('text-green-400'); + } + const traktLogoutBtn = document.getElementById('traktLogoutBtn'); + if (traktLogoutBtn) traktLogoutBtn.classList.remove('hidden'); + + // Validate token in background to show username + validateAndShowTraktUser(settings.trakt_access_token); + } + + // Restore Simkl connected state + if (settings.simkl_access_token) { + window._watchlyOAuth.simkl = { + access_token: settings.simkl_access_token, + }; + + const simklOption = sourceSelect.querySelector('option[value="simkl"]'); + if (simklOption) simklOption.disabled = false; + + const simklSyncStatus = document.getElementById('simklSyncStatus'); + if (simklSyncStatus) { + simklSyncStatus.textContent = 'Connected'; + simklSyncStatus.classList.remove('text-slate-500'); + simklSyncStatus.classList.add('text-green-400'); + } + const simklSyncLogoutBtn = document.getElementById('simklSyncLogoutBtn'); + if (simklSyncLogoutBtn) simklSyncLogoutBtn.classList.remove('hidden'); + + // Validate token in background to show username + validateAndShowSimklUser(settings.simkl_access_token); + } + + // Restore selected history source (after enabling options) + if (settings.watch_history_source) { + sourceSelect.value = settings.watch_history_source; + } +} + +async function validateAndShowTraktUser(accessToken) { + try { + const res = await fetch('/trakt/validation', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ access_token: accessToken }), + }); + const data = await res.json(); + const traktStatus = document.getElementById('traktStatus'); + if (data.valid && traktStatus) { + traktStatus.textContent = data.message; // "Connected as username" + } + } catch (e) { + // Silently ignore — status already shows "Connected" + } +} + +async function validateAndShowSimklUser(accessToken) { + try { + const res = await fetch('/simkl-sync/validation', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ access_token: accessToken }), + }); + const data = await res.json(); + const simklSyncStatus = document.getElementById('simklSyncStatus'); + if (data.valid && simklSyncStatus) { + simklSyncStatus.textContent = data.message; // "Connected as username" + } + } catch (e) { + // Silently ignore — status already shows "Connected" + } +} diff --git a/app/static/js/modules/form.js b/app/static/js/modules/form.js index 6166145..83ee529 100644 --- a/app/static/js/modules/form.js +++ b/app/static/js/modules/form.js @@ -48,6 +48,7 @@ export function initializeForm(domElements, state, actions) { initializeSimkl(); initializeGemini(); updateYearSlider = initializeYearSliderControl(); + initializeWatchHistorySource(); } async function postJson(url, payload) { @@ -87,7 +88,8 @@ function getRequestPayload() { simkl_api_key: document.getElementById('simklApiKey')?.value.trim() || '', gemini_api_key: document.getElementById('geminiApiKey')?.value.trim() || '', excluded_movie_genres: Array.from(document.querySelectorAll('input[name="movie-genre"]:checked')).map(cb => cb.value), - excluded_series_genres: Array.from(document.querySelectorAll('input[name="series-genre"]:checked')).map(cb => cb.value) + excluded_series_genres: Array.from(document.querySelectorAll('input[name="series-genre"]:checked')).map(cb => cb.value), + watch_history_source: document.getElementById('watchHistorySource')?.value || 'stremio', }; } @@ -115,7 +117,11 @@ function buildTokenPayload(formData) { simkl_api_key: formData.simkl_api_key, gemini_api_key: formData.gemini_api_key, excluded_movie_genres: formData.excluded_movie_genres, - excluded_series_genres: formData.excluded_series_genres + excluded_series_genres: formData.excluded_series_genres, + watch_history_source: formData.watch_history_source, + trakt_access_token: window._watchlyOAuth?.trakt?.access_token || undefined, + trakt_refresh_token: window._watchlyOAuth?.trakt?.refresh_token || undefined, + simkl_access_token: window._watchlyOAuth?.simkl?.access_token || undefined, }; } @@ -453,3 +459,87 @@ export function refreshYearSlider() { function showSuccess(url) { showSuccessSection(url); } + +// Watch History Source + OAuth +function initializeWatchHistorySource() { + const sourceSelect = document.getElementById('watchHistorySource'); + const traktLoginBtn = document.getElementById('traktLoginBtn'); + const traktStatus = document.getElementById('traktStatus'); + const traktLogoutBtn = document.getElementById('traktLogoutBtn'); + const simklLoginBtn = document.getElementById('simklLoginBtn'); + const simklSyncStatus = document.getElementById('simklSyncStatus'); + const simklSyncLogoutBtn = document.getElementById('simklSyncLogoutBtn'); + + if (!sourceSelect) return; + + window._watchlyOAuth = window._watchlyOAuth || {}; + + window.addEventListener('message', (event) => { + const data = event.data; + if (!data || !data.provider || !data.tokens) return; + + if (data.provider === 'trakt') { + window._watchlyOAuth.trakt = data.tokens; + if (traktStatus) { + traktStatus.textContent = `Connected as ${data.username || 'Unknown'}`; + traktStatus.classList.remove('text-slate-500'); + traktStatus.classList.add('text-green-400'); + } + if (traktLogoutBtn) traktLogoutBtn.classList.remove('hidden'); + const traktOption = sourceSelect.querySelector('option[value="trakt"]'); + if (traktOption) traktOption.disabled = false; + } else if (data.provider === 'simkl') { + window._watchlyOAuth.simkl = data.tokens; + if (simklSyncStatus) { + simklSyncStatus.textContent = `Connected as ${data.username || 'Unknown'}`; + simklSyncStatus.classList.remove('text-slate-500'); + simklSyncStatus.classList.add('text-green-400'); + } + if (simklSyncLogoutBtn) simklSyncLogoutBtn.classList.remove('hidden'); + const simklOption = sourceSelect.querySelector('option[value="simkl"]'); + if (simklOption) simklOption.disabled = false; + } + }); + + if (traktLoginBtn) { + traktLoginBtn.addEventListener('click', () => { + window.open('/auth/trakt', '_blank', 'width=600,height=700'); + }); + } + + if (simklLoginBtn) { + simklLoginBtn.addEventListener('click', () => { + window.open('/auth/simkl', '_blank', 'width=600,height=700'); + }); + } + + if (traktLogoutBtn) { + traktLogoutBtn.addEventListener('click', () => { + delete window._watchlyOAuth.trakt; + if (traktStatus) { + traktStatus.textContent = 'Not connected'; + traktStatus.classList.remove('text-green-400'); + traktStatus.classList.add('text-slate-500'); + } + traktLogoutBtn.classList.add('hidden'); + const traktOption = sourceSelect.querySelector('option[value="trakt"]'); + if (traktOption) traktOption.disabled = true; + if (sourceSelect.value === 'trakt') sourceSelect.value = 'stremio'; + }); + } + + if (simklSyncLogoutBtn) { + simklSyncLogoutBtn.addEventListener('click', () => { + delete window._watchlyOAuth.simkl; + if (simklSyncStatus) { + simklSyncStatus.textContent = 'Not connected'; + simklSyncStatus.classList.remove('text-green-400'); + simklSyncStatus.classList.add('text-slate-500'); + } + simklSyncLogoutBtn.classList.add('hidden'); + const simklOption = sourceSelect.querySelector('option[value="simkl"]'); + if (simklOption) simklOption.disabled = true; + if (sourceSelect.value === 'simkl') sourceSelect.value = 'stremio'; + }); + } +} diff --git a/app/templates/components/section_config.html b/app/templates/components/section_config.html index 95f35b7..7795b16 100644 --- a/app/templates/components/section_config.html +++ b/app/templates/components/section_config.html @@ -346,6 +346,48 @@

Preferences

+ +
+ +
+ +
+ + + +
+
+

Choose where your watch history comes from. External sources (Trakt, Simkl) provide richer data with explicit ratings.

+ + +
+ + Not connected + +
+ + +
+ + Not connected + +
+
+
+