"""Monitoring API — system overview, customer statuses, host resources.""" import logging import platform from typing import Any import psutil from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, status from sqlalchemy.orm import Session from app.database import SessionLocal, get_db from app.dependencies import get_current_user from app.models import Customer, Deployment, SystemConfig, User from app.services import docker_service, image_service logger = logging.getLogger(__name__) router = APIRouter() @router.get("/status") async def system_status( current_user: User = Depends(get_current_user), db: Session = Depends(get_db), ) -> dict[str, Any]: """System overview with aggregated customer statistics. Returns: Counts by status and total customers. """ total = db.query(Customer).count() active = db.query(Customer).filter(Customer.status == "active").count() inactive = db.query(Customer).filter(Customer.status == "inactive").count() deploying = db.query(Customer).filter(Customer.status == "deploying").count() error = db.query(Customer).filter(Customer.status == "error").count() return { "total_customers": total, "active": active, "inactive": inactive, "deploying": deploying, "error": error, } @router.get("/customers") async def all_customers_status( current_user: User = Depends(get_current_user), db: Session = Depends(get_db), ) -> list[dict[str, Any]]: """Get deployment status for every customer. Returns: List of dicts with customer info and container statuses. """ customers = ( db.query(Customer) .order_by(Customer.id) .all() ) results: list[dict[str, Any]] = [] for c in customers: entry: dict[str, Any] = { "id": c.id, "name": c.name, "subdomain": c.subdomain, "status": c.status, } if c.deployment: containers = docker_service.get_container_status(c.deployment.container_prefix) entry["deployment_status"] = c.deployment.deployment_status entry["containers"] = containers entry["relay_udp_port"] = c.deployment.relay_udp_port entry["dashboard_port"] = c.deployment.dashboard_port entry["setup_url"] = c.deployment.setup_url else: entry["deployment_status"] = None entry["containers"] = [] results.append(entry) return results @router.get("/resources") async def host_resources( current_user: User = Depends(get_current_user), ) -> dict[str, Any]: """Return host system resource usage. Returns: CPU, memory, disk, and network information. """ cpu_percent = psutil.cpu_percent(interval=1) cpu_count = psutil.cpu_count() mem = psutil.virtual_memory() disk = psutil.disk_usage("/") return { "hostname": platform.node(), "os": f"{platform.system()} {platform.release()}", "cpu": { "percent": cpu_percent, "count": cpu_count, }, "memory": { "total_gb": round(mem.total / (1024 ** 3), 1), "used_gb": round(mem.used / (1024 ** 3), 1), "available_gb": round(mem.available / (1024 ** 3), 1), "percent": mem.percent, }, "disk": { "total_gb": round(disk.total / (1024 ** 3), 1), "used_gb": round(disk.used / (1024 ** 3), 1), "free_gb": round(disk.free / (1024 ** 3), 1), "percent": disk.percent, }, } @router.get("/images/check") async def check_image_updates( current_user: User = Depends(get_current_user), db: Session = Depends(get_db), ) -> dict[str, Any]: """Check all configured NetBird images for available updates on Docker Hub. Compares local image digests against Docker Hub — no image is pulled. Returns: images: dict mapping image name to update status any_update_available: bool customer_status: list of per-customer container image status """ config = db.query(SystemConfig).filter(SystemConfig.id == 1).first() if not config: raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="System not configured.") hub_status = await image_service.check_all_images(config) # Per-customer local check (no network) deployments = db.query(Deployment).all() customer_status = [] for dep in deployments: customer = dep.customer cs = image_service.get_customer_container_image_status(dep.container_prefix, config) customer_status.append({ "customer_id": customer.id, "customer_name": customer.name, "subdomain": customer.subdomain, "container_prefix": dep.container_prefix, "needs_update": cs["needs_update"], "services": cs["services"], }) return {**hub_status, "customer_status": customer_status} @router.post("/images/pull") async def pull_all_netbird_images( background_tasks: BackgroundTasks, current_user: User = Depends(get_current_user), db: Session = Depends(get_db), ) -> dict[str, Any]: """Pull all configured NetBird images from Docker Hub. Runs in the background — returns immediately. After pulling, re-check customer status via GET /images/check to see which customers need updating. """ if current_user.role != "admin": raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Admin only.") config = db.query(SystemConfig).filter(SystemConfig.id == 1).first() if not config: raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="System not configured.") # Snapshot image list before background task starts images = [ config.netbird_management_image, config.netbird_signal_image, config.netbird_relay_image, config.netbird_dashboard_image, ] async def _pull_bg() -> None: bg_db = SessionLocal() try: cfg = bg_db.query(SystemConfig).filter(SystemConfig.id == 1).first() if cfg: await image_service.pull_all_images(cfg) except Exception: logger.exception("Background image pull failed") finally: bg_db.close() background_tasks.add_task(_pull_bg) return {"message": "Image pull started in background.", "images": images} @router.get("/customers/local-update-status") async def customers_local_update_status( current_user: User = Depends(get_current_user), db: Session = Depends(get_db), ) -> list[dict[str, Any]]: """Fast local-only check for outdated customer containers. Compares running container image IDs against locally stored images. No network call — safe to call on every dashboard load. """ config = db.query(SystemConfig).filter(SystemConfig.id == 1).first() if not config: return [] deployments = db.query(Deployment).all() results = [] for dep in deployments: cs = image_service.get_customer_container_image_status(dep.container_prefix, config) results.append({"customer_id": dep.customer_id, "needs_update": cs["needs_update"]}) return results @router.post("/customers/update-all") async def update_all_customers( current_user: User = Depends(get_current_user), db: Session = Depends(get_db), ) -> dict[str, Any]: """Recreate containers for all customers with outdated images — sequential, synchronous. Updates customers one at a time so a failing customer does not block others. Images must already be pulled. Data is preserved (bind mounts). Returns detailed per-customer results. """ if current_user.role != "admin": raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Admin only.") config = db.query(SystemConfig).filter(SystemConfig.id == 1).first() if not config: raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="System not configured.") deployments = db.query(Deployment).all() to_update = [] for dep in deployments: cs = image_service.get_customer_container_image_status(dep.container_prefix, config) if cs["needs_update"]: customer = dep.customer to_update.append({ "instance_dir": f"{config.data_dir}/{customer.subdomain}", "project_name": dep.container_prefix, "customer_name": customer.name, "customer_id": customer.id, }) if not to_update: return {"message": "All customers are already up to date.", "updated": 0, "results": []} # Update customers sequentially — one at a time update_results = [] for entry in to_update: res = await image_service.update_customer_containers( entry["instance_dir"], entry["project_name"] ) ok = res["success"] logger.info("Updated %s: %s", entry["project_name"], "OK" if ok else res.get("error")) update_results.append({ "customer_name": entry["customer_name"], "customer_id": entry["customer_id"], "success": ok, "error": res.get("error"), }) success_count = sum(1 for r in update_results if r["success"]) return { "message": f"Updated {success_count} of {len(update_results)} customer(s).", "updated": success_count, "results": update_results, }