- Dashboard: update badge (orange) injected lazily into customer Status cell after table renders via GET /monitoring/customers/local-update-status (local-only Docker inspect, no Hub call on every page load) - Customer detail Deployment tab: "Update Images" button with spinner, shows success/error inline without page reload - Monitoring Update All: now synchronous + sequential (one customer at a time), shows live spinner + per-customer results table on completion - Settings > Docker Images: "Pull from Docker Hub" button with spinner and inline status message - /monitoring/customers/local-update-status: new lightweight endpoint (no network, pure local Docker inspect) - /monitoring/customers/update-all: removed BackgroundTasks, now awaits each customer sequentially and returns detailed per-customer results Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
275 lines
9.4 KiB
Python
275 lines
9.4 KiB
Python
"""Monitoring API — system overview, customer statuses, host resources."""
|
|
|
|
import logging
|
|
import platform
|
|
from typing import Any
|
|
|
|
import psutil
|
|
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, status
|
|
from sqlalchemy.orm import Session
|
|
|
|
from app.database import SessionLocal, get_db
|
|
from app.dependencies import get_current_user
|
|
from app.models import Customer, Deployment, SystemConfig, User
|
|
from app.services import docker_service, image_service
|
|
|
|
logger = logging.getLogger(__name__)
|
|
router = APIRouter()
|
|
|
|
|
|
@router.get("/status")
|
|
async def system_status(
|
|
current_user: User = Depends(get_current_user),
|
|
db: Session = Depends(get_db),
|
|
) -> dict[str, Any]:
|
|
"""System overview with aggregated customer statistics.
|
|
|
|
Returns:
|
|
Counts by status and total customers.
|
|
"""
|
|
total = db.query(Customer).count()
|
|
active = db.query(Customer).filter(Customer.status == "active").count()
|
|
inactive = db.query(Customer).filter(Customer.status == "inactive").count()
|
|
deploying = db.query(Customer).filter(Customer.status == "deploying").count()
|
|
error = db.query(Customer).filter(Customer.status == "error").count()
|
|
|
|
return {
|
|
"total_customers": total,
|
|
"active": active,
|
|
"inactive": inactive,
|
|
"deploying": deploying,
|
|
"error": error,
|
|
}
|
|
|
|
|
|
@router.get("/customers")
|
|
async def all_customers_status(
|
|
current_user: User = Depends(get_current_user),
|
|
db: Session = Depends(get_db),
|
|
) -> list[dict[str, Any]]:
|
|
"""Get deployment status for every customer.
|
|
|
|
Returns:
|
|
List of dicts with customer info and container statuses.
|
|
"""
|
|
customers = (
|
|
db.query(Customer)
|
|
.order_by(Customer.id)
|
|
.all()
|
|
)
|
|
|
|
results: list[dict[str, Any]] = []
|
|
for c in customers:
|
|
entry: dict[str, Any] = {
|
|
"id": c.id,
|
|
"name": c.name,
|
|
"subdomain": c.subdomain,
|
|
"status": c.status,
|
|
}
|
|
if c.deployment:
|
|
containers = docker_service.get_container_status(c.deployment.container_prefix)
|
|
entry["deployment_status"] = c.deployment.deployment_status
|
|
entry["containers"] = containers
|
|
entry["relay_udp_port"] = c.deployment.relay_udp_port
|
|
entry["dashboard_port"] = c.deployment.dashboard_port
|
|
entry["setup_url"] = c.deployment.setup_url
|
|
else:
|
|
entry["deployment_status"] = None
|
|
entry["containers"] = []
|
|
results.append(entry)
|
|
|
|
return results
|
|
|
|
|
|
@router.get("/resources")
|
|
async def host_resources(
|
|
current_user: User = Depends(get_current_user),
|
|
) -> dict[str, Any]:
|
|
"""Return host system resource usage.
|
|
|
|
Returns:
|
|
CPU, memory, disk, and network information.
|
|
"""
|
|
cpu_percent = psutil.cpu_percent(interval=1)
|
|
cpu_count = psutil.cpu_count()
|
|
mem = psutil.virtual_memory()
|
|
disk = psutil.disk_usage("/")
|
|
|
|
return {
|
|
"hostname": platform.node(),
|
|
"os": f"{platform.system()} {platform.release()}",
|
|
"cpu": {
|
|
"percent": cpu_percent,
|
|
"count": cpu_count,
|
|
},
|
|
"memory": {
|
|
"total_gb": round(mem.total / (1024 ** 3), 1),
|
|
"used_gb": round(mem.used / (1024 ** 3), 1),
|
|
"available_gb": round(mem.available / (1024 ** 3), 1),
|
|
"percent": mem.percent,
|
|
},
|
|
"disk": {
|
|
"total_gb": round(disk.total / (1024 ** 3), 1),
|
|
"used_gb": round(disk.used / (1024 ** 3), 1),
|
|
"free_gb": round(disk.free / (1024 ** 3), 1),
|
|
"percent": disk.percent,
|
|
},
|
|
}
|
|
|
|
|
|
@router.get("/images/check")
|
|
async def check_image_updates(
|
|
current_user: User = Depends(get_current_user),
|
|
db: Session = Depends(get_db),
|
|
) -> dict[str, Any]:
|
|
"""Check all configured NetBird images for available updates on Docker Hub.
|
|
|
|
Compares local image digests against Docker Hub — no image is pulled.
|
|
|
|
Returns:
|
|
images: dict mapping image name to update status
|
|
any_update_available: bool
|
|
customer_status: list of per-customer container image status
|
|
"""
|
|
config = db.query(SystemConfig).filter(SystemConfig.id == 1).first()
|
|
if not config:
|
|
raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="System not configured.")
|
|
|
|
hub_status = await image_service.check_all_images(config)
|
|
|
|
# Per-customer local check (no network)
|
|
deployments = db.query(Deployment).all()
|
|
customer_status = []
|
|
for dep in deployments:
|
|
customer = dep.customer
|
|
cs = image_service.get_customer_container_image_status(dep.container_prefix, config)
|
|
customer_status.append({
|
|
"customer_id": customer.id,
|
|
"customer_name": customer.name,
|
|
"subdomain": customer.subdomain,
|
|
"container_prefix": dep.container_prefix,
|
|
"needs_update": cs["needs_update"],
|
|
"services": cs["services"],
|
|
})
|
|
|
|
return {**hub_status, "customer_status": customer_status}
|
|
|
|
|
|
@router.post("/images/pull")
|
|
async def pull_all_netbird_images(
|
|
background_tasks: BackgroundTasks,
|
|
current_user: User = Depends(get_current_user),
|
|
db: Session = Depends(get_db),
|
|
) -> dict[str, Any]:
|
|
"""Pull all configured NetBird images from Docker Hub.
|
|
|
|
Runs in the background — returns immediately. After pulling, re-check
|
|
customer status via GET /images/check to see which customers need updating.
|
|
"""
|
|
if current_user.role != "admin":
|
|
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Admin only.")
|
|
|
|
config = db.query(SystemConfig).filter(SystemConfig.id == 1).first()
|
|
if not config:
|
|
raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="System not configured.")
|
|
|
|
# Snapshot image list before background task starts
|
|
images = [
|
|
config.netbird_management_image,
|
|
config.netbird_signal_image,
|
|
config.netbird_relay_image,
|
|
config.netbird_dashboard_image,
|
|
]
|
|
|
|
async def _pull_bg() -> None:
|
|
bg_db = SessionLocal()
|
|
try:
|
|
cfg = bg_db.query(SystemConfig).filter(SystemConfig.id == 1).first()
|
|
if cfg:
|
|
await image_service.pull_all_images(cfg)
|
|
except Exception:
|
|
logger.exception("Background image pull failed")
|
|
finally:
|
|
bg_db.close()
|
|
|
|
background_tasks.add_task(_pull_bg)
|
|
return {"message": "Image pull started in background.", "images": images}
|
|
|
|
|
|
@router.get("/customers/local-update-status")
|
|
async def customers_local_update_status(
|
|
current_user: User = Depends(get_current_user),
|
|
db: Session = Depends(get_db),
|
|
) -> list[dict[str, Any]]:
|
|
"""Fast local-only check for outdated customer containers.
|
|
|
|
Compares running container image IDs against locally stored images.
|
|
No network call — safe to call on every dashboard load.
|
|
"""
|
|
config = db.query(SystemConfig).filter(SystemConfig.id == 1).first()
|
|
if not config:
|
|
return []
|
|
deployments = db.query(Deployment).all()
|
|
results = []
|
|
for dep in deployments:
|
|
cs = image_service.get_customer_container_image_status(dep.container_prefix, config)
|
|
results.append({"customer_id": dep.customer_id, "needs_update": cs["needs_update"]})
|
|
return results
|
|
|
|
|
|
@router.post("/customers/update-all")
|
|
async def update_all_customers(
|
|
current_user: User = Depends(get_current_user),
|
|
db: Session = Depends(get_db),
|
|
) -> dict[str, Any]:
|
|
"""Recreate containers for all customers with outdated images — sequential, synchronous.
|
|
|
|
Updates customers one at a time so a failing customer does not block others.
|
|
Images must already be pulled. Data is preserved (bind mounts).
|
|
Returns detailed per-customer results.
|
|
"""
|
|
if current_user.role != "admin":
|
|
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Admin only.")
|
|
|
|
config = db.query(SystemConfig).filter(SystemConfig.id == 1).first()
|
|
if not config:
|
|
raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="System not configured.")
|
|
|
|
deployments = db.query(Deployment).all()
|
|
to_update = []
|
|
for dep in deployments:
|
|
cs = image_service.get_customer_container_image_status(dep.container_prefix, config)
|
|
if cs["needs_update"]:
|
|
customer = dep.customer
|
|
to_update.append({
|
|
"instance_dir": f"{config.data_dir}/{customer.subdomain}",
|
|
"project_name": dep.container_prefix,
|
|
"customer_name": customer.name,
|
|
"customer_id": customer.id,
|
|
})
|
|
|
|
if not to_update:
|
|
return {"message": "All customers are already up to date.", "updated": 0, "results": []}
|
|
|
|
# Update customers sequentially — one at a time
|
|
update_results = []
|
|
for entry in to_update:
|
|
res = await image_service.update_customer_containers(
|
|
entry["instance_dir"], entry["project_name"]
|
|
)
|
|
ok = res["success"]
|
|
logger.info("Updated %s: %s", entry["project_name"], "OK" if ok else res.get("error"))
|
|
update_results.append({
|
|
"customer_name": entry["customer_name"],
|
|
"customer_id": entry["customer_id"],
|
|
"success": ok,
|
|
"error": res.get("error"),
|
|
})
|
|
|
|
success_count = sum(1 for r in update_results if r["success"])
|
|
return {
|
|
"message": f"Updated {success_count} of {len(update_results)} customer(s).",
|
|
"updated": success_count,
|
|
"results": update_results,
|
|
}
|