Files
NetBirdMSP-Appliance/app/routers/monitoring.py
twothatIT 848ead0b2c feat(updates): NetBird container image update management
- New image_service.py: Docker Hub digest check (no pull), local digest/ID
  comparison, pull_all_images, per-customer container image status, and
  update_customer_containers (docker compose up -d, data-safe)
- Monitoring endpoints: GET /images/check (hub vs local + per-customer
  needs_update), POST /images/pull (background), POST /customers/update-all
- Deployment endpoint: POST /{id}/update-images (single-customer update)
- Monitoring page: "NetBird Container Updates" card with Check / Pull / Update
  All buttons; image status table and per-customer update table with inline
  update buttons
- i18n: added keys in en.json and de.json

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-24 21:01:56 +01:00

252 lines
8.5 KiB
Python

"""Monitoring API — system overview, customer statuses, host resources."""
import logging
import platform
from typing import Any
import psutil
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, status
from sqlalchemy.orm import Session
from app.database import SessionLocal, get_db
from app.dependencies import get_current_user
from app.models import Customer, Deployment, SystemConfig, User
from app.services import docker_service, image_service
logger = logging.getLogger(__name__)
router = APIRouter()
@router.get("/status")
async def system_status(
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db),
) -> dict[str, Any]:
"""System overview with aggregated customer statistics.
Returns:
Counts by status and total customers.
"""
total = db.query(Customer).count()
active = db.query(Customer).filter(Customer.status == "active").count()
inactive = db.query(Customer).filter(Customer.status == "inactive").count()
deploying = db.query(Customer).filter(Customer.status == "deploying").count()
error = db.query(Customer).filter(Customer.status == "error").count()
return {
"total_customers": total,
"active": active,
"inactive": inactive,
"deploying": deploying,
"error": error,
}
@router.get("/customers")
async def all_customers_status(
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db),
) -> list[dict[str, Any]]:
"""Get deployment status for every customer.
Returns:
List of dicts with customer info and container statuses.
"""
customers = (
db.query(Customer)
.order_by(Customer.id)
.all()
)
results: list[dict[str, Any]] = []
for c in customers:
entry: dict[str, Any] = {
"id": c.id,
"name": c.name,
"subdomain": c.subdomain,
"status": c.status,
}
if c.deployment:
containers = docker_service.get_container_status(c.deployment.container_prefix)
entry["deployment_status"] = c.deployment.deployment_status
entry["containers"] = containers
entry["relay_udp_port"] = c.deployment.relay_udp_port
entry["dashboard_port"] = c.deployment.dashboard_port
entry["setup_url"] = c.deployment.setup_url
else:
entry["deployment_status"] = None
entry["containers"] = []
results.append(entry)
return results
@router.get("/resources")
async def host_resources(
current_user: User = Depends(get_current_user),
) -> dict[str, Any]:
"""Return host system resource usage.
Returns:
CPU, memory, disk, and network information.
"""
cpu_percent = psutil.cpu_percent(interval=1)
cpu_count = psutil.cpu_count()
mem = psutil.virtual_memory()
disk = psutil.disk_usage("/")
return {
"hostname": platform.node(),
"os": f"{platform.system()} {platform.release()}",
"cpu": {
"percent": cpu_percent,
"count": cpu_count,
},
"memory": {
"total_gb": round(mem.total / (1024 ** 3), 1),
"used_gb": round(mem.used / (1024 ** 3), 1),
"available_gb": round(mem.available / (1024 ** 3), 1),
"percent": mem.percent,
},
"disk": {
"total_gb": round(disk.total / (1024 ** 3), 1),
"used_gb": round(disk.used / (1024 ** 3), 1),
"free_gb": round(disk.free / (1024 ** 3), 1),
"percent": disk.percent,
},
}
@router.get("/images/check")
async def check_image_updates(
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db),
) -> dict[str, Any]:
"""Check all configured NetBird images for available updates on Docker Hub.
Compares local image digests against Docker Hub — no image is pulled.
Returns:
images: dict mapping image name to update status
any_update_available: bool
customer_status: list of per-customer container image status
"""
config = db.query(SystemConfig).filter(SystemConfig.id == 1).first()
if not config:
raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="System not configured.")
hub_status = await image_service.check_all_images(config)
# Per-customer local check (no network)
deployments = db.query(Deployment).all()
customer_status = []
for dep in deployments:
customer = dep.customer
cs = image_service.get_customer_container_image_status(dep.container_prefix, config)
customer_status.append({
"customer_id": customer.id,
"customer_name": customer.name,
"subdomain": customer.subdomain,
"container_prefix": dep.container_prefix,
"needs_update": cs["needs_update"],
"services": cs["services"],
})
return {**hub_status, "customer_status": customer_status}
@router.post("/images/pull")
async def pull_all_netbird_images(
background_tasks: BackgroundTasks,
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db),
) -> dict[str, Any]:
"""Pull all configured NetBird images from Docker Hub.
Runs in the background — returns immediately. After pulling, re-check
customer status via GET /images/check to see which customers need updating.
"""
if current_user.role != "admin":
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Admin only.")
config = db.query(SystemConfig).filter(SystemConfig.id == 1).first()
if not config:
raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="System not configured.")
# Snapshot image list before background task starts
images = [
config.netbird_management_image,
config.netbird_signal_image,
config.netbird_relay_image,
config.netbird_dashboard_image,
]
async def _pull_bg() -> None:
bg_db = SessionLocal()
try:
cfg = bg_db.query(SystemConfig).filter(SystemConfig.id == 1).first()
if cfg:
await image_service.pull_all_images(cfg)
except Exception:
logger.exception("Background image pull failed")
finally:
bg_db.close()
background_tasks.add_task(_pull_bg)
return {"message": "Image pull started in background.", "images": images}
@router.post("/customers/update-all")
async def update_all_customers(
background_tasks: BackgroundTasks,
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db),
) -> dict[str, Any]:
"""Recreate containers for all customers that have outdated images.
Only customers where at least one container runs an outdated image are updated.
Images must already be pulled. Data is preserved (bind mounts).
"""
if current_user.role != "admin":
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Admin only.")
config = db.query(SystemConfig).filter(SystemConfig.id == 1).first()
if not config:
raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="System not configured.")
# Collect customers that need updating
deployments = db.query(Deployment).all()
to_update = []
for dep in deployments:
cs = image_service.get_customer_container_image_status(dep.container_prefix, config)
if cs["needs_update"]:
customer = dep.customer
instance_dir = str(dep.container_prefix).replace(
"netbird-", "", 1
) # subdomain
to_update.append({
"instance_dir": f"{config.data_dir}/{customer.subdomain}",
"project_name": dep.container_prefix,
"customer_name": customer.name,
})
if not to_update:
return {"message": "All customers are already up to date.", "updated": 0}
async def _update_all_bg() -> None:
for entry in to_update:
try:
await image_service.update_customer_containers(
entry["instance_dir"], entry["project_name"]
)
logger.info("Updated containers for %s", entry["project_name"])
except Exception:
logger.exception("Failed to update %s", entry["project_name"])
background_tasks.add_task(_update_all_bg)
names = [e["customer_name"] for e in to_update]
return {
"message": f"Updating {len(to_update)} customer(s) in background.",
"customers": names,
}