feat(updates): NetBird container image update management

- New image_service.py: Docker Hub digest check (no pull), local digest/ID
  comparison, pull_all_images, per-customer container image status, and
  update_customer_containers (docker compose up -d, data-safe)
- Monitoring endpoints: GET /images/check (hub vs local + per-customer
  needs_update), POST /images/pull (background), POST /customers/update-all
- Deployment endpoint: POST /{id}/update-images (single-customer update)
- Monitoring page: "NetBird Container Updates" card with Check / Pull / Update
  All buttons; image status table and per-customer update table with inline
  update buttons
- i18n: added keys in en.json and de.json

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-24 21:01:56 +01:00
parent 796824c400
commit 848ead0b2c
7 changed files with 635 additions and 9 deletions

View File

@@ -7,8 +7,8 @@ from sqlalchemy.orm import Session
from app.database import SessionLocal, get_db
from app.dependencies import get_current_user
from app.models import Customer, Deployment, User
from app.services import docker_service, netbird_service
from app.models import Customer, Deployment, SystemConfig, User
from app.services import docker_service, image_service, netbird_service
from app.utils.security import decrypt_value
logger = logging.getLogger(__name__)
@@ -207,6 +207,50 @@ async def get_customer_credentials(
}
@router.post("/{customer_id}/update-images")
async def update_customer_images(
customer_id: int,
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""Recreate a customer's containers to pick up newly pulled images.
Images must already be pulled via POST /monitoring/images/pull.
Bind-mounted data is preserved — no data loss.
"""
if current_user.role != "admin":
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Admin only.")
customer = _require_customer(db, customer_id)
deployment = db.query(Deployment).filter(Deployment.customer_id == customer_id).first()
if not deployment:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="No deployment found for this customer.",
)
config = db.query(SystemConfig).filter(SystemConfig.id == 1).first()
if not config:
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="System not configured."
)
instance_dir = f"{config.data_dir}/{customer.subdomain}"
result = await image_service.update_customer_containers(instance_dir, deployment.container_prefix)
if not result["success"]:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=result.get("error", "Failed to update containers."),
)
logger.info(
"Containers updated for customer '%s' (prefix: %s) by '%s'.",
customer.name, deployment.container_prefix, current_user.username,
)
return {"message": f"Containers updated for '{customer.name}'."}
def _require_customer(db: Session, customer_id: int) -> Customer:
"""Helper to fetch a customer or raise 404.

View File

@@ -5,13 +5,13 @@ import platform
from typing import Any
import psutil
from fastapi import APIRouter, Depends
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, status
from sqlalchemy.orm import Session
from app.database import get_db
from app.database import SessionLocal, get_db
from app.dependencies import get_current_user
from app.models import Customer, Deployment, User
from app.services import docker_service
from app.models import Customer, Deployment, SystemConfig, User
from app.services import docker_service, image_service
logger = logging.getLogger(__name__)
router = APIRouter()
@@ -115,3 +115,137 @@ async def host_resources(
"percent": disk.percent,
},
}
@router.get("/images/check")
async def check_image_updates(
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db),
) -> dict[str, Any]:
"""Check all configured NetBird images for available updates on Docker Hub.
Compares local image digests against Docker Hub — no image is pulled.
Returns:
images: dict mapping image name to update status
any_update_available: bool
customer_status: list of per-customer container image status
"""
config = db.query(SystemConfig).filter(SystemConfig.id == 1).first()
if not config:
raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="System not configured.")
hub_status = await image_service.check_all_images(config)
# Per-customer local check (no network)
deployments = db.query(Deployment).all()
customer_status = []
for dep in deployments:
customer = dep.customer
cs = image_service.get_customer_container_image_status(dep.container_prefix, config)
customer_status.append({
"customer_id": customer.id,
"customer_name": customer.name,
"subdomain": customer.subdomain,
"container_prefix": dep.container_prefix,
"needs_update": cs["needs_update"],
"services": cs["services"],
})
return {**hub_status, "customer_status": customer_status}
@router.post("/images/pull")
async def pull_all_netbird_images(
background_tasks: BackgroundTasks,
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db),
) -> dict[str, Any]:
"""Pull all configured NetBird images from Docker Hub.
Runs in the background — returns immediately. After pulling, re-check
customer status via GET /images/check to see which customers need updating.
"""
if current_user.role != "admin":
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Admin only.")
config = db.query(SystemConfig).filter(SystemConfig.id == 1).first()
if not config:
raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="System not configured.")
# Snapshot image list before background task starts
images = [
config.netbird_management_image,
config.netbird_signal_image,
config.netbird_relay_image,
config.netbird_dashboard_image,
]
async def _pull_bg() -> None:
bg_db = SessionLocal()
try:
cfg = bg_db.query(SystemConfig).filter(SystemConfig.id == 1).first()
if cfg:
await image_service.pull_all_images(cfg)
except Exception:
logger.exception("Background image pull failed")
finally:
bg_db.close()
background_tasks.add_task(_pull_bg)
return {"message": "Image pull started in background.", "images": images}
@router.post("/customers/update-all")
async def update_all_customers(
background_tasks: BackgroundTasks,
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db),
) -> dict[str, Any]:
"""Recreate containers for all customers that have outdated images.
Only customers where at least one container runs an outdated image are updated.
Images must already be pulled. Data is preserved (bind mounts).
"""
if current_user.role != "admin":
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Admin only.")
config = db.query(SystemConfig).filter(SystemConfig.id == 1).first()
if not config:
raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="System not configured.")
# Collect customers that need updating
deployments = db.query(Deployment).all()
to_update = []
for dep in deployments:
cs = image_service.get_customer_container_image_status(dep.container_prefix, config)
if cs["needs_update"]:
customer = dep.customer
instance_dir = str(dep.container_prefix).replace(
"netbird-", "", 1
) # subdomain
to_update.append({
"instance_dir": f"{config.data_dir}/{customer.subdomain}",
"project_name": dep.container_prefix,
"customer_name": customer.name,
})
if not to_update:
return {"message": "All customers are already up to date.", "updated": 0}
async def _update_all_bg() -> None:
for entry in to_update:
try:
await image_service.update_customer_containers(
entry["instance_dir"], entry["project_name"]
)
logger.info("Updated containers for %s", entry["project_name"])
except Exception:
logger.exception("Failed to update %s", entry["project_name"])
background_tasks.add_task(_update_all_bg)
names = [e["customer_name"] for e in to_update]
return {
"message": f"Updating {len(to_update)} customer(s) in background.",
"customers": names,
}