Compare commits

..

1 Commits

Author SHA1 Message Date
848ead0b2c feat(updates): NetBird container image update management
- New image_service.py: Docker Hub digest check (no pull), local digest/ID
  comparison, pull_all_images, per-customer container image status, and
  update_customer_containers (docker compose up -d, data-safe)
- Monitoring endpoints: GET /images/check (hub vs local + per-customer
  needs_update), POST /images/pull (background), POST /customers/update-all
- Deployment endpoint: POST /{id}/update-images (single-customer update)
- Monitoring page: "NetBird Container Updates" card with Check / Pull / Update
  All buttons; image status table and per-customer update table with inline
  update buttons
- i18n: added keys in en.json and de.json

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-24 21:01:56 +01:00
7 changed files with 635 additions and 9 deletions

View File

@@ -7,8 +7,8 @@ from sqlalchemy.orm import Session
from app.database import SessionLocal, get_db
from app.dependencies import get_current_user
from app.models import Customer, Deployment, User
from app.services import docker_service, netbird_service
from app.models import Customer, Deployment, SystemConfig, User
from app.services import docker_service, image_service, netbird_service
from app.utils.security import decrypt_value
logger = logging.getLogger(__name__)
@@ -207,6 +207,50 @@ async def get_customer_credentials(
}
@router.post("/{customer_id}/update-images")
async def update_customer_images(
customer_id: int,
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""Recreate a customer's containers to pick up newly pulled images.
Images must already be pulled via POST /monitoring/images/pull.
Bind-mounted data is preserved — no data loss.
"""
if current_user.role != "admin":
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Admin only.")
customer = _require_customer(db, customer_id)
deployment = db.query(Deployment).filter(Deployment.customer_id == customer_id).first()
if not deployment:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="No deployment found for this customer.",
)
config = db.query(SystemConfig).filter(SystemConfig.id == 1).first()
if not config:
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="System not configured."
)
instance_dir = f"{config.data_dir}/{customer.subdomain}"
result = await image_service.update_customer_containers(instance_dir, deployment.container_prefix)
if not result["success"]:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=result.get("error", "Failed to update containers."),
)
logger.info(
"Containers updated for customer '%s' (prefix: %s) by '%s'.",
customer.name, deployment.container_prefix, current_user.username,
)
return {"message": f"Containers updated for '{customer.name}'."}
def _require_customer(db: Session, customer_id: int) -> Customer:
"""Helper to fetch a customer or raise 404.

View File

@@ -5,13 +5,13 @@ import platform
from typing import Any
import psutil
from fastapi import APIRouter, Depends
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, status
from sqlalchemy.orm import Session
from app.database import get_db
from app.database import SessionLocal, get_db
from app.dependencies import get_current_user
from app.models import Customer, Deployment, User
from app.services import docker_service
from app.models import Customer, Deployment, SystemConfig, User
from app.services import docker_service, image_service
logger = logging.getLogger(__name__)
router = APIRouter()
@@ -115,3 +115,137 @@ async def host_resources(
"percent": disk.percent,
},
}
@router.get("/images/check")
async def check_image_updates(
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db),
) -> dict[str, Any]:
"""Check all configured NetBird images for available updates on Docker Hub.
Compares local image digests against Docker Hub — no image is pulled.
Returns:
images: dict mapping image name to update status
any_update_available: bool
customer_status: list of per-customer container image status
"""
config = db.query(SystemConfig).filter(SystemConfig.id == 1).first()
if not config:
raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="System not configured.")
hub_status = await image_service.check_all_images(config)
# Per-customer local check (no network)
deployments = db.query(Deployment).all()
customer_status = []
for dep in deployments:
customer = dep.customer
cs = image_service.get_customer_container_image_status(dep.container_prefix, config)
customer_status.append({
"customer_id": customer.id,
"customer_name": customer.name,
"subdomain": customer.subdomain,
"container_prefix": dep.container_prefix,
"needs_update": cs["needs_update"],
"services": cs["services"],
})
return {**hub_status, "customer_status": customer_status}
@router.post("/images/pull")
async def pull_all_netbird_images(
background_tasks: BackgroundTasks,
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db),
) -> dict[str, Any]:
"""Pull all configured NetBird images from Docker Hub.
Runs in the background — returns immediately. After pulling, re-check
customer status via GET /images/check to see which customers need updating.
"""
if current_user.role != "admin":
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Admin only.")
config = db.query(SystemConfig).filter(SystemConfig.id == 1).first()
if not config:
raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="System not configured.")
# Snapshot image list before background task starts
images = [
config.netbird_management_image,
config.netbird_signal_image,
config.netbird_relay_image,
config.netbird_dashboard_image,
]
async def _pull_bg() -> None:
bg_db = SessionLocal()
try:
cfg = bg_db.query(SystemConfig).filter(SystemConfig.id == 1).first()
if cfg:
await image_service.pull_all_images(cfg)
except Exception:
logger.exception("Background image pull failed")
finally:
bg_db.close()
background_tasks.add_task(_pull_bg)
return {"message": "Image pull started in background.", "images": images}
@router.post("/customers/update-all")
async def update_all_customers(
background_tasks: BackgroundTasks,
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db),
) -> dict[str, Any]:
"""Recreate containers for all customers that have outdated images.
Only customers where at least one container runs an outdated image are updated.
Images must already be pulled. Data is preserved (bind mounts).
"""
if current_user.role != "admin":
raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Admin only.")
config = db.query(SystemConfig).filter(SystemConfig.id == 1).first()
if not config:
raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="System not configured.")
# Collect customers that need updating
deployments = db.query(Deployment).all()
to_update = []
for dep in deployments:
cs = image_service.get_customer_container_image_status(dep.container_prefix, config)
if cs["needs_update"]:
customer = dep.customer
instance_dir = str(dep.container_prefix).replace(
"netbird-", "", 1
) # subdomain
to_update.append({
"instance_dir": f"{config.data_dir}/{customer.subdomain}",
"project_name": dep.container_prefix,
"customer_name": customer.name,
})
if not to_update:
return {"message": "All customers are already up to date.", "updated": 0}
async def _update_all_bg() -> None:
for entry in to_update:
try:
await image_service.update_customer_containers(
entry["instance_dir"], entry["project_name"]
)
logger.info("Updated containers for %s", entry["project_name"])
except Exception:
logger.exception("Failed to update %s", entry["project_name"])
background_tasks.add_task(_update_all_bg)
names = [e["customer_name"] for e in to_update]
return {
"message": f"Updating {len(to_update)} customer(s) in background.",
"customers": names,
}

View File

@@ -0,0 +1,251 @@
"""NetBird Docker image update service.
Compares locally pulled images against Docker Hub to detect available updates.
Provides pull and per-customer container recreation functions without data loss.
"""
import asyncio
import json
import logging
import os
import subprocess
from typing import Any
import httpx
logger = logging.getLogger(__name__)
# Services that make up a customer's NetBird deployment
NETBIRD_SERVICES = ["management", "signal", "relay", "dashboard"]
async def _run_cmd(cmd: list[str], timeout: int = 300) -> subprocess.CompletedProcess:
"""Run a subprocess command without blocking the event loop."""
loop = asyncio.get_event_loop()
return await loop.run_in_executor(
None,
lambda: subprocess.run(cmd, capture_output=True, text=True, timeout=timeout),
)
def _parse_image_name(image: str) -> tuple[str, str]:
"""Split 'repo/name:tag' into ('repo/name', 'tag'). Defaults tag to 'latest'."""
if ":" in image:
name, tag = image.rsplit(":", 1)
else:
name, tag = image, "latest"
return name, tag
async def get_hub_digest(image: str) -> str | None:
"""Fetch the current digest from Docker Hub for an image:tag.
Uses the Docker Hub REST API — does NOT pull the image.
Returns the digest string (sha256:...) or None on failure.
"""
name, tag = _parse_image_name(image)
url = f"https://hub.docker.com/v2/repositories/{name}/tags/{tag}/"
try:
async with httpx.AsyncClient(timeout=15) as client:
resp = await client.get(url)
if resp.status_code != 200:
logger.warning("Docker Hub API returned %d for %s", resp.status_code, image)
return None
data = resp.json()
images = data.get("images", [])
# Prefer linux/amd64 digest
for img in images:
if img.get("os") == "linux" and img.get("architecture") in ("amd64", ""):
d = img.get("digest")
if d:
return d
# Fallback: first available digest
if images:
return images[0].get("digest")
return None
except Exception as exc:
logger.warning("Failed to fetch Docker Hub digest for %s: %s", image, exc)
return None
def get_local_digest(image: str) -> str | None:
"""Get the RepoDigest for a locally pulled image.
Returns the digest (sha256:...) or None if image not found locally.
"""
try:
result = subprocess.run(
["docker", "image", "inspect", image, "--format", "{{json .RepoDigests}}"],
capture_output=True, text=True, timeout=10,
)
if result.returncode != 0:
return None
digests = json.loads(result.stdout.strip())
if not digests:
return None
# RepoDigests look like "netbirdio/management@sha256:abc..."
for d in digests:
if "@" in d:
return d.split("@", 1)[1]
return None
except Exception as exc:
logger.warning("Failed to inspect local image %s: %s", image, exc)
return None
def get_container_image_id(container_name: str) -> str | None:
"""Get the full image ID (sha256:...) of a running or stopped container."""
try:
result = subprocess.run(
["docker", "inspect", container_name, "--format", "{{.Image}}"],
capture_output=True, text=True, timeout=10,
)
if result.returncode != 0:
return None
return result.stdout.strip() or None
except Exception:
return None
def get_local_image_id(image: str) -> str | None:
"""Get the full image ID (sha256:...) of a locally stored image."""
try:
result = subprocess.run(
["docker", "image", "inspect", image, "--format", "{{.Id}}"],
capture_output=True, text=True, timeout=10,
)
if result.returncode != 0:
return None
return result.stdout.strip() or None
except Exception:
return None
async def check_image_status(image: str) -> dict[str, Any]:
"""Check whether a configured image has an update available on Docker Hub.
Returns a dict with:
image: the image name:tag
local_digest: digest of locally cached image (or None)
hub_digest: latest digest from Docker Hub (or None)
update_available: True if hub_digest differs from local_digest
"""
hub_digest, local_digest = await asyncio.gather(
get_hub_digest(image),
asyncio.get_event_loop().run_in_executor(None, get_local_digest, image),
)
if hub_digest and local_digest:
update_available = hub_digest != local_digest
elif hub_digest and not local_digest:
# Image not pulled locally yet — needs pull
update_available = True
else:
update_available = False
return {
"image": image,
"local_digest": local_digest,
"hub_digest": hub_digest,
"update_available": update_available,
}
async def check_all_images(config) -> dict[str, Any]:
"""Check all 4 configured NetBird images for available updates.
Returns a dict with:
images: dict mapping image name -> status dict
any_update_available: bool
"""
images = [
config.netbird_management_image,
config.netbird_signal_image,
config.netbird_relay_image,
config.netbird_dashboard_image,
]
results = await asyncio.gather(*[check_image_status(img) for img in images])
by_image = {r["image"]: r for r in results}
any_update = any(r["update_available"] for r in results)
return {"images": by_image, "any_update_available": any_update}
async def pull_image(image: str) -> dict[str, Any]:
"""Pull a Docker image. Returns success/error dict."""
logger.info("Pulling image: %s", image)
result = await _run_cmd(["docker", "pull", image], timeout=600)
if result.returncode != 0:
logger.error("Failed to pull %s: %s", image, result.stderr)
return {"image": image, "success": False, "error": result.stderr[:500]}
return {"image": image, "success": True}
async def pull_all_images(config) -> dict[str, Any]:
"""Pull all 4 configured NetBird images. Returns results per image."""
images = [
config.netbird_management_image,
config.netbird_signal_image,
config.netbird_relay_image,
config.netbird_dashboard_image,
]
results = await asyncio.gather(*[pull_image(img) for img in images])
return {
"results": {r["image"]: r for r in results},
"all_success": all(r["success"] for r in results),
}
def get_customer_container_image_status(container_prefix: str, config) -> dict[str, Any]:
"""Check which service containers are running outdated local images.
Compares each running container's image ID against the locally stored image ID
for the configured image tag. This is a local check — no network call.
Returns:
services: dict mapping service name to status info
needs_update: True if any service has a different image ID than locally stored
"""
service_images = {
"management": config.netbird_management_image,
"signal": config.netbird_signal_image,
"relay": config.netbird_relay_image,
"dashboard": config.netbird_dashboard_image,
}
services: dict[str, Any] = {}
for svc, image in service_images.items():
container_name = f"{container_prefix}-{svc}"
container_id = get_container_image_id(container_name)
local_id = get_local_image_id(image)
if container_id and local_id:
up_to_date = container_id == local_id
else:
up_to_date = None # container not running or image not pulled
services[svc] = {
"container": container_name,
"image": image,
"up_to_date": up_to_date,
}
needs_update = any(s["up_to_date"] is False for s in services.values())
return {"services": services, "needs_update": needs_update}
async def update_customer_containers(instance_dir: str, project_name: str) -> dict[str, Any]:
"""Recreate customer containers to pick up newly pulled images.
Runs `docker compose up -d` in the customer's instance directory.
Images must already be pulled. Bind-mounted data is preserved — no data loss.
"""
compose_file = os.path.join(instance_dir, "docker-compose.yml")
if not os.path.isfile(compose_file):
return {"success": False, "error": f"docker-compose.yml not found at {compose_file}"}
cmd = [
"docker", "compose",
"-f", compose_file,
"-p", project_name,
"up", "-d", "--remove-orphans",
]
logger.info("Updating containers for %s", project_name)
result = await _run_cmd(cmd, timeout=300)
if result.returncode != 0:
return {"success": False, "error": result.stderr[:1000]}
return {"success": True}

View File

@@ -1152,7 +1152,7 @@
</div>
<!-- Customer Statuses -->
<div class="card shadow-sm">
<div class="card shadow-sm mb-4">
<div class="card-header" data-i18n="monitoring.allCustomerDeployments">All Customer Deployments
</div>
<div class="table-responsive">
@@ -1178,6 +1178,27 @@
</table>
</div>
</div>
<!-- NetBird Container Updates -->
<div class="card shadow-sm">
<div class="card-header d-flex justify-content-between align-items-center">
<span><i class="bi bi-arrow-repeat me-2"></i><span data-i18n="monitoring.imageUpdates">NetBird Container Updates</span></span>
<div class="d-flex gap-2">
<button class="btn btn-outline-secondary btn-sm" onclick="checkImageUpdates()" id="btn-check-updates">
<i class="bi bi-search me-1"></i><span data-i18n="monitoring.checkUpdates">Check for Updates</span>
</button>
<button class="btn btn-outline-primary btn-sm" onclick="pullAllImages()" id="btn-pull-images">
<i class="bi bi-cloud-download me-1"></i><span data-i18n="monitoring.pullImages">Pull Latest Images</span>
</button>
<button class="btn btn-warning btn-sm d-none" onclick="updateAllCustomers()" id="btn-update-all">
<i class="bi bi-lightning-charge-fill me-1"></i><span data-i18n="monitoring.updateAll">Update All</span>
</button>
</div>
</div>
<div class="card-body" id="image-updates-body">
<p class="text-muted mb-0" data-i18n="monitoring.clickCheckUpdates">Click "Check for Updates" to compare local images with Docker Hub.</p>
</div>
</div>
</div>
</div>
</div>

View File

@@ -1633,6 +1633,144 @@ async function loadAllCustomerStatuses() {
}
}
// ---------------------------------------------------------------------------
// Image Updates
// ---------------------------------------------------------------------------
async function checkImageUpdates() {
const btn = document.getElementById('btn-check-updates');
const body = document.getElementById('image-updates-body');
btn.disabled = true;
body.innerHTML = `<div class="text-muted"><span class="spinner-border spinner-border-sm me-2"></span>${t('common.loading')}</div>`;
try {
const data = await api('GET', '/monitoring/images/check');
// Image status table
const imageRows = Object.values(data.images).map(img => {
const badge = img.update_available
? `<span class="badge bg-warning text-dark">${t('monitoring.updateAvailable')}</span>`
: `<span class="badge bg-success">${t('monitoring.upToDate')}</span>`;
const shortDigest = d => d ? d.substring(7, 19) + '…' : '-';
return `<tr>
<td><code class="small">${esc(img.image)}</code></td>
<td class="small text-muted">${shortDigest(img.local_digest)}</td>
<td class="small text-muted">${shortDigest(img.hub_digest)}</td>
<td>${badge}</td>
</tr>`;
}).join('');
// Customer status table
const customerRows = data.customer_status.length === 0
? `<tr><td colspan="3" class="text-center text-muted py-3">${t('monitoring.noCustomers')}</td></tr>`
: data.customer_status.map(c => {
const badge = c.needs_update
? `<span class="badge bg-warning text-dark">${t('monitoring.needsUpdate')}</span>`
: `<span class="badge bg-success">${t('monitoring.upToDate')}</span>`;
const updateBtn = c.needs_update
? `<button class="btn btn-sm btn-outline-warning ms-2" onclick="updateCustomerImages(${c.customer_id})"
title="${t('monitoring.updateCustomer')}"><i class="bi bi-arrow-repeat"></i></button>`
: '';
return `<tr>
<td>${c.customer_id}</td>
<td>${esc(c.customer_name)} <code class="small text-muted">${esc(c.subdomain)}</code></td>
<td>${badge}${updateBtn}</td>
</tr>`;
}).join('');
// Show "Update All" button if any customer needs update
const updateAllBtn = document.getElementById('btn-update-all');
if (data.customer_status.some(c => c.needs_update)) {
updateAllBtn.classList.remove('d-none');
} else {
updateAllBtn.classList.add('d-none');
}
body.innerHTML = `
<h6 class="mb-2">${t('monitoring.imageStatusTitle')}</h6>
<div class="table-responsive mb-4">
<table class="table table-sm mb-0">
<thead class="table-light">
<tr>
<th>${t('monitoring.thImage')}</th>
<th>${t('monitoring.thLocalDigest')}</th>
<th>${t('monitoring.thHubDigest')}</th>
<th>${t('monitoring.thStatus')}</th>
</tr>
</thead>
<tbody>${imageRows}</tbody>
</table>
</div>
<h6 class="mb-2">${t('monitoring.customerImageTitle')}</h6>
<div class="table-responsive">
<table class="table table-sm mb-0">
<thead class="table-light">
<tr>
<th>${t('monitoring.thId')}</th>
<th>${t('monitoring.thName')}</th>
<th>${t('monitoring.thStatus')}</th>
</tr>
</thead>
<tbody>${customerRows}</tbody>
</table>
</div>`;
} catch (err) {
body.innerHTML = `<div class="alert alert-danger">${err.message}</div>`;
} finally {
btn.disabled = false;
}
}
async function pullAllImages() {
if (!confirm(t('monitoring.confirmPull'))) return;
const btn = document.getElementById('btn-pull-images');
btn.disabled = true;
try {
await api('POST', '/monitoring/images/pull');
showToast(t('monitoring.pullStarted'));
// Re-check after a few seconds to let pull finish
setTimeout(() => checkImageUpdates(), 5000);
} catch (err) {
showMonitoringAlert('danger', err.message);
} finally {
btn.disabled = false;
}
}
async function updateCustomerImages(customerId) {
try {
await api('POST', `/customers/${customerId}/update-images`);
showToast(t('monitoring.updateDone'));
setTimeout(() => checkImageUpdates(), 2000);
} catch (err) {
showMonitoringAlert('danger', err.message);
}
}
async function updateAllCustomers() {
if (!confirm(t('monitoring.confirmUpdateAll'))) return;
const btn = document.getElementById('btn-update-all');
btn.disabled = true;
try {
const data = await api('POST', '/monitoring/customers/update-all');
showToast(data.message || t('monitoring.updateAllStarted'));
setTimeout(() => checkImageUpdates(), 5000);
} catch (err) {
showMonitoringAlert('danger', err.message);
} finally {
btn.disabled = false;
}
}
function showMonitoringAlert(type, msg) {
const body = document.getElementById('image-updates-body');
const existing = body.querySelector('.alert');
if (existing) existing.remove();
const div = document.createElement('div');
div.className = `alert alert-${type} mt-2`;
div.textContent = msg;
body.prepend(div);
}
// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------

View File

@@ -373,6 +373,25 @@
"thDashboard": "Dashboard",
"thRelayPort": "Relay-Port",
"thContainers": "Container",
"noCustomers": "Keine Kunden."
"noCustomers": "Keine Kunden.",
"imageUpdates": "NetBird Container Updates",
"checkUpdates": "Auf Updates prüfen",
"pullImages": "Neueste Images laden",
"updateAll": "Alle aktualisieren",
"clickCheckUpdates": "Klicken Sie auf \"Auf Updates prüfen\" um lokale Images mit Docker Hub zu vergleichen.",
"updateAvailable": "Update verfügbar",
"upToDate": "Aktuell",
"needsUpdate": "Update erforderlich",
"updateCustomer": "Diesen Kunden aktualisieren",
"imageStatusTitle": "Image-Status (vs. Docker Hub)",
"customerImageTitle": "Kunden-Container Status",
"thImage": "Image",
"thLocalDigest": "Lokaler Digest",
"thHubDigest": "Hub Digest",
"confirmPull": "Neueste NetBird Images von Docker Hub laden? Dies kann einige Minuten dauern.",
"pullStarted": "Image-Download im Hintergrund gestartet. Prüfung in 5 Sekunden…",
"confirmUpdateAll": "Container aller Kunden mit veralteten Images neu erstellen? Laufende Dienste werden kurz neu gestartet.",
"updateAllStarted": "Aktualisierung im Hintergrund gestartet.",
"updateDone": "Kunden-Container aktualisiert."
}
}

View File

@@ -280,7 +280,26 @@
"thDashboard": "Dashboard",
"thRelayPort": "Relay Port",
"thContainers": "Containers",
"noCustomers": "No customers."
"noCustomers": "No customers.",
"imageUpdates": "NetBird Container Updates",
"checkUpdates": "Check for Updates",
"pullImages": "Pull Latest Images",
"updateAll": "Update All",
"clickCheckUpdates": "Click \"Check for Updates\" to compare local images with Docker Hub.",
"updateAvailable": "Update available",
"upToDate": "Up to date",
"needsUpdate": "Needs update",
"updateCustomer": "Update this customer",
"imageStatusTitle": "Image Status (vs. Docker Hub)",
"customerImageTitle": "Customer Container Status",
"thImage": "Image",
"thLocalDigest": "Local Digest",
"thHubDigest": "Hub Digest",
"confirmPull": "Pull the latest NetBird images from Docker Hub? This may take a few minutes.",
"pullStarted": "Image pull started in background. Re-checking in 5 seconds…",
"confirmUpdateAll": "Recreate containers for all customers that have outdated images? Running services will briefly restart.",
"updateAllStarted": "Update started in background.",
"updateDone": "Customer containers updated."
},
"userModal": {
"title": "New User",