From 848ead0b2c10ace99d57d4161ff559114631b929 Mon Sep 17 00:00:00 2001 From: twothatIT Date: Tue, 24 Feb 2026 21:01:56 +0100 Subject: [PATCH] feat(updates): NetBird container image update management - New image_service.py: Docker Hub digest check (no pull), local digest/ID comparison, pull_all_images, per-customer container image status, and update_customer_containers (docker compose up -d, data-safe) - Monitoring endpoints: GET /images/check (hub vs local + per-customer needs_update), POST /images/pull (background), POST /customers/update-all - Deployment endpoint: POST /{id}/update-images (single-customer update) - Monitoring page: "NetBird Container Updates" card with Check / Pull / Update All buttons; image status table and per-customer update table with inline update buttons - i18n: added keys in en.json and de.json Co-Authored-By: Claude Sonnet 4.6 --- app/routers/deployments.py | 48 ++++++- app/routers/monitoring.py | 142 ++++++++++++++++++- app/services/image_service.py | 251 ++++++++++++++++++++++++++++++++++ static/index.html | 23 +++- static/js/app.js | 138 +++++++++++++++++++ static/lang/de.json | 21 ++- static/lang/en.json | 21 ++- 7 files changed, 635 insertions(+), 9 deletions(-) create mode 100644 app/services/image_service.py diff --git a/app/routers/deployments.py b/app/routers/deployments.py index d29ef01..2f3f072 100644 --- a/app/routers/deployments.py +++ b/app/routers/deployments.py @@ -7,8 +7,8 @@ from sqlalchemy.orm import Session from app.database import SessionLocal, get_db from app.dependencies import get_current_user -from app.models import Customer, Deployment, User -from app.services import docker_service, netbird_service +from app.models import Customer, Deployment, SystemConfig, User +from app.services import docker_service, image_service, netbird_service from app.utils.security import decrypt_value logger = logging.getLogger(__name__) @@ -207,6 +207,50 @@ async def get_customer_credentials( } +@router.post("/{customer_id}/update-images") +async def update_customer_images( + customer_id: int, + current_user: User = Depends(get_current_user), + db: Session = Depends(get_db), +): + """Recreate a customer's containers to pick up newly pulled images. + + Images must already be pulled via POST /monitoring/images/pull. + Bind-mounted data is preserved — no data loss. + """ + if current_user.role != "admin": + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Admin only.") + + customer = _require_customer(db, customer_id) + deployment = db.query(Deployment).filter(Deployment.customer_id == customer_id).first() + if not deployment: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="No deployment found for this customer.", + ) + + config = db.query(SystemConfig).filter(SystemConfig.id == 1).first() + if not config: + raise HTTPException( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="System not configured." + ) + + instance_dir = f"{config.data_dir}/{customer.subdomain}" + result = await image_service.update_customer_containers(instance_dir, deployment.container_prefix) + + if not result["success"]: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=result.get("error", "Failed to update containers."), + ) + + logger.info( + "Containers updated for customer '%s' (prefix: %s) by '%s'.", + customer.name, deployment.container_prefix, current_user.username, + ) + return {"message": f"Containers updated for '{customer.name}'."} + + def _require_customer(db: Session, customer_id: int) -> Customer: """Helper to fetch a customer or raise 404. diff --git a/app/routers/monitoring.py b/app/routers/monitoring.py index a35e8bd..0a0a5be 100644 --- a/app/routers/monitoring.py +++ b/app/routers/monitoring.py @@ -5,13 +5,13 @@ import platform from typing import Any import psutil -from fastapi import APIRouter, Depends +from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, status from sqlalchemy.orm import Session -from app.database import get_db +from app.database import SessionLocal, get_db from app.dependencies import get_current_user -from app.models import Customer, Deployment, User -from app.services import docker_service +from app.models import Customer, Deployment, SystemConfig, User +from app.services import docker_service, image_service logger = logging.getLogger(__name__) router = APIRouter() @@ -115,3 +115,137 @@ async def host_resources( "percent": disk.percent, }, } + + +@router.get("/images/check") +async def check_image_updates( + current_user: User = Depends(get_current_user), + db: Session = Depends(get_db), +) -> dict[str, Any]: + """Check all configured NetBird images for available updates on Docker Hub. + + Compares local image digests against Docker Hub — no image is pulled. + + Returns: + images: dict mapping image name to update status + any_update_available: bool + customer_status: list of per-customer container image status + """ + config = db.query(SystemConfig).filter(SystemConfig.id == 1).first() + if not config: + raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="System not configured.") + + hub_status = await image_service.check_all_images(config) + + # Per-customer local check (no network) + deployments = db.query(Deployment).all() + customer_status = [] + for dep in deployments: + customer = dep.customer + cs = image_service.get_customer_container_image_status(dep.container_prefix, config) + customer_status.append({ + "customer_id": customer.id, + "customer_name": customer.name, + "subdomain": customer.subdomain, + "container_prefix": dep.container_prefix, + "needs_update": cs["needs_update"], + "services": cs["services"], + }) + + return {**hub_status, "customer_status": customer_status} + + +@router.post("/images/pull") +async def pull_all_netbird_images( + background_tasks: BackgroundTasks, + current_user: User = Depends(get_current_user), + db: Session = Depends(get_db), +) -> dict[str, Any]: + """Pull all configured NetBird images from Docker Hub. + + Runs in the background — returns immediately. After pulling, re-check + customer status via GET /images/check to see which customers need updating. + """ + if current_user.role != "admin": + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Admin only.") + + config = db.query(SystemConfig).filter(SystemConfig.id == 1).first() + if not config: + raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="System not configured.") + + # Snapshot image list before background task starts + images = [ + config.netbird_management_image, + config.netbird_signal_image, + config.netbird_relay_image, + config.netbird_dashboard_image, + ] + + async def _pull_bg() -> None: + bg_db = SessionLocal() + try: + cfg = bg_db.query(SystemConfig).filter(SystemConfig.id == 1).first() + if cfg: + await image_service.pull_all_images(cfg) + except Exception: + logger.exception("Background image pull failed") + finally: + bg_db.close() + + background_tasks.add_task(_pull_bg) + return {"message": "Image pull started in background.", "images": images} + + +@router.post("/customers/update-all") +async def update_all_customers( + background_tasks: BackgroundTasks, + current_user: User = Depends(get_current_user), + db: Session = Depends(get_db), +) -> dict[str, Any]: + """Recreate containers for all customers that have outdated images. + + Only customers where at least one container runs an outdated image are updated. + Images must already be pulled. Data is preserved (bind mounts). + """ + if current_user.role != "admin": + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Admin only.") + + config = db.query(SystemConfig).filter(SystemConfig.id == 1).first() + if not config: + raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="System not configured.") + + # Collect customers that need updating + deployments = db.query(Deployment).all() + to_update = [] + for dep in deployments: + cs = image_service.get_customer_container_image_status(dep.container_prefix, config) + if cs["needs_update"]: + customer = dep.customer + instance_dir = str(dep.container_prefix).replace( + "netbird-", "", 1 + ) # subdomain + to_update.append({ + "instance_dir": f"{config.data_dir}/{customer.subdomain}", + "project_name": dep.container_prefix, + "customer_name": customer.name, + }) + + if not to_update: + return {"message": "All customers are already up to date.", "updated": 0} + + async def _update_all_bg() -> None: + for entry in to_update: + try: + await image_service.update_customer_containers( + entry["instance_dir"], entry["project_name"] + ) + logger.info("Updated containers for %s", entry["project_name"]) + except Exception: + logger.exception("Failed to update %s", entry["project_name"]) + + background_tasks.add_task(_update_all_bg) + names = [e["customer_name"] for e in to_update] + return { + "message": f"Updating {len(to_update)} customer(s) in background.", + "customers": names, + } diff --git a/app/services/image_service.py b/app/services/image_service.py new file mode 100644 index 0000000..b45376c --- /dev/null +++ b/app/services/image_service.py @@ -0,0 +1,251 @@ +"""NetBird Docker image update service. + +Compares locally pulled images against Docker Hub to detect available updates. +Provides pull and per-customer container recreation functions without data loss. +""" + +import asyncio +import json +import logging +import os +import subprocess +from typing import Any + +import httpx + +logger = logging.getLogger(__name__) + +# Services that make up a customer's NetBird deployment +NETBIRD_SERVICES = ["management", "signal", "relay", "dashboard"] + + +async def _run_cmd(cmd: list[str], timeout: int = 300) -> subprocess.CompletedProcess: + """Run a subprocess command without blocking the event loop.""" + loop = asyncio.get_event_loop() + return await loop.run_in_executor( + None, + lambda: subprocess.run(cmd, capture_output=True, text=True, timeout=timeout), + ) + + +def _parse_image_name(image: str) -> tuple[str, str]: + """Split 'repo/name:tag' into ('repo/name', 'tag'). Defaults tag to 'latest'.""" + if ":" in image: + name, tag = image.rsplit(":", 1) + else: + name, tag = image, "latest" + return name, tag + + +async def get_hub_digest(image: str) -> str | None: + """Fetch the current digest from Docker Hub for an image:tag. + + Uses the Docker Hub REST API — does NOT pull the image. + Returns the digest string (sha256:...) or None on failure. + """ + name, tag = _parse_image_name(image) + url = f"https://hub.docker.com/v2/repositories/{name}/tags/{tag}/" + try: + async with httpx.AsyncClient(timeout=15) as client: + resp = await client.get(url) + if resp.status_code != 200: + logger.warning("Docker Hub API returned %d for %s", resp.status_code, image) + return None + data = resp.json() + images = data.get("images", []) + # Prefer linux/amd64 digest + for img in images: + if img.get("os") == "linux" and img.get("architecture") in ("amd64", ""): + d = img.get("digest") + if d: + return d + # Fallback: first available digest + if images: + return images[0].get("digest") + return None + except Exception as exc: + logger.warning("Failed to fetch Docker Hub digest for %s: %s", image, exc) + return None + + +def get_local_digest(image: str) -> str | None: + """Get the RepoDigest for a locally pulled image. + + Returns the digest (sha256:...) or None if image not found locally. + """ + try: + result = subprocess.run( + ["docker", "image", "inspect", image, "--format", "{{json .RepoDigests}}"], + capture_output=True, text=True, timeout=10, + ) + if result.returncode != 0: + return None + digests = json.loads(result.stdout.strip()) + if not digests: + return None + # RepoDigests look like "netbirdio/management@sha256:abc..." + for d in digests: + if "@" in d: + return d.split("@", 1)[1] + return None + except Exception as exc: + logger.warning("Failed to inspect local image %s: %s", image, exc) + return None + + +def get_container_image_id(container_name: str) -> str | None: + """Get the full image ID (sha256:...) of a running or stopped container.""" + try: + result = subprocess.run( + ["docker", "inspect", container_name, "--format", "{{.Image}}"], + capture_output=True, text=True, timeout=10, + ) + if result.returncode != 0: + return None + return result.stdout.strip() or None + except Exception: + return None + + +def get_local_image_id(image: str) -> str | None: + """Get the full image ID (sha256:...) of a locally stored image.""" + try: + result = subprocess.run( + ["docker", "image", "inspect", image, "--format", "{{.Id}}"], + capture_output=True, text=True, timeout=10, + ) + if result.returncode != 0: + return None + return result.stdout.strip() or None + except Exception: + return None + + +async def check_image_status(image: str) -> dict[str, Any]: + """Check whether a configured image has an update available on Docker Hub. + + Returns a dict with: + image: the image name:tag + local_digest: digest of locally cached image (or None) + hub_digest: latest digest from Docker Hub (or None) + update_available: True if hub_digest differs from local_digest + """ + hub_digest, local_digest = await asyncio.gather( + get_hub_digest(image), + asyncio.get_event_loop().run_in_executor(None, get_local_digest, image), + ) + + if hub_digest and local_digest: + update_available = hub_digest != local_digest + elif hub_digest and not local_digest: + # Image not pulled locally yet — needs pull + update_available = True + else: + update_available = False + + return { + "image": image, + "local_digest": local_digest, + "hub_digest": hub_digest, + "update_available": update_available, + } + + +async def check_all_images(config) -> dict[str, Any]: + """Check all 4 configured NetBird images for available updates. + + Returns a dict with: + images: dict mapping image name -> status dict + any_update_available: bool + """ + images = [ + config.netbird_management_image, + config.netbird_signal_image, + config.netbird_relay_image, + config.netbird_dashboard_image, + ] + results = await asyncio.gather(*[check_image_status(img) for img in images]) + by_image = {r["image"]: r for r in results} + any_update = any(r["update_available"] for r in results) + return {"images": by_image, "any_update_available": any_update} + + +async def pull_image(image: str) -> dict[str, Any]: + """Pull a Docker image. Returns success/error dict.""" + logger.info("Pulling image: %s", image) + result = await _run_cmd(["docker", "pull", image], timeout=600) + if result.returncode != 0: + logger.error("Failed to pull %s: %s", image, result.stderr) + return {"image": image, "success": False, "error": result.stderr[:500]} + return {"image": image, "success": True} + + +async def pull_all_images(config) -> dict[str, Any]: + """Pull all 4 configured NetBird images. Returns results per image.""" + images = [ + config.netbird_management_image, + config.netbird_signal_image, + config.netbird_relay_image, + config.netbird_dashboard_image, + ] + results = await asyncio.gather(*[pull_image(img) for img in images]) + return { + "results": {r["image"]: r for r in results}, + "all_success": all(r["success"] for r in results), + } + + +def get_customer_container_image_status(container_prefix: str, config) -> dict[str, Any]: + """Check which service containers are running outdated local images. + + Compares each running container's image ID against the locally stored image ID + for the configured image tag. This is a local check — no network call. + + Returns: + services: dict mapping service name to status info + needs_update: True if any service has a different image ID than locally stored + """ + service_images = { + "management": config.netbird_management_image, + "signal": config.netbird_signal_image, + "relay": config.netbird_relay_image, + "dashboard": config.netbird_dashboard_image, + } + services: dict[str, Any] = {} + for svc, image in service_images.items(): + container_name = f"{container_prefix}-{svc}" + container_id = get_container_image_id(container_name) + local_id = get_local_image_id(image) + if container_id and local_id: + up_to_date = container_id == local_id + else: + up_to_date = None # container not running or image not pulled + services[svc] = { + "container": container_name, + "image": image, + "up_to_date": up_to_date, + } + needs_update = any(s["up_to_date"] is False for s in services.values()) + return {"services": services, "needs_update": needs_update} + + +async def update_customer_containers(instance_dir: str, project_name: str) -> dict[str, Any]: + """Recreate customer containers to pick up newly pulled images. + + Runs `docker compose up -d` in the customer's instance directory. + Images must already be pulled. Bind-mounted data is preserved — no data loss. + """ + compose_file = os.path.join(instance_dir, "docker-compose.yml") + if not os.path.isfile(compose_file): + return {"success": False, "error": f"docker-compose.yml not found at {compose_file}"} + cmd = [ + "docker", "compose", + "-f", compose_file, + "-p", project_name, + "up", "-d", "--remove-orphans", + ] + logger.info("Updating containers for %s", project_name) + result = await _run_cmd(cmd, timeout=300) + if result.returncode != 0: + return {"success": False, "error": result.stderr[:1000]} + return {"success": True} diff --git a/static/index.html b/static/index.html index 6d2667d..0d20b49 100644 --- a/static/index.html +++ b/static/index.html @@ -1152,7 +1152,7 @@ -
+
All Customer Deployments
@@ -1178,6 +1178,27 @@
+ + +
+
+ NetBird Container Updates +
+ + + +
+
+
+

Click "Check for Updates" to compare local images with Docker Hub.

+
+
diff --git a/static/js/app.js b/static/js/app.js index 7bcf14e..50a0249 100644 --- a/static/js/app.js +++ b/static/js/app.js @@ -1633,6 +1633,144 @@ async function loadAllCustomerStatuses() { } } +// --------------------------------------------------------------------------- +// Image Updates +// --------------------------------------------------------------------------- +async function checkImageUpdates() { + const btn = document.getElementById('btn-check-updates'); + const body = document.getElementById('image-updates-body'); + btn.disabled = true; + body.innerHTML = `
${t('common.loading')}
`; + + try { + const data = await api('GET', '/monitoring/images/check'); + + // Image status table + const imageRows = Object.values(data.images).map(img => { + const badge = img.update_available + ? `${t('monitoring.updateAvailable')}` + : `${t('monitoring.upToDate')}`; + const shortDigest = d => d ? d.substring(7, 19) + '…' : '-'; + return ` + ${esc(img.image)} + ${shortDigest(img.local_digest)} + ${shortDigest(img.hub_digest)} + ${badge} + `; + }).join(''); + + // Customer status table + const customerRows = data.customer_status.length === 0 + ? `${t('monitoring.noCustomers')}` + : data.customer_status.map(c => { + const badge = c.needs_update + ? `${t('monitoring.needsUpdate')}` + : `${t('monitoring.upToDate')}`; + const updateBtn = c.needs_update + ? `` + : ''; + return ` + ${c.customer_id} + ${esc(c.customer_name)} ${esc(c.subdomain)} + ${badge}${updateBtn} + `; + }).join(''); + + // Show "Update All" button if any customer needs update + const updateAllBtn = document.getElementById('btn-update-all'); + if (data.customer_status.some(c => c.needs_update)) { + updateAllBtn.classList.remove('d-none'); + } else { + updateAllBtn.classList.add('d-none'); + } + + body.innerHTML = ` +
${t('monitoring.imageStatusTitle')}
+
+ + + + + + + + + + ${imageRows} +
${t('monitoring.thImage')}${t('monitoring.thLocalDigest')}${t('monitoring.thHubDigest')}${t('monitoring.thStatus')}
+
+
${t('monitoring.customerImageTitle')}
+
+ + + + + + + + + ${customerRows} +
${t('monitoring.thId')}${t('monitoring.thName')}${t('monitoring.thStatus')}
+
`; + } catch (err) { + body.innerHTML = `
${err.message}
`; + } finally { + btn.disabled = false; + } +} + +async function pullAllImages() { + if (!confirm(t('monitoring.confirmPull'))) return; + const btn = document.getElementById('btn-pull-images'); + btn.disabled = true; + try { + await api('POST', '/monitoring/images/pull'); + showToast(t('monitoring.pullStarted')); + // Re-check after a few seconds to let pull finish + setTimeout(() => checkImageUpdates(), 5000); + } catch (err) { + showMonitoringAlert('danger', err.message); + } finally { + btn.disabled = false; + } +} + +async function updateCustomerImages(customerId) { + try { + await api('POST', `/customers/${customerId}/update-images`); + showToast(t('monitoring.updateDone')); + setTimeout(() => checkImageUpdates(), 2000); + } catch (err) { + showMonitoringAlert('danger', err.message); + } +} + +async function updateAllCustomers() { + if (!confirm(t('monitoring.confirmUpdateAll'))) return; + const btn = document.getElementById('btn-update-all'); + btn.disabled = true; + try { + const data = await api('POST', '/monitoring/customers/update-all'); + showToast(data.message || t('monitoring.updateAllStarted')); + setTimeout(() => checkImageUpdates(), 5000); + } catch (err) { + showMonitoringAlert('danger', err.message); + } finally { + btn.disabled = false; + } +} + +function showMonitoringAlert(type, msg) { + const body = document.getElementById('image-updates-body'); + const existing = body.querySelector('.alert'); + if (existing) existing.remove(); + const div = document.createElement('div'); + div.className = `alert alert-${type} mt-2`; + div.textContent = msg; + body.prepend(div); +} + // --------------------------------------------------------------------------- // Helpers // --------------------------------------------------------------------------- diff --git a/static/lang/de.json b/static/lang/de.json index fdebf0d..65995ff 100644 --- a/static/lang/de.json +++ b/static/lang/de.json @@ -373,6 +373,25 @@ "thDashboard": "Dashboard", "thRelayPort": "Relay-Port", "thContainers": "Container", - "noCustomers": "Keine Kunden." + "noCustomers": "Keine Kunden.", + "imageUpdates": "NetBird Container Updates", + "checkUpdates": "Auf Updates prüfen", + "pullImages": "Neueste Images laden", + "updateAll": "Alle aktualisieren", + "clickCheckUpdates": "Klicken Sie auf \"Auf Updates prüfen\" um lokale Images mit Docker Hub zu vergleichen.", + "updateAvailable": "Update verfügbar", + "upToDate": "Aktuell", + "needsUpdate": "Update erforderlich", + "updateCustomer": "Diesen Kunden aktualisieren", + "imageStatusTitle": "Image-Status (vs. Docker Hub)", + "customerImageTitle": "Kunden-Container Status", + "thImage": "Image", + "thLocalDigest": "Lokaler Digest", + "thHubDigest": "Hub Digest", + "confirmPull": "Neueste NetBird Images von Docker Hub laden? Dies kann einige Minuten dauern.", + "pullStarted": "Image-Download im Hintergrund gestartet. Prüfung in 5 Sekunden…", + "confirmUpdateAll": "Container aller Kunden mit veralteten Images neu erstellen? Laufende Dienste werden kurz neu gestartet.", + "updateAllStarted": "Aktualisierung im Hintergrund gestartet.", + "updateDone": "Kunden-Container aktualisiert." } } \ No newline at end of file diff --git a/static/lang/en.json b/static/lang/en.json index e15ba20..a4d4fce 100644 --- a/static/lang/en.json +++ b/static/lang/en.json @@ -280,7 +280,26 @@ "thDashboard": "Dashboard", "thRelayPort": "Relay Port", "thContainers": "Containers", - "noCustomers": "No customers." + "noCustomers": "No customers.", + "imageUpdates": "NetBird Container Updates", + "checkUpdates": "Check for Updates", + "pullImages": "Pull Latest Images", + "updateAll": "Update All", + "clickCheckUpdates": "Click \"Check for Updates\" to compare local images with Docker Hub.", + "updateAvailable": "Update available", + "upToDate": "Up to date", + "needsUpdate": "Needs update", + "updateCustomer": "Update this customer", + "imageStatusTitle": "Image Status (vs. Docker Hub)", + "customerImageTitle": "Customer Container Status", + "thImage": "Image", + "thLocalDigest": "Local Digest", + "thHubDigest": "Hub Digest", + "confirmPull": "Pull the latest NetBird images from Docker Hub? This may take a few minutes.", + "pullStarted": "Image pull started in background. Re-checking in 5 seconds…", + "confirmUpdateAll": "Recreate containers for all customers that have outdated images? Running services will briefly restart.", + "updateAllStarted": "Update started in background.", + "updateDone": "Customer containers updated." }, "userModal": { "title": "New User",