Files
NetBirdMSP-Appliance/app/services/update_service.py
Sascha Lustenberger | techlan gmbh 1d27226b6f fix(update): detect compose project name at runtime instead of hardcoding
The project name was hardcoded as 'netbirdmsp-appliance' but Docker Compose
derives the project name from the install directory name ('netbird-msp').
This caused Phase A to build an image under the wrong project name and
Phase B to start the replacement container under a mismatched project,
leaving the old container running indefinitely.

Fix: read the 'com.docker.compose.project' label from the running container
at update time. Both Phase A (build) and Phase B (docker compose up) now
use the detected project name. Falls back to SOURCE_DIR basename if the
inspect fails.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-24 10:51:25 +01:00

428 lines
16 KiB
Python

"""Update management — version check and in-place update via git + docker compose."""
import json
import logging
import os
import shutil
import subprocess
import httpx
from datetime import datetime
from pathlib import Path
from typing import Any
import httpx
SOURCE_DIR = "/app-source"
VERSION_FILE = "/app/version.json"
BACKUP_DIR = "/app/backups"
CONTAINER_NAME = "netbird-msp-appliance"
SERVICE_NAME = "netbird-msp-appliance"
logger = logging.getLogger(__name__)
def _get_compose_project_name() -> str:
"""Detect the compose project name from the running container's labels.
Docker Compose sets the label ``com.docker.compose.project`` on every
managed container. Reading it at runtime avoids hard-coding a project
name that may differ from the directory name used at deploy time.
Returns:
The compose project name (e.g. ``netbird-msp``).
"""
try:
result = subprocess.run(
[
"docker", "inspect", CONTAINER_NAME,
"--format",
'{{index .Config.Labels "com.docker.compose.project"}}',
],
capture_output=True, text=True, timeout=10,
)
if result.returncode == 0:
project = result.stdout.strip()
if project:
logger.info("Detected compose project name: %s", project)
return project
except Exception as exc:
logger.warning("Could not detect compose project name: %s", exc)
# Fallback: derive from SOURCE_DIR basename (mirrors Compose default behaviour)
fallback = Path(SOURCE_DIR).name
logger.warning("Using fallback compose project name: %s", fallback)
return fallback
def get_current_version() -> dict:
"""Read the version baked at build time from /app/version.json."""
try:
data = json.loads(Path(VERSION_FILE).read_text())
return {
"tag": data.get("tag", "unknown"),
"commit": data.get("commit", "unknown"),
"branch": data.get("branch", "unknown"),
"date": data.get("date", "unknown"),
}
except Exception:
return {"tag": "unknown", "commit": "unknown", "branch": "unknown", "date": "unknown"}
async def check_for_updates(config: Any) -> dict:
"""Query the Gitea API for the latest tag and commit on the configured branch.
Parses the repo URL to build the Gitea API endpoint:
https://git.example.com/owner/repo
→ https://git.example.com/api/v1/repos/owner/repo/...
Uses tags for version comparison when available, falls back to commit SHAs.
Returns dict with current, latest, needs_update, and optional error.
"""
current = get_current_version()
if not config.git_repo_url:
return {
"current": current,
"latest": None,
"needs_update": False,
"error": "git_repo_url not configured",
}
repo_url = config.git_repo_url.rstrip("/")
parts = repo_url.split("/")
if len(parts) < 5:
return {
"current": current,
"latest": None,
"needs_update": False,
"error": f"Cannot parse repo URL: {repo_url}",
}
base_url = "/".join(parts[:-2])
owner = parts[-2]
repo = parts[-1]
branch = config.git_branch or "main"
branch_api = f"{base_url}/api/v1/repos/{owner}/{repo}/branches/{branch}"
tags_api = f"{base_url}/api/v1/repos/{owner}/{repo}/tags?limit=1"
headers = {}
if config.git_token:
headers["Authorization"] = f"token {config.git_token}"
try:
async with httpx.AsyncClient(timeout=10) as client:
# Fetch branch info (latest commit)
resp = await client.get(branch_api, headers=headers)
if resp.status_code != 200:
return {
"current": current,
"latest": None,
"needs_update": False,
"error": f"Gitea API returned HTTP {resp.status_code}",
}
data = resp.json()
latest_commit = data.get("commit", {})
full_sha = latest_commit.get("id", "unknown")
short_sha = full_sha[:8] if full_sha != "unknown" else "unknown"
# Fetch latest tag
latest_tag = "unknown"
try:
tag_resp = await client.get(tags_api, headers=headers)
if tag_resp.status_code == 200:
tags = tag_resp.json()
if tags and len(tags) > 0:
latest_tag = tags[0].get("name", "unknown")
except Exception:
pass # Tag fetch is best-effort
latest = {
"tag": latest_tag,
"commit": short_sha,
"commit_full": full_sha,
"message": latest_commit.get("commit", {}).get("message", "").split("\n")[0] if latest_commit.get("commit") else "",
"date": latest_commit.get("timestamp", ""),
"branch": branch,
}
# Determine if update is needed: prefer tag comparison, fallback to commit
current_tag = current.get("tag", "unknown")
current_sha = current.get("commit", "unknown")
# If we don't know our current version but the remote has one, we should update
if current_tag == "unknown" and current_sha == "unknown":
needs_update = latest_tag != "unknown" or short_sha != "unknown"
elif current_tag != "unknown" and latest_tag != "unknown":
needs_update = current_tag != latest_tag
else:
needs_update = (
current_sha != "unknown"
and short_sha != "unknown"
and current_sha != short_sha
and not full_sha.startswith(current_sha)
)
return {"current": current, "latest": latest, "needs_update": needs_update}
except Exception as exc:
return {
"current": current,
"latest": None,
"needs_update": False,
"error": str(exc),
}
async def get_remote_branches(config: Any) -> list[str]:
"""Query the Gitea API for available branches on the configured repository.
Returns a list of branch names (e.g., ['main', 'unstable', 'development']).
If the repository URL is not configured or an error occurs, returns an empty list.
"""
if not config.git_repo_url:
return []
repo_url = config.git_repo_url.rstrip("/")
parts = repo_url.split("/")
if len(parts) < 5:
return []
base_url = "/".join(parts[:-2])
owner = parts[-2]
repo = parts[-1]
branches_api = f"{base_url}/api/v1/repos/{owner}/{repo}/branches?limit=100"
headers = {}
if config.git_token:
headers["Authorization"] = f"token {config.git_token}"
try:
async with httpx.AsyncClient(timeout=10) as client:
resp = await client.get(branches_api, headers=headers)
if resp.status_code == 200:
data = resp.json()
if isinstance(data, list):
return [branch.get("name") for branch in data if "name" in branch]
except Exception as exc:
logger.error("Error fetching branches: %s", exc)
return []
def backup_database(db_path: str) -> str:
"""Create a timestamped backup of the SQLite database.
Returns the backup file path.
"""
Path(BACKUP_DIR).mkdir(parents=True, exist_ok=True)
timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
backup_path = f"{BACKUP_DIR}/netbird_msp_{timestamp}.db"
shutil.copy2(db_path, backup_path)
logger.info("Database backed up to %s", backup_path)
return backup_path
def trigger_update(config: Any, db_path: str) -> dict:
"""Backup DB, git pull latest code, then fire-and-forget docker compose rebuild.
Returns immediately after launching the rebuild. The container will restart
in ~30-60 seconds causing a brief HTTP connection drop.
Args:
config: AppConfig with git_repo_url, git_branch, git_token.
db_path: Absolute path to the SQLite database file.
Returns:
Dict with ok (bool), message, backup path, and pulled_branch.
"""
# 1. Backup database before any changes
try:
backup_path = backup_database(db_path)
except Exception as exc:
logger.error("Database backup failed: %s", exc)
return {"ok": False, "message": f"Database backup failed: {exc}", "backup": None}
# 2. Build git pull command (embed token in URL if provided)
branch = config.git_branch or "main"
if config.git_token and config.git_repo_url:
scheme_sep = config.git_repo_url.split("://", 1)
if len(scheme_sep) == 2:
auth_url = f"{scheme_sep[0]}://token:{config.git_token}@{scheme_sep[1]}"
else:
auth_url = config.git_repo_url
pull_cmd = ["git", "-C", SOURCE_DIR, "pull", auth_url, branch]
else:
pull_cmd = ["git", "-C", SOURCE_DIR, "pull", "origin", branch]
# 3. Git pull (synchronous — must complete before rebuild)
try:
result = subprocess.run(
pull_cmd,
capture_output=True,
text=True,
timeout=120,
)
except subprocess.TimeoutExpired:
return {"ok": False, "message": "git pull timed out after 120s.", "backup": backup_path}
except Exception as exc:
return {"ok": False, "message": f"git pull error: {exc}", "backup": backup_path}
if result.returncode != 0:
stderr = result.stderr.strip()[:500]
logger.error("git pull failed (exit %d): %s", result.returncode, stderr)
return {
"ok": False,
"message": f"git pull failed: {stderr}",
"backup": backup_path,
}
logger.info("git pull succeeded: %s", result.stdout.strip()[:200])
# 4. Read version info from the freshly-pulled source
build_env = os.environ.copy()
try:
build_env["GIT_COMMIT"] = subprocess.run(
["git", "-C", SOURCE_DIR, "rev-parse", "--short", "HEAD"],
capture_output=True, text=True, timeout=10,
).stdout.strip() or "unknown"
build_env["GIT_BRANCH"] = subprocess.run(
["git", "-C", SOURCE_DIR, "rev-parse", "--abbrev-ref", "HEAD"],
capture_output=True, text=True, timeout=10,
).stdout.strip() or "unknown"
build_env["GIT_COMMIT_DATE"] = subprocess.run(
["git", "-C", SOURCE_DIR, "log", "-1", "--format=%cI"],
capture_output=True, text=True, timeout=10,
).stdout.strip() or "unknown"
tag_result = subprocess.run(
["git", "-C", SOURCE_DIR, "describe", "--tags", "--abbrev=0"],
capture_output=True, text=True, timeout=10,
)
build_env["GIT_TAG"] = tag_result.stdout.strip() if tag_result.returncode == 0 else "unknown"
except Exception as exc:
logger.warning("Could not read version info from source: %s", exc)
logger.info(
"Rebuilding with GIT_TAG=%s GIT_COMMIT=%s GIT_BRANCH=%s",
build_env.get("GIT_TAG", "?"),
build_env.get("GIT_COMMIT", "?"),
build_env.get("GIT_BRANCH", "?"),
)
# 5. Two-phase rebuild: Build image first, then swap container.
# The swap will kill this process (we ARE the container), so we must
# ensure the compose-up runs detached on the Docker host via a wrapper.
log_path = Path(BACKUP_DIR) / "update_rebuild.log"
# Detect compose project name at runtime — avoids hard-coding a name that
# may differ from the directory used at deploy time.
project_name = _get_compose_project_name()
# Image name follows Docker Compose convention: {project}-{service}
service_image = f"{project_name}-{SERVICE_NAME}:latest"
logger.info("Using project=%s image=%s", project_name, service_image)
# Phase A — build the new image (does NOT stop anything)
build_cmd = [
"docker", "compose",
"-p", project_name,
"-f", f"{SOURCE_DIR}/docker-compose.yml",
"build", "--no-cache",
SERVICE_NAME,
]
logger.info("Phase A: building new image …")
try:
build_result = subprocess.run(
build_cmd,
capture_output=True, text=True,
timeout=600,
env=build_env,
)
with open(log_path, "w") as f:
f.write(build_result.stdout)
f.write(build_result.stderr)
if build_result.returncode != 0:
logger.error("Image build failed: %s", build_result.stderr[:500])
return {
"ok": False,
"message": f"Image build failed: {build_result.stderr[:300]}",
"backup": backup_path,
}
except subprocess.TimeoutExpired:
return {"ok": False, "message": "Image build timed out after 600s.", "backup": backup_path}
logger.info("Phase A complete — image built successfully.")
# Phase B — swap the container using a helper container.
# When compose recreates our container, ALL processes inside die (PID namespace
# is destroyed). So we launch a *separate* helper container via 'docker run -d'
# that has access to the Docker socket and runs 'docker compose up -d'.
# This helper lives outside our container and survives our restart.
# Discover the host-side path of /app-source (docker volumes use host paths)
try:
inspect_result = subprocess.run(
["docker", "inspect", "netbird-msp-appliance",
"--format", '{{range .Mounts}}{{if eq .Destination "/app-source"}}{{.Source}}{{end}}{{end}}'],
capture_output=True, text=True, timeout=10,
)
host_source_dir = inspect_result.stdout.strip()
if not host_source_dir:
raise ValueError("Could not find /app-source mount")
except Exception as exc:
logger.error("Failed to discover host source path: %s", exc)
return {"ok": False, "message": f"Could not find host source path: {exc}", "backup": backup_path}
logger.info("Host source directory: %s", host_source_dir)
env_flags = []
for key in ("GIT_TAG", "GIT_COMMIT", "GIT_BRANCH", "GIT_COMMIT_DATE"):
val = build_env.get(key, "unknown")
env_flags.extend(["-e", f"{key}={val}"])
helper_cmd = [
"docker", "run", "--rm", "-d", "--privileged",
"--name", "msp-updater",
"-v", "/var/run/docker.sock:/var/run/docker.sock:z",
"-v", f"{host_source_dir}:{host_source_dir}:ro,z",
*env_flags,
service_image, # freshly built image — has docker CLI + compose plugin
"sh", "-c",
(
"sleep 3 && "
f"docker compose -p {project_name} "
f"-f {host_source_dir}/docker-compose.yml "
f"up --force-recreate --no-deps -d {SERVICE_NAME}"
),
]
try:
# Remove stale updater container if any
subprocess.run(
["docker", "rm", "-f", "msp-updater"],
capture_output=True, timeout=10,
)
result = subprocess.run(
helper_cmd,
capture_output=True, text=True,
timeout=30,
env=build_env,
)
if result.returncode != 0:
logger.error("Failed to start updater container: %s", result.stderr.strip())
return {
"ok": False,
"message": f"Update-Container konnte nicht gestartet werden: {result.stderr.strip()[:200]}",
"backup": backup_path,
}
logger.info("Phase B: updater container started — this container will restart in ~5s.")
except Exception as exc:
logger.error("Failed to launch updater: %s", exc)
return {"ok": False, "message": f"Updater launch failed: {exc}", "backup": backup_path}
return {
"ok": True,
"message": (
"Update gestartet. Die App wird in ca. 60 Sekunden mit der neuen Version verfügbar sein."
),
"backup": backup_path,
"pulled_branch": branch,
}