Add i18n, branding, user management, health checks, and cleanup for deployment

- Multi-language support (EN/DE) with i18n engine and language files
- Configurable branding (name, subtitle, logo) in Settings
- Global default language and per-user language preference
- User management router with CRUD endpoints
- Customer status sync on start/stop/restart
- Health check fixes: derive status from container state, remove broken wget healthcheck
- Caddy reverse proxy and dashboard env templates for customer stacks
- Updated README with real hardware specs, prerequisites, and new features
- Removed .claude settings (JWT tokens) and build artifacts from tracking
- Updated .gitignore for .claude/ and Windows artifacts

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-08 17:24:05 +01:00
parent c4d68db2f4
commit 41ba835a99
28 changed files with 2550 additions and 661 deletions

View File

@@ -6,26 +6,33 @@ Coordinates the full customer deployment lifecycle:
3. Generate configs from Jinja2 templates
4. Create instance directory and write files
5. Start Docker containers
6. Wait for health checks
7. Create NPM proxy hosts
8. Update database
6. Create NPM proxy hosts (production only)
7. Update database
Uses NetBird's embedded IdP (built-in since v0.62) — no external
identity provider (Zitadel, Keycloak, etc.) required.
Includes comprehensive rollback on failure.
"""
import json
import logging
import os
import secrets
import shutil
import time
import urllib.request
import urllib.error
from datetime import datetime
from typing import Any
from jinja2 import Environment, FileSystemLoader
from sqlalchemy.orm import Session
from app.models import Customer, Deployment, DeploymentLog, SystemConfig
from app.models import Customer, Deployment, DeploymentLog
from app.services import docker_service, npm_service, port_manager
from app.utils.config import get_system_config
from app.utils.security import encrypt_value, generate_relay_secret
from app.utils.security import encrypt_value, generate_datastore_encryption_key, generate_relay_secret
logger = logging.getLogger(__name__)
@@ -41,19 +48,16 @@ def _get_jinja_env() -> Environment:
)
def _is_local_domain(base_domain: str) -> bool:
"""Check if the base domain is a local/test domain."""
local_suffixes = (".local", ".test", ".localhost", ".internal", ".example")
return base_domain == "localhost" or any(base_domain.endswith(s) for s in local_suffixes)
def _log_action(
db: Session, customer_id: int, action: str, status: str, message: str, details: str = ""
) -> None:
"""Write a deployment log entry.
Args:
db: Active session.
customer_id: The customer this log belongs to.
action: Action name (e.g. ``deploy``, ``stop``).
status: ``success``, ``error``, or ``info``.
message: Human-readable message.
details: Additional details (optional).
"""
"""Write a deployment log entry."""
log = DeploymentLog(
customer_id=customer_id,
action=action,
@@ -65,15 +69,20 @@ def _log_action(
db.commit()
def _render_template(jinja_env: Environment, template_name: str, output_path: str, **vars) -> None:
"""Render a Jinja2 template and write the output to a file."""
template = jinja_env.get_template(template_name)
content = template.render(**vars)
with open(output_path, "w") as f:
f.write(content)
async def deploy_customer(db: Session, customer_id: int) -> dict[str, Any]:
"""Execute the full deployment workflow for a customer.
Args:
db: Active session.
customer_id: Customer to deploy.
Returns:
Dict with ``success``, ``setup_url``, or ``error``.
Uses NetBird's embedded IdP — no external identity provider needed.
After deployment, the admin opens the dashboard URL and completes
the initial setup wizard (/setup) to create the first user.
"""
customer = db.query(Customer).filter(Customer.id == customer_id).first()
if not customer:
@@ -83,7 +92,6 @@ async def deploy_customer(db: Session, customer_id: int) -> dict[str, Any]:
if not config:
return {"success": False, "error": "System not configured. Please set up system settings first."}
# Update status to deploying
customer.status = "deploying"
db.commit()
@@ -92,103 +100,161 @@ async def deploy_customer(db: Session, customer_id: int) -> dict[str, Any]:
allocated_port = None
instance_dir = None
container_prefix = f"netbird-kunde{customer_id}"
local_mode = _is_local_domain(config.base_domain)
try:
# Step 1: Allocate relay UDP port
allocated_port = port_manager.allocate_port(db, config.relay_base_port)
_log_action(db, customer_id, "deploy", "info", f"Allocated UDP port {allocated_port}.")
# Step 2: Generate relay secret
# Step 2: Generate secrets
relay_secret = generate_relay_secret()
datastore_key = generate_datastore_encryption_key()
# Step 3: Create instance directory
# Step 3: Compute dashboard port and URLs
dashboard_port = config.dashboard_base_port + customer_id
netbird_domain = f"{customer.subdomain}.{config.base_domain}"
if local_mode:
external_url = f"http://localhost:{dashboard_port}"
netbird_protocol = "http"
netbird_port = str(dashboard_port)
else:
external_url = f"https://{netbird_domain}"
netbird_protocol = "https"
netbird_port = "443"
# Step 4: Create instance directory
instance_dir = os.path.join(config.data_dir, f"kunde{customer_id}")
os.makedirs(instance_dir, exist_ok=True)
os.makedirs(os.path.join(instance_dir, "data", "management"), exist_ok=True)
os.makedirs(os.path.join(instance_dir, "data", "signal"), exist_ok=True)
_log_action(db, customer_id, "deploy", "info", f"Created directory {instance_dir}.")
# Step 4: Render templates
# Step 5: Render all config files
jinja_env = _get_jinja_env()
template_vars = {
"customer_id": customer_id,
"subdomain": customer.subdomain,
"base_domain": config.base_domain,
"netbird_domain": netbird_domain,
"instance_dir": instance_dir,
"relay_udp_port": allocated_port,
"relay_secret": relay_secret,
"dashboard_port": dashboard_port,
"external_url": external_url,
"netbird_protocol": netbird_protocol,
"netbird_port": netbird_port,
"netbird_management_image": config.netbird_management_image,
"netbird_signal_image": config.netbird_signal_image,
"netbird_relay_image": config.netbird_relay_image,
"netbird_dashboard_image": config.netbird_dashboard_image,
"docker_network": config.docker_network,
"datastore_encryption_key": datastore_key,
}
# docker-compose.yml
dc_template = jinja_env.get_template("docker-compose.yml.j2")
dc_content = dc_template.render(**template_vars)
with open(os.path.join(instance_dir, "docker-compose.yml"), "w") as f:
f.write(dc_content)
# management.json
mgmt_template = jinja_env.get_template("management.json.j2")
mgmt_content = mgmt_template.render(**template_vars)
with open(os.path.join(instance_dir, "management.json"), "w") as f:
f.write(mgmt_content)
# relay.env
relay_template = jinja_env.get_template("relay.env.j2")
relay_content = relay_template.render(**template_vars)
with open(os.path.join(instance_dir, "relay.env"), "w") as f:
f.write(relay_content)
_render_template(jinja_env, "docker-compose.yml.j2",
os.path.join(instance_dir, "docker-compose.yml"), **template_vars)
_render_template(jinja_env, "management.json.j2",
os.path.join(instance_dir, "management.json"), **template_vars)
_render_template(jinja_env, "relay.env.j2",
os.path.join(instance_dir, "relay.env"), **template_vars)
_render_template(jinja_env, "Caddyfile.j2",
os.path.join(instance_dir, "Caddyfile"), **template_vars)
_render_template(jinja_env, "dashboard.env.j2",
os.path.join(instance_dir, "dashboard.env"), **template_vars)
_log_action(db, customer_id, "deploy", "info", "Configuration files generated.")
# Step 5: Start Docker containers
docker_service.compose_up(instance_dir, container_prefix)
# Step 6: Start all Docker containers
docker_service.compose_up(instance_dir, container_prefix, timeout=120)
_log_action(db, customer_id, "deploy", "info", "Docker containers started.")
# Step 6: Wait for containers to be healthy
healthy = docker_service.wait_for_healthy(container_prefix, timeout=60)
# Step 7: Wait for containers to be healthy
healthy = docker_service.wait_for_healthy(container_prefix, timeout=90)
if not healthy:
_log_action(
db, customer_id, "deploy", "error",
"Containers did not become healthy within 60 seconds."
db, customer_id, "deploy", "info",
"Not all containers healthy within 90s — may still be starting."
)
# Don't fail completely — containers might still come up
# Step 7: Create NPM proxy host
domain = f"{customer.subdomain}.{config.base_domain}"
dashboard_container = f"netbird-kunde{customer_id}-dashboard"
npm_result = await npm_service.create_proxy_host(
api_url=config.npm_api_url,
npm_email=config.npm_api_email,
npm_password=config.npm_api_password,
domain=domain,
forward_host=dashboard_container,
forward_port=80,
admin_email=config.admin_email,
subdomain=customer.subdomain,
customer_id=customer_id,
)
# Step 8: Auto-create admin user via NetBird setup API
admin_email = customer.email
admin_password = secrets.token_urlsafe(16)
management_container = f"netbird-kunde{customer_id}-management"
setup_api_url = f"http://{management_container}:80/api/setup"
setup_payload = json.dumps({
"name": customer.name,
"email": admin_email,
"password": admin_password,
}).encode("utf-8")
npm_proxy_id = npm_result.get("proxy_id")
if npm_result.get("error"):
_log_action(
db, customer_id, "deploy", "error",
f"NPM proxy creation failed: {npm_result['error']}",
setup_ok = False
for attempt in range(10):
try:
req = urllib.request.Request(
setup_api_url,
data=setup_payload,
headers={"Content-Type": "application/json"},
method="POST",
)
with urllib.request.urlopen(req, timeout=10) as resp:
if resp.status in (200, 201):
setup_ok = True
_log_action(db, customer_id, "deploy", "info",
f"Admin user created: {admin_email}")
break
except urllib.error.HTTPError as e:
body = e.read().decode("utf-8", errors="replace")
if e.code == 409 or "already" in body.lower():
_log_action(db, customer_id, "deploy", "info",
"Instance already set up — skipping admin creation.")
setup_ok = True
break
logger.info("Setup attempt %d failed (HTTP %d): %s", attempt + 1, e.code, body)
except Exception as e:
logger.info("Setup attempt %d failed: %s", attempt + 1, e)
time.sleep(5)
if not setup_ok:
_log_action(db, customer_id, "deploy", "info",
"Auto-setup failed — admin must complete setup manually.")
# Step 9: Create NPM proxy host (production only)
npm_proxy_id = None
if not local_mode:
caddy_container = f"netbird-kunde{customer_id}-caddy"
npm_result = await npm_service.create_proxy_host(
api_url=config.npm_api_url,
npm_email=config.npm_api_email,
npm_password=config.npm_api_password,
domain=netbird_domain,
forward_host=caddy_container,
forward_port=80,
admin_email=config.admin_email,
subdomain=customer.subdomain,
customer_id=customer_id,
)
# Continue — deployment works without NPM, admin can fix later
npm_proxy_id = npm_result.get("proxy_id")
if npm_result.get("error"):
_log_action(
db, customer_id, "deploy", "error",
f"NPM proxy creation failed: {npm_result['error']}",
)
# Step 9: Create deployment record
setup_url = external_url
# Step 8: Create deployment record
setup_url = f"https://{domain}"
deployment = Deployment(
customer_id=customer_id,
container_prefix=container_prefix,
relay_udp_port=allocated_port,
dashboard_port=dashboard_port,
npm_proxy_id=npm_proxy_id,
relay_secret=encrypt_value(relay_secret),
setup_url=setup_url,
netbird_admin_email=encrypt_value(admin_email) if setup_ok else None,
netbird_admin_password=encrypt_value(admin_password) if setup_ok else None,
deployment_status="running",
deployed_at=datetime.utcnow(),
)
@@ -197,7 +263,8 @@ async def deploy_customer(db: Session, customer_id: int) -> dict[str, Any]:
customer.status = "active"
db.commit()
_log_action(db, customer_id, "deploy", "success", f"Deployment complete. URL: {setup_url}")
_log_action(db, customer_id, "deploy", "success",
f"Deployment complete. Open {setup_url} to complete initial setup.")
return {"success": True, "setup_url": setup_url}
@@ -234,15 +301,7 @@ async def deploy_customer(db: Session, customer_id: int) -> dict[str, Any]:
async def undeploy_customer(db: Session, customer_id: int) -> dict[str, Any]:
"""Remove all resources for a customer deployment.
Args:
db: Active session.
customer_id: Customer to undeploy.
Returns:
Dict with ``success`` bool.
"""
"""Remove all resources for a customer deployment."""
customer = db.query(Customer).filter(Customer.id == customer_id).first()
if not customer:
return {"success": False, "error": "Customer not found."}
@@ -288,15 +347,7 @@ async def undeploy_customer(db: Session, customer_id: int) -> dict[str, Any]:
def stop_customer(db: Session, customer_id: int) -> dict[str, Any]:
"""Stop containers for a customer.
Args:
db: Active session.
customer_id: Customer whose containers to stop.
Returns:
Dict with ``success`` bool.
"""
"""Stop containers for a customer."""
deployment = db.query(Deployment).filter(Deployment.customer_id == customer_id).first()
config = get_system_config(db)
if not deployment or not config:
@@ -306,6 +357,9 @@ def stop_customer(db: Session, customer_id: int) -> dict[str, Any]:
ok = docker_service.compose_stop(instance_dir, deployment.container_prefix)
if ok:
deployment.deployment_status = "stopped"
customer = db.query(Customer).filter(Customer.id == customer_id).first()
if customer:
customer.status = "inactive"
db.commit()
_log_action(db, customer_id, "stop", "success", "Containers stopped.")
else:
@@ -314,15 +368,7 @@ def stop_customer(db: Session, customer_id: int) -> dict[str, Any]:
def start_customer(db: Session, customer_id: int) -> dict[str, Any]:
"""Start containers for a customer.
Args:
db: Active session.
customer_id: Customer whose containers to start.
Returns:
Dict with ``success`` bool.
"""
"""Start containers for a customer."""
deployment = db.query(Deployment).filter(Deployment.customer_id == customer_id).first()
config = get_system_config(db)
if not deployment or not config:
@@ -332,6 +378,9 @@ def start_customer(db: Session, customer_id: int) -> dict[str, Any]:
ok = docker_service.compose_start(instance_dir, deployment.container_prefix)
if ok:
deployment.deployment_status = "running"
customer = db.query(Customer).filter(Customer.id == customer_id).first()
if customer:
customer.status = "active"
db.commit()
_log_action(db, customer_id, "start", "success", "Containers started.")
else:
@@ -340,15 +389,7 @@ def start_customer(db: Session, customer_id: int) -> dict[str, Any]:
def restart_customer(db: Session, customer_id: int) -> dict[str, Any]:
"""Restart containers for a customer.
Args:
db: Active session.
customer_id: Customer whose containers to restart.
Returns:
Dict with ``success`` bool.
"""
"""Restart containers for a customer."""
deployment = db.query(Deployment).filter(Deployment.customer_id == customer_id).first()
config = get_system_config(db)
if not deployment or not config:
@@ -358,6 +399,9 @@ def restart_customer(db: Session, customer_id: int) -> dict[str, Any]:
ok = docker_service.compose_restart(instance_dir, deployment.container_prefix)
if ok:
deployment.deployment_status = "running"
customer = db.query(Customer).filter(Customer.id == customer_id).first()
if customer:
customer.status = "active"
db.commit()
_log_action(db, customer_id, "restart", "success", "Containers restarted.")
else:
@@ -366,15 +410,7 @@ def restart_customer(db: Session, customer_id: int) -> dict[str, Any]:
def get_customer_health(db: Session, customer_id: int) -> dict[str, Any]:
"""Check health of a customer's deployment.
Args:
db: Active session.
customer_id: Customer ID.
Returns:
Dict with container statuses and overall health.
"""
"""Check health of a customer's deployment."""
deployment = db.query(Deployment).filter(Deployment.customer_id == customer_id).first()
if not deployment:
return {"healthy": False, "error": "No deployment found.", "containers": []}
@@ -382,12 +418,16 @@ def get_customer_health(db: Session, customer_id: int) -> dict[str, Any]:
containers = docker_service.get_container_status(deployment.container_prefix)
all_running = all(c["status"] == "running" for c in containers) if containers else False
# Update last health check time
deployment.last_health_check = datetime.utcnow()
customer = db.query(Customer).filter(Customer.id == customer_id).first()
if all_running:
deployment.deployment_status = "running"
if customer:
customer.status = "active"
elif containers:
deployment.deployment_status = "failed"
if customer:
customer.status = "error"
db.commit()
return {