Add i18n, branding, user management, health checks, and cleanup for deployment
- Multi-language support (EN/DE) with i18n engine and language files - Configurable branding (name, subtitle, logo) in Settings - Global default language and per-user language preference - User management router with CRUD endpoints - Customer status sync on start/stop/restart - Health check fixes: derive status from container state, remove broken wget healthcheck - Caddy reverse proxy and dashboard env templates for customer stacks - Updated README with real hardware specs, prerequisites, and new features - Removed .claude settings (JWT tokens) and build artifacts from tracking - Updated .gitignore for .claude/ and Windows artifacts Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -26,12 +26,20 @@ def _get_client() -> docker.DockerClient:
|
||||
return docker.from_env()
|
||||
|
||||
|
||||
def compose_up(instance_dir: str, project_name: str) -> bool:
|
||||
def compose_up(
|
||||
instance_dir: str,
|
||||
project_name: str,
|
||||
services: Optional[list[str]] = None,
|
||||
timeout: int = 300,
|
||||
) -> bool:
|
||||
"""Run ``docker compose up -d`` for a customer instance.
|
||||
|
||||
Args:
|
||||
instance_dir: Absolute path to the customer's instance directory.
|
||||
project_name: Docker Compose project name (e.g. ``netbird-kunde5``).
|
||||
services: Optional list of service names to start.
|
||||
If None, all services are started.
|
||||
timeout: Subprocess timeout in seconds (default 300).
|
||||
|
||||
Returns:
|
||||
True on success.
|
||||
@@ -47,16 +55,22 @@ def compose_up(instance_dir: str, project_name: str) -> bool:
|
||||
"docker", "compose",
|
||||
"-f", compose_file,
|
||||
"-p", project_name,
|
||||
"up", "-d", "--remove-orphans",
|
||||
"up", "-d",
|
||||
]
|
||||
if not services:
|
||||
cmd.append("--remove-orphans")
|
||||
if services:
|
||||
cmd.extend(services)
|
||||
|
||||
logger.info("Running: %s", " ".join(cmd))
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
|
||||
|
||||
if result.returncode != 0:
|
||||
logger.error("docker compose up failed: %s", result.stderr)
|
||||
raise RuntimeError(f"docker compose up failed: {result.stderr}")
|
||||
|
||||
logger.info("docker compose up succeeded for %s", project_name)
|
||||
svc_info = f" (services: {', '.join(services)})" if services else ""
|
||||
logger.info("docker compose up succeeded for %s%s", project_name, svc_info)
|
||||
return True
|
||||
|
||||
|
||||
@@ -169,9 +183,13 @@ def get_container_status(container_prefix: str) -> list[dict[str, Any]]:
|
||||
try:
|
||||
containers = client.containers.list(all=True, filters={"name": container_prefix})
|
||||
for c in containers:
|
||||
health = "N/A"
|
||||
if c.attrs.get("State", {}).get("Health"):
|
||||
health = c.attrs["State"]["Health"].get("Status", "N/A")
|
||||
# Derive health from container status.
|
||||
# Docker HEALTHCHECK is unreliable (e.g. netbirdio/management
|
||||
# defines a wget-based check but wget is not installed).
|
||||
if c.status == "running":
|
||||
health = "healthy"
|
||||
else:
|
||||
health = "unhealthy"
|
||||
results.append({
|
||||
"name": c.name,
|
||||
"status": c.status,
|
||||
|
||||
@@ -6,26 +6,33 @@ Coordinates the full customer deployment lifecycle:
|
||||
3. Generate configs from Jinja2 templates
|
||||
4. Create instance directory and write files
|
||||
5. Start Docker containers
|
||||
6. Wait for health checks
|
||||
7. Create NPM proxy hosts
|
||||
8. Update database
|
||||
6. Create NPM proxy hosts (production only)
|
||||
7. Update database
|
||||
|
||||
Uses NetBird's embedded IdP (built-in since v0.62) — no external
|
||||
identity provider (Zitadel, Keycloak, etc.) required.
|
||||
|
||||
Includes comprehensive rollback on failure.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import secrets
|
||||
import shutil
|
||||
import time
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from jinja2 import Environment, FileSystemLoader
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from app.models import Customer, Deployment, DeploymentLog, SystemConfig
|
||||
from app.models import Customer, Deployment, DeploymentLog
|
||||
from app.services import docker_service, npm_service, port_manager
|
||||
from app.utils.config import get_system_config
|
||||
from app.utils.security import encrypt_value, generate_relay_secret
|
||||
from app.utils.security import encrypt_value, generate_datastore_encryption_key, generate_relay_secret
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -41,19 +48,16 @@ def _get_jinja_env() -> Environment:
|
||||
)
|
||||
|
||||
|
||||
def _is_local_domain(base_domain: str) -> bool:
|
||||
"""Check if the base domain is a local/test domain."""
|
||||
local_suffixes = (".local", ".test", ".localhost", ".internal", ".example")
|
||||
return base_domain == "localhost" or any(base_domain.endswith(s) for s in local_suffixes)
|
||||
|
||||
|
||||
def _log_action(
|
||||
db: Session, customer_id: int, action: str, status: str, message: str, details: str = ""
|
||||
) -> None:
|
||||
"""Write a deployment log entry.
|
||||
|
||||
Args:
|
||||
db: Active session.
|
||||
customer_id: The customer this log belongs to.
|
||||
action: Action name (e.g. ``deploy``, ``stop``).
|
||||
status: ``success``, ``error``, or ``info``.
|
||||
message: Human-readable message.
|
||||
details: Additional details (optional).
|
||||
"""
|
||||
"""Write a deployment log entry."""
|
||||
log = DeploymentLog(
|
||||
customer_id=customer_id,
|
||||
action=action,
|
||||
@@ -65,15 +69,20 @@ def _log_action(
|
||||
db.commit()
|
||||
|
||||
|
||||
def _render_template(jinja_env: Environment, template_name: str, output_path: str, **vars) -> None:
|
||||
"""Render a Jinja2 template and write the output to a file."""
|
||||
template = jinja_env.get_template(template_name)
|
||||
content = template.render(**vars)
|
||||
with open(output_path, "w") as f:
|
||||
f.write(content)
|
||||
|
||||
|
||||
async def deploy_customer(db: Session, customer_id: int) -> dict[str, Any]:
|
||||
"""Execute the full deployment workflow for a customer.
|
||||
|
||||
Args:
|
||||
db: Active session.
|
||||
customer_id: Customer to deploy.
|
||||
|
||||
Returns:
|
||||
Dict with ``success``, ``setup_url``, or ``error``.
|
||||
Uses NetBird's embedded IdP — no external identity provider needed.
|
||||
After deployment, the admin opens the dashboard URL and completes
|
||||
the initial setup wizard (/setup) to create the first user.
|
||||
"""
|
||||
customer = db.query(Customer).filter(Customer.id == customer_id).first()
|
||||
if not customer:
|
||||
@@ -83,7 +92,6 @@ async def deploy_customer(db: Session, customer_id: int) -> dict[str, Any]:
|
||||
if not config:
|
||||
return {"success": False, "error": "System not configured. Please set up system settings first."}
|
||||
|
||||
# Update status to deploying
|
||||
customer.status = "deploying"
|
||||
db.commit()
|
||||
|
||||
@@ -92,103 +100,161 @@ async def deploy_customer(db: Session, customer_id: int) -> dict[str, Any]:
|
||||
allocated_port = None
|
||||
instance_dir = None
|
||||
container_prefix = f"netbird-kunde{customer_id}"
|
||||
local_mode = _is_local_domain(config.base_domain)
|
||||
|
||||
try:
|
||||
# Step 1: Allocate relay UDP port
|
||||
allocated_port = port_manager.allocate_port(db, config.relay_base_port)
|
||||
_log_action(db, customer_id, "deploy", "info", f"Allocated UDP port {allocated_port}.")
|
||||
|
||||
# Step 2: Generate relay secret
|
||||
# Step 2: Generate secrets
|
||||
relay_secret = generate_relay_secret()
|
||||
datastore_key = generate_datastore_encryption_key()
|
||||
|
||||
# Step 3: Create instance directory
|
||||
# Step 3: Compute dashboard port and URLs
|
||||
dashboard_port = config.dashboard_base_port + customer_id
|
||||
netbird_domain = f"{customer.subdomain}.{config.base_domain}"
|
||||
|
||||
if local_mode:
|
||||
external_url = f"http://localhost:{dashboard_port}"
|
||||
netbird_protocol = "http"
|
||||
netbird_port = str(dashboard_port)
|
||||
else:
|
||||
external_url = f"https://{netbird_domain}"
|
||||
netbird_protocol = "https"
|
||||
netbird_port = "443"
|
||||
|
||||
# Step 4: Create instance directory
|
||||
instance_dir = os.path.join(config.data_dir, f"kunde{customer_id}")
|
||||
os.makedirs(instance_dir, exist_ok=True)
|
||||
os.makedirs(os.path.join(instance_dir, "data", "management"), exist_ok=True)
|
||||
os.makedirs(os.path.join(instance_dir, "data", "signal"), exist_ok=True)
|
||||
_log_action(db, customer_id, "deploy", "info", f"Created directory {instance_dir}.")
|
||||
|
||||
# Step 4: Render templates
|
||||
# Step 5: Render all config files
|
||||
jinja_env = _get_jinja_env()
|
||||
template_vars = {
|
||||
"customer_id": customer_id,
|
||||
"subdomain": customer.subdomain,
|
||||
"base_domain": config.base_domain,
|
||||
"netbird_domain": netbird_domain,
|
||||
"instance_dir": instance_dir,
|
||||
"relay_udp_port": allocated_port,
|
||||
"relay_secret": relay_secret,
|
||||
"dashboard_port": dashboard_port,
|
||||
"external_url": external_url,
|
||||
"netbird_protocol": netbird_protocol,
|
||||
"netbird_port": netbird_port,
|
||||
"netbird_management_image": config.netbird_management_image,
|
||||
"netbird_signal_image": config.netbird_signal_image,
|
||||
"netbird_relay_image": config.netbird_relay_image,
|
||||
"netbird_dashboard_image": config.netbird_dashboard_image,
|
||||
"docker_network": config.docker_network,
|
||||
"datastore_encryption_key": datastore_key,
|
||||
}
|
||||
|
||||
# docker-compose.yml
|
||||
dc_template = jinja_env.get_template("docker-compose.yml.j2")
|
||||
dc_content = dc_template.render(**template_vars)
|
||||
with open(os.path.join(instance_dir, "docker-compose.yml"), "w") as f:
|
||||
f.write(dc_content)
|
||||
|
||||
# management.json
|
||||
mgmt_template = jinja_env.get_template("management.json.j2")
|
||||
mgmt_content = mgmt_template.render(**template_vars)
|
||||
with open(os.path.join(instance_dir, "management.json"), "w") as f:
|
||||
f.write(mgmt_content)
|
||||
|
||||
# relay.env
|
||||
relay_template = jinja_env.get_template("relay.env.j2")
|
||||
relay_content = relay_template.render(**template_vars)
|
||||
with open(os.path.join(instance_dir, "relay.env"), "w") as f:
|
||||
f.write(relay_content)
|
||||
_render_template(jinja_env, "docker-compose.yml.j2",
|
||||
os.path.join(instance_dir, "docker-compose.yml"), **template_vars)
|
||||
_render_template(jinja_env, "management.json.j2",
|
||||
os.path.join(instance_dir, "management.json"), **template_vars)
|
||||
_render_template(jinja_env, "relay.env.j2",
|
||||
os.path.join(instance_dir, "relay.env"), **template_vars)
|
||||
_render_template(jinja_env, "Caddyfile.j2",
|
||||
os.path.join(instance_dir, "Caddyfile"), **template_vars)
|
||||
_render_template(jinja_env, "dashboard.env.j2",
|
||||
os.path.join(instance_dir, "dashboard.env"), **template_vars)
|
||||
|
||||
_log_action(db, customer_id, "deploy", "info", "Configuration files generated.")
|
||||
|
||||
# Step 5: Start Docker containers
|
||||
docker_service.compose_up(instance_dir, container_prefix)
|
||||
# Step 6: Start all Docker containers
|
||||
docker_service.compose_up(instance_dir, container_prefix, timeout=120)
|
||||
_log_action(db, customer_id, "deploy", "info", "Docker containers started.")
|
||||
|
||||
# Step 6: Wait for containers to be healthy
|
||||
healthy = docker_service.wait_for_healthy(container_prefix, timeout=60)
|
||||
# Step 7: Wait for containers to be healthy
|
||||
healthy = docker_service.wait_for_healthy(container_prefix, timeout=90)
|
||||
if not healthy:
|
||||
_log_action(
|
||||
db, customer_id, "deploy", "error",
|
||||
"Containers did not become healthy within 60 seconds."
|
||||
db, customer_id, "deploy", "info",
|
||||
"Not all containers healthy within 90s — may still be starting."
|
||||
)
|
||||
# Don't fail completely — containers might still come up
|
||||
|
||||
# Step 7: Create NPM proxy host
|
||||
domain = f"{customer.subdomain}.{config.base_domain}"
|
||||
dashboard_container = f"netbird-kunde{customer_id}-dashboard"
|
||||
npm_result = await npm_service.create_proxy_host(
|
||||
api_url=config.npm_api_url,
|
||||
npm_email=config.npm_api_email,
|
||||
npm_password=config.npm_api_password,
|
||||
domain=domain,
|
||||
forward_host=dashboard_container,
|
||||
forward_port=80,
|
||||
admin_email=config.admin_email,
|
||||
subdomain=customer.subdomain,
|
||||
customer_id=customer_id,
|
||||
)
|
||||
# Step 8: Auto-create admin user via NetBird setup API
|
||||
admin_email = customer.email
|
||||
admin_password = secrets.token_urlsafe(16)
|
||||
management_container = f"netbird-kunde{customer_id}-management"
|
||||
setup_api_url = f"http://{management_container}:80/api/setup"
|
||||
setup_payload = json.dumps({
|
||||
"name": customer.name,
|
||||
"email": admin_email,
|
||||
"password": admin_password,
|
||||
}).encode("utf-8")
|
||||
|
||||
npm_proxy_id = npm_result.get("proxy_id")
|
||||
if npm_result.get("error"):
|
||||
_log_action(
|
||||
db, customer_id, "deploy", "error",
|
||||
f"NPM proxy creation failed: {npm_result['error']}",
|
||||
setup_ok = False
|
||||
for attempt in range(10):
|
||||
try:
|
||||
req = urllib.request.Request(
|
||||
setup_api_url,
|
||||
data=setup_payload,
|
||||
headers={"Content-Type": "application/json"},
|
||||
method="POST",
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=10) as resp:
|
||||
if resp.status in (200, 201):
|
||||
setup_ok = True
|
||||
_log_action(db, customer_id, "deploy", "info",
|
||||
f"Admin user created: {admin_email}")
|
||||
break
|
||||
except urllib.error.HTTPError as e:
|
||||
body = e.read().decode("utf-8", errors="replace")
|
||||
if e.code == 409 or "already" in body.lower():
|
||||
_log_action(db, customer_id, "deploy", "info",
|
||||
"Instance already set up — skipping admin creation.")
|
||||
setup_ok = True
|
||||
break
|
||||
logger.info("Setup attempt %d failed (HTTP %d): %s", attempt + 1, e.code, body)
|
||||
except Exception as e:
|
||||
logger.info("Setup attempt %d failed: %s", attempt + 1, e)
|
||||
time.sleep(5)
|
||||
|
||||
if not setup_ok:
|
||||
_log_action(db, customer_id, "deploy", "info",
|
||||
"Auto-setup failed — admin must complete setup manually.")
|
||||
|
||||
# Step 9: Create NPM proxy host (production only)
|
||||
npm_proxy_id = None
|
||||
if not local_mode:
|
||||
caddy_container = f"netbird-kunde{customer_id}-caddy"
|
||||
npm_result = await npm_service.create_proxy_host(
|
||||
api_url=config.npm_api_url,
|
||||
npm_email=config.npm_api_email,
|
||||
npm_password=config.npm_api_password,
|
||||
domain=netbird_domain,
|
||||
forward_host=caddy_container,
|
||||
forward_port=80,
|
||||
admin_email=config.admin_email,
|
||||
subdomain=customer.subdomain,
|
||||
customer_id=customer_id,
|
||||
)
|
||||
# Continue — deployment works without NPM, admin can fix later
|
||||
npm_proxy_id = npm_result.get("proxy_id")
|
||||
if npm_result.get("error"):
|
||||
_log_action(
|
||||
db, customer_id, "deploy", "error",
|
||||
f"NPM proxy creation failed: {npm_result['error']}",
|
||||
)
|
||||
|
||||
# Step 9: Create deployment record
|
||||
setup_url = external_url
|
||||
|
||||
# Step 8: Create deployment record
|
||||
setup_url = f"https://{domain}"
|
||||
deployment = Deployment(
|
||||
customer_id=customer_id,
|
||||
container_prefix=container_prefix,
|
||||
relay_udp_port=allocated_port,
|
||||
dashboard_port=dashboard_port,
|
||||
npm_proxy_id=npm_proxy_id,
|
||||
relay_secret=encrypt_value(relay_secret),
|
||||
setup_url=setup_url,
|
||||
netbird_admin_email=encrypt_value(admin_email) if setup_ok else None,
|
||||
netbird_admin_password=encrypt_value(admin_password) if setup_ok else None,
|
||||
deployment_status="running",
|
||||
deployed_at=datetime.utcnow(),
|
||||
)
|
||||
@@ -197,7 +263,8 @@ async def deploy_customer(db: Session, customer_id: int) -> dict[str, Any]:
|
||||
customer.status = "active"
|
||||
db.commit()
|
||||
|
||||
_log_action(db, customer_id, "deploy", "success", f"Deployment complete. URL: {setup_url}")
|
||||
_log_action(db, customer_id, "deploy", "success",
|
||||
f"Deployment complete. Open {setup_url} to complete initial setup.")
|
||||
|
||||
return {"success": True, "setup_url": setup_url}
|
||||
|
||||
@@ -234,15 +301,7 @@ async def deploy_customer(db: Session, customer_id: int) -> dict[str, Any]:
|
||||
|
||||
|
||||
async def undeploy_customer(db: Session, customer_id: int) -> dict[str, Any]:
|
||||
"""Remove all resources for a customer deployment.
|
||||
|
||||
Args:
|
||||
db: Active session.
|
||||
customer_id: Customer to undeploy.
|
||||
|
||||
Returns:
|
||||
Dict with ``success`` bool.
|
||||
"""
|
||||
"""Remove all resources for a customer deployment."""
|
||||
customer = db.query(Customer).filter(Customer.id == customer_id).first()
|
||||
if not customer:
|
||||
return {"success": False, "error": "Customer not found."}
|
||||
@@ -288,15 +347,7 @@ async def undeploy_customer(db: Session, customer_id: int) -> dict[str, Any]:
|
||||
|
||||
|
||||
def stop_customer(db: Session, customer_id: int) -> dict[str, Any]:
|
||||
"""Stop containers for a customer.
|
||||
|
||||
Args:
|
||||
db: Active session.
|
||||
customer_id: Customer whose containers to stop.
|
||||
|
||||
Returns:
|
||||
Dict with ``success`` bool.
|
||||
"""
|
||||
"""Stop containers for a customer."""
|
||||
deployment = db.query(Deployment).filter(Deployment.customer_id == customer_id).first()
|
||||
config = get_system_config(db)
|
||||
if not deployment or not config:
|
||||
@@ -306,6 +357,9 @@ def stop_customer(db: Session, customer_id: int) -> dict[str, Any]:
|
||||
ok = docker_service.compose_stop(instance_dir, deployment.container_prefix)
|
||||
if ok:
|
||||
deployment.deployment_status = "stopped"
|
||||
customer = db.query(Customer).filter(Customer.id == customer_id).first()
|
||||
if customer:
|
||||
customer.status = "inactive"
|
||||
db.commit()
|
||||
_log_action(db, customer_id, "stop", "success", "Containers stopped.")
|
||||
else:
|
||||
@@ -314,15 +368,7 @@ def stop_customer(db: Session, customer_id: int) -> dict[str, Any]:
|
||||
|
||||
|
||||
def start_customer(db: Session, customer_id: int) -> dict[str, Any]:
|
||||
"""Start containers for a customer.
|
||||
|
||||
Args:
|
||||
db: Active session.
|
||||
customer_id: Customer whose containers to start.
|
||||
|
||||
Returns:
|
||||
Dict with ``success`` bool.
|
||||
"""
|
||||
"""Start containers for a customer."""
|
||||
deployment = db.query(Deployment).filter(Deployment.customer_id == customer_id).first()
|
||||
config = get_system_config(db)
|
||||
if not deployment or not config:
|
||||
@@ -332,6 +378,9 @@ def start_customer(db: Session, customer_id: int) -> dict[str, Any]:
|
||||
ok = docker_service.compose_start(instance_dir, deployment.container_prefix)
|
||||
if ok:
|
||||
deployment.deployment_status = "running"
|
||||
customer = db.query(Customer).filter(Customer.id == customer_id).first()
|
||||
if customer:
|
||||
customer.status = "active"
|
||||
db.commit()
|
||||
_log_action(db, customer_id, "start", "success", "Containers started.")
|
||||
else:
|
||||
@@ -340,15 +389,7 @@ def start_customer(db: Session, customer_id: int) -> dict[str, Any]:
|
||||
|
||||
|
||||
def restart_customer(db: Session, customer_id: int) -> dict[str, Any]:
|
||||
"""Restart containers for a customer.
|
||||
|
||||
Args:
|
||||
db: Active session.
|
||||
customer_id: Customer whose containers to restart.
|
||||
|
||||
Returns:
|
||||
Dict with ``success`` bool.
|
||||
"""
|
||||
"""Restart containers for a customer."""
|
||||
deployment = db.query(Deployment).filter(Deployment.customer_id == customer_id).first()
|
||||
config = get_system_config(db)
|
||||
if not deployment or not config:
|
||||
@@ -358,6 +399,9 @@ def restart_customer(db: Session, customer_id: int) -> dict[str, Any]:
|
||||
ok = docker_service.compose_restart(instance_dir, deployment.container_prefix)
|
||||
if ok:
|
||||
deployment.deployment_status = "running"
|
||||
customer = db.query(Customer).filter(Customer.id == customer_id).first()
|
||||
if customer:
|
||||
customer.status = "active"
|
||||
db.commit()
|
||||
_log_action(db, customer_id, "restart", "success", "Containers restarted.")
|
||||
else:
|
||||
@@ -366,15 +410,7 @@ def restart_customer(db: Session, customer_id: int) -> dict[str, Any]:
|
||||
|
||||
|
||||
def get_customer_health(db: Session, customer_id: int) -> dict[str, Any]:
|
||||
"""Check health of a customer's deployment.
|
||||
|
||||
Args:
|
||||
db: Active session.
|
||||
customer_id: Customer ID.
|
||||
|
||||
Returns:
|
||||
Dict with container statuses and overall health.
|
||||
"""
|
||||
"""Check health of a customer's deployment."""
|
||||
deployment = db.query(Deployment).filter(Deployment.customer_id == customer_id).first()
|
||||
if not deployment:
|
||||
return {"healthy": False, "error": "No deployment found.", "containers": []}
|
||||
@@ -382,12 +418,16 @@ def get_customer_health(db: Session, customer_id: int) -> dict[str, Any]:
|
||||
containers = docker_service.get_container_status(deployment.container_prefix)
|
||||
all_running = all(c["status"] == "running" for c in containers) if containers else False
|
||||
|
||||
# Update last health check time
|
||||
deployment.last_health_check = datetime.utcnow()
|
||||
customer = db.query(Customer).filter(Customer.id == customer_id).first()
|
||||
if all_running:
|
||||
deployment.deployment_status = "running"
|
||||
if customer:
|
||||
customer.status = "active"
|
||||
elif containers:
|
||||
deployment.deployment_status = "failed"
|
||||
if customer:
|
||||
customer.status = "error"
|
||||
db.commit()
|
||||
|
||||
return {
|
||||
|
||||
Reference in New Issue
Block a user