Add i18n, branding, user management, health checks, and cleanup for deployment

- Multi-language support (EN/DE) with i18n engine and language files
- Configurable branding (name, subtitle, logo) in Settings
- Global default language and per-user language preference
- User management router with CRUD endpoints
- Customer status sync on start/stop/restart
- Health check fixes: derive status from container state, remove broken wget healthcheck
- Caddy reverse proxy and dashboard env templates for customer stacks
- Updated README with real hardware specs, prerequisites, and new features
- Removed .claude settings (JWT tokens) and build artifacts from tracking
- Updated .gitignore for .claude/ and Windows artifacts

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-08 17:24:05 +01:00
parent c4d68db2f4
commit 41ba835a99
28 changed files with 2550 additions and 661 deletions

View File

@@ -39,7 +39,7 @@ def get_db() -> Generator[Session, None, None]:
def init_db() -> None:
"""Create all database tables."""
"""Create all database tables and run lightweight migrations."""
from app.models import ( # noqa: F401
Customer,
Deployment,
@@ -49,6 +49,43 @@ def init_db() -> None:
)
Base.metadata.create_all(bind=engine)
_run_migrations()
def _run_migrations() -> None:
"""Add columns that may be missing from older database versions."""
import sqlite3
conn = sqlite3.connect(DATABASE_PATH)
cursor = conn.cursor()
def _has_column(table: str, column: str) -> bool:
cursor.execute(f"PRAGMA table_info({table})")
return any(row[1] == column for row in cursor.fetchall())
migrations = [
("deployments", "dashboard_port", "INTEGER"),
("system_config", "dashboard_base_port", "INTEGER DEFAULT 9000"),
("deployments", "netbird_admin_email", "TEXT"),
("deployments", "netbird_admin_password", "TEXT"),
("system_config", "branding_name", "TEXT DEFAULT 'NetBird MSP Appliance'"),
("system_config", "branding_logo_path", "TEXT"),
("users", "role", "TEXT DEFAULT 'admin'"),
("users", "auth_provider", "TEXT DEFAULT 'local'"),
("system_config", "azure_enabled", "BOOLEAN DEFAULT 0"),
("system_config", "azure_tenant_id", "TEXT"),
("system_config", "azure_client_id", "TEXT"),
("system_config", "azure_client_secret_encrypted", "TEXT"),
("system_config", "branding_subtitle", "TEXT DEFAULT 'Multi-Tenant Management Platform'"),
("system_config", "default_language", "TEXT DEFAULT 'en'"),
("users", "default_language", "TEXT"),
]
for table, column, col_type in migrations:
if not _has_column(table, column):
cursor.execute(f"ALTER TABLE {table} ADD COLUMN {column} {col_type}")
conn.commit()
conn.close()
if __name__ == "__main__":

View File

@@ -9,7 +9,7 @@ from fastapi.responses import JSONResponse
from fastapi.staticfiles import StaticFiles
from app.database import init_db
from app.routers import auth, customers, deployments, monitoring, settings
from app.routers import auth, customers, deployments, monitoring, settings, users
# ---------------------------------------------------------------------------
# Logging
@@ -50,6 +50,7 @@ app.include_router(settings.router, prefix="/api/settings", tags=["Settings"])
app.include_router(customers.router, prefix="/api/customers", tags=["Customers"])
app.include_router(deployments.router, prefix="/api/customers", tags=["Deployments"])
app.include_router(monitoring.router, prefix="/api/monitoring", tags=["Monitoring"])
app.include_router(users.router, prefix="/api/users", tags=["Users"])
# ---------------------------------------------------------------------------
# Static files — serve the frontend SPA

View File

@@ -81,9 +81,12 @@ class Deployment(Base):
)
container_prefix: Mapped[str] = mapped_column(String(100), nullable=False)
relay_udp_port: Mapped[int] = mapped_column(Integer, unique=True, nullable=False)
dashboard_port: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
npm_proxy_id: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
relay_secret: Mapped[str] = mapped_column(Text, nullable=False)
setup_url: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
netbird_admin_email: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
netbird_admin_password: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
deployment_status: Mapped[str] = mapped_column(
String(20), default="pending", nullable=False
)
@@ -106,9 +109,11 @@ class Deployment(Base):
"customer_id": self.customer_id,
"container_prefix": self.container_prefix,
"relay_udp_port": self.relay_udp_port,
"dashboard_port": self.dashboard_port,
"npm_proxy_id": self.npm_proxy_id,
"relay_secret": "***", # Never expose secrets
"setup_url": self.setup_url,
"has_credentials": bool(self.netbird_admin_email and self.netbird_admin_password),
"deployment_status": self.deployment_status,
"deployed_at": self.deployed_at.isoformat() if self.deployed_at else None,
"last_health_check": (
@@ -145,6 +150,19 @@ class SystemConfig(Base):
data_dir: Mapped[str] = mapped_column(String(500), default="/opt/netbird-instances")
docker_network: Mapped[str] = mapped_column(String(100), default="npm-network")
relay_base_port: Mapped[int] = mapped_column(Integer, default=3478)
dashboard_base_port: Mapped[int] = mapped_column(Integer, default=9000)
branding_name: Mapped[Optional[str]] = mapped_column(
String(255), default="NetBird MSP Appliance"
)
branding_subtitle: Mapped[Optional[str]] = mapped_column(
String(255), default="Multi-Tenant Management Platform"
)
branding_logo_path: Mapped[Optional[str]] = mapped_column(String(500), nullable=True)
default_language: Mapped[Optional[str]] = mapped_column(String(10), default="en")
azure_enabled: Mapped[bool] = mapped_column(Boolean, default=False)
azure_tenant_id: Mapped[Optional[str]] = mapped_column(String(255), nullable=True)
azure_client_id: Mapped[Optional[str]] = mapped_column(String(255), nullable=True)
azure_client_secret_encrypted: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
updated_at: Mapped[datetime] = mapped_column(
DateTime, default=datetime.utcnow, onupdate=datetime.utcnow
@@ -168,6 +186,15 @@ class SystemConfig(Base):
"data_dir": self.data_dir,
"docker_network": self.docker_network,
"relay_base_port": self.relay_base_port,
"dashboard_base_port": self.dashboard_base_port,
"branding_name": self.branding_name or "NetBird MSP Appliance",
"branding_subtitle": self.branding_subtitle or "Multi-Tenant Management Platform",
"branding_logo_path": self.branding_logo_path,
"default_language": self.default_language or "en",
"azure_enabled": bool(self.azure_enabled),
"azure_tenant_id": self.azure_tenant_id or "",
"azure_client_id": self.azure_client_id or "",
"azure_client_secret_set": bool(self.azure_client_secret_encrypted),
"created_at": self.created_at.isoformat() if self.created_at else None,
"updated_at": self.updated_at.isoformat() if self.updated_at else None,
}
@@ -220,6 +247,9 @@ class User(Base):
password_hash: Mapped[str] = mapped_column(Text, nullable=False)
email: Mapped[Optional[str]] = mapped_column(String(255), nullable=True)
is_active: Mapped[bool] = mapped_column(Boolean, default=True)
role: Mapped[str] = mapped_column(String(20), default="admin")
auth_provider: Mapped[str] = mapped_column(String(20), default="local")
default_language: Mapped[Optional[str]] = mapped_column(String(10), nullable=True, default=None)
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
def to_dict(self) -> dict:
@@ -229,5 +259,8 @@ class User(Base):
"username": self.username,
"email": self.email,
"is_active": self.is_active,
"role": self.role or "admin",
"auth_provider": self.auth_provider or "local",
"default_language": self.default_language,
"created_at": self.created_at.isoformat() if self.created_at else None,
}

View File

@@ -1,15 +1,17 @@
"""Authentication API endpoints — login, logout, current user, password change."""
"""Authentication API endpoints — login, logout, current user, password change, Azure AD."""
import logging
import secrets
from datetime import datetime
from fastapi import APIRouter, Depends, HTTPException, status
from pydantic import BaseModel
from sqlalchemy.orm import Session
from app.database import get_db
from app.dependencies import create_access_token, get_current_user
from app.models import User
from app.utils.security import hash_password, verify_password
from app.models import SystemConfig, User
from app.utils.security import decrypt_value, hash_password, verify_password
from app.utils.validators import ChangePasswordRequest, LoginRequest
logger = logging.getLogger(__name__)
@@ -95,3 +97,115 @@ async def change_password(
db.commit()
logger.info("Password changed for user %s.", current_user.username)
return {"message": "Password changed successfully."}
class AzureCallbackRequest(BaseModel):
"""Azure AD auth code callback payload."""
code: str
redirect_uri: str
@router.get("/azure/config")
async def get_azure_config(db: Session = Depends(get_db)):
"""Public endpoint — returns Azure AD config for the login page."""
config = db.query(SystemConfig).filter(SystemConfig.id == 1).first()
if not config or not config.azure_enabled:
return {"azure_enabled": False}
return {
"azure_enabled": True,
"azure_tenant_id": config.azure_tenant_id,
"azure_client_id": config.azure_client_id,
}
@router.post("/azure/callback")
async def azure_callback(
payload: AzureCallbackRequest,
db: Session = Depends(get_db),
):
"""Exchange Azure AD authorization code for tokens and authenticate."""
config = db.query(SystemConfig).filter(SystemConfig.id == 1).first()
if not config or not config.azure_enabled:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Azure AD authentication is not enabled.",
)
if not config.azure_tenant_id or not config.azure_client_id or not config.azure_client_secret_encrypted:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Azure AD is not fully configured.",
)
try:
import msal
client_secret = decrypt_value(config.azure_client_secret_encrypted)
authority = f"https://login.microsoftonline.com/{config.azure_tenant_id}"
app = msal.ConfidentialClientApplication(
config.azure_client_id,
authority=authority,
client_credential=client_secret,
)
result = app.acquire_token_by_authorization_code(
payload.code,
scopes=["User.Read"],
redirect_uri=payload.redirect_uri,
)
if "error" in result:
logger.warning("Azure AD token exchange failed: %s", result.get("error_description", result["error"]))
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail=result.get("error_description", "Azure AD authentication failed."),
)
id_token_claims = result.get("id_token_claims", {})
email = id_token_claims.get("preferred_username") or id_token_claims.get("email", "")
display_name = id_token_claims.get("name", email)
if not email:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Could not determine email from Azure AD token.",
)
# Find or create user
user = db.query(User).filter(User.username == email).first()
if not user:
user = User(
username=email,
password_hash=hash_password(secrets.token_urlsafe(32)),
email=email,
is_active=True,
role="admin",
auth_provider="azure",
)
db.add(user)
db.commit()
db.refresh(user)
logger.info("Azure AD user '%s' auto-created.", email)
elif not user.is_active:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Account is disabled.",
)
token = create_access_token(user.username)
logger.info("Azure AD user '%s' logged in.", user.username)
return {
"access_token": token,
"token_type": "bearer",
"user": user.to_dict(),
}
except HTTPException:
raise
except Exception as exc:
logger.exception("Azure AD authentication error")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Azure AD authentication error: {exc}",
)

View File

@@ -9,6 +9,7 @@ from app.database import SessionLocal, get_db
from app.dependencies import get_current_user
from app.models import Customer, Deployment, User
from app.services import docker_service, netbird_service
from app.utils.security import decrypt_value
logger = logging.getLogger(__name__)
router = APIRouter()
@@ -174,6 +175,38 @@ async def check_customer_health(
return netbird_service.get_customer_health(db, customer_id)
@router.get("/{customer_id}/credentials")
async def get_customer_credentials(
customer_id: int,
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""Get the NetBird admin credentials for a customer's deployment.
Args:
customer_id: Customer ID.
Returns:
Dict with email and password.
"""
_require_customer(db, customer_id)
deployment = db.query(Deployment).filter(Deployment.customer_id == customer_id).first()
if not deployment:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="No deployment found for this customer.",
)
if not deployment.netbird_admin_email or not deployment.netbird_admin_password:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="No credentials available. Admin must complete setup manually.",
)
return {
"email": decrypt_value(deployment.netbird_admin_email),
"password": decrypt_value(deployment.netbird_admin_password),
}
def _require_customer(db: Session, customer_id: int) -> Customer:
"""Helper to fetch a customer or raise 404.

View File

@@ -71,6 +71,7 @@ async def all_customers_status(
entry["deployment_status"] = c.deployment.deployment_status
entry["containers"] = containers
entry["relay_udp_port"] = c.deployment.relay_udp_port
entry["dashboard_port"] = c.deployment.dashboard_port
entry["setup_url"] = c.deployment.setup_url
else:
entry["deployment_status"] = None

View File

@@ -5,9 +5,11 @@ There is no .env file. Every setting lives in the ``system_config`` table
"""
import logging
import os
import shutil
from datetime import datetime
from fastapi import APIRouter, Depends, HTTPException, status
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, status
from sqlalchemy.orm import Session
from app.database import get_db
@@ -21,6 +23,10 @@ from app.utils.validators import SystemConfigUpdate
logger = logging.getLogger(__name__)
router = APIRouter()
UPLOAD_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "static", "uploads")
MAX_LOGO_SIZE = 512 * 1024 # 500 KB
ALLOWED_LOGO_TYPES = {"image/png", "image/jpeg", "image/svg+xml"}
@router.get("/system")
async def get_settings(
@@ -75,6 +81,11 @@ async def update_settings(
raw_password = update_data.pop("npm_api_password")
row.npm_api_password_encrypted = encrypt_value(raw_password)
# Handle Azure client secret encryption
if "azure_client_secret" in update_data:
raw_secret = update_data.pop("azure_client_secret")
row.azure_client_secret_encrypted = encrypt_value(raw_secret)
for field, value in update_data.items():
if hasattr(row, field):
setattr(row, field, value)
@@ -116,3 +127,85 @@ async def test_npm(
config.npm_api_url, config.npm_api_email, config.npm_api_password
)
return result
@router.get("/branding")
async def get_branding(db: Session = Depends(get_db)):
"""Public endpoint — returns branding info for the login page (no auth required)."""
row = db.query(SystemConfig).filter(SystemConfig.id == 1).first()
if not row:
return {
"branding_name": "NetBird MSP Appliance",
"branding_subtitle": "Multi-Tenant Management Platform",
"branding_logo_path": None,
"default_language": "en",
}
return {
"branding_name": row.branding_name or "NetBird MSP Appliance",
"branding_subtitle": row.branding_subtitle or "Multi-Tenant Management Platform",
"branding_logo_path": row.branding_logo_path,
"default_language": row.default_language or "en",
}
@router.post("/branding/logo")
async def upload_logo(
file: UploadFile = File(...),
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""Upload a branding logo image (PNG, JPG, SVG, max 500KB)."""
if file.content_type not in ALLOWED_LOGO_TYPES:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"File type '{file.content_type}' not allowed. Use PNG, JPG, or SVG.",
)
content = await file.read()
if len(content) > MAX_LOGO_SIZE:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"File too large ({len(content)} bytes). Maximum is {MAX_LOGO_SIZE} bytes.",
)
os.makedirs(UPLOAD_DIR, exist_ok=True)
ext_map = {"image/png": ".png", "image/jpeg": ".jpg", "image/svg+xml": ".svg"}
ext = ext_map.get(file.content_type, ".png")
filename = f"logo{ext}"
filepath = os.path.join(UPLOAD_DIR, filename)
with open(filepath, "wb") as f:
f.write(content)
logo_url = f"/static/uploads/{filename}"
row = db.query(SystemConfig).filter(SystemConfig.id == 1).first()
if row:
row.branding_logo_path = logo_url
row.updated_at = datetime.utcnow()
db.commit()
logger.info("Logo uploaded by %s: %s", current_user.username, logo_url)
return {"branding_logo_path": logo_url}
@router.delete("/branding/logo")
async def delete_logo(
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""Remove the branding logo and reset to default icon."""
row = db.query(SystemConfig).filter(SystemConfig.id == 1).first()
if row and row.branding_logo_path:
old_path = os.path.join(
os.path.dirname(os.path.dirname(os.path.dirname(__file__))),
row.branding_logo_path.lstrip("/"),
)
if os.path.isfile(old_path):
os.remove(old_path)
row.branding_logo_path = None
row.updated_at = datetime.utcnow()
db.commit()
return {"branding_logo_path": None}

131
app/routers/users.py Normal file
View File

@@ -0,0 +1,131 @@
"""User management API — CRUD operations for local users."""
import logging
import secrets
from fastapi import APIRouter, Depends, HTTPException, status
from sqlalchemy.orm import Session
from app.database import get_db
from app.dependencies import get_current_user
from app.models import User
from app.utils.security import hash_password
from app.utils.validators import UserCreate, UserUpdate
logger = logging.getLogger(__name__)
router = APIRouter()
@router.get("")
async def list_users(
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""List all users."""
users = db.query(User).order_by(User.id).all()
return [u.to_dict() for u in users]
@router.post("")
async def create_user(
payload: UserCreate,
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""Create a new local user."""
existing = db.query(User).filter(User.username == payload.username).first()
if existing:
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail=f"Username '{payload.username}' already exists.",
)
user = User(
username=payload.username,
password_hash=hash_password(payload.password),
email=payload.email,
is_active=True,
role="admin",
auth_provider="local",
default_language=payload.default_language,
)
db.add(user)
db.commit()
db.refresh(user)
logger.info("User '%s' created by '%s'.", user.username, current_user.username)
return user.to_dict()
@router.put("/{user_id}")
async def update_user(
user_id: int,
payload: UserUpdate,
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""Update an existing user (email, is_active, role)."""
user = db.query(User).filter(User.id == user_id).first()
if not user:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="User not found.")
update_data = payload.model_dump(exclude_none=True)
for field, value in update_data.items():
if hasattr(user, field):
setattr(user, field, value)
db.commit()
db.refresh(user)
logger.info("User '%s' updated by '%s'.", user.username, current_user.username)
return user.to_dict()
@router.delete("/{user_id}")
async def delete_user(
user_id: int,
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""Delete a user (cannot delete yourself)."""
if user_id == current_user.id:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="You cannot delete your own account.",
)
user = db.query(User).filter(User.id == user_id).first()
if not user:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="User not found.")
username = user.username
db.delete(user)
db.commit()
logger.info("User '%s' deleted by '%s'.", username, current_user.username)
return {"message": f"User '{username}' deleted."}
@router.post("/{user_id}/reset-password")
async def reset_password(
user_id: int,
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""Generate a new random password for a user."""
user = db.query(User).filter(User.id == user_id).first()
if not user:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="User not found.")
if user.auth_provider != "local":
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Cannot reset password for Azure AD users.",
)
new_password = secrets.token_urlsafe(16)
user.password_hash = hash_password(new_password)
db.commit()
logger.info("Password reset for user '%s' by '%s'.", user.username, current_user.username)
return {"message": "Password reset successfully.", "new_password": new_password}

View File

@@ -26,12 +26,20 @@ def _get_client() -> docker.DockerClient:
return docker.from_env()
def compose_up(instance_dir: str, project_name: str) -> bool:
def compose_up(
instance_dir: str,
project_name: str,
services: Optional[list[str]] = None,
timeout: int = 300,
) -> bool:
"""Run ``docker compose up -d`` for a customer instance.
Args:
instance_dir: Absolute path to the customer's instance directory.
project_name: Docker Compose project name (e.g. ``netbird-kunde5``).
services: Optional list of service names to start.
If None, all services are started.
timeout: Subprocess timeout in seconds (default 300).
Returns:
True on success.
@@ -47,16 +55,22 @@ def compose_up(instance_dir: str, project_name: str) -> bool:
"docker", "compose",
"-f", compose_file,
"-p", project_name,
"up", "-d", "--remove-orphans",
"up", "-d",
]
if not services:
cmd.append("--remove-orphans")
if services:
cmd.extend(services)
logger.info("Running: %s", " ".join(cmd))
result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
result = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
if result.returncode != 0:
logger.error("docker compose up failed: %s", result.stderr)
raise RuntimeError(f"docker compose up failed: {result.stderr}")
logger.info("docker compose up succeeded for %s", project_name)
svc_info = f" (services: {', '.join(services)})" if services else ""
logger.info("docker compose up succeeded for %s%s", project_name, svc_info)
return True
@@ -169,9 +183,13 @@ def get_container_status(container_prefix: str) -> list[dict[str, Any]]:
try:
containers = client.containers.list(all=True, filters={"name": container_prefix})
for c in containers:
health = "N/A"
if c.attrs.get("State", {}).get("Health"):
health = c.attrs["State"]["Health"].get("Status", "N/A")
# Derive health from container status.
# Docker HEALTHCHECK is unreliable (e.g. netbirdio/management
# defines a wget-based check but wget is not installed).
if c.status == "running":
health = "healthy"
else:
health = "unhealthy"
results.append({
"name": c.name,
"status": c.status,

View File

@@ -6,26 +6,33 @@ Coordinates the full customer deployment lifecycle:
3. Generate configs from Jinja2 templates
4. Create instance directory and write files
5. Start Docker containers
6. Wait for health checks
7. Create NPM proxy hosts
8. Update database
6. Create NPM proxy hosts (production only)
7. Update database
Uses NetBird's embedded IdP (built-in since v0.62) — no external
identity provider (Zitadel, Keycloak, etc.) required.
Includes comprehensive rollback on failure.
"""
import json
import logging
import os
import secrets
import shutil
import time
import urllib.request
import urllib.error
from datetime import datetime
from typing import Any
from jinja2 import Environment, FileSystemLoader
from sqlalchemy.orm import Session
from app.models import Customer, Deployment, DeploymentLog, SystemConfig
from app.models import Customer, Deployment, DeploymentLog
from app.services import docker_service, npm_service, port_manager
from app.utils.config import get_system_config
from app.utils.security import encrypt_value, generate_relay_secret
from app.utils.security import encrypt_value, generate_datastore_encryption_key, generate_relay_secret
logger = logging.getLogger(__name__)
@@ -41,19 +48,16 @@ def _get_jinja_env() -> Environment:
)
def _is_local_domain(base_domain: str) -> bool:
"""Check if the base domain is a local/test domain."""
local_suffixes = (".local", ".test", ".localhost", ".internal", ".example")
return base_domain == "localhost" or any(base_domain.endswith(s) for s in local_suffixes)
def _log_action(
db: Session, customer_id: int, action: str, status: str, message: str, details: str = ""
) -> None:
"""Write a deployment log entry.
Args:
db: Active session.
customer_id: The customer this log belongs to.
action: Action name (e.g. ``deploy``, ``stop``).
status: ``success``, ``error``, or ``info``.
message: Human-readable message.
details: Additional details (optional).
"""
"""Write a deployment log entry."""
log = DeploymentLog(
customer_id=customer_id,
action=action,
@@ -65,15 +69,20 @@ def _log_action(
db.commit()
def _render_template(jinja_env: Environment, template_name: str, output_path: str, **vars) -> None:
"""Render a Jinja2 template and write the output to a file."""
template = jinja_env.get_template(template_name)
content = template.render(**vars)
with open(output_path, "w") as f:
f.write(content)
async def deploy_customer(db: Session, customer_id: int) -> dict[str, Any]:
"""Execute the full deployment workflow for a customer.
Args:
db: Active session.
customer_id: Customer to deploy.
Returns:
Dict with ``success``, ``setup_url``, or ``error``.
Uses NetBird's embedded IdP — no external identity provider needed.
After deployment, the admin opens the dashboard URL and completes
the initial setup wizard (/setup) to create the first user.
"""
customer = db.query(Customer).filter(Customer.id == customer_id).first()
if not customer:
@@ -83,7 +92,6 @@ async def deploy_customer(db: Session, customer_id: int) -> dict[str, Any]:
if not config:
return {"success": False, "error": "System not configured. Please set up system settings first."}
# Update status to deploying
customer.status = "deploying"
db.commit()
@@ -92,103 +100,161 @@ async def deploy_customer(db: Session, customer_id: int) -> dict[str, Any]:
allocated_port = None
instance_dir = None
container_prefix = f"netbird-kunde{customer_id}"
local_mode = _is_local_domain(config.base_domain)
try:
# Step 1: Allocate relay UDP port
allocated_port = port_manager.allocate_port(db, config.relay_base_port)
_log_action(db, customer_id, "deploy", "info", f"Allocated UDP port {allocated_port}.")
# Step 2: Generate relay secret
# Step 2: Generate secrets
relay_secret = generate_relay_secret()
datastore_key = generate_datastore_encryption_key()
# Step 3: Create instance directory
# Step 3: Compute dashboard port and URLs
dashboard_port = config.dashboard_base_port + customer_id
netbird_domain = f"{customer.subdomain}.{config.base_domain}"
if local_mode:
external_url = f"http://localhost:{dashboard_port}"
netbird_protocol = "http"
netbird_port = str(dashboard_port)
else:
external_url = f"https://{netbird_domain}"
netbird_protocol = "https"
netbird_port = "443"
# Step 4: Create instance directory
instance_dir = os.path.join(config.data_dir, f"kunde{customer_id}")
os.makedirs(instance_dir, exist_ok=True)
os.makedirs(os.path.join(instance_dir, "data", "management"), exist_ok=True)
os.makedirs(os.path.join(instance_dir, "data", "signal"), exist_ok=True)
_log_action(db, customer_id, "deploy", "info", f"Created directory {instance_dir}.")
# Step 4: Render templates
# Step 5: Render all config files
jinja_env = _get_jinja_env()
template_vars = {
"customer_id": customer_id,
"subdomain": customer.subdomain,
"base_domain": config.base_domain,
"netbird_domain": netbird_domain,
"instance_dir": instance_dir,
"relay_udp_port": allocated_port,
"relay_secret": relay_secret,
"dashboard_port": dashboard_port,
"external_url": external_url,
"netbird_protocol": netbird_protocol,
"netbird_port": netbird_port,
"netbird_management_image": config.netbird_management_image,
"netbird_signal_image": config.netbird_signal_image,
"netbird_relay_image": config.netbird_relay_image,
"netbird_dashboard_image": config.netbird_dashboard_image,
"docker_network": config.docker_network,
"datastore_encryption_key": datastore_key,
}
# docker-compose.yml
dc_template = jinja_env.get_template("docker-compose.yml.j2")
dc_content = dc_template.render(**template_vars)
with open(os.path.join(instance_dir, "docker-compose.yml"), "w") as f:
f.write(dc_content)
# management.json
mgmt_template = jinja_env.get_template("management.json.j2")
mgmt_content = mgmt_template.render(**template_vars)
with open(os.path.join(instance_dir, "management.json"), "w") as f:
f.write(mgmt_content)
# relay.env
relay_template = jinja_env.get_template("relay.env.j2")
relay_content = relay_template.render(**template_vars)
with open(os.path.join(instance_dir, "relay.env"), "w") as f:
f.write(relay_content)
_render_template(jinja_env, "docker-compose.yml.j2",
os.path.join(instance_dir, "docker-compose.yml"), **template_vars)
_render_template(jinja_env, "management.json.j2",
os.path.join(instance_dir, "management.json"), **template_vars)
_render_template(jinja_env, "relay.env.j2",
os.path.join(instance_dir, "relay.env"), **template_vars)
_render_template(jinja_env, "Caddyfile.j2",
os.path.join(instance_dir, "Caddyfile"), **template_vars)
_render_template(jinja_env, "dashboard.env.j2",
os.path.join(instance_dir, "dashboard.env"), **template_vars)
_log_action(db, customer_id, "deploy", "info", "Configuration files generated.")
# Step 5: Start Docker containers
docker_service.compose_up(instance_dir, container_prefix)
# Step 6: Start all Docker containers
docker_service.compose_up(instance_dir, container_prefix, timeout=120)
_log_action(db, customer_id, "deploy", "info", "Docker containers started.")
# Step 6: Wait for containers to be healthy
healthy = docker_service.wait_for_healthy(container_prefix, timeout=60)
# Step 7: Wait for containers to be healthy
healthy = docker_service.wait_for_healthy(container_prefix, timeout=90)
if not healthy:
_log_action(
db, customer_id, "deploy", "error",
"Containers did not become healthy within 60 seconds."
db, customer_id, "deploy", "info",
"Not all containers healthy within 90s — may still be starting."
)
# Don't fail completely — containers might still come up
# Step 7: Create NPM proxy host
domain = f"{customer.subdomain}.{config.base_domain}"
dashboard_container = f"netbird-kunde{customer_id}-dashboard"
npm_result = await npm_service.create_proxy_host(
api_url=config.npm_api_url,
npm_email=config.npm_api_email,
npm_password=config.npm_api_password,
domain=domain,
forward_host=dashboard_container,
forward_port=80,
admin_email=config.admin_email,
subdomain=customer.subdomain,
customer_id=customer_id,
)
# Step 8: Auto-create admin user via NetBird setup API
admin_email = customer.email
admin_password = secrets.token_urlsafe(16)
management_container = f"netbird-kunde{customer_id}-management"
setup_api_url = f"http://{management_container}:80/api/setup"
setup_payload = json.dumps({
"name": customer.name,
"email": admin_email,
"password": admin_password,
}).encode("utf-8")
npm_proxy_id = npm_result.get("proxy_id")
if npm_result.get("error"):
_log_action(
db, customer_id, "deploy", "error",
f"NPM proxy creation failed: {npm_result['error']}",
setup_ok = False
for attempt in range(10):
try:
req = urllib.request.Request(
setup_api_url,
data=setup_payload,
headers={"Content-Type": "application/json"},
method="POST",
)
with urllib.request.urlopen(req, timeout=10) as resp:
if resp.status in (200, 201):
setup_ok = True
_log_action(db, customer_id, "deploy", "info",
f"Admin user created: {admin_email}")
break
except urllib.error.HTTPError as e:
body = e.read().decode("utf-8", errors="replace")
if e.code == 409 or "already" in body.lower():
_log_action(db, customer_id, "deploy", "info",
"Instance already set up — skipping admin creation.")
setup_ok = True
break
logger.info("Setup attempt %d failed (HTTP %d): %s", attempt + 1, e.code, body)
except Exception as e:
logger.info("Setup attempt %d failed: %s", attempt + 1, e)
time.sleep(5)
if not setup_ok:
_log_action(db, customer_id, "deploy", "info",
"Auto-setup failed — admin must complete setup manually.")
# Step 9: Create NPM proxy host (production only)
npm_proxy_id = None
if not local_mode:
caddy_container = f"netbird-kunde{customer_id}-caddy"
npm_result = await npm_service.create_proxy_host(
api_url=config.npm_api_url,
npm_email=config.npm_api_email,
npm_password=config.npm_api_password,
domain=netbird_domain,
forward_host=caddy_container,
forward_port=80,
admin_email=config.admin_email,
subdomain=customer.subdomain,
customer_id=customer_id,
)
# Continue — deployment works without NPM, admin can fix later
npm_proxy_id = npm_result.get("proxy_id")
if npm_result.get("error"):
_log_action(
db, customer_id, "deploy", "error",
f"NPM proxy creation failed: {npm_result['error']}",
)
# Step 9: Create deployment record
setup_url = external_url
# Step 8: Create deployment record
setup_url = f"https://{domain}"
deployment = Deployment(
customer_id=customer_id,
container_prefix=container_prefix,
relay_udp_port=allocated_port,
dashboard_port=dashboard_port,
npm_proxy_id=npm_proxy_id,
relay_secret=encrypt_value(relay_secret),
setup_url=setup_url,
netbird_admin_email=encrypt_value(admin_email) if setup_ok else None,
netbird_admin_password=encrypt_value(admin_password) if setup_ok else None,
deployment_status="running",
deployed_at=datetime.utcnow(),
)
@@ -197,7 +263,8 @@ async def deploy_customer(db: Session, customer_id: int) -> dict[str, Any]:
customer.status = "active"
db.commit()
_log_action(db, customer_id, "deploy", "success", f"Deployment complete. URL: {setup_url}")
_log_action(db, customer_id, "deploy", "success",
f"Deployment complete. Open {setup_url} to complete initial setup.")
return {"success": True, "setup_url": setup_url}
@@ -234,15 +301,7 @@ async def deploy_customer(db: Session, customer_id: int) -> dict[str, Any]:
async def undeploy_customer(db: Session, customer_id: int) -> dict[str, Any]:
"""Remove all resources for a customer deployment.
Args:
db: Active session.
customer_id: Customer to undeploy.
Returns:
Dict with ``success`` bool.
"""
"""Remove all resources for a customer deployment."""
customer = db.query(Customer).filter(Customer.id == customer_id).first()
if not customer:
return {"success": False, "error": "Customer not found."}
@@ -288,15 +347,7 @@ async def undeploy_customer(db: Session, customer_id: int) -> dict[str, Any]:
def stop_customer(db: Session, customer_id: int) -> dict[str, Any]:
"""Stop containers for a customer.
Args:
db: Active session.
customer_id: Customer whose containers to stop.
Returns:
Dict with ``success`` bool.
"""
"""Stop containers for a customer."""
deployment = db.query(Deployment).filter(Deployment.customer_id == customer_id).first()
config = get_system_config(db)
if not deployment or not config:
@@ -306,6 +357,9 @@ def stop_customer(db: Session, customer_id: int) -> dict[str, Any]:
ok = docker_service.compose_stop(instance_dir, deployment.container_prefix)
if ok:
deployment.deployment_status = "stopped"
customer = db.query(Customer).filter(Customer.id == customer_id).first()
if customer:
customer.status = "inactive"
db.commit()
_log_action(db, customer_id, "stop", "success", "Containers stopped.")
else:
@@ -314,15 +368,7 @@ def stop_customer(db: Session, customer_id: int) -> dict[str, Any]:
def start_customer(db: Session, customer_id: int) -> dict[str, Any]:
"""Start containers for a customer.
Args:
db: Active session.
customer_id: Customer whose containers to start.
Returns:
Dict with ``success`` bool.
"""
"""Start containers for a customer."""
deployment = db.query(Deployment).filter(Deployment.customer_id == customer_id).first()
config = get_system_config(db)
if not deployment or not config:
@@ -332,6 +378,9 @@ def start_customer(db: Session, customer_id: int) -> dict[str, Any]:
ok = docker_service.compose_start(instance_dir, deployment.container_prefix)
if ok:
deployment.deployment_status = "running"
customer = db.query(Customer).filter(Customer.id == customer_id).first()
if customer:
customer.status = "active"
db.commit()
_log_action(db, customer_id, "start", "success", "Containers started.")
else:
@@ -340,15 +389,7 @@ def start_customer(db: Session, customer_id: int) -> dict[str, Any]:
def restart_customer(db: Session, customer_id: int) -> dict[str, Any]:
"""Restart containers for a customer.
Args:
db: Active session.
customer_id: Customer whose containers to restart.
Returns:
Dict with ``success`` bool.
"""
"""Restart containers for a customer."""
deployment = db.query(Deployment).filter(Deployment.customer_id == customer_id).first()
config = get_system_config(db)
if not deployment or not config:
@@ -358,6 +399,9 @@ def restart_customer(db: Session, customer_id: int) -> dict[str, Any]:
ok = docker_service.compose_restart(instance_dir, deployment.container_prefix)
if ok:
deployment.deployment_status = "running"
customer = db.query(Customer).filter(Customer.id == customer_id).first()
if customer:
customer.status = "active"
db.commit()
_log_action(db, customer_id, "restart", "success", "Containers restarted.")
else:
@@ -366,15 +410,7 @@ def restart_customer(db: Session, customer_id: int) -> dict[str, Any]:
def get_customer_health(db: Session, customer_id: int) -> dict[str, Any]:
"""Check health of a customer's deployment.
Args:
db: Active session.
customer_id: Customer ID.
Returns:
Dict with container statuses and overall health.
"""
"""Check health of a customer's deployment."""
deployment = db.query(Deployment).filter(Deployment.customer_id == customer_id).first()
if not deployment:
return {"healthy": False, "error": "No deployment found.", "containers": []}
@@ -382,12 +418,16 @@ def get_customer_health(db: Session, customer_id: int) -> dict[str, Any]:
containers = docker_service.get_container_status(deployment.container_prefix)
all_running = all(c["status"] == "running" for c in containers) if containers else False
# Update last health check time
deployment.last_health_check = datetime.utcnow()
customer = db.query(Customer).filter(Customer.id == customer_id).first()
if all_running:
deployment.deployment_status = "running"
if customer:
customer.status = "active"
elif containers:
deployment.deployment_status = "failed"
if customer:
customer.status = "error"
db.commit()
return {

View File

@@ -30,6 +30,7 @@ class AppConfig:
data_dir: str
docker_network: str
relay_base_port: int
dashboard_base_port: int
# Environment-level settings (not stored in DB)
@@ -77,4 +78,5 @@ def get_system_config(db: Session) -> Optional[AppConfig]:
data_dir=row.data_dir,
docker_network=row.docker_network,
relay_base_port=row.relay_base_port,
dashboard_base_port=getattr(row, "dashboard_base_port", 9000) or 9000,
)

View File

@@ -89,3 +89,16 @@ def generate_relay_secret() -> str:
A 32-character hex string.
"""
return secrets.token_hex(16)
def generate_datastore_encryption_key() -> str:
"""Generate a base64-encoded 32-byte key for NetBird DataStoreEncryptionKey.
NetBird management (Go) expects standard base64 decoding to exactly 32 bytes.
Returns:
A standard base64-encoded string representing 32 random bytes.
"""
import base64
return base64.b64encode(secrets.token_bytes(32)).decode()

View File

@@ -109,6 +109,14 @@ class SystemConfigUpdate(BaseModel):
data_dir: Optional[str] = Field(None, max_length=500)
docker_network: Optional[str] = Field(None, max_length=100)
relay_base_port: Optional[int] = Field(None, ge=1024, le=65535)
dashboard_base_port: Optional[int] = Field(None, ge=1024, le=65535)
branding_name: Optional[str] = Field(None, max_length=255)
branding_subtitle: Optional[str] = Field(None, max_length=255)
default_language: Optional[str] = Field(None, max_length=10)
azure_enabled: Optional[bool] = None
azure_tenant_id: Optional[str] = Field(None, max_length=255)
azure_client_id: Optional[str] = Field(None, max_length=255)
azure_client_secret: Optional[str] = None # encrypted before storage
@field_validator("base_domain")
@classmethod
@@ -143,6 +151,54 @@ class SystemConfigUpdate(BaseModel):
return v.lower().strip()
# ---------------------------------------------------------------------------
# Users
# ---------------------------------------------------------------------------
class UserCreate(BaseModel):
"""Payload to create a new local user."""
username: str = Field(..., min_length=3, max_length=100)
password: str = Field(..., min_length=8, max_length=128)
email: Optional[str] = Field(None, max_length=255)
default_language: Optional[str] = Field(None, max_length=10)
@field_validator("username")
@classmethod
def validate_username(cls, v: str) -> str:
if not re.match(r"^[a-zA-Z0-9_.-]+$", v):
raise ValueError("Username may only contain letters, digits, dots, hyphens, and underscores.")
return v.strip()
@field_validator("email")
@classmethod
def validate_user_email(cls, v: Optional[str]) -> Optional[str]:
if v is None:
return v
pattern = r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$"
if not re.match(pattern, v):
raise ValueError("Invalid email address.")
return v.lower().strip()
class UserUpdate(BaseModel):
"""Payload to update an existing user."""
email: Optional[str] = Field(None, max_length=255)
is_active: Optional[bool] = None
role: Optional[str] = Field(None, max_length=20)
default_language: Optional[str] = Field(None, max_length=10)
@field_validator("email")
@classmethod
def validate_user_email(cls, v: Optional[str]) -> Optional[str]:
if v is None:
return v
pattern = r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$"
if not re.match(pattern, v):
raise ValueError("Invalid email address.")
return v.lower().strip()
# ---------------------------------------------------------------------------
# Query params
# ---------------------------------------------------------------------------