First Build alpha 0.1

This commit is contained in:
2026-02-07 12:18:20 +01:00
parent 29e83436b2
commit 42a3cc9d9f
36 changed files with 4982 additions and 51 deletions

1
app/__init__.py Normal file
View File

@@ -0,0 +1 @@
# NetBird MSP Appliance

57
app/database.py Normal file
View File

@@ -0,0 +1,57 @@
"""Database setup and session management for NetBird MSP Appliance."""
import os
import sys
from typing import Generator
from sqlalchemy import create_engine, event
from sqlalchemy.orm import Session, sessionmaker, declarative_base
DATABASE_PATH = os.environ.get("DATABASE_PATH", "/app/data/netbird_msp.db")
DATABASE_URL = f"sqlite:///{DATABASE_PATH}"
engine = create_engine(
DATABASE_URL,
connect_args={"check_same_thread": False},
echo=False,
)
# Enable WAL mode and foreign keys for SQLite
@event.listens_for(engine, "connect")
def _set_sqlite_pragma(dbapi_connection, connection_record) -> None:
cursor = dbapi_connection.cursor()
cursor.execute("PRAGMA journal_mode=WAL")
cursor.execute("PRAGMA foreign_keys=ON")
cursor.close()
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
Base = declarative_base()
def get_db() -> Generator[Session, None, None]:
"""Yield a database session, ensuring it is closed after use."""
db = SessionLocal()
try:
yield db
finally:
db.close()
def init_db() -> None:
"""Create all database tables."""
from app.models import ( # noqa: F401
Customer,
Deployment,
DeploymentLog,
SystemConfig,
User,
)
Base.metadata.create_all(bind=engine)
if __name__ == "__main__":
if len(sys.argv) > 1 and sys.argv[1] == "init":
init_db()
print("Database initialized successfully.")

77
app/dependencies.py Normal file
View File

@@ -0,0 +1,77 @@
"""FastAPI dependencies — JWT authentication, database session, rate limiting."""
from datetime import datetime, timedelta
from typing import Optional
from fastapi import Depends, HTTPException, status
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
from jose import JWTError, jwt
from sqlalchemy.orm import Session
from app.database import get_db
from app.models import User
from app.utils.config import JWT_ALGORITHM, JWT_EXPIRE_MINUTES, SECRET_KEY
security_scheme = HTTPBearer(auto_error=False)
def create_access_token(username: str, expires_delta: Optional[timedelta] = None) -> str:
"""Create a JWT access token.
Args:
username: The user identity to encode.
expires_delta: Custom expiration; defaults to JWT_EXPIRE_MINUTES.
Returns:
Encoded JWT string.
"""
expire = datetime.utcnow() + (expires_delta or timedelta(minutes=JWT_EXPIRE_MINUTES))
payload = {"sub": username, "exp": expire}
return jwt.encode(payload, SECRET_KEY, algorithm=JWT_ALGORITHM)
def get_current_user(
credentials: Optional[HTTPAuthorizationCredentials] = Depends(security_scheme),
db: Session = Depends(get_db),
) -> User:
"""Validate the JWT bearer token and return the authenticated user.
Args:
credentials: Bearer token from the Authorization header.
db: Database session.
Returns:
The authenticated User ORM object.
Raises:
HTTPException: If the token is missing, invalid, or the user is inactive.
"""
if credentials is None:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Authentication required.",
headers={"WWW-Authenticate": "Bearer"},
)
token = credentials.credentials
try:
payload = jwt.decode(token, SECRET_KEY, algorithms=[JWT_ALGORITHM])
username: Optional[str] = payload.get("sub")
if username is None:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid token payload.",
)
except JWTError:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid or expired token.",
)
user = db.query(User).filter(User.username == username).first()
if user is None or not user.is_active:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="User not found or inactive.",
)
return user

90
app/main.py Normal file
View File

@@ -0,0 +1,90 @@
"""FastAPI entry point for NetBird MSP Appliance."""
import logging
import os
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from fastapi.staticfiles import StaticFiles
from app.database import init_db
from app.routers import auth, customers, deployments, monitoring, settings
# ---------------------------------------------------------------------------
# Logging
# ---------------------------------------------------------------------------
LOG_LEVEL = os.environ.get("LOG_LEVEL", "INFO").upper()
logging.basicConfig(
level=getattr(logging, LOG_LEVEL, logging.INFO),
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
)
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Application
# ---------------------------------------------------------------------------
app = FastAPI(
title="NetBird MSP Appliance",
description="Multi-tenant NetBird management platform for MSPs",
version="1.0.0",
docs_url="/api/docs",
redoc_url="/api/redoc",
openapi_url="/api/openapi.json",
)
# CORS — allow same-origin; adjust if needed
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# ---------------------------------------------------------------------------
# Routers
# ---------------------------------------------------------------------------
app.include_router(auth.router, prefix="/api/auth", tags=["Authentication"])
app.include_router(settings.router, prefix="/api/settings", tags=["Settings"])
app.include_router(customers.router, prefix="/api/customers", tags=["Customers"])
app.include_router(deployments.router, prefix="/api/customers", tags=["Deployments"])
app.include_router(monitoring.router, prefix="/api/monitoring", tags=["Monitoring"])
# ---------------------------------------------------------------------------
# Static files — serve the frontend SPA
# ---------------------------------------------------------------------------
STATIC_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), "static")
if os.path.isdir(STATIC_DIR):
app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
# Serve index.html at root
from fastapi.responses import FileResponse
@app.get("/", include_in_schema=False)
async def serve_index():
"""Serve the main dashboard."""
index_path = os.path.join(STATIC_DIR, "index.html")
if os.path.isfile(index_path):
return FileResponse(index_path)
return JSONResponse({"message": "NetBird MSP Appliance API is running."})
# ---------------------------------------------------------------------------
# Health endpoint (unauthenticated)
# ---------------------------------------------------------------------------
@app.get("/api/health", tags=["Health"])
async def health_check():
"""Simple health check endpoint for Docker HEALTHCHECK."""
return {"status": "ok", "service": "netbird-msp-appliance"}
# ---------------------------------------------------------------------------
# Startup event
# ---------------------------------------------------------------------------
@app.on_event("startup")
async def startup_event():
"""Initialize database tables on startup."""
logger.info("Starting NetBird MSP Appliance...")
init_db()
logger.info("Database initialized.")

232
app/models.py Normal file
View File

@@ -0,0 +1,232 @@
"""SQLAlchemy ORM models for NetBird MSP Appliance."""
from datetime import datetime
from typing import Optional
from sqlalchemy import (
Boolean,
CheckConstraint,
DateTime,
ForeignKey,
Integer,
String,
Text,
UniqueConstraint,
)
from sqlalchemy.orm import Mapped, mapped_column, relationship
from app.database import Base
class Customer(Base):
"""Customer model representing an MSP client."""
__tablename__ = "customers"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
name: Mapped[str] = mapped_column(String(255), nullable=False)
company: Mapped[Optional[str]] = mapped_column(String(255), nullable=True)
subdomain: Mapped[str] = mapped_column(String(63), unique=True, nullable=False)
email: Mapped[str] = mapped_column(String(255), nullable=False)
max_devices: Mapped[int] = mapped_column(Integer, default=20)
notes: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
status: Mapped[str] = mapped_column(
String(20),
default="active",
nullable=False,
)
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
updated_at: Mapped[datetime] = mapped_column(
DateTime, default=datetime.utcnow, onupdate=datetime.utcnow
)
__table_args__ = (
CheckConstraint(
"status IN ('active', 'inactive', 'deploying', 'error')",
name="ck_customer_status",
),
)
deployment: Mapped[Optional["Deployment"]] = relationship(
"Deployment", back_populates="customer", uselist=False, cascade="all, delete-orphan"
)
logs: Mapped[list["DeploymentLog"]] = relationship(
"DeploymentLog", back_populates="customer", cascade="all, delete-orphan"
)
def to_dict(self) -> dict:
"""Serialize customer to dictionary."""
return {
"id": self.id,
"name": self.name,
"company": self.company,
"subdomain": self.subdomain,
"email": self.email,
"max_devices": self.max_devices,
"notes": self.notes,
"status": self.status,
"created_at": self.created_at.isoformat() if self.created_at else None,
"updated_at": self.updated_at.isoformat() if self.updated_at else None,
}
class Deployment(Base):
"""Deployment model tracking a customer's NetBird instance."""
__tablename__ = "deployments"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
customer_id: Mapped[int] = mapped_column(
Integer, ForeignKey("customers.id", ondelete="CASCADE"), unique=True, nullable=False
)
container_prefix: Mapped[str] = mapped_column(String(100), nullable=False)
relay_udp_port: Mapped[int] = mapped_column(Integer, unique=True, nullable=False)
npm_proxy_id: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
relay_secret: Mapped[str] = mapped_column(Text, nullable=False)
setup_url: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
deployment_status: Mapped[str] = mapped_column(
String(20), default="pending", nullable=False
)
deployed_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
last_health_check: Mapped[Optional[datetime]] = mapped_column(DateTime, nullable=True)
__table_args__ = (
CheckConstraint(
"deployment_status IN ('pending', 'running', 'stopped', 'failed')",
name="ck_deployment_status",
),
)
customer: Mapped["Customer"] = relationship("Customer", back_populates="deployment")
def to_dict(self) -> dict:
"""Serialize deployment to dictionary."""
return {
"id": self.id,
"customer_id": self.customer_id,
"container_prefix": self.container_prefix,
"relay_udp_port": self.relay_udp_port,
"npm_proxy_id": self.npm_proxy_id,
"relay_secret": "***", # Never expose secrets
"setup_url": self.setup_url,
"deployment_status": self.deployment_status,
"deployed_at": self.deployed_at.isoformat() if self.deployed_at else None,
"last_health_check": (
self.last_health_check.isoformat() if self.last_health_check else None
),
}
class SystemConfig(Base):
"""Singleton system configuration — always id=1."""
__tablename__ = "system_config"
id: Mapped[int] = mapped_column(
Integer, primary_key=True, default=1
)
base_domain: Mapped[str] = mapped_column(String(255), nullable=False)
admin_email: Mapped[str] = mapped_column(String(255), nullable=False)
npm_api_url: Mapped[str] = mapped_column(String(500), nullable=False)
npm_api_token_encrypted: Mapped[str] = mapped_column(Text, nullable=False)
netbird_management_image: Mapped[str] = mapped_column(
String(255), default="netbirdio/management:latest"
)
netbird_signal_image: Mapped[str] = mapped_column(
String(255), default="netbirdio/signal:latest"
)
netbird_relay_image: Mapped[str] = mapped_column(
String(255), default="netbirdio/relay:latest"
)
netbird_dashboard_image: Mapped[str] = mapped_column(
String(255), default="netbirdio/dashboard:latest"
)
data_dir: Mapped[str] = mapped_column(String(500), default="/opt/netbird-instances")
docker_network: Mapped[str] = mapped_column(String(100), default="npm-network")
relay_base_port: Mapped[int] = mapped_column(Integer, default=3478)
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
updated_at: Mapped[datetime] = mapped_column(
DateTime, default=datetime.utcnow, onupdate=datetime.utcnow
)
__table_args__ = (
CheckConstraint("id = 1", name="ck_system_config_singleton"),
)
def to_dict(self) -> dict:
"""Serialize config to dictionary (token masked)."""
return {
"base_domain": self.base_domain,
"admin_email": self.admin_email,
"npm_api_url": self.npm_api_url,
"npm_api_token_set": bool(self.npm_api_token_encrypted),
"netbird_management_image": self.netbird_management_image,
"netbird_signal_image": self.netbird_signal_image,
"netbird_relay_image": self.netbird_relay_image,
"netbird_dashboard_image": self.netbird_dashboard_image,
"data_dir": self.data_dir,
"docker_network": self.docker_network,
"relay_base_port": self.relay_base_port,
"created_at": self.created_at.isoformat() if self.created_at else None,
"updated_at": self.updated_at.isoformat() if self.updated_at else None,
}
class DeploymentLog(Base):
"""Log entries for deployment actions."""
__tablename__ = "deployment_logs"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
customer_id: Mapped[int] = mapped_column(
Integer, ForeignKey("customers.id", ondelete="CASCADE"), nullable=False
)
action: Mapped[str] = mapped_column(String(100), nullable=False)
status: Mapped[str] = mapped_column(String(20), nullable=False)
message: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
details: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
__table_args__ = (
CheckConstraint(
"status IN ('success', 'error', 'info')",
name="ck_log_status",
),
)
customer: Mapped["Customer"] = relationship("Customer", back_populates="logs")
def to_dict(self) -> dict:
"""Serialize log entry to dictionary."""
return {
"id": self.id,
"customer_id": self.customer_id,
"action": self.action,
"status": self.status,
"message": self.message,
"details": self.details,
"created_at": self.created_at.isoformat() if self.created_at else None,
}
class User(Base):
"""Admin user model."""
__tablename__ = "users"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
username: Mapped[str] = mapped_column(String(100), unique=True, nullable=False)
password_hash: Mapped[str] = mapped_column(Text, nullable=False)
email: Mapped[Optional[str]] = mapped_column(String(255), nullable=True)
is_active: Mapped[bool] = mapped_column(Boolean, default=True)
created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow)
def to_dict(self) -> dict:
"""Serialize user to dictionary (no password)."""
return {
"id": self.id,
"username": self.username,
"email": self.email,
"is_active": self.is_active,
"created_at": self.created_at.isoformat() if self.created_at else None,
}

0
app/routers/__init__.py Normal file
View File

97
app/routers/auth.py Normal file
View File

@@ -0,0 +1,97 @@
"""Authentication API endpoints — login, logout, current user, password change."""
import logging
from datetime import datetime
from fastapi import APIRouter, Depends, HTTPException, status
from sqlalchemy.orm import Session
from app.database import get_db
from app.dependencies import create_access_token, get_current_user
from app.models import User
from app.utils.security import hash_password, verify_password
from app.utils.validators import ChangePasswordRequest, LoginRequest
logger = logging.getLogger(__name__)
router = APIRouter()
@router.post("/login")
async def login(payload: LoginRequest, db: Session = Depends(get_db)):
"""Authenticate and return a JWT token.
Args:
payload: Username and password.
db: Database session.
Returns:
JSON with ``access_token`` and ``token_type``.
"""
user = db.query(User).filter(User.username == payload.username).first()
if not user or not verify_password(payload.password, user.password_hash):
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid username or password.",
)
if not user.is_active:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail="Account is disabled.",
)
token = create_access_token(user.username)
logger.info("User %s logged in.", user.username)
return {
"access_token": token,
"token_type": "bearer",
"user": user.to_dict(),
}
@router.post("/logout")
async def logout(current_user: User = Depends(get_current_user)):
"""Logout (client-side token discard).
Returns:
Confirmation message.
"""
logger.info("User %s logged out.", current_user.username)
return {"message": "Logged out successfully."}
@router.get("/me")
async def get_me(current_user: User = Depends(get_current_user)):
"""Return the current authenticated user's profile.
Returns:
User dict (no password hash).
"""
return current_user.to_dict()
@router.post("/change-password")
async def change_password(
payload: ChangePasswordRequest,
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""Change the current user's password.
Args:
payload: Current and new password.
current_user: Authenticated user.
db: Database session.
Returns:
Confirmation message.
"""
if not verify_password(payload.current_password, current_user.password_hash):
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Current password is incorrect.",
)
current_user.password_hash = hash_password(payload.new_password)
db.commit()
logger.info("Password changed for user %s.", current_user.username)
return {"message": "Password changed successfully."}

231
app/routers/customers.py Normal file
View File

@@ -0,0 +1,231 @@
"""Customer CRUD API endpoints with automatic deployment on create."""
import logging
from datetime import datetime
from typing import Optional
from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Query, status
from sqlalchemy.orm import Session
from app.database import get_db
from app.dependencies import get_current_user
from app.models import Customer, Deployment, DeploymentLog, User
from app.services import netbird_service
from app.utils.validators import CustomerCreate, CustomerUpdate
logger = logging.getLogger(__name__)
router = APIRouter()
@router.post("", status_code=status.HTTP_201_CREATED)
async def create_customer(
payload: CustomerCreate,
background_tasks: BackgroundTasks,
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""Create a new customer and trigger auto-deployment.
Validates that the subdomain is unique, creates the customer record,
and launches deployment in the background.
Args:
payload: Customer creation data.
background_tasks: FastAPI background task runner.
Returns:
Created customer dict with deployment status.
"""
# Check subdomain uniqueness
existing = db.query(Customer).filter(Customer.subdomain == payload.subdomain).first()
if existing:
raise HTTPException(
status_code=status.HTTP_409_CONFLICT,
detail=f"Subdomain '{payload.subdomain}' is already in use.",
)
customer = Customer(
name=payload.name,
company=payload.company,
subdomain=payload.subdomain,
email=payload.email,
max_devices=payload.max_devices,
notes=payload.notes,
status="deploying",
)
db.add(customer)
db.commit()
db.refresh(customer)
logger.info("Customer %d (%s) created by %s.", customer.id, customer.subdomain, current_user.username)
# Deploy in background
result = await netbird_service.deploy_customer(db, customer.id)
response = customer.to_dict()
response["deployment"] = result
return response
@router.get("")
async def list_customers(
page: int = Query(default=1, ge=1),
per_page: int = Query(default=25, ge=1, le=100),
search: Optional[str] = Query(default=None),
status_filter: Optional[str] = Query(default=None, alias="status"),
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""List customers with pagination, search, and status filter.
Args:
page: Page number (1-indexed).
per_page: Items per page.
search: Search in name, subdomain, email.
status_filter: Filter by status.
Returns:
Paginated customer list with metadata.
"""
query = db.query(Customer)
if search:
like_term = f"%{search}%"
query = query.filter(
(Customer.name.ilike(like_term))
| (Customer.subdomain.ilike(like_term))
| (Customer.email.ilike(like_term))
| (Customer.company.ilike(like_term))
)
if status_filter:
query = query.filter(Customer.status == status_filter)
total = query.count()
customers = (
query.order_by(Customer.created_at.desc())
.offset((page - 1) * per_page)
.limit(per_page)
.all()
)
items = []
for c in customers:
data = c.to_dict()
if c.deployment:
data["deployment"] = c.deployment.to_dict()
else:
data["deployment"] = None
items.append(data)
return {
"items": items,
"total": total,
"page": page,
"per_page": per_page,
"pages": (total + per_page - 1) // per_page if total > 0 else 1,
}
@router.get("/{customer_id}")
async def get_customer(
customer_id: int,
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""Get detailed customer information including deployment and logs.
Args:
customer_id: Customer ID.
Returns:
Customer dict with deployment info and recent logs.
"""
customer = db.query(Customer).filter(Customer.id == customer_id).first()
if not customer:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Customer not found.",
)
data = customer.to_dict()
data["deployment"] = customer.deployment.to_dict() if customer.deployment else None
data["logs"] = [
log.to_dict()
for log in db.query(DeploymentLog)
.filter(DeploymentLog.customer_id == customer_id)
.order_by(DeploymentLog.created_at.desc())
.limit(50)
.all()
]
return data
@router.put("/{customer_id}")
async def update_customer(
customer_id: int,
payload: CustomerUpdate,
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""Update customer information.
Args:
customer_id: Customer ID.
payload: Fields to update.
Returns:
Updated customer dict.
"""
customer = db.query(Customer).filter(Customer.id == customer_id).first()
if not customer:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Customer not found.",
)
update_data = payload.model_dump(exclude_none=True)
for field, value in update_data.items():
if hasattr(customer, field):
setattr(customer, field, value)
customer.updated_at = datetime.utcnow()
db.commit()
db.refresh(customer)
logger.info("Customer %d updated by %s.", customer_id, current_user.username)
return customer.to_dict()
@router.delete("/{customer_id}")
async def delete_customer(
customer_id: int,
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""Delete a customer and clean up all resources.
Removes containers, NPM proxy, instance directory, and database records.
Args:
customer_id: Customer ID.
Returns:
Confirmation message.
"""
customer = db.query(Customer).filter(Customer.id == customer_id).first()
if not customer:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Customer not found.",
)
# Undeploy first (containers, NPM, files)
await netbird_service.undeploy_customer(db, customer_id)
# Delete customer record (cascades to deployment + logs)
db.delete(customer)
db.commit()
logger.info("Customer %d deleted by %s.", customer_id, current_user.username)
return {"message": f"Customer {customer_id} deleted successfully."}

185
app/routers/deployments.py Normal file
View File

@@ -0,0 +1,185 @@
"""Deployment management API — start, stop, restart, logs, health for customers."""
import logging
from fastapi import APIRouter, Depends, HTTPException, status
from sqlalchemy.orm import Session
from app.database import get_db
from app.dependencies import get_current_user
from app.models import Customer, Deployment, User
from app.services import docker_service, netbird_service
logger = logging.getLogger(__name__)
router = APIRouter()
@router.post("/{customer_id}/deploy")
async def manual_deploy(
customer_id: int,
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""Manually trigger deployment for a customer.
Use this to re-deploy a customer whose previous deployment failed.
Args:
customer_id: Customer ID.
Returns:
Deployment result dict.
"""
customer = db.query(Customer).filter(Customer.id == customer_id).first()
if not customer:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Customer not found.")
# Remove existing deployment if present
existing = db.query(Deployment).filter(Deployment.customer_id == customer_id).first()
if existing:
await netbird_service.undeploy_customer(db, customer_id)
result = await netbird_service.deploy_customer(db, customer_id)
if not result.get("success"):
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=result.get("error", "Deployment failed."),
)
return result
@router.post("/{customer_id}/start")
async def start_customer(
customer_id: int,
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""Start containers for a customer.
Args:
customer_id: Customer ID.
Returns:
Result dict.
"""
_require_customer(db, customer_id)
result = netbird_service.start_customer(db, customer_id)
if not result.get("success"):
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=result.get("error", "Failed to start containers."),
)
return result
@router.post("/{customer_id}/stop")
async def stop_customer(
customer_id: int,
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""Stop containers for a customer.
Args:
customer_id: Customer ID.
Returns:
Result dict.
"""
_require_customer(db, customer_id)
result = netbird_service.stop_customer(db, customer_id)
if not result.get("success"):
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=result.get("error", "Failed to stop containers."),
)
return result
@router.post("/{customer_id}/restart")
async def restart_customer(
customer_id: int,
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""Restart containers for a customer.
Args:
customer_id: Customer ID.
Returns:
Result dict.
"""
_require_customer(db, customer_id)
result = netbird_service.restart_customer(db, customer_id)
if not result.get("success"):
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=result.get("error", "Failed to restart containers."),
)
return result
@router.get("/{customer_id}/logs")
async def get_customer_logs(
customer_id: int,
tail: int = 200,
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""Get container logs for a customer.
Args:
customer_id: Customer ID.
tail: Number of log lines per container.
Returns:
Dict mapping container name to log text.
"""
_require_customer(db, customer_id)
deployment = db.query(Deployment).filter(Deployment.customer_id == customer_id).first()
if not deployment:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="No deployment found for this customer.",
)
logs = docker_service.get_all_container_logs(deployment.container_prefix, tail=tail)
return {"logs": logs}
@router.get("/{customer_id}/health")
async def check_customer_health(
customer_id: int,
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""Run a health check on a customer's deployment.
Args:
customer_id: Customer ID.
Returns:
Health check results.
"""
_require_customer(db, customer_id)
return netbird_service.get_customer_health(db, customer_id)
def _require_customer(db: Session, customer_id: int) -> Customer:
"""Helper to fetch a customer or raise 404.
Args:
db: Database session.
customer_id: Customer ID.
Returns:
Customer ORM object.
Raises:
HTTPException: If customer not found.
"""
customer = db.query(Customer).filter(Customer.id == customer_id).first()
if not customer:
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Customer not found.")
return customer

116
app/routers/monitoring.py Normal file
View File

@@ -0,0 +1,116 @@
"""Monitoring API — system overview, customer statuses, host resources."""
import logging
import platform
from typing import Any
import psutil
from fastapi import APIRouter, Depends
from sqlalchemy.orm import Session
from app.database import get_db
from app.dependencies import get_current_user
from app.models import Customer, Deployment, User
from app.services import docker_service
logger = logging.getLogger(__name__)
router = APIRouter()
@router.get("/status")
async def system_status(
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db),
) -> dict[str, Any]:
"""System overview with aggregated customer statistics.
Returns:
Counts by status and total customers.
"""
total = db.query(Customer).count()
active = db.query(Customer).filter(Customer.status == "active").count()
inactive = db.query(Customer).filter(Customer.status == "inactive").count()
deploying = db.query(Customer).filter(Customer.status == "deploying").count()
error = db.query(Customer).filter(Customer.status == "error").count()
return {
"total_customers": total,
"active": active,
"inactive": inactive,
"deploying": deploying,
"error": error,
}
@router.get("/customers")
async def all_customers_status(
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db),
) -> list[dict[str, Any]]:
"""Get deployment status for every customer.
Returns:
List of dicts with customer info and container statuses.
"""
customers = (
db.query(Customer)
.order_by(Customer.id)
.all()
)
results: list[dict[str, Any]] = []
for c in customers:
entry: dict[str, Any] = {
"id": c.id,
"name": c.name,
"subdomain": c.subdomain,
"status": c.status,
}
if c.deployment:
containers = docker_service.get_container_status(c.deployment.container_prefix)
entry["deployment_status"] = c.deployment.deployment_status
entry["containers"] = containers
entry["relay_udp_port"] = c.deployment.relay_udp_port
entry["setup_url"] = c.deployment.setup_url
else:
entry["deployment_status"] = None
entry["containers"] = []
results.append(entry)
return results
@router.get("/resources")
async def host_resources(
current_user: User = Depends(get_current_user),
) -> dict[str, Any]:
"""Return host system resource usage.
Returns:
CPU, memory, disk, and network information.
"""
cpu_percent = psutil.cpu_percent(interval=1)
cpu_count = psutil.cpu_count()
mem = psutil.virtual_memory()
disk = psutil.disk_usage("/")
return {
"hostname": platform.node(),
"os": f"{platform.system()} {platform.release()}",
"cpu": {
"percent": cpu_percent,
"count": cpu_count,
},
"memory": {
"total_gb": round(mem.total / (1024 ** 3), 1),
"used_gb": round(mem.used / (1024 ** 3), 1),
"available_gb": round(mem.available / (1024 ** 3), 1),
"percent": mem.percent,
},
"disk": {
"total_gb": round(disk.total / (1024 ** 3), 1),
"used_gb": round(disk.used / (1024 ** 3), 1),
"free_gb": round(disk.free / (1024 ** 3), 1),
"percent": disk.percent,
},
}

113
app/routers/settings.py Normal file
View File

@@ -0,0 +1,113 @@
"""System configuration API — read/write all settings from the database.
There is no .env file. Every setting lives in the ``system_config`` table
(singleton row with id=1) and is editable via the Web UI settings page.
"""
import logging
from datetime import datetime
from fastapi import APIRouter, Depends, HTTPException, status
from sqlalchemy.orm import Session
from app.database import get_db
from app.dependencies import get_current_user
from app.models import SystemConfig, User
from app.services import npm_service
from app.utils.config import get_system_config
from app.utils.security import encrypt_value
from app.utils.validators import SystemConfigUpdate
logger = logging.getLogger(__name__)
router = APIRouter()
@router.get("/system")
async def get_settings(
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""Return all system configuration values (token masked).
Returns:
System config dict.
"""
row = db.query(SystemConfig).filter(SystemConfig.id == 1).first()
if not row:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="System configuration not initialized. Run install.sh first.",
)
return row.to_dict()
@router.put("/system")
async def update_settings(
payload: SystemConfigUpdate,
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""Update system configuration values.
Only provided (non-None) fields are updated. The NPM API token is
encrypted before storage.
Args:
payload: Fields to update.
Returns:
Updated system config dict.
"""
row = db.query(SystemConfig).filter(SystemConfig.id == 1).first()
if not row:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="System configuration not initialized.",
)
update_data = payload.model_dump(exclude_none=True)
# Handle NPM token encryption
if "npm_api_token" in update_data:
raw_token = update_data.pop("npm_api_token")
row.npm_api_token_encrypted = encrypt_value(raw_token)
for field, value in update_data.items():
if hasattr(row, field):
setattr(row, field, value)
row.updated_at = datetime.utcnow()
db.commit()
db.refresh(row)
logger.info("System configuration updated by %s.", current_user.username)
return row.to_dict()
@router.get("/test-npm")
async def test_npm(
current_user: User = Depends(get_current_user),
db: Session = Depends(get_db),
):
"""Test connectivity to the Nginx Proxy Manager API.
Loads the NPM URL and decrypted token from the database and attempts
to list proxy hosts.
Returns:
Dict with ``ok`` and ``message``.
"""
config = get_system_config(db)
if not config:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="System configuration not initialized.",
)
if not config.npm_api_url or not config.npm_api_token:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="NPM API URL or token not configured.",
)
result = await npm_service.test_npm_connection(config.npm_api_url, config.npm_api_token)
return result

0
app/services/__init__.py Normal file
View File

View File

@@ -0,0 +1,334 @@
"""Docker container management via the Python Docker SDK.
Responsible for creating, starting, stopping, restarting, and removing
per-customer Docker Compose stacks. Also provides log retrieval and
container health/status information.
"""
import logging
import os
import subprocess
import time
from typing import Any, Optional
import docker
from docker.errors import DockerException, NotFound
logger = logging.getLogger(__name__)
def _get_client() -> docker.DockerClient:
"""Return a Docker client connected via the Unix socket.
Returns:
docker.DockerClient instance.
"""
return docker.from_env()
def compose_up(instance_dir: str, project_name: str) -> bool:
"""Run ``docker compose up -d`` for a customer instance.
Args:
instance_dir: Absolute path to the customer's instance directory.
project_name: Docker Compose project name (e.g. ``netbird-kunde5``).
Returns:
True on success.
Raises:
RuntimeError: If ``docker compose up`` fails.
"""
compose_file = os.path.join(instance_dir, "docker-compose.yml")
if not os.path.isfile(compose_file):
raise FileNotFoundError(f"docker-compose.yml not found at {compose_file}")
cmd = [
"docker", "compose",
"-f", compose_file,
"-p", project_name,
"up", "-d", "--remove-orphans",
]
logger.info("Running: %s", " ".join(cmd))
result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
if result.returncode != 0:
logger.error("docker compose up failed: %s", result.stderr)
raise RuntimeError(f"docker compose up failed: {result.stderr}")
logger.info("docker compose up succeeded for %s", project_name)
return True
def compose_down(instance_dir: str, project_name: str, remove_volumes: bool = False) -> bool:
"""Run ``docker compose down`` for a customer instance.
Args:
instance_dir: Absolute path to the customer's instance directory.
project_name: Docker Compose project name.
remove_volumes: Whether to also remove volumes.
Returns:
True on success.
"""
compose_file = os.path.join(instance_dir, "docker-compose.yml")
cmd = [
"docker", "compose",
"-f", compose_file,
"-p", project_name,
"down",
]
if remove_volumes:
cmd.append("-v")
logger.info("Running: %s", " ".join(cmd))
result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
if result.returncode != 0:
logger.warning("docker compose down returned non-zero: %s", result.stderr)
return True
def compose_stop(instance_dir: str, project_name: str) -> bool:
"""Run ``docker compose stop`` for a customer instance.
Args:
instance_dir: Absolute path to the customer's instance directory.
project_name: Docker Compose project name.
Returns:
True on success.
"""
compose_file = os.path.join(instance_dir, "docker-compose.yml")
cmd = [
"docker", "compose",
"-f", compose_file,
"-p", project_name,
"stop",
]
logger.info("Running: %s", " ".join(cmd))
result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
return result.returncode == 0
def compose_start(instance_dir: str, project_name: str) -> bool:
"""Run ``docker compose start`` for a customer instance.
Args:
instance_dir: Absolute path to the customer's instance directory.
project_name: Docker Compose project name.
Returns:
True on success.
"""
compose_file = os.path.join(instance_dir, "docker-compose.yml")
cmd = [
"docker", "compose",
"-f", compose_file,
"-p", project_name,
"start",
]
logger.info("Running: %s", " ".join(cmd))
result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
return result.returncode == 0
def compose_restart(instance_dir: str, project_name: str) -> bool:
"""Run ``docker compose restart`` for a customer instance.
Args:
instance_dir: Absolute path to the customer's instance directory.
project_name: Docker Compose project name.
Returns:
True on success.
"""
compose_file = os.path.join(instance_dir, "docker-compose.yml")
cmd = [
"docker", "compose",
"-f", compose_file,
"-p", project_name,
"restart",
]
logger.info("Running: %s", " ".join(cmd))
result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
return result.returncode == 0
def get_container_status(container_prefix: str) -> list[dict[str, Any]]:
"""Get the status of all containers matching a prefix.
Args:
container_prefix: Container name prefix (e.g. ``netbird-kunde5``).
Returns:
List of dicts with container name, status, and health info.
"""
client = _get_client()
results: list[dict[str, Any]] = []
try:
containers = client.containers.list(all=True, filters={"name": container_prefix})
for c in containers:
health = "N/A"
if c.attrs.get("State", {}).get("Health"):
health = c.attrs["State"]["Health"].get("Status", "N/A")
results.append({
"name": c.name,
"status": c.status,
"health": health,
"image": str(c.image.tags[0]) if c.image.tags else str(c.image.id[:12]),
"created": c.attrs.get("Created", ""),
})
except DockerException as exc:
logger.error("Failed to get container status: %s", exc)
return results
def get_container_logs(container_name: str, tail: int = 200) -> str:
"""Retrieve recent logs from a container.
Args:
container_name: Full container name.
tail: Number of log lines to retrieve.
Returns:
Log text.
"""
client = _get_client()
try:
container = client.containers.get(container_name)
return container.logs(tail=tail, timestamps=True).decode("utf-8", errors="replace")
except NotFound:
return f"Container {container_name} not found."
except DockerException as exc:
return f"Error retrieving logs: {exc}"
def get_all_container_logs(container_prefix: str, tail: int = 100) -> dict[str, str]:
"""Get logs for all containers matching a prefix.
Args:
container_prefix: Container name prefix.
tail: Lines per container.
Returns:
Dict mapping container name to log text.
"""
client = _get_client()
logs: dict[str, str] = {}
try:
containers = client.containers.list(all=True, filters={"name": container_prefix})
for c in containers:
try:
logs[c.name] = c.logs(tail=tail, timestamps=True).decode(
"utf-8", errors="replace"
)
except DockerException:
logs[c.name] = "Error retrieving logs."
except DockerException as exc:
logger.error("Failed to list containers: %s", exc)
return logs
def wait_for_healthy(container_prefix: str, timeout: int = 60) -> bool:
"""Wait until all containers with the given prefix are running.
Args:
container_prefix: Container name prefix.
timeout: Maximum seconds to wait.
Returns:
True if all containers started within timeout.
"""
client = _get_client()
deadline = time.time() + timeout
while time.time() < deadline:
try:
containers = client.containers.list(
all=True, filters={"name": container_prefix}
)
if not containers:
time.sleep(2)
continue
all_running = all(c.status == "running" for c in containers)
if all_running:
logger.info("All containers for %s are running.", container_prefix)
return True
except DockerException as exc:
logger.warning("Health check error: %s", exc)
time.sleep(3)
logger.warning("Timeout waiting for %s containers to start.", container_prefix)
return False
def get_docker_stats(container_prefix: str) -> list[dict[str, Any]]:
"""Retrieve resource usage stats for containers matching a prefix.
Args:
container_prefix: Container name prefix.
Returns:
List of dicts with CPU, memory, and network stats.
"""
client = _get_client()
stats_list: list[dict[str, Any]] = []
try:
containers = client.containers.list(filters={"name": container_prefix})
for c in containers:
try:
raw = c.stats(stream=False)
cpu_delta = (
raw.get("cpu_stats", {}).get("cpu_usage", {}).get("total_usage", 0)
- raw.get("precpu_stats", {}).get("cpu_usage", {}).get("total_usage", 0)
)
system_delta = (
raw.get("cpu_stats", {}).get("system_cpu_usage", 0)
- raw.get("precpu_stats", {}).get("system_cpu_usage", 0)
)
num_cpus = len(
raw.get("cpu_stats", {}).get("cpu_usage", {}).get("percpu_usage", [1])
)
cpu_pct = 0.0
if system_delta > 0:
cpu_pct = (cpu_delta / system_delta) * num_cpus * 100
mem_usage = raw.get("memory_stats", {}).get("usage", 0)
mem_limit = raw.get("memory_stats", {}).get("limit", 1)
stats_list.append({
"name": c.name,
"cpu_percent": round(cpu_pct, 2),
"memory_usage_mb": round(mem_usage / 1024 / 1024, 1),
"memory_limit_mb": round(mem_limit / 1024 / 1024, 1),
"memory_percent": round((mem_usage / mem_limit) * 100, 1) if mem_limit else 0,
})
except DockerException:
stats_list.append({"name": c.name, "error": "Failed to get stats"})
except DockerException as exc:
logger.error("Failed to get docker stats: %s", exc)
return stats_list
def remove_instance_containers(container_prefix: str) -> bool:
"""Force-remove all containers matching a prefix.
Args:
container_prefix: Container name prefix.
Returns:
True if removal succeeded.
"""
client = _get_client()
try:
containers = client.containers.list(all=True, filters={"name": container_prefix})
for c in containers:
logger.info("Removing container %s", c.name)
c.remove(force=True)
return True
except DockerException as exc:
logger.error("Failed to remove containers: %s", exc)
return False

View File

@@ -0,0 +1,396 @@
"""NetBird deployment orchestration service.
Coordinates the full customer deployment lifecycle:
1. Validate inputs
2. Allocate ports
3. Generate configs from Jinja2 templates
4. Create instance directory and write files
5. Start Docker containers
6. Wait for health checks
7. Create NPM proxy hosts
8. Update database
Includes comprehensive rollback on failure.
"""
import logging
import os
import shutil
from datetime import datetime
from typing import Any
from jinja2 import Environment, FileSystemLoader
from sqlalchemy.orm import Session
from app.models import Customer, Deployment, DeploymentLog, SystemConfig
from app.services import docker_service, npm_service, port_manager
from app.utils.config import get_system_config
from app.utils.security import encrypt_value, generate_relay_secret
logger = logging.getLogger(__name__)
# Path to Jinja2 templates
TEMPLATE_DIR = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "templates")
def _get_jinja_env() -> Environment:
"""Create a Jinja2 environment for template rendering."""
return Environment(
loader=FileSystemLoader(TEMPLATE_DIR),
keep_trailing_newline=True,
)
def _log_action(
db: Session, customer_id: int, action: str, status: str, message: str, details: str = ""
) -> None:
"""Write a deployment log entry.
Args:
db: Active session.
customer_id: The customer this log belongs to.
action: Action name (e.g. ``deploy``, ``stop``).
status: ``success``, ``error``, or ``info``.
message: Human-readable message.
details: Additional details (optional).
"""
log = DeploymentLog(
customer_id=customer_id,
action=action,
status=status,
message=message,
details=details,
)
db.add(log)
db.commit()
async def deploy_customer(db: Session, customer_id: int) -> dict[str, Any]:
"""Execute the full deployment workflow for a customer.
Args:
db: Active session.
customer_id: Customer to deploy.
Returns:
Dict with ``success``, ``setup_url``, or ``error``.
"""
customer = db.query(Customer).filter(Customer.id == customer_id).first()
if not customer:
return {"success": False, "error": "Customer not found."}
config = get_system_config(db)
if not config:
return {"success": False, "error": "System not configured. Please set up system settings first."}
# Update status to deploying
customer.status = "deploying"
db.commit()
_log_action(db, customer_id, "deploy", "info", "Deployment started.")
allocated_port = None
instance_dir = None
container_prefix = f"netbird-kunde{customer_id}"
try:
# Step 1: Allocate relay UDP port
allocated_port = port_manager.allocate_port(db, config.relay_base_port)
_log_action(db, customer_id, "deploy", "info", f"Allocated UDP port {allocated_port}.")
# Step 2: Generate relay secret
relay_secret = generate_relay_secret()
# Step 3: Create instance directory
instance_dir = os.path.join(config.data_dir, f"kunde{customer_id}")
os.makedirs(instance_dir, exist_ok=True)
os.makedirs(os.path.join(instance_dir, "data", "management"), exist_ok=True)
os.makedirs(os.path.join(instance_dir, "data", "signal"), exist_ok=True)
_log_action(db, customer_id, "deploy", "info", f"Created directory {instance_dir}.")
# Step 4: Render templates
jinja_env = _get_jinja_env()
template_vars = {
"customer_id": customer_id,
"subdomain": customer.subdomain,
"base_domain": config.base_domain,
"instance_dir": instance_dir,
"relay_udp_port": allocated_port,
"relay_secret": relay_secret,
"netbird_management_image": config.netbird_management_image,
"netbird_signal_image": config.netbird_signal_image,
"netbird_relay_image": config.netbird_relay_image,
"netbird_dashboard_image": config.netbird_dashboard_image,
"docker_network": config.docker_network,
}
# docker-compose.yml
dc_template = jinja_env.get_template("docker-compose.yml.j2")
dc_content = dc_template.render(**template_vars)
with open(os.path.join(instance_dir, "docker-compose.yml"), "w") as f:
f.write(dc_content)
# management.json
mgmt_template = jinja_env.get_template("management.json.j2")
mgmt_content = mgmt_template.render(**template_vars)
with open(os.path.join(instance_dir, "management.json"), "w") as f:
f.write(mgmt_content)
# relay.env
relay_template = jinja_env.get_template("relay.env.j2")
relay_content = relay_template.render(**template_vars)
with open(os.path.join(instance_dir, "relay.env"), "w") as f:
f.write(relay_content)
_log_action(db, customer_id, "deploy", "info", "Configuration files generated.")
# Step 5: Start Docker containers
docker_service.compose_up(instance_dir, container_prefix)
_log_action(db, customer_id, "deploy", "info", "Docker containers started.")
# Step 6: Wait for containers to be healthy
healthy = docker_service.wait_for_healthy(container_prefix, timeout=60)
if not healthy:
_log_action(
db, customer_id, "deploy", "error",
"Containers did not become healthy within 60 seconds."
)
# Don't fail completely — containers might still come up
# Step 7: Create NPM proxy host
domain = f"{customer.subdomain}.{config.base_domain}"
dashboard_container = f"netbird-kunde{customer_id}-dashboard"
npm_result = await npm_service.create_proxy_host(
api_url=config.npm_api_url,
api_token=config.npm_api_token,
domain=domain,
forward_host=dashboard_container,
forward_port=80,
admin_email=config.admin_email,
subdomain=customer.subdomain,
customer_id=customer_id,
)
npm_proxy_id = npm_result.get("proxy_id")
if npm_result.get("error"):
_log_action(
db, customer_id, "deploy", "error",
f"NPM proxy creation failed: {npm_result['error']}",
)
# Continue — deployment works without NPM, admin can fix later
# Step 8: Create deployment record
setup_url = f"https://{domain}"
deployment = Deployment(
customer_id=customer_id,
container_prefix=container_prefix,
relay_udp_port=allocated_port,
npm_proxy_id=npm_proxy_id,
relay_secret=encrypt_value(relay_secret),
setup_url=setup_url,
deployment_status="running",
deployed_at=datetime.utcnow(),
)
db.add(deployment)
customer.status = "active"
db.commit()
_log_action(db, customer_id, "deploy", "success", f"Deployment complete. URL: {setup_url}")
return {"success": True, "setup_url": setup_url}
except Exception as exc:
logger.exception("Deployment failed for customer %d", customer_id)
# Rollback: stop containers if they were started
try:
docker_service.compose_down(
instance_dir or os.path.join(config.data_dir, f"kunde{customer_id}"),
container_prefix,
remove_volumes=True,
)
except Exception:
pass
# Rollback: remove instance directory
if instance_dir and os.path.isdir(instance_dir):
try:
shutil.rmtree(instance_dir)
except Exception:
pass
customer.status = "error"
db.commit()
_log_action(
db, customer_id, "deploy", "error",
f"Deployment failed: {exc}",
details=str(exc),
)
return {"success": False, "error": str(exc)}
async def undeploy_customer(db: Session, customer_id: int) -> dict[str, Any]:
"""Remove all resources for a customer deployment.
Args:
db: Active session.
customer_id: Customer to undeploy.
Returns:
Dict with ``success`` bool.
"""
customer = db.query(Customer).filter(Customer.id == customer_id).first()
if not customer:
return {"success": False, "error": "Customer not found."}
deployment = db.query(Deployment).filter(Deployment.customer_id == customer_id).first()
config = get_system_config(db)
if deployment and config:
instance_dir = os.path.join(config.data_dir, f"kunde{customer_id}")
# Stop and remove containers
try:
docker_service.compose_down(instance_dir, deployment.container_prefix, remove_volumes=True)
_log_action(db, customer_id, "undeploy", "info", "Containers removed.")
except Exception as exc:
_log_action(db, customer_id, "undeploy", "error", f"Container removal error: {exc}")
# Remove NPM proxy host
if deployment.npm_proxy_id and config.npm_api_token:
try:
await npm_service.delete_proxy_host(
config.npm_api_url, config.npm_api_token, deployment.npm_proxy_id
)
_log_action(db, customer_id, "undeploy", "info", "NPM proxy host removed.")
except Exception as exc:
_log_action(db, customer_id, "undeploy", "error", f"NPM removal error: {exc}")
# Remove instance directory
if os.path.isdir(instance_dir):
try:
shutil.rmtree(instance_dir)
_log_action(db, customer_id, "undeploy", "info", "Instance directory removed.")
except Exception as exc:
_log_action(db, customer_id, "undeploy", "error", f"Directory removal error: {exc}")
# Remove deployment record
db.delete(deployment)
db.commit()
_log_action(db, customer_id, "undeploy", "success", "Undeployment complete.")
return {"success": True}
def stop_customer(db: Session, customer_id: int) -> dict[str, Any]:
"""Stop containers for a customer.
Args:
db: Active session.
customer_id: Customer whose containers to stop.
Returns:
Dict with ``success`` bool.
"""
deployment = db.query(Deployment).filter(Deployment.customer_id == customer_id).first()
config = get_system_config(db)
if not deployment or not config:
return {"success": False, "error": "Deployment or config not found."}
instance_dir = os.path.join(config.data_dir, f"kunde{customer_id}")
ok = docker_service.compose_stop(instance_dir, deployment.container_prefix)
if ok:
deployment.deployment_status = "stopped"
db.commit()
_log_action(db, customer_id, "stop", "success", "Containers stopped.")
else:
_log_action(db, customer_id, "stop", "error", "Failed to stop containers.")
return {"success": ok}
def start_customer(db: Session, customer_id: int) -> dict[str, Any]:
"""Start containers for a customer.
Args:
db: Active session.
customer_id: Customer whose containers to start.
Returns:
Dict with ``success`` bool.
"""
deployment = db.query(Deployment).filter(Deployment.customer_id == customer_id).first()
config = get_system_config(db)
if not deployment or not config:
return {"success": False, "error": "Deployment or config not found."}
instance_dir = os.path.join(config.data_dir, f"kunde{customer_id}")
ok = docker_service.compose_start(instance_dir, deployment.container_prefix)
if ok:
deployment.deployment_status = "running"
db.commit()
_log_action(db, customer_id, "start", "success", "Containers started.")
else:
_log_action(db, customer_id, "start", "error", "Failed to start containers.")
return {"success": ok}
def restart_customer(db: Session, customer_id: int) -> dict[str, Any]:
"""Restart containers for a customer.
Args:
db: Active session.
customer_id: Customer whose containers to restart.
Returns:
Dict with ``success`` bool.
"""
deployment = db.query(Deployment).filter(Deployment.customer_id == customer_id).first()
config = get_system_config(db)
if not deployment or not config:
return {"success": False, "error": "Deployment or config not found."}
instance_dir = os.path.join(config.data_dir, f"kunde{customer_id}")
ok = docker_service.compose_restart(instance_dir, deployment.container_prefix)
if ok:
deployment.deployment_status = "running"
db.commit()
_log_action(db, customer_id, "restart", "success", "Containers restarted.")
else:
_log_action(db, customer_id, "restart", "error", "Failed to restart containers.")
return {"success": ok}
def get_customer_health(db: Session, customer_id: int) -> dict[str, Any]:
"""Check health of a customer's deployment.
Args:
db: Active session.
customer_id: Customer ID.
Returns:
Dict with container statuses and overall health.
"""
deployment = db.query(Deployment).filter(Deployment.customer_id == customer_id).first()
if not deployment:
return {"healthy": False, "error": "No deployment found.", "containers": []}
containers = docker_service.get_container_status(deployment.container_prefix)
all_running = all(c["status"] == "running" for c in containers) if containers else False
# Update last health check time
deployment.last_health_check = datetime.utcnow()
if all_running:
deployment.deployment_status = "running"
elif containers:
deployment.deployment_status = "failed"
db.commit()
return {
"healthy": all_running,
"containers": containers,
"deployment_status": deployment.deployment_status,
"last_check": deployment.last_health_check.isoformat(),
}

234
app/services/npm_service.py Normal file
View File

@@ -0,0 +1,234 @@
"""Nginx Proxy Manager API integration.
Creates, updates, and deletes proxy host entries so each customer's NetBird
dashboard is accessible at ``{subdomain}.{base_domain}`` with automatic
Let's Encrypt SSL certificates.
"""
import logging
from typing import Any, Optional
import httpx
logger = logging.getLogger(__name__)
# Timeout for NPM API calls (seconds)
NPM_TIMEOUT = 30
async def test_npm_connection(api_url: str, api_token: str) -> dict[str, Any]:
"""Test connectivity to the Nginx Proxy Manager API.
Args:
api_url: NPM API base URL (e.g. ``http://npm:81/api``).
api_token: Bearer token for authentication.
Returns:
Dict with ``ok`` (bool) and ``message`` (str).
"""
headers = {"Authorization": f"Bearer {api_token}"}
try:
async with httpx.AsyncClient(timeout=NPM_TIMEOUT) as client:
resp = await client.get(f"{api_url}/nginx/proxy-hosts", headers=headers)
if resp.status_code == 200:
count = len(resp.json())
return {"ok": True, "message": f"Connected. {count} proxy hosts found."}
return {
"ok": False,
"message": f"NPM returned status {resp.status_code}: {resp.text[:200]}",
}
except httpx.ConnectError:
return {"ok": False, "message": "Connection refused. Is NPM running?"}
except httpx.TimeoutException:
return {"ok": False, "message": "Connection timed out."}
except Exception as exc:
return {"ok": False, "message": f"Unexpected error: {exc}"}
async def create_proxy_host(
api_url: str,
api_token: str,
domain: str,
forward_host: str,
forward_port: int = 80,
admin_email: str = "",
subdomain: str = "",
customer_id: int = 0,
) -> dict[str, Any]:
"""Create a proxy host entry in NPM with SSL for a customer.
The proxy routes traffic as follows:
- ``/`` -> dashboard container (port 80)
- ``/api`` -> management container (port 80)
- ``/signalexchange.*`` -> signal container (port 80)
- ``/relay`` -> relay container (port 80)
Args:
api_url: NPM API base URL.
api_token: Bearer token.
domain: Full domain (e.g. ``kunde1.example.com``).
forward_host: Container name for the dashboard.
forward_port: Port to forward to (default 80).
admin_email: Email for Let's Encrypt.
subdomain: Customer subdomain for building container names.
customer_id: Customer ID for building container names.
Returns:
Dict with ``proxy_id`` on success or ``error`` on failure.
"""
headers = {
"Authorization": f"Bearer {api_token}",
"Content-Type": "application/json",
}
# Build advanced Nginx config to route sub-paths to different containers
mgmt_container = f"netbird-kunde{customer_id}-management"
signal_container = f"netbird-kunde{customer_id}-signal"
relay_container = f"netbird-kunde{customer_id}-relay"
advanced_config = f"""
# NetBird Management API
location /api {{
proxy_pass http://{mgmt_container}:80;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
}}
# NetBird Signal (gRPC-Web)
location /signalexchange. {{
grpc_pass grpc://{signal_container}:80;
grpc_set_header Host $host;
}}
# NetBird Relay (WebSocket)
location /relay {{
proxy_pass http://{relay_container}:80;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
}}
"""
payload = {
"domain_names": [domain],
"forward_scheme": "http",
"forward_host": forward_host,
"forward_port": forward_port,
"certificate_id": 0,
"ssl_forced": True,
"hsts_enabled": True,
"hsts_subdomains": False,
"http2_support": True,
"block_exploits": True,
"allow_websocket_upgrade": True,
"access_list_id": 0,
"advanced_config": advanced_config.strip(),
"meta": {
"letsencrypt_agree": True,
"letsencrypt_email": admin_email,
"dns_challenge": False,
},
}
try:
async with httpx.AsyncClient(timeout=NPM_TIMEOUT) as client:
resp = await client.post(
f"{api_url}/nginx/proxy-hosts", json=payload, headers=headers
)
if resp.status_code in (200, 201):
data = resp.json()
proxy_id = data.get("id")
logger.info("Created NPM proxy host %s (id=%s)", domain, proxy_id)
# Request SSL certificate
await _request_ssl(client, api_url, headers, proxy_id, domain, admin_email)
return {"proxy_id": proxy_id}
else:
error_msg = f"NPM returned {resp.status_code}: {resp.text[:300]}"
logger.error("Failed to create proxy host: %s", error_msg)
return {"error": error_msg}
except Exception as exc:
logger.error("NPM API error: %s", exc)
return {"error": str(exc)}
async def _request_ssl(
client: httpx.AsyncClient,
api_url: str,
headers: dict,
proxy_id: int,
domain: str,
admin_email: str,
) -> None:
"""Request a Let's Encrypt SSL certificate for a proxy host.
Args:
client: httpx client.
api_url: NPM API base URL.
headers: Auth headers.
proxy_id: The proxy host ID.
domain: The domain to certify.
admin_email: Contact email for LE.
"""
ssl_payload = {
"domain_names": [domain],
"meta": {
"letsencrypt_agree": True,
"letsencrypt_email": admin_email,
"dns_challenge": False,
},
}
try:
resp = await client.post(
f"{api_url}/nginx/certificates", json=ssl_payload, headers=headers
)
if resp.status_code in (200, 201):
cert_id = resp.json().get("id")
# Assign certificate to proxy host
await client.put(
f"{api_url}/nginx/proxy-hosts/{proxy_id}",
json={"certificate_id": cert_id},
headers=headers,
)
logger.info("SSL certificate assigned to proxy host %s", proxy_id)
else:
logger.warning("SSL request returned %s: %s", resp.status_code, resp.text[:200])
except Exception as exc:
logger.warning("SSL certificate request failed: %s", exc)
async def delete_proxy_host(
api_url: str, api_token: str, proxy_id: int
) -> bool:
"""Delete a proxy host from NPM.
Args:
api_url: NPM API base URL.
api_token: Bearer token.
proxy_id: The proxy host ID to delete.
Returns:
True on success.
"""
headers = {"Authorization": f"Bearer {api_token}"}
try:
async with httpx.AsyncClient(timeout=NPM_TIMEOUT) as client:
resp = await client.delete(
f"{api_url}/nginx/proxy-hosts/{proxy_id}", headers=headers
)
if resp.status_code in (200, 204):
logger.info("Deleted NPM proxy host %d", proxy_id)
return True
logger.warning(
"Failed to delete proxy host %d: %s %s",
proxy_id, resp.status_code, resp.text[:200],
)
return False
except Exception as exc:
logger.error("NPM delete error: %s", exc)
return False

View File

@@ -0,0 +1,110 @@
"""UDP port allocation service for NetBird relay/STUN ports.
Manages the range starting at relay_base_port (default 3478). Each customer
gets one unique UDP port. The manager checks both the database and the OS
to avoid collisions.
"""
import logging
import socket
from typing import Optional
from sqlalchemy.orm import Session
from app.models import Deployment
logger = logging.getLogger(__name__)
def _is_udp_port_in_use(port: int) -> bool:
"""Check whether a UDP port is currently bound on the host.
Args:
port: UDP port number to probe.
Returns:
True if the port is in use.
"""
sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
try:
sock.bind(("0.0.0.0", port))
return False
except OSError:
return True
finally:
sock.close()
def get_allocated_ports(db: Session) -> set[int]:
"""Return the set of relay UDP ports already assigned in the database.
Args:
db: Active SQLAlchemy session.
Returns:
Set of port numbers.
"""
rows = db.query(Deployment.relay_udp_port).all()
return {r[0] for r in rows}
def allocate_port(db: Session, base_port: int = 3478, max_ports: int = 100) -> int:
"""Find and return the next available relay UDP port.
Scans from *base_port* to *base_port + max_ports - 1*, skipping ports
that are either already in the database or currently bound on the host.
Args:
db: Active SQLAlchemy session.
base_port: Start of the port range.
max_ports: Number of ports in the range.
Returns:
An available port number.
Raises:
RuntimeError: If no port in the range is available.
"""
allocated = get_allocated_ports(db)
for port in range(base_port, base_port + max_ports):
if port in allocated:
continue
if _is_udp_port_in_use(port):
logger.warning("Port %d is in use on the host, skipping.", port)
continue
logger.info("Allocated relay UDP port %d.", port)
return port
raise RuntimeError(
f"No available relay ports in range {base_port}-{base_port + max_ports - 1}. "
"All 100 ports are allocated."
)
def release_port(db: Session, port: int) -> None:
"""Mark a port as released (informational logging only).
The actual release happens when the Deployment row is deleted. This
helper exists for explicit logging in rollback scenarios.
Args:
db: Active SQLAlchemy session.
port: The port to release.
"""
logger.info("Released relay UDP port %d.", port)
def validate_port_available(db: Session, port: int) -> bool:
"""Check if a specific port is available both in DB and on the host.
Args:
db: Active SQLAlchemy session.
port: Port number to check.
Returns:
True if the port is available.
"""
allocated = get_allocated_ports(db)
if port in allocated:
return False
return not _is_udp_port_in_use(port)

0
app/utils/__init__.py Normal file
View File

74
app/utils/config.py Normal file
View File

@@ -0,0 +1,74 @@
"""Configuration management — loads all settings from the database (system_config table).
There is NO .env file for application config. The install.sh script collects values
interactively and seeds them into the database. The Web UI settings page allows
editing every value at runtime.
"""
import os
from dataclasses import dataclass
from typing import Optional
from sqlalchemy.orm import Session
from app.utils.security import decrypt_value
@dataclass
class AppConfig:
"""In-memory snapshot of system configuration."""
base_domain: str
admin_email: str
npm_api_url: str
npm_api_token: str # decrypted
netbird_management_image: str
netbird_signal_image: str
netbird_relay_image: str
netbird_dashboard_image: str
data_dir: str
docker_network: str
relay_base_port: int
# Environment-level settings (not stored in DB)
SECRET_KEY: str = os.environ.get("SECRET_KEY", "change-me-in-production")
DATABASE_PATH: str = os.environ.get("DATABASE_PATH", "/app/data/netbird_msp.db")
LOG_LEVEL: str = os.environ.get("LOG_LEVEL", "INFO")
JWT_ALGORITHM: str = "HS256"
JWT_EXPIRE_MINUTES: int = 480 # 8 hours
def get_system_config(db: Session) -> Optional[AppConfig]:
"""Load the singleton SystemConfig row and return an AppConfig dataclass.
Args:
db: Active SQLAlchemy session.
Returns:
AppConfig instance or None if the system_config row does not exist yet.
"""
from app.models import SystemConfig
row = db.query(SystemConfig).filter(SystemConfig.id == 1).first()
if row is None:
return None
try:
npm_token = decrypt_value(row.npm_api_token_encrypted)
except Exception:
npm_token = ""
return AppConfig(
base_domain=row.base_domain,
admin_email=row.admin_email,
npm_api_url=row.npm_api_url,
npm_api_token=npm_token,
netbird_management_image=row.netbird_management_image,
netbird_signal_image=row.netbird_signal_image,
netbird_relay_image=row.netbird_relay_image,
netbird_dashboard_image=row.netbird_dashboard_image,
data_dir=row.data_dir,
docker_network=row.docker_network,
relay_base_port=row.relay_base_port,
)

91
app/utils/security.py Normal file
View File

@@ -0,0 +1,91 @@
"""Security utilities — password hashing (bcrypt) and token encryption (Fernet)."""
import os
import secrets
from cryptography.fernet import Fernet
from passlib.context import CryptContext
# ---------------------------------------------------------------------------
# Password hashing (bcrypt)
# ---------------------------------------------------------------------------
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
def hash_password(plain: str) -> str:
"""Hash a plaintext password with bcrypt.
Args:
plain: The plaintext password.
Returns:
Bcrypt hash string.
"""
return pwd_context.hash(plain)
def verify_password(plain: str, hashed: str) -> bool:
"""Verify a plaintext password against a bcrypt hash.
Args:
plain: The plaintext password to check.
hashed: The stored bcrypt hash.
Returns:
True if the password matches.
"""
return pwd_context.verify(plain, hashed)
# ---------------------------------------------------------------------------
# Fernet encryption for secrets (NPM token, relay secrets, etc.)
# ---------------------------------------------------------------------------
def _get_fernet() -> Fernet:
"""Derive a Fernet key from the application SECRET_KEY.
The SECRET_KEY from the environment is used as the basis. We pad/truncate
it to produce a valid 32-byte URL-safe-base64 key that Fernet requires.
"""
import base64
import hashlib
secret = os.environ.get("SECRET_KEY", "change-me-in-production")
# Derive a stable 32-byte key via SHA-256
key_bytes = hashlib.sha256(secret.encode()).digest()
fernet_key = base64.urlsafe_b64encode(key_bytes)
return Fernet(fernet_key)
def encrypt_value(plaintext: str) -> str:
"""Encrypt a string value with Fernet.
Args:
plaintext: Value to encrypt.
Returns:
Encrypted string (base64-encoded Fernet token).
"""
f = _get_fernet()
return f.encrypt(plaintext.encode()).decode()
def decrypt_value(ciphertext: str) -> str:
"""Decrypt a Fernet-encrypted string.
Args:
ciphertext: Encrypted value.
Returns:
Original plaintext string.
"""
f = _get_fernet()
return f.decrypt(ciphertext.encode()).decode()
def generate_relay_secret() -> str:
"""Generate a cryptographically secure relay secret.
Returns:
A 32-character hex string.
"""
return secrets.token_hex(16)

165
app/utils/validators.py Normal file
View File

@@ -0,0 +1,165 @@
"""Input validation with Pydantic models for all API endpoints."""
import re
from typing import Optional
from pydantic import BaseModel, Field, field_validator
# ---------------------------------------------------------------------------
# Auth
# ---------------------------------------------------------------------------
class LoginRequest(BaseModel):
"""Login credentials."""
username: str = Field(..., min_length=1, max_length=100)
password: str = Field(..., min_length=1)
class ChangePasswordRequest(BaseModel):
"""Password change payload."""
current_password: str = Field(..., min_length=1)
new_password: str = Field(..., min_length=12, max_length=128)
# ---------------------------------------------------------------------------
# Customer
# ---------------------------------------------------------------------------
class CustomerCreate(BaseModel):
"""Payload to create a new customer."""
name: str = Field(..., min_length=1, max_length=255)
company: Optional[str] = Field(None, max_length=255)
subdomain: str = Field(..., min_length=1, max_length=63)
email: str = Field(..., max_length=255)
max_devices: int = Field(default=20, ge=1, le=10000)
notes: Optional[str] = None
@field_validator("subdomain")
@classmethod
def validate_subdomain(cls, v: str) -> str:
"""Subdomain must be lowercase alphanumeric + hyphens, no leading/trailing hyphen."""
v = v.lower().strip()
if not re.match(r"^[a-z0-9]([a-z0-9-]{0,61}[a-z0-9])?$", v):
raise ValueError(
"Subdomain must be lowercase, alphanumeric with hyphens, "
"2-63 chars, no leading/trailing hyphen."
)
return v
@field_validator("email")
@classmethod
def validate_email(cls, v: str) -> str:
"""Basic email format check."""
pattern = r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$"
if not re.match(pattern, v):
raise ValueError("Invalid email address.")
return v.lower().strip()
class CustomerUpdate(BaseModel):
"""Payload to update an existing customer."""
name: Optional[str] = Field(None, min_length=1, max_length=255)
company: Optional[str] = Field(None, max_length=255)
email: Optional[str] = Field(None, max_length=255)
max_devices: Optional[int] = Field(None, ge=1, le=10000)
notes: Optional[str] = None
status: Optional[str] = None
@field_validator("email")
@classmethod
def validate_email(cls, v: Optional[str]) -> Optional[str]:
"""Basic email format check."""
if v is None:
return v
pattern = r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$"
if not re.match(pattern, v):
raise ValueError("Invalid email address.")
return v.lower().strip()
@field_validator("status")
@classmethod
def validate_status(cls, v: Optional[str]) -> Optional[str]:
"""Status must be one of the allowed values."""
if v is None:
return v
allowed = {"active", "inactive", "deploying", "error"}
if v not in allowed:
raise ValueError(f"Status must be one of: {', '.join(sorted(allowed))}")
return v
# ---------------------------------------------------------------------------
# Settings
# ---------------------------------------------------------------------------
class SystemConfigUpdate(BaseModel):
"""Payload to update system configuration."""
base_domain: Optional[str] = Field(None, min_length=1, max_length=255)
admin_email: Optional[str] = Field(None, max_length=255)
npm_api_url: Optional[str] = Field(None, max_length=500)
npm_api_token: Optional[str] = None # plaintext, will be encrypted before storage
netbird_management_image: Optional[str] = Field(None, max_length=255)
netbird_signal_image: Optional[str] = Field(None, max_length=255)
netbird_relay_image: Optional[str] = Field(None, max_length=255)
netbird_dashboard_image: Optional[str] = Field(None, max_length=255)
data_dir: Optional[str] = Field(None, max_length=500)
docker_network: Optional[str] = Field(None, max_length=100)
relay_base_port: Optional[int] = Field(None, ge=1024, le=65535)
@field_validator("base_domain")
@classmethod
def validate_domain(cls, v: Optional[str]) -> Optional[str]:
"""Validate domain format."""
if v is None:
return v
pattern = r"^[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$"
if not re.match(pattern, v):
raise ValueError("Invalid domain format.")
return v.lower().strip()
@field_validator("npm_api_url")
@classmethod
def validate_npm_url(cls, v: Optional[str]) -> Optional[str]:
"""NPM URL must start with http(s)://."""
if v is None:
return v
if not re.match(r"^https?://", v):
raise ValueError("NPM API URL must start with http:// or https://")
return v.rstrip("/")
@field_validator("admin_email")
@classmethod
def validate_email(cls, v: Optional[str]) -> Optional[str]:
"""Validate admin email."""
if v is None:
return v
pattern = r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$"
if not re.match(pattern, v):
raise ValueError("Invalid email address.")
return v.lower().strip()
# ---------------------------------------------------------------------------
# Query params
# ---------------------------------------------------------------------------
class CustomerListParams(BaseModel):
"""Query parameters for listing customers."""
page: int = Field(default=1, ge=1)
per_page: int = Field(default=25, ge=1, le=100)
search: Optional[str] = None
status: Optional[str] = None
@field_validator("status")
@classmethod
def validate_status(cls, v: Optional[str]) -> Optional[str]:
"""Filter status validation."""
if v is None or v == "":
return None
allowed = {"active", "inactive", "deploying", "error"}
if v not in allowed:
raise ValueError(f"Status must be one of: {', '.join(sorted(allowed))}")
return v