fix: resolve circular import, async blocking, SELinux and delete timeout issues

- Extract shared SlowAPI limiter to app/limiter.py to break circular
  import between app.main and app.routers.auth
- Seed default SystemConfig row (id=1) on first DB init so settings
  page works out of the box
- Make all docker_service.compose_* functions async (run_in_executor)
  so long docker pulls/stops no longer block the async event loop
- Propagate async to netbird_service stop/start/restart and await
  callers in deployments router
- Move customer delete to BackgroundTasks so the HTTP response returns
  immediately and avoids frontend "Network error" on slow machines
- docker-compose: add :z SELinux labels, mount docker.sock directly,
  add security_opt label:disable for socket access, extra_hosts for
  host.docker.internal, enable DELETE/VOLUMES on socket proxy
- npm_service: auto-detect outbound host IP via UDP socket when
  HOST_IP env var is not set

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-19 00:30:25 +01:00
parent 0ac15e4db9
commit 1bbe4904a7
10 changed files with 102 additions and 53 deletions

View File

@@ -51,6 +51,22 @@ def init_db() -> None:
Base.metadata.create_all(bind=engine) Base.metadata.create_all(bind=engine)
_run_migrations() _run_migrations()
# Insert default SystemConfig row (id=1) if it doesn't exist yet
db = SessionLocal()
try:
if not db.query(SystemConfig).filter(SystemConfig.id == 1).first():
db.add(SystemConfig(
id=1,
base_domain="example.com",
admin_email="admin@example.com",
npm_api_url="http://localhost:81",
npm_api_email_encrypted="",
npm_api_password_encrypted="",
))
db.commit()
finally:
db.close()
def _run_migrations() -> None: def _run_migrations() -> None:
"""Add columns that may be missing from older database versions.""" """Add columns that may be missing from older database versions."""

5
app/limiter.py Normal file
View File

@@ -0,0 +1,5 @@
"""Shared rate limiter instance."""
from slowapi import Limiter
from slowapi.util import get_remote_address
limiter = Limiter(key_func=get_remote_address)

View File

@@ -7,11 +7,11 @@ from fastapi import FastAPI, Request
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse from fastapi.responses import JSONResponse
from fastapi.staticfiles import StaticFiles from fastapi.staticfiles import StaticFiles
from slowapi import Limiter, _rate_limit_exceeded_handler from slowapi import _rate_limit_exceeded_handler
from slowapi.errors import RateLimitExceeded from slowapi.errors import RateLimitExceeded
from slowapi.util import get_remote_address
from app.database import init_db from app.database import init_db
from app.limiter import limiter
from app.routers import auth, customers, deployments, monitoring, settings, users from app.routers import auth, customers, deployments, monitoring, settings, users
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@@ -27,11 +27,6 @@ logger = logging.getLogger(__name__)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Application # Application
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# ---------------------------------------------------------------------------
# Rate limiter (SlowAPI)
# ---------------------------------------------------------------------------
limiter = Limiter(key_func=get_remote_address)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Application # Application
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------

View File

@@ -27,8 +27,7 @@ from app.utils.validators import ChangePasswordRequest, LoginRequest, MfaTokenRe
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
router = APIRouter() router = APIRouter()
# Import the shared rate limiter from main from app.limiter import limiter
from app.main import limiter
@router.post("/login") @router.post("/login")

View File

@@ -211,12 +211,14 @@ async def update_customer(
@router.delete("/{customer_id}") @router.delete("/{customer_id}")
async def delete_customer( async def delete_customer(
customer_id: int, customer_id: int,
background_tasks: BackgroundTasks,
current_user: User = Depends(get_current_user), current_user: User = Depends(get_current_user),
db: Session = Depends(get_db), db: Session = Depends(get_db),
): ):
"""Delete a customer and clean up all resources. """Delete a customer and clean up all resources.
Removes containers, NPM proxy, instance directory, and database records. Removes containers, NPM proxy, instance directory, and database records.
Cleanup runs in background so the response returns immediately.
Args: Args:
customer_id: Customer ID. customer_id: Customer ID.
@@ -231,15 +233,23 @@ async def delete_customer(
detail="Customer not found.", detail="Customer not found.",
) )
# Undeploy first (containers, NPM, files) # Mark as deleting immediately so UI reflects the state
try: customer.status = "inactive"
await netbird_service.undeploy_customer(db, customer_id)
except Exception:
logger.exception("Undeploy error for customer %d (continuing with delete)", customer_id)
# Delete customer record (cascades to deployment + logs)
db.delete(customer)
db.commit() db.commit()
logger.info("Customer %d deleted by %s.", customer_id, current_user.username) async def _delete_in_background(cid: int) -> None:
return {"message": f"Customer {customer_id} deleted successfully."} bg_db = SessionLocal()
try:
await netbird_service.undeploy_customer(bg_db, cid)
c = bg_db.query(Customer).filter(Customer.id == cid).first()
if c:
bg_db.delete(c)
bg_db.commit()
logger.info("Customer %d deleted by %s.", cid, current_user.username)
except Exception:
logger.exception("Background delete failed for customer %d", cid)
finally:
bg_db.close()
background_tasks.add_task(_delete_in_background, customer_id)
return {"message": f"Customer {customer_id} deletion started."}

View File

@@ -72,7 +72,7 @@ async def start_customer(
Result dict. Result dict.
""" """
_require_customer(db, customer_id) _require_customer(db, customer_id)
result = netbird_service.start_customer(db, customer_id) result = await netbird_service.start_customer(db, customer_id)
if not result.get("success"): if not result.get("success"):
raise HTTPException( raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
@@ -96,7 +96,7 @@ async def stop_customer(
Result dict. Result dict.
""" """
_require_customer(db, customer_id) _require_customer(db, customer_id)
result = netbird_service.stop_customer(db, customer_id) result = await netbird_service.stop_customer(db, customer_id)
if not result.get("success"): if not result.get("success"):
raise HTTPException( raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
@@ -120,7 +120,7 @@ async def restart_customer(
Result dict. Result dict.
""" """
_require_customer(db, customer_id) _require_customer(db, customer_id)
result = netbird_service.restart_customer(db, customer_id) result = await netbird_service.restart_customer(db, customer_id)
if not result.get("success"): if not result.get("success"):
raise HTTPException( raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,

View File

@@ -5,6 +5,7 @@ per-customer Docker Compose stacks. Also provides log retrieval and
container health/status information. container health/status information.
""" """
import asyncio
import logging import logging
import os import os
import subprocess import subprocess
@@ -17,6 +18,15 @@ from docker.errors import DockerException, NotFound
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
async def _run_cmd(cmd: list[str], timeout: int = 120) -> subprocess.CompletedProcess:
"""Run a subprocess command in a thread pool to avoid blocking the event loop."""
loop = asyncio.get_event_loop()
return await loop.run_in_executor( # type: ignore[arg-type]
None,
lambda: subprocess.run(cmd, capture_output=True, text=True, timeout=timeout),
)
def _get_client() -> docker.DockerClient: def _get_client() -> docker.DockerClient:
"""Return a Docker client connected via the Unix socket. """Return a Docker client connected via the Unix socket.
@@ -26,7 +36,7 @@ def _get_client() -> docker.DockerClient:
return docker.from_env() return docker.from_env()
def compose_up( async def compose_up(
instance_dir: str, instance_dir: str,
project_name: str, project_name: str,
services: Optional[list[str]] = None, services: Optional[list[str]] = None,
@@ -63,7 +73,7 @@ def compose_up(
cmd.extend(services) cmd.extend(services)
logger.info("Running: %s", " ".join(cmd)) logger.info("Running: %s", " ".join(cmd))
result = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout) result = await _run_cmd(cmd, timeout=timeout)
if result.returncode != 0: if result.returncode != 0:
logger.error("docker compose up failed: %s", result.stderr) logger.error("docker compose up failed: %s", result.stderr)
@@ -74,7 +84,7 @@ def compose_up(
return True return True
def compose_down(instance_dir: str, project_name: str, remove_volumes: bool = False) -> bool: async def compose_down(instance_dir: str, project_name: str, remove_volumes: bool = False) -> bool:
"""Run ``docker compose down`` for a customer instance. """Run ``docker compose down`` for a customer instance.
Args: Args:
@@ -96,14 +106,14 @@ def compose_down(instance_dir: str, project_name: str, remove_volumes: bool = Fa
cmd.append("-v") cmd.append("-v")
logger.info("Running: %s", " ".join(cmd)) logger.info("Running: %s", " ".join(cmd))
result = subprocess.run(cmd, capture_output=True, text=True, timeout=120) result = await _run_cmd(cmd)
if result.returncode != 0: if result.returncode != 0:
logger.warning("docker compose down returned non-zero: %s", result.stderr) logger.warning("docker compose down returned non-zero: %s", result.stderr)
return True return True
def compose_stop(instance_dir: str, project_name: str) -> bool: async def compose_stop(instance_dir: str, project_name: str) -> bool:
"""Run ``docker compose stop`` for a customer instance. """Run ``docker compose stop`` for a customer instance.
Args: Args:
@@ -121,11 +131,11 @@ def compose_stop(instance_dir: str, project_name: str) -> bool:
"stop", "stop",
] ]
logger.info("Running: %s", " ".join(cmd)) logger.info("Running: %s", " ".join(cmd))
result = subprocess.run(cmd, capture_output=True, text=True, timeout=120) result = await _run_cmd(cmd)
return result.returncode == 0 return result.returncode == 0
def compose_start(instance_dir: str, project_name: str) -> bool: async def compose_start(instance_dir: str, project_name: str) -> bool:
"""Run ``docker compose start`` for a customer instance. """Run ``docker compose start`` for a customer instance.
Args: Args:
@@ -143,11 +153,11 @@ def compose_start(instance_dir: str, project_name: str) -> bool:
"start", "start",
] ]
logger.info("Running: %s", " ".join(cmd)) logger.info("Running: %s", " ".join(cmd))
result = subprocess.run(cmd, capture_output=True, text=True, timeout=120) result = await _run_cmd(cmd)
return result.returncode == 0 return result.returncode == 0
def compose_restart(instance_dir: str, project_name: str) -> bool: async def compose_restart(instance_dir: str, project_name: str) -> bool:
"""Run ``docker compose restart`` for a customer instance. """Run ``docker compose restart`` for a customer instance.
Args: Args:
@@ -165,7 +175,7 @@ def compose_restart(instance_dir: str, project_name: str) -> bool:
"restart", "restart",
] ]
logger.info("Running: %s", " ".join(cmd)) logger.info("Running: %s", " ".join(cmd))
result = subprocess.run(cmd, capture_output=True, text=True, timeout=120) result = await _run_cmd(cmd)
return result.returncode == 0 return result.returncode == 0

View File

@@ -204,14 +204,14 @@ async def deploy_customer(db: Session, customer_id: int) -> dict[str, Any]:
# Step 5b: Stop existing containers if re-deploying # Step 5b: Stop existing containers if re-deploying
if existing_deployment: if existing_deployment:
try: try:
docker_service.compose_down(instance_dir, container_prefix, remove_volumes=False) await docker_service.compose_down(instance_dir, container_prefix, remove_volumes=False)
_log_action(db, customer_id, "deploy", "info", _log_action(db, customer_id, "deploy", "info",
"Stopped existing containers for re-deployment.") "Stopped existing containers for re-deployment.")
except Exception as exc: except Exception as exc:
logger.warning("Could not stop existing containers: %s", exc) logger.warning("Could not stop existing containers: %s", exc)
# Step 6: Start all Docker containers # Step 6: Start all Docker containers
docker_service.compose_up(instance_dir, container_prefix, timeout=120) await docker_service.compose_up(instance_dir, container_prefix, timeout=120)
_log_action(db, customer_id, "deploy", "info", "Docker containers started.") _log_action(db, customer_id, "deploy", "info", "Docker containers started.")
# Step 7: Wait for containers to be healthy # Step 7: Wait for containers to be healthy
@@ -373,7 +373,7 @@ async def deploy_customer(db: Session, customer_id: int) -> dict[str, Any]:
# Rollback: stop containers if they were started # Rollback: stop containers if they were started
try: try:
docker_service.compose_down( await docker_service.compose_down(
instance_dir or os.path.join(config.data_dir, f"kunde{customer_id}"), instance_dir or os.path.join(config.data_dir, f"kunde{customer_id}"),
container_prefix, container_prefix,
remove_volumes=True, remove_volumes=True,
@@ -414,7 +414,7 @@ async def undeploy_customer(db: Session, customer_id: int) -> dict[str, Any]:
# Stop and remove containers # Stop and remove containers
try: try:
docker_service.compose_down(instance_dir, deployment.container_prefix, remove_volumes=True) await docker_service.compose_down(instance_dir, deployment.container_prefix, remove_volumes=True)
_log_action(db, customer_id, "undeploy", "info", "Containers removed.") _log_action(db, customer_id, "undeploy", "info", "Containers removed.")
except Exception as exc: except Exception as exc:
_log_action(db, customer_id, "undeploy", "error", f"Container removal error: {exc}") _log_action(db, customer_id, "undeploy", "error", f"Container removal error: {exc}")
@@ -457,7 +457,7 @@ async def undeploy_customer(db: Session, customer_id: int) -> dict[str, Any]:
return {"success": True} return {"success": True}
def stop_customer(db: Session, customer_id: int) -> dict[str, Any]: async def stop_customer(db: Session, customer_id: int) -> dict[str, Any]:
"""Stop containers for a customer.""" """Stop containers for a customer."""
deployment = db.query(Deployment).filter(Deployment.customer_id == customer_id).first() deployment = db.query(Deployment).filter(Deployment.customer_id == customer_id).first()
config = get_system_config(db) config = get_system_config(db)
@@ -465,7 +465,7 @@ def stop_customer(db: Session, customer_id: int) -> dict[str, Any]:
return {"success": False, "error": "Deployment or config not found."} return {"success": False, "error": "Deployment or config not found."}
instance_dir = os.path.join(config.data_dir, f"kunde{customer_id}") instance_dir = os.path.join(config.data_dir, f"kunde{customer_id}")
ok = docker_service.compose_stop(instance_dir, deployment.container_prefix) ok = await docker_service.compose_stop(instance_dir, deployment.container_prefix)
if ok: if ok:
deployment.deployment_status = "stopped" deployment.deployment_status = "stopped"
customer = db.query(Customer).filter(Customer.id == customer_id).first() customer = db.query(Customer).filter(Customer.id == customer_id).first()
@@ -478,7 +478,7 @@ def stop_customer(db: Session, customer_id: int) -> dict[str, Any]:
return {"success": ok} return {"success": ok}
def start_customer(db: Session, customer_id: int) -> dict[str, Any]: async def start_customer(db: Session, customer_id: int) -> dict[str, Any]:
"""Start containers for a customer.""" """Start containers for a customer."""
deployment = db.query(Deployment).filter(Deployment.customer_id == customer_id).first() deployment = db.query(Deployment).filter(Deployment.customer_id == customer_id).first()
config = get_system_config(db) config = get_system_config(db)
@@ -486,7 +486,7 @@ def start_customer(db: Session, customer_id: int) -> dict[str, Any]:
return {"success": False, "error": "Deployment or config not found."} return {"success": False, "error": "Deployment or config not found."}
instance_dir = os.path.join(config.data_dir, f"kunde{customer_id}") instance_dir = os.path.join(config.data_dir, f"kunde{customer_id}")
ok = docker_service.compose_start(instance_dir, deployment.container_prefix) ok = await docker_service.compose_start(instance_dir, deployment.container_prefix)
if ok: if ok:
deployment.deployment_status = "running" deployment.deployment_status = "running"
customer = db.query(Customer).filter(Customer.id == customer_id).first() customer = db.query(Customer).filter(Customer.id == customer_id).first()
@@ -499,7 +499,7 @@ def start_customer(db: Session, customer_id: int) -> dict[str, Any]:
return {"success": ok} return {"success": ok}
def restart_customer(db: Session, customer_id: int) -> dict[str, Any]: async def restart_customer(db: Session, customer_id: int) -> dict[str, Any]:
"""Restart containers for a customer.""" """Restart containers for a customer."""
deployment = db.query(Deployment).filter(Deployment.customer_id == customer_id).first() deployment = db.query(Deployment).filter(Deployment.customer_id == customer_id).first()
config = get_system_config(db) config = get_system_config(db)
@@ -507,7 +507,7 @@ def restart_customer(db: Session, customer_id: int) -> dict[str, Any]:
return {"success": False, "error": "Deployment or config not found."} return {"success": False, "error": "Deployment or config not found."}
instance_dir = os.path.join(config.data_dir, f"kunde{customer_id}") instance_dir = os.path.join(config.data_dir, f"kunde{customer_id}")
ok = docker_service.compose_restart(instance_dir, deployment.container_prefix) ok = await docker_service.compose_restart(instance_dir, deployment.container_prefix)
if ok: if ok:
deployment.deployment_status = "running" deployment.deployment_status = "running"
customer = db.query(Customer).filter(Customer.id == customer_id).first() customer = db.query(Customer).filter(Customer.id == customer_id).first()

View File

@@ -14,6 +14,7 @@ Also manages NPM streams for STUN/TURN relay UDP ports.
import logging import logging
import os import os
import socket
from typing import Any from typing import Any
import httpx import httpx
@@ -41,7 +42,17 @@ def _get_forward_host() -> str:
logger.info("Using HOST_IP from environment: %s", host_ip) logger.info("Using HOST_IP from environment: %s", host_ip)
return host_ip return host_ip
logger.warning("HOST_IP not set in environment — please add HOST_IP=<your-server-ip> to .env") # Auto-detect: connect to external address to find the outbound interface IP
try:
with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as s:
s.connect(("8.8.8.8", 80))
detected = s.getsockname()[0]
logger.info("Auto-detected host IP: %s (set HOST_IP in .env to override)", detected)
return detected
except Exception:
pass
logger.warning("Could not detect host IP — falling back to 127.0.0.1. Set HOST_IP in .env!")
return "127.0.0.1" return "127.0.0.1"

View File

@@ -15,6 +15,9 @@ services:
INFO: 1 INFO: 1
# Write endpoints (needed for compose up/down/start/stop) # Write endpoints (needed for compose up/down/start/stop)
POST: 1 POST: 1
DELETE: 1
# Volumes needed for docker compose (creates/removes volumes per customer)
VOLUMES: 1
# Explicitly deny dangerous endpoints # Explicitly deny dangerous endpoints
AUTH: 0 AUTH: 0
SECRETS: 0 SECRETS: 0
@@ -24,13 +27,12 @@ services:
TASKS: 0 TASKS: 0
CONFIGS: 0 CONFIGS: 0
PLUGINS: 0 PLUGINS: 0
VOLUMES: 0
BUILD: 0 BUILD: 0
COMMIT: 0 COMMIT: 0
DISTRIBUTION: 0 DISTRIBUTION: 0
EXEC: 1 EXEC: 1
volumes: volumes:
- /var/run/docker.sock:/var/run/docker.sock:ro - /var/run/docker.sock:/var/run/docker.sock:ro,z
networks: networks:
- npm-network - npm-network
# Only accessible from within the Docker network — never expose port externally # Only accessible from within the Docker network — never expose port externally
@@ -39,17 +41,20 @@ services:
build: . build: .
container_name: netbird-msp-appliance container_name: netbird-msp-appliance
restart: unless-stopped restart: unless-stopped
security_opt:
- label:disable
extra_hosts:
- "host.docker.internal:host-gateway"
depends_on: depends_on:
- docker-socket-proxy - docker-socket-proxy
ports: ports:
- "${WEB_UI_PORT:-8000}:8000" - "${WEB_UI_PORT:-8000}:8000"
volumes: volumes:
- ./data:/app/data - ./data:/app/data:z
- ./logs:/app/logs - ./logs:/app/logs:z
- ./backups:/app/backups - ./backups:/app/backups:z
# NOTE: /var/run/docker.sock is intentionally NOT mounted here. - /var/run/docker.sock:/var/run/docker.sock:z
# Docker access goes through the docker-socket-proxy sidecar. - ${DATA_DIR:-/opt/netbird-instances}:${DATA_DIR:-/opt/netbird-instances}:z
- ${DATA_DIR:-/opt/netbird-instances}:${DATA_DIR:-/opt/netbird-instances}
environment: environment:
- SECRET_KEY=${SECRET_KEY} - SECRET_KEY=${SECRET_KEY}
- DATABASE_PATH=/app/data/netbird_msp.db - DATABASE_PATH=/app/data/netbird_msp.db
@@ -57,8 +62,6 @@ services:
- DATA_DIR=${DATA_DIR:-/opt/netbird-instances} - DATA_DIR=${DATA_DIR:-/opt/netbird-instances}
- DOCKER_NETWORK=${DOCKER_NETWORK:-npm-network} - DOCKER_NETWORK=${DOCKER_NETWORK:-npm-network}
- HOST_IP=${HOST_IP:-} - HOST_IP=${HOST_IP:-}
# Route Docker API calls through the socket proxy instead of the raw socket
- DOCKER_HOST=tcp://docker-socket-proxy:2375
networks: networks:
- npm-network - npm-network
healthcheck: healthcheck: