fix: resolve circular import, async blocking, SELinux and delete timeout issues

- Extract shared SlowAPI limiter to app/limiter.py to break circular
  import between app.main and app.routers.auth
- Seed default SystemConfig row (id=1) on first DB init so settings
  page works out of the box
- Make all docker_service.compose_* functions async (run_in_executor)
  so long docker pulls/stops no longer block the async event loop
- Propagate async to netbird_service stop/start/restart and await
  callers in deployments router
- Move customer delete to BackgroundTasks so the HTTP response returns
  immediately and avoids frontend "Network error" on slow machines
- docker-compose: add :z SELinux labels, mount docker.sock directly,
  add security_opt label:disable for socket access, extra_hosts for
  host.docker.internal, enable DELETE/VOLUMES on socket proxy
- npm_service: auto-detect outbound host IP via UDP socket when
  HOST_IP env var is not set

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-19 00:30:25 +01:00
parent 0ac15e4db9
commit 1bbe4904a7
10 changed files with 102 additions and 53 deletions

View File

@@ -5,6 +5,7 @@ per-customer Docker Compose stacks. Also provides log retrieval and
container health/status information.
"""
import asyncio
import logging
import os
import subprocess
@@ -17,6 +18,15 @@ from docker.errors import DockerException, NotFound
logger = logging.getLogger(__name__)
async def _run_cmd(cmd: list[str], timeout: int = 120) -> subprocess.CompletedProcess:
"""Run a subprocess command in a thread pool to avoid blocking the event loop."""
loop = asyncio.get_event_loop()
return await loop.run_in_executor( # type: ignore[arg-type]
None,
lambda: subprocess.run(cmd, capture_output=True, text=True, timeout=timeout),
)
def _get_client() -> docker.DockerClient:
"""Return a Docker client connected via the Unix socket.
@@ -26,7 +36,7 @@ def _get_client() -> docker.DockerClient:
return docker.from_env()
def compose_up(
async def compose_up(
instance_dir: str,
project_name: str,
services: Optional[list[str]] = None,
@@ -63,7 +73,7 @@ def compose_up(
cmd.extend(services)
logger.info("Running: %s", " ".join(cmd))
result = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
result = await _run_cmd(cmd, timeout=timeout)
if result.returncode != 0:
logger.error("docker compose up failed: %s", result.stderr)
@@ -74,7 +84,7 @@ def compose_up(
return True
def compose_down(instance_dir: str, project_name: str, remove_volumes: bool = False) -> bool:
async def compose_down(instance_dir: str, project_name: str, remove_volumes: bool = False) -> bool:
"""Run ``docker compose down`` for a customer instance.
Args:
@@ -96,14 +106,14 @@ def compose_down(instance_dir: str, project_name: str, remove_volumes: bool = Fa
cmd.append("-v")
logger.info("Running: %s", " ".join(cmd))
result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
result = await _run_cmd(cmd)
if result.returncode != 0:
logger.warning("docker compose down returned non-zero: %s", result.stderr)
return True
def compose_stop(instance_dir: str, project_name: str) -> bool:
async def compose_stop(instance_dir: str, project_name: str) -> bool:
"""Run ``docker compose stop`` for a customer instance.
Args:
@@ -121,11 +131,11 @@ def compose_stop(instance_dir: str, project_name: str) -> bool:
"stop",
]
logger.info("Running: %s", " ".join(cmd))
result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
result = await _run_cmd(cmd)
return result.returncode == 0
def compose_start(instance_dir: str, project_name: str) -> bool:
async def compose_start(instance_dir: str, project_name: str) -> bool:
"""Run ``docker compose start`` for a customer instance.
Args:
@@ -143,11 +153,11 @@ def compose_start(instance_dir: str, project_name: str) -> bool:
"start",
]
logger.info("Running: %s", " ".join(cmd))
result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
result = await _run_cmd(cmd)
return result.returncode == 0
def compose_restart(instance_dir: str, project_name: str) -> bool:
async def compose_restart(instance_dir: str, project_name: str) -> bool:
"""Run ``docker compose restart`` for a customer instance.
Args:
@@ -165,7 +175,7 @@ def compose_restart(instance_dir: str, project_name: str) -> bool:
"restart",
]
logger.info("Running: %s", " ".join(cmd))
result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
result = await _run_cmd(cmd)
return result.returncode == 0

View File

@@ -204,14 +204,14 @@ async def deploy_customer(db: Session, customer_id: int) -> dict[str, Any]:
# Step 5b: Stop existing containers if re-deploying
if existing_deployment:
try:
docker_service.compose_down(instance_dir, container_prefix, remove_volumes=False)
await docker_service.compose_down(instance_dir, container_prefix, remove_volumes=False)
_log_action(db, customer_id, "deploy", "info",
"Stopped existing containers for re-deployment.")
except Exception as exc:
logger.warning("Could not stop existing containers: %s", exc)
# Step 6: Start all Docker containers
docker_service.compose_up(instance_dir, container_prefix, timeout=120)
await docker_service.compose_up(instance_dir, container_prefix, timeout=120)
_log_action(db, customer_id, "deploy", "info", "Docker containers started.")
# Step 7: Wait for containers to be healthy
@@ -373,7 +373,7 @@ async def deploy_customer(db: Session, customer_id: int) -> dict[str, Any]:
# Rollback: stop containers if they were started
try:
docker_service.compose_down(
await docker_service.compose_down(
instance_dir or os.path.join(config.data_dir, f"kunde{customer_id}"),
container_prefix,
remove_volumes=True,
@@ -414,7 +414,7 @@ async def undeploy_customer(db: Session, customer_id: int) -> dict[str, Any]:
# Stop and remove containers
try:
docker_service.compose_down(instance_dir, deployment.container_prefix, remove_volumes=True)
await docker_service.compose_down(instance_dir, deployment.container_prefix, remove_volumes=True)
_log_action(db, customer_id, "undeploy", "info", "Containers removed.")
except Exception as exc:
_log_action(db, customer_id, "undeploy", "error", f"Container removal error: {exc}")
@@ -457,7 +457,7 @@ async def undeploy_customer(db: Session, customer_id: int) -> dict[str, Any]:
return {"success": True}
def stop_customer(db: Session, customer_id: int) -> dict[str, Any]:
async def stop_customer(db: Session, customer_id: int) -> dict[str, Any]:
"""Stop containers for a customer."""
deployment = db.query(Deployment).filter(Deployment.customer_id == customer_id).first()
config = get_system_config(db)
@@ -465,7 +465,7 @@ def stop_customer(db: Session, customer_id: int) -> dict[str, Any]:
return {"success": False, "error": "Deployment or config not found."}
instance_dir = os.path.join(config.data_dir, f"kunde{customer_id}")
ok = docker_service.compose_stop(instance_dir, deployment.container_prefix)
ok = await docker_service.compose_stop(instance_dir, deployment.container_prefix)
if ok:
deployment.deployment_status = "stopped"
customer = db.query(Customer).filter(Customer.id == customer_id).first()
@@ -478,7 +478,7 @@ def stop_customer(db: Session, customer_id: int) -> dict[str, Any]:
return {"success": ok}
def start_customer(db: Session, customer_id: int) -> dict[str, Any]:
async def start_customer(db: Session, customer_id: int) -> dict[str, Any]:
"""Start containers for a customer."""
deployment = db.query(Deployment).filter(Deployment.customer_id == customer_id).first()
config = get_system_config(db)
@@ -486,7 +486,7 @@ def start_customer(db: Session, customer_id: int) -> dict[str, Any]:
return {"success": False, "error": "Deployment or config not found."}
instance_dir = os.path.join(config.data_dir, f"kunde{customer_id}")
ok = docker_service.compose_start(instance_dir, deployment.container_prefix)
ok = await docker_service.compose_start(instance_dir, deployment.container_prefix)
if ok:
deployment.deployment_status = "running"
customer = db.query(Customer).filter(Customer.id == customer_id).first()
@@ -499,7 +499,7 @@ def start_customer(db: Session, customer_id: int) -> dict[str, Any]:
return {"success": ok}
def restart_customer(db: Session, customer_id: int) -> dict[str, Any]:
async def restart_customer(db: Session, customer_id: int) -> dict[str, Any]:
"""Restart containers for a customer."""
deployment = db.query(Deployment).filter(Deployment.customer_id == customer_id).first()
config = get_system_config(db)
@@ -507,7 +507,7 @@ def restart_customer(db: Session, customer_id: int) -> dict[str, Any]:
return {"success": False, "error": "Deployment or config not found."}
instance_dir = os.path.join(config.data_dir, f"kunde{customer_id}")
ok = docker_service.compose_restart(instance_dir, deployment.container_prefix)
ok = await docker_service.compose_restart(instance_dir, deployment.container_prefix)
if ok:
deployment.deployment_status = "running"
customer = db.query(Customer).filter(Customer.id == customer_id).first()

View File

@@ -14,6 +14,7 @@ Also manages NPM streams for STUN/TURN relay UDP ports.
import logging
import os
import socket
from typing import Any
import httpx
@@ -41,7 +42,17 @@ def _get_forward_host() -> str:
logger.info("Using HOST_IP from environment: %s", host_ip)
return host_ip
logger.warning("HOST_IP not set in environment — please add HOST_IP=<your-server-ip> to .env")
# Auto-detect: connect to external address to find the outbound interface IP
try:
with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as s:
s.connect(("8.8.8.8", 80))
detected = s.getsockname()[0]
logger.info("Auto-detected host IP: %s (set HOST_IP in .env to override)", detected)
return detected
except Exception:
pass
logger.warning("Could not detect host IP — falling back to 127.0.0.1. Set HOST_IP in .env!")
return "127.0.0.1"