diff --git a/.gitignore b/.gitignore index 514f6c9..f66dbd2 100644 --- a/.gitignore +++ b/.gitignore @@ -65,10 +65,24 @@ htmlcov/ # Claude Code .claude/ -CLAUDE_CODE_SPEC.md PROJECT_SUMMARY.md QUICKSTART.md VS_CODE_SETUP.md +# Gemini / Antigravity +.gemini/ + # Windows artifacts nul + +# Debug / temp files (generated during development & testing) +out.txt +containers.txt +helper.txt +logs.txt +port.txt +env.txt +network.txt +update_helper.txt +state.txt +hostpath.txt diff --git a/Dockerfile b/Dockerfile index f727358..3ac3f90 100644 --- a/Dockerfile +++ b/Dockerfile @@ -13,7 +13,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ && chmod a+r /etc/apt/keyrings/docker.gpg \ && echo "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/debian $(. /etc/os-release && echo "$VERSION_CODENAME") stable" > /etc/apt/sources.list.d/docker.list \ && apt-get update \ - && apt-get install -y --no-install-recommends docker-ce-cli docker-compose-plugin \ + && apt-get install -y --no-install-recommends docker-ce-cli docker-compose-plugin git \ && rm -rf /var/lib/apt/lists/* # Set working directory @@ -28,6 +28,16 @@ COPY app/ ./app/ COPY templates/ ./templates/ COPY static/ ./static/ +# Bake version info at build time +ARG GIT_COMMIT=unknown +ARG GIT_BRANCH=unknown +ARG GIT_COMMIT_DATE=unknown +ARG GIT_TAG=unknown +RUN echo "{\"tag\": \"$GIT_TAG\", \"commit\": \"$GIT_COMMIT\", \"branch\": \"$GIT_BRANCH\", \"date\": \"$GIT_COMMIT_DATE\"}" > /app/version.json + +# Allow git to operate in the /app-source volume (owner may differ from container user) +RUN git config --global --add safe.directory /app-source + # Create data directories RUN mkdir -p /app/data /app/logs /app/backups diff --git a/ProjectAISpec.md b/ProjectAISpec.md new file mode 100644 index 0000000..67510d0 --- /dev/null +++ b/ProjectAISpec.md @@ -0,0 +1,459 @@ +# NetBird MSP Appliance - Claude Code Specification + +## Project Overview +Build a complete, production-ready multi-tenant NetBird management platform that runs entirely in Docker containers. This is an MSP (Managed Service Provider) tool to manage 100+ isolated NetBird instances from a single web interface. + +## Technology Stack +- **Backend**: Python 3.11+ with FastAPI +- **Frontend**: HTML5 + Bootstrap 5 + Vanilla JavaScript (no frameworks) +- **Database**: SQLite +- **Containerization**: Docker + Docker Compose +- **Templating**: Jinja2 for Docker Compose generation +- **Integration**: Docker Python SDK, Nginx Proxy Manager API + +## Project Structure + +``` +netbird-msp-appliance/ +├── README.md # Main documentation +├── QUICKSTART.md # Quick start guide +├── ARCHITECTURE.md # Architecture documentation +├── LICENSE # MIT License +├── .gitignore # Git ignore file +├── .env.example # Environment variables template +├── install.sh # One-click installation script +├── docker-compose.yml # Main application container +├── Dockerfile # Application container definition +├── requirements.txt # Python dependencies +│ +├── app/ # Python application +│ ├── __init__.py +│ ├── main.py # FastAPI entry point +│ ├── models.py # SQLAlchemy models +│ ├── database.py # Database setup +│ ├── dependencies.py # FastAPI dependencies +│ │ +│ ├── routers/ # API endpoints +│ │ ├── __init__.py +│ │ ├── auth.py # Authentication endpoints +│ │ ├── customers.py # Customer CRUD +│ │ ├── deployments.py # Deployment management +│ │ ├── monitoring.py # Status & health checks +│ │ └── settings.py # System configuration +│ │ +│ ├── services/ # Business logic +│ │ ├── __init__.py +│ │ ├── docker_service.py # Docker container management +│ │ ├── npm_service.py # NPM API integration +│ │ ├── netbird_service.py # NetBird deployment orchestration +│ │ └── port_manager.py # UDP port allocation +│ │ +│ └── utils/ # Utilities +│ ├── __init__.py +│ ├── config.py # Configuration management +│ ├── security.py # Encryption, hashing +│ └── validators.py # Input validation +│ +├── templates/ # Jinja2 templates +│ ├── docker-compose.yml.j2 # Per-customer Docker Compose +│ ├── management.json.j2 # NetBird management config +│ └── relay.env.j2 # Relay environment variables +│ +├── static/ # Frontend files +│ ├── index.html # Main dashboard +│ ├── css/ +│ │ └── styles.css # Custom styles +│ └── js/ +│ └── app.js # Frontend JavaScript +│ +├── tests/ # Unit & integration tests +│ ├── __init__.py +│ ├── test_customer_api.py +│ ├── test_deployment.py +│ └── test_docker_service.py +│ +└── docs/ # Additional documentation + ├── API.md # API documentation + ├── DEPLOYMENT.md # Deployment guide + └── TROUBLESHOOTING.md # Common issues +``` + +## Key Features to Implement + +### 1. Customer Management +- **Create Customer**: Web form → API → Deploy NetBird instance +- **List Customers**: Paginated table with search/filter +- **Customer Details**: Status, logs, setup URL, actions +- **Delete Customer**: Remove all containers, NPM entries, data + +### 2. Automated Deployment +**Workflow when creating customer:** +1. Validate inputs (subdomain unique, email valid) +2. Allocate ports (Management internal, Relay UDP public) +3. Generate configs from Jinja2 templates +4. Create instance directory: `/opt/netbird-instances/{subdomain}/` +5. Write `docker-compose.yml`, `management.json`, `relay.env` +6. Start Docker containers via Docker SDK +7. Wait for health checks (max 60s) +8. Create NPM proxy hosts via API (with SSL) +9. Update database with deployment info +10. Return setup URL to user + +### 3. Web-Based Configuration +**All settings in database, editable via UI:** +- Base Domain +- Admin Email +- NPM API URL & Token +- NetBird Docker Images +- Port Ranges +- Data Directories + +No manual config file editing required! + +### 4. Nginx Proxy Manager Integration +**Per customer, create proxy host:** +- Domain: `{subdomain}.{base_domain}` +- Forward to: `netbird-{subdomain}-dashboard:80` +- SSL: Automatic Let's Encrypt +- Advanced config: Route `/api/*` to management, `/signalexchange.*` to signal, `/relay` to relay + +### 5. Port Management +**UDP Ports for STUN/Relay (publicly accessible):** +- Customer 1: 3478 +- Customer 2: 3479 +- ... +- Customer 100: 3577 + +**Algorithm:** +- Find next available port starting from 3478 +- Check if port not in use (via `netstat` or database) +- Assign to customer +- Store in database + +### 6. Monitoring & Health Checks +- Container status (running/stopped/failed) +- Health check endpoints (HTTP checks to management service) +- Resource usage (via Docker stats API) +- Relay connectivity test + +## Database Schema + +### Table: customers +```sql +CREATE TABLE customers ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL, + company TEXT, + subdomain TEXT UNIQUE NOT NULL, + email TEXT NOT NULL, + max_devices INTEGER DEFAULT 20, + notes TEXT, + status TEXT DEFAULT 'active' CHECK(status IN ('active', 'inactive', 'deploying', 'error')), + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); +``` + +### Table: deployments +```sql +CREATE TABLE deployments ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + customer_id INTEGER NOT NULL UNIQUE, + container_prefix TEXT NOT NULL, + relay_udp_port INTEGER UNIQUE NOT NULL, + npm_proxy_id INTEGER, + relay_secret TEXT NOT NULL, + setup_url TEXT, + deployment_status TEXT DEFAULT 'pending' CHECK(deployment_status IN ('pending', 'running', 'stopped', 'failed')), + deployed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + last_health_check TIMESTAMP, + FOREIGN KEY (customer_id) REFERENCES customers(id) ON DELETE CASCADE +); +``` + +### Table: system_config +```sql +CREATE TABLE system_config ( + id INTEGER PRIMARY KEY CHECK (id = 1), + base_domain TEXT NOT NULL, + admin_email TEXT NOT NULL, + npm_api_url TEXT NOT NULL, + npm_api_token_encrypted TEXT NOT NULL, + netbird_management_image TEXT DEFAULT 'netbirdio/management:latest', + netbird_signal_image TEXT DEFAULT 'netbirdio/signal:latest', + netbird_relay_image TEXT DEFAULT 'netbirdio/relay:latest', + netbird_dashboard_image TEXT DEFAULT 'netbirdio/dashboard:latest', + data_dir TEXT DEFAULT '/opt/netbird-instances', + docker_network TEXT DEFAULT 'npm-network', + relay_base_port INTEGER DEFAULT 3478, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); +``` + +### Table: deployment_logs +```sql +CREATE TABLE deployment_logs ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + customer_id INTEGER NOT NULL, + action TEXT NOT NULL, + status TEXT NOT NULL CHECK(status IN ('success', 'error', 'info')), + message TEXT, + details TEXT, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + FOREIGN KEY (customer_id) REFERENCES customers(id) ON DELETE CASCADE +); +``` + +### Table: users (simple auth) +```sql +CREATE TABLE users ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + username TEXT UNIQUE NOT NULL, + password_hash TEXT NOT NULL, + email TEXT, + is_active BOOLEAN DEFAULT TRUE, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP +); +``` + +## API Endpoints to Implement + +### Authentication +``` +POST /api/auth/login # Login and get token +POST /api/auth/logout # Logout +GET /api/auth/me # Get current user +POST /api/auth/change-password +``` + +### Customers +``` +POST /api/customers # Create + auto-deploy +GET /api/customers # List all (pagination, search, filter) +GET /api/customers/{id} # Get details +PUT /api/customers/{id} # Update +DELETE /api/customers/{id} # Delete + cleanup +``` + +### Deployments +``` +POST /api/customers/{id}/deploy # Manual deploy +POST /api/customers/{id}/start # Start containers +POST /api/customers/{id}/stop # Stop containers +POST /api/customers/{id}/restart # Restart containers +GET /api/customers/{id}/logs # Get container logs +GET /api/customers/{id}/health # Health check +``` + +### Monitoring +``` +GET /api/monitoring/status # System overview +GET /api/monitoring/customers # All customers status +GET /api/monitoring/resources # Host resource usage +``` + +### Settings +``` +GET /api/settings/system # Get system config +PUT /api/settings/system # Update system config +GET /api/settings/test-npm # Test NPM connectivity +``` + +## Docker Compose Template (Per Customer) + +```yaml +version: '3.8' + +networks: + npm-network: + external: true + +services: + netbird-management: + image: {{ netbird_management_image }} + container_name: netbird-{{ subdomain }}-management + restart: unless-stopped + networks: + - npm-network + volumes: + - {{ instance_dir }}/data/management:/var/lib/netbird + - {{ instance_dir }}/management.json:/etc/netbird/management.json + command: ["--port", "80", "--log-file", "console", "--log-level", "info", + "--single-account-mode-domain={{ subdomain }}.{{ base_domain }}", + "--dns-domain={{ subdomain }}.{{ base_domain }}"] + + netbird-signal: + image: {{ netbird_signal_image }} + container_name: netbird-{{ subdomain }}-signal + restart: unless-stopped + networks: + - npm-network + volumes: + - {{ instance_dir }}/data/signal:/var/lib/netbird + + netbird-relay: + image: {{ netbird_relay_image }} + container_name: netbird-{{ subdomain }}-relay + restart: unless-stopped + networks: + - npm-network + ports: + - "{{ relay_udp_port }}:3478/udp" + env_file: + - {{ instance_dir }}/relay.env + environment: + - NB_ENABLE_STUN=true + - NB_STUN_PORTS=3478 + - NB_LISTEN_ADDRESS=:80 + - NB_EXPOSED_ADDRESS=rels://{{ subdomain }}.{{ base_domain }}:443 + - NB_AUTH_SECRET={{ relay_secret }} + + netbird-dashboard: + image: {{ netbird_dashboard_image }} + container_name: netbird-{{ subdomain }}-dashboard + restart: unless-stopped + networks: + - npm-network + environment: + - NETBIRD_MGMT_API_ENDPOINT=https://{{ subdomain }}.{{ base_domain }} + - NETBIRD_MGMT_GRPC_API_ENDPOINT=https://{{ subdomain }}.{{ base_domain }} +``` + +## Frontend Requirements + +### Main Dashboard (index.html) +**Layout:** +- Navbar: Logo, "New Customer" button, User menu (settings, logout) +- Stats Cards: Total customers, Active, Inactive, Errors +- Customer Table: Name, Subdomain, Status, Devices, Actions +- Pagination: 25 customers per page +- Search bar: Filter by name, subdomain, email +- Status filter dropdown: All, Active, Inactive, Error + +**Customer Table Actions:** +- View Details (→ customer detail page) +- Start/Stop/Restart (inline buttons) +- Delete (with confirmation modal) + +### Customer Detail Page +**Tabs:** +1. **Info**: All customer details, edit button +2. **Deployment**: Status, Setup URL (copy button), Container status +3. **Logs**: Real-time logs from all containers (auto-refresh) +4. **Health**: Health check results, relay connectivity test + +### Settings Page +**Tabs:** +1. **System Configuration**: All system settings, save button +2. **NPM Integration**: API URL, Token, Test button +3. **Images**: NetBird Docker image tags +4. **Security**: Change admin password + +### Modal Dialogs +- New/Edit Customer Form +- Delete Confirmation +- Deployment Progress (with spinner) +- Error Display + +## Security Requirements + +1. **Password Hashing**: Use bcrypt for admin password +2. **Secret Encryption**: Encrypt NPM token and relay secrets with Fernet +3. **Input Validation**: Pydantic models for all API inputs +4. **SQL Injection Prevention**: Use SQLAlchemy ORM (no raw queries) +5. **CSRF Protection**: Token-based authentication +6. **Rate Limiting**: Prevent brute force on login endpoint + +## Error Handling + +All operations should have comprehensive error handling: + +```python +try: + # Deploy customer + result = deploy_customer(customer_id) +except DockerException as e: + # Rollback: Stop containers + # Log error + # Update status to 'failed' + # Return error to user +except NPMException as e: + # Rollback: Remove containers + # Log error + # Update status to 'failed' +except Exception as e: + # Generic rollback + # Log error + # Alert admin +``` + +## Testing Requirements + +1. **Unit Tests**: All services (docker_service, npm_service, etc.) +2. **Integration Tests**: Full deployment workflow +3. **API Tests**: All endpoints with different scenarios +4. **Mock External Dependencies**: Docker API, NPM API + +## Deployment Process + +1. Clone repository +2. Run `./install.sh` +3. Access `http://server-ip:8000` +4. Complete setup wizard +5. Deploy first customer + +## System Requirements Documentation + +**Include in README.md:** + +### For 100 Customers: +- **CPU**: 16 cores (minimum 8) +- **RAM**: 64 GB (minimum) - 128 GB (recommended) + - Formula: `(100 customers × 600 MB) + 8 GB overhead = 68 GB` +- **Disk**: 500 GB SSD (minimum) - 1 TB recommended +- **Network**: 1 Gbps dedicated connection +- **OS**: Ubuntu 22.04 LTS or 24.04 LTS + +### Port Requirements: +- **TCP 8000**: Web UI +- **UDP 3478-3577**: Relay/STUN (100 ports for 100 customers) + +## Success Criteria + +✅ One-command installation via `install.sh` +✅ Web-based configuration (no manual file editing) +✅ Customer deployment < 2 minutes +✅ All settings in database +✅ Automatic NPM integration +✅ Comprehensive error handling +✅ Clean, professional UI +✅ Full API documentation (auto-generated) +✅ Health monitoring +✅ Easy to deploy on fresh Ubuntu VM + +## Special Notes for Claude Code + +- **Use type hints** throughout Python code +- **Document all functions** with docstrings +- **Follow PEP 8** style guidelines +- **Create modular code**: Each service should be independently testable +- **Use async/await** where appropriate (FastAPI endpoints) +- **Provide comprehensive comments** for complex logic +- **Include error messages** that help users troubleshoot + +## File Priorities + +Create in this order: +1. Basic structure (directories, requirements.txt, Dockerfile, docker-compose.yml) +2. Database models and setup (models.py, database.py) +3. Core services (docker_service.py, port_manager.py) +4. API routers (start with customers.py) +5. NPM integration (npm_service.py) +6. Templates (Jinja2 files) +7. Frontend (HTML, CSS, JS) +8. Installation script +9. Documentation +10. Tests + +This specification provides everything needed to build a production-ready NetBird MSP Appliance! diff --git a/README.md b/README.md index a31bf68..084e498 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,7 @@ A management solution for running isolated NetBird instances for your MSP busine - **Complete Isolation** — Each customer gets their own NetBird stack with separate data - **One-Click Deployment** — Deploy new customer instances in under 2 minutes - **Nginx Proxy Manager Integration** — Automatic SSL certificates and reverse proxy setup +- **SSL Certificate Modes** — Choose between per-customer Let's Encrypt certificates or a shared wildcard certificate - **Docker-Based** — Everything runs in containers for easy deployment ### Dashboard @@ -94,8 +95,8 @@ A management solution for running isolated NetBird instances for your MSP busine | | Caddy | | | | Caddy | | | +------------+ | | +------------+ | +------------------+ +------------------+ - kunde1.domain.de kundeN.domain.de - UDP 3478 UDP 3478+N-1 + customer-a.domain.de customer-x.domain.de + | |3478+N-1 ``` ### Components per Customer Instance (5 containers): @@ -139,9 +140,9 @@ Example for 3 customers: | Customer | Dashboard (TCP) | Relay (UDP) | |----------|----------------|-------------| -| Kunde 1 | 9001 | 3478 | -| Kunde 2 | 9002 | 3479 | -| Kunde 3 | 9003 | 3480 | +| Customer-A | 9001 | 3478 | +| Customer-C | 9002 | 3479 | +| Customer-X | 9003 | 3480 | **Your firewall must allow both the TCP dashboard ports and the UDP relay ports!** @@ -269,7 +270,8 @@ Available under **Settings** in the web interface: | Tab | Settings | |-----|----------| -| **System** | Base domain, admin email, NPM credentials, Docker images, port ranges, data directory | +| **System** | Base domain, admin email, Docker images, port ranges, data directory | +| **NPM Integration** | NPM API URL, login credentials, SSL certificate mode (Let's Encrypt / Wildcard), wildcard certificate selection | | **Branding** | Platform name, subtitle, logo upload, default language | | **Users** | Create/edit/delete admin users, per-user language preference, MFA reset | | **Azure AD** | Azure AD / Entra ID SSO configuration | @@ -342,6 +344,26 @@ When MFA is enabled and a user logs in for the first time: - **Disable own TOTP** — In Settings > Security, click "Disable my TOTP" to remove your own MFA setup - **Disable MFA globally** — Uncheck the toggle in Settings > Security to allow login without MFA +### SSL Certificate Mode + +The appliance supports two SSL certificate modes for customer proxy hosts, configurable under **Settings > NPM Integration**: + +#### Let's Encrypt (default) +Each customer gets an individual Let's Encrypt certificate via HTTP-01 validation. This is the default behavior and requires no additional setup beyond a valid admin email. + +#### Wildcard Certificate +Use a pre-existing wildcard certificate (e.g. `*.yourdomain.com`) already uploaded in NPM. All customer proxy hosts share this certificate — no per-customer LE validation needed. + +**Setup:** +1. Upload a wildcard certificate in Nginx Proxy Manager (e.g. via DNS challenge) +2. Go to **Settings > NPM Integration** +3. Set **SSL Mode** to "Wildcard Certificate" +4. Click the refresh button to load certificates from NPM +5. Select your wildcard certificate from the dropdown +6. Click **Save NPM Settings** + +New customer deployments will automatically use the selected wildcard certificate. + --- ## API Documentation @@ -376,6 +398,7 @@ GET /api/customers/{id}/logs # Get container logs GET /api/customers/{id}/health # Health check GET /api/settings/branding # Get branding (public, no auth) +GET /api/settings/npm-certificates # List NPM SSL certificates PUT /api/settings # Update system settings GET /api/users # List users POST /api/users # Create user diff --git a/app/database.py b/app/database.py index 29d1f97..967ac6b 100644 --- a/app/database.py +++ b/app/database.py @@ -51,6 +51,22 @@ def init_db() -> None: Base.metadata.create_all(bind=engine) _run_migrations() + # Insert default SystemConfig row (id=1) if it doesn't exist yet + db = SessionLocal() + try: + if not db.query(SystemConfig).filter(SystemConfig.id == 1).first(): + db.add(SystemConfig( + id=1, + base_domain="example.com", + admin_email="admin@example.com", + npm_api_url="http://localhost:81", + npm_api_email_encrypted="", + npm_api_password_encrypted="", + )) + db.commit() + finally: + db.close() + def _run_migrations() -> None: """Add columns that may be missing from older database versions.""" @@ -83,6 +99,29 @@ def _run_migrations() -> None: ("system_config", "mfa_enabled", "BOOLEAN DEFAULT 0"), ("users", "totp_secret_encrypted", "TEXT"), ("users", "totp_enabled", "BOOLEAN DEFAULT 0"), + ("system_config", "ssl_mode", "TEXT DEFAULT 'letsencrypt'"), + ("system_config", "wildcard_cert_id", "INTEGER"), + # Windows DNS + ("system_config", "dns_enabled", "BOOLEAN DEFAULT 0"), + ("system_config", "dns_server", "TEXT"), + ("system_config", "dns_username", "TEXT"), + ("system_config", "dns_password_encrypted", "TEXT"), + ("system_config", "dns_zone", "TEXT"), + ("system_config", "dns_record_ip", "TEXT"), + # LDAP + ("system_config", "ldap_enabled", "BOOLEAN DEFAULT 0"), + ("system_config", "ldap_server", "TEXT"), + ("system_config", "ldap_port", "INTEGER DEFAULT 389"), + ("system_config", "ldap_use_ssl", "BOOLEAN DEFAULT 0"), + ("system_config", "ldap_bind_dn", "TEXT"), + ("system_config", "ldap_bind_password_encrypted", "TEXT"), + ("system_config", "ldap_base_dn", "TEXT"), + ("system_config", "ldap_user_filter", "TEXT DEFAULT '(sAMAccountName={username})'"), + ("system_config", "ldap_group_dn", "TEXT"), + # Update management + ("system_config", "git_repo_url", "TEXT"), + ("system_config", "git_branch", "TEXT DEFAULT 'main'"), + ("system_config", "git_token_encrypted", "TEXT"), ] for table, column, col_type in migrations: if not _has_column(table, column): diff --git a/app/limiter.py b/app/limiter.py new file mode 100644 index 0000000..ae8efa9 --- /dev/null +++ b/app/limiter.py @@ -0,0 +1,5 @@ +"""Shared rate limiter instance.""" +from slowapi import Limiter +from slowapi.util import get_remote_address + +limiter = Limiter(key_func=get_remote_address) diff --git a/app/main.py b/app/main.py index be9cd11..f644fe6 100644 --- a/app/main.py +++ b/app/main.py @@ -3,12 +3,15 @@ import logging import os -from fastapi import FastAPI +from fastapi import FastAPI, Request from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse from fastapi.staticfiles import StaticFiles +from slowapi import _rate_limit_exceeded_handler +from slowapi.errors import RateLimitExceeded from app.database import init_db +from app.limiter import limiter from app.routers import auth, customers, deployments, monitoring, settings, users # --------------------------------------------------------------------------- @@ -21,6 +24,9 @@ logging.basicConfig( ) logger = logging.getLogger(__name__) +# --------------------------------------------------------------------------- +# Application +# --------------------------------------------------------------------------- # --------------------------------------------------------------------------- # Application # --------------------------------------------------------------------------- @@ -33,15 +39,40 @@ app = FastAPI( openapi_url="/api/openapi.json", ) -# CORS — allow same-origin; adjust if needed +# Attach limiter to app state and register the 429 exception handler +app.state.limiter = limiter +app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler) + +# CORS — restrict to explicitly configured origins only. +# Set ALLOWED_ORIGINS in .env as a comma-separated list of allowed origins, +# e.g. ALLOWED_ORIGINS=https://myapp.example.com +# If unset, no cross-origin requests are allowed (same-origin only). +_raw_origins = os.environ.get("ALLOWED_ORIGINS", "") +_allowed_origins = [o.strip() for o in _raw_origins.split(",") if o.strip()] + app.add_middleware( CORSMiddleware, - allow_origins=["*"], - allow_credentials=True, - allow_methods=["*"], - allow_headers=["*"], + allow_origins=_allowed_origins, + allow_credentials=False, + allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"], + allow_headers=["Authorization", "Content-Type"], ) +# --------------------------------------------------------------------------- +# Security headers middleware +# --------------------------------------------------------------------------- +@app.middleware("http") +async def add_security_headers(request: Request, call_next): + """Attach standard security headers to every response.""" + response = await call_next(request) + response.headers["X-Content-Type-Options"] = "nosniff" + response.headers["X-Frame-Options"] = "DENY" + response.headers["X-XSS-Protection"] = "1; mode=block" + response.headers["Referrer-Policy"] = "strict-origin-when-cross-origin" + response.headers["Strict-Transport-Security"] = "max-age=31536000; includeSubDomains" + return response + + # --------------------------------------------------------------------------- # Routers # --------------------------------------------------------------------------- diff --git a/app/models.py b/app/models.py index c6682de..20c3303 100644 --- a/app/models.py +++ b/app/models.py @@ -161,11 +161,44 @@ class SystemConfig(Base): ) branding_logo_path: Mapped[Optional[str]] = mapped_column(String(500), nullable=True) default_language: Mapped[Optional[str]] = mapped_column(String(10), default="en") + ssl_mode: Mapped[str] = mapped_column(String(20), default="letsencrypt") + wildcard_cert_id: Mapped[Optional[int]] = mapped_column(Integer, nullable=True) mfa_enabled: Mapped[bool] = mapped_column(Boolean, default=False) azure_enabled: Mapped[bool] = mapped_column(Boolean, default=False) azure_tenant_id: Mapped[Optional[str]] = mapped_column(String(255), nullable=True) azure_client_id: Mapped[Optional[str]] = mapped_column(String(255), nullable=True) azure_client_secret_encrypted: Mapped[Optional[str]] = mapped_column(Text, nullable=True) + azure_allowed_group_id: Mapped[Optional[str]] = mapped_column( + String(255), nullable=True, + comment="If set, only Azure AD users in this group (object ID) are allowed to log in." + ) + + # Windows DNS integration + dns_enabled: Mapped[bool] = mapped_column(Boolean, default=False) + dns_server: Mapped[Optional[str]] = mapped_column(String(255), nullable=True) + dns_username: Mapped[Optional[str]] = mapped_column(String(255), nullable=True) + dns_password_encrypted: Mapped[Optional[str]] = mapped_column(Text, nullable=True) + dns_zone: Mapped[Optional[str]] = mapped_column(String(255), nullable=True) + dns_record_ip: Mapped[Optional[str]] = mapped_column(String(45), nullable=True) + + # LDAP / Active Directory authentication + ldap_enabled: Mapped[bool] = mapped_column(Boolean, default=False) + ldap_server: Mapped[Optional[str]] = mapped_column(String(255), nullable=True) + ldap_port: Mapped[int] = mapped_column(Integer, default=389) + ldap_use_ssl: Mapped[bool] = mapped_column(Boolean, default=False) + ldap_bind_dn: Mapped[Optional[str]] = mapped_column(String(500), nullable=True) + ldap_bind_password_encrypted: Mapped[Optional[str]] = mapped_column(Text, nullable=True) + ldap_base_dn: Mapped[Optional[str]] = mapped_column(String(500), nullable=True) + ldap_user_filter: Mapped[Optional[str]] = mapped_column( + String(255), default="(sAMAccountName={username})" + ) + ldap_group_dn: Mapped[Optional[str]] = mapped_column(String(500), nullable=True) + + # Update management + git_repo_url: Mapped[Optional[str]] = mapped_column(String(500), nullable=True) + git_branch: Mapped[Optional[str]] = mapped_column(String(100), default="main") + git_token_encrypted: Mapped[Optional[str]] = mapped_column(Text, nullable=True) + created_at: Mapped[datetime] = mapped_column(DateTime, default=datetime.utcnow) updated_at: Mapped[datetime] = mapped_column( DateTime, default=datetime.utcnow, onupdate=datetime.utcnow @@ -194,11 +227,32 @@ class SystemConfig(Base): "branding_subtitle": self.branding_subtitle or "Multi-Tenant Management Platform", "branding_logo_path": self.branding_logo_path, "default_language": self.default_language or "en", + "ssl_mode": self.ssl_mode or "letsencrypt", + "wildcard_cert_id": self.wildcard_cert_id, "mfa_enabled": bool(self.mfa_enabled), "azure_enabled": bool(self.azure_enabled), "azure_tenant_id": self.azure_tenant_id or "", "azure_client_id": self.azure_client_id or "", "azure_client_secret_set": bool(self.azure_client_secret_encrypted), + "azure_allowed_group_id": self.azure_allowed_group_id or "", + "dns_enabled": bool(self.dns_enabled), + "dns_server": self.dns_server or "", + "dns_username": self.dns_username or "", + "dns_password_set": bool(self.dns_password_encrypted), + "dns_zone": self.dns_zone or "", + "dns_record_ip": self.dns_record_ip or "", + "ldap_enabled": bool(self.ldap_enabled), + "ldap_server": self.ldap_server or "", + "ldap_port": self.ldap_port or 389, + "ldap_use_ssl": bool(self.ldap_use_ssl), + "ldap_bind_dn": self.ldap_bind_dn or "", + "ldap_bind_password_set": bool(self.ldap_bind_password_encrypted), + "ldap_base_dn": self.ldap_base_dn or "", + "ldap_user_filter": self.ldap_user_filter or "(sAMAccountName={username})", + "ldap_group_dn": self.ldap_group_dn or "", + "git_repo_url": self.git_repo_url or "", + "git_branch": self.git_branch or "main", + "git_token_set": bool(self.git_token_encrypted), "created_at": self.created_at.isoformat() if self.created_at else None, "updated_at": self.updated_at.isoformat() if self.updated_at else None, } diff --git a/app/routers/auth.py b/app/routers/auth.py index 7e541cc..bac548e 100644 --- a/app/routers/auth.py +++ b/app/routers/auth.py @@ -6,13 +6,15 @@ import logging import secrets from datetime import datetime -from fastapi import APIRouter, Depends, HTTPException, status +from fastapi import APIRouter, Depends, HTTPException, Request, status from pydantic import BaseModel from sqlalchemy.orm import Session from app.database import get_db from app.dependencies import create_access_token, create_mfa_token, get_current_user, verify_mfa_token from app.models import SystemConfig, User +from app.services import ldap_service +from app.utils.config import get_system_config from app.utils.security import ( decrypt_value, encrypt_value, @@ -27,26 +29,102 @@ from app.utils.validators import ChangePasswordRequest, LoginRequest, MfaTokenRe logger = logging.getLogger(__name__) router = APIRouter() +from app.limiter import limiter + @router.post("/login") -async def login(payload: LoginRequest, db: Session = Depends(get_db)): - """Authenticate with username/password. May require MFA as a second step.""" - user = db.query(User).filter(User.username == payload.username).first() - if not user or not verify_password(payload.password, user.password_hash): - raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - detail="Invalid username or password.", - ) +@limiter.limit("10/minute") +async def login(request: Request, payload: LoginRequest, db: Session = Depends(get_db)): + """Authenticate with username/password. May require MFA as a second step. + + Auth flow: + 1. If LDAP is enabled: try LDAP authentication first. + - Success → find or auto-create local User with auth_provider="ldap" + - Wrong password (user found in LDAP) → HTTP 401 + - User not found in LDAP → fall through to local auth + 2. Local auth: verify bcrypt hash for users with auth_provider="local" + 3. On success: check MFA requirement (local users only) then issue JWT + + Rate-limited to 10 attempts per minute per IP address. + """ + config = get_system_config(db) + user: User | None = None + + # ------------------------------------------------------------------ + # Step 1: LDAP authentication (if enabled) + # ------------------------------------------------------------------ + if config and config.ldap_enabled and config.ldap_server: + try: + ldap_info = await ldap_service.authenticate_ldap( + payload.username, payload.password, config + ) + if ldap_info is not None: + # User authenticated via LDAP — find or create local record + user = db.query(User).filter(User.username == ldap_info["username"]).first() + if not user: + user = User( + username=ldap_info["username"], + password_hash=hash_password(secrets.token_urlsafe(32)), + email=ldap_info.get("email", ""), + is_active=True, + role="viewer", + auth_provider="ldap", + ) + db.add(user) + db.commit() + db.refresh(user) + logger.info("LDAP user '%s' auto-created with role 'viewer'.", ldap_info["username"]) + elif not user.is_active: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Account is disabled.", + ) + else: + # Keep auth_provider in sync in case it was changed + if user.auth_provider != "ldap": + user.auth_provider = "ldap" + db.commit() + except ValueError as exc: + # User found in LDAP but wrong password or group denied + logger.warning("LDAP login failed for '%s': %s", payload.username, exc) + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Invalid username or password.", + ) + except RuntimeError as exc: + # LDAP server unreachable — log and fall through to local auth + logger.error("LDAP server error, falling back to local auth: %s", exc) + + # ------------------------------------------------------------------ + # Step 2: Local authentication (if LDAP didn't produce a user) + # ------------------------------------------------------------------ + if user is None: + local_user = db.query(User).filter(User.username == payload.username).first() + if local_user and local_user.auth_provider == "local": + if not verify_password(payload.password, local_user.password_hash): + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Invalid username or password.", + ) + user = local_user + else: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Invalid username or password.", + ) + if not user.is_active: raise HTTPException( status_code=status.HTTP_403_FORBIDDEN, detail="Account is disabled.", ) - # Check if MFA is required (only for local users) + # ------------------------------------------------------------------ + # Step 3: MFA check (local users only) + # ------------------------------------------------------------------ if user.auth_provider == "local": - config = db.query(SystemConfig).filter(SystemConfig.id == 1).first() - if config and getattr(config, "mfa_enabled", False): + sys_config = db.query(SystemConfig).filter(SystemConfig.id == 1).first() + if sys_config and getattr(sys_config, "mfa_enabled", False): mfa_token = create_mfa_token(user.username) return { "mfa_required": True, @@ -55,7 +133,7 @@ async def login(payload: LoginRequest, db: Session = Depends(get_db)): } token = create_access_token(user.username) - logger.info("User %s logged in.", user.username) + logger.info("User %s logged in (provider: %s).", user.username, user.auth_provider) return { "access_token": token, "token_type": "bearer", @@ -129,8 +207,12 @@ async def mfa_setup_complete(payload: MfaVerifyRequest, db: Session = Depends(ge @router.post("/mfa/verify") -async def mfa_verify(payload: MfaVerifyRequest, db: Session = Depends(get_db)): - """Verify a TOTP code for users who already have MFA set up.""" +@limiter.limit("10/minute") +async def mfa_verify(request: Request, payload: MfaVerifyRequest, db: Session = Depends(get_db)): + """Verify a TOTP code for users who already have MFA set up. + + Rate-limited to 10 attempts per minute per IP address. + """ username = verify_mfa_token(payload.mfa_token) user = db.query(User).filter(User.username == username).first() if not user: @@ -262,17 +344,18 @@ async def azure_callback( try: import msal + import httpx as _httpx client_secret = decrypt_value(config.azure_client_secret_encrypted) authority = f"https://login.microsoftonline.com/{config.azure_tenant_id}" - app = msal.ConfidentialClientApplication( + msal_app = msal.ConfidentialClientApplication( config.azure_client_id, authority=authority, client_credential=client_secret, ) - result = app.acquire_token_by_authorization_code( + result = msal_app.acquire_token_by_authorization_code( payload.code, scopes=["User.Read"], redirect_uri=payload.redirect_uri, @@ -287,7 +370,8 @@ async def azure_callback( id_token_claims = result.get("id_token_claims", {}) email = id_token_claims.get("preferred_username") or id_token_claims.get("email", "") - display_name = id_token_claims.get("name", email) + display_name = id_token_claims.get("name", email) # noqa: F841 + user_access_token = result.get("access_token", "") if not email: raise HTTPException( @@ -295,6 +379,54 @@ async def azure_callback( detail="Could not determine email from Azure AD token.", ) + # ----------------------------------------------------------------- + # Group membership check (Fix #3 – Azure AD group whitelist) + # ----------------------------------------------------------------- + allowed_group_id = getattr(config, "azure_allowed_group_id", None) + if allowed_group_id: + # Use the user's own access token to check their group membership + # via the Microsoft Graph API (requires GroupMember.Read.All or + # the user's own memberOf delegated permission). + graph_url = "https://graph.microsoft.com/v1.0/me/memberOf" + is_member = False + try: + async with _httpx.AsyncClient(timeout=10) as http: + resp = await http.get( + graph_url, + headers={"Authorization": f"Bearer {user_access_token}"}, + ) + if resp.status_code == 200: + groups = resp.json().get("value", []) + is_member = any( + g.get("id") == allowed_group_id for g in groups + ) + else: + logger.warning( + "Graph API group check returned %s for user '%s'.", + resp.status_code, email, + ) + except Exception as graph_exc: + logger.error("Graph API group check failed: %s", graph_exc) + raise HTTPException( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + detail="Could not verify Azure AD group membership. Please try again.", + ) + + if not is_member: + logger.warning( + "Azure AD login denied for '%s': not a member of required group '%s'.", + email, allowed_group_id, + ) + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Access denied: you are not a member of the required Azure AD group.", + ) + else: + logger.warning( + "azure_allowed_group_id is not configured. All Azure AD tenant users can log in. " + "Set azure_allowed_group_id in Settings to restrict access." + ) + # Find or create user user = db.query(User).filter(User.username == email).first() if not user: @@ -303,13 +435,13 @@ async def azure_callback( password_hash=hash_password(secrets.token_urlsafe(32)), email=email, is_active=True, - role="admin", + role="viewer", # New Azure users start as viewer; promote manually auth_provider="azure", ) db.add(user) db.commit() db.refresh(user) - logger.info("Azure AD user '%s' auto-created.", email) + logger.info("Azure AD user '%s' auto-created with role 'viewer'.", email) elif not user.is_active: raise HTTPException( status_code=status.HTTP_403_FORBIDDEN, @@ -326,9 +458,9 @@ async def azure_callback( except HTTPException: raise - except Exception as exc: + except Exception: logger.exception("Azure AD authentication error") raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - detail=f"Azure AD authentication error: {exc}", + detail="Azure AD authentication failed. Please try again or contact support.", ) diff --git a/app/routers/customers.py b/app/routers/customers.py index 27685c2..0f1280b 100644 --- a/app/routers/customers.py +++ b/app/routers/customers.py @@ -211,12 +211,14 @@ async def update_customer( @router.delete("/{customer_id}") async def delete_customer( customer_id: int, + background_tasks: BackgroundTasks, current_user: User = Depends(get_current_user), db: Session = Depends(get_db), ): """Delete a customer and clean up all resources. Removes containers, NPM proxy, instance directory, and database records. + Cleanup runs in background so the response returns immediately. Args: customer_id: Customer ID. @@ -231,15 +233,23 @@ async def delete_customer( detail="Customer not found.", ) - # Undeploy first (containers, NPM, files) - try: - await netbird_service.undeploy_customer(db, customer_id) - except Exception: - logger.exception("Undeploy error for customer %d (continuing with delete)", customer_id) - - # Delete customer record (cascades to deployment + logs) - db.delete(customer) + # Mark as deleting immediately so UI reflects the state + customer.status = "inactive" db.commit() - logger.info("Customer %d deleted by %s.", customer_id, current_user.username) - return {"message": f"Customer {customer_id} deleted successfully."} + async def _delete_in_background(cid: int) -> None: + bg_db = SessionLocal() + try: + await netbird_service.undeploy_customer(bg_db, cid) + c = bg_db.query(Customer).filter(Customer.id == cid).first() + if c: + bg_db.delete(c) + bg_db.commit() + logger.info("Customer %d deleted by %s.", cid, current_user.username) + except Exception: + logger.exception("Background delete failed for customer %d", cid) + finally: + bg_db.close() + + background_tasks.add_task(_delete_in_background, customer_id) + return {"message": f"Customer {customer_id} deletion started."} diff --git a/app/routers/deployments.py b/app/routers/deployments.py index 1e5f8eb..2f3f072 100644 --- a/app/routers/deployments.py +++ b/app/routers/deployments.py @@ -7,8 +7,8 @@ from sqlalchemy.orm import Session from app.database import SessionLocal, get_db from app.dependencies import get_current_user -from app.models import Customer, Deployment, User -from app.services import docker_service, netbird_service +from app.models import Customer, Deployment, SystemConfig, User +from app.services import docker_service, image_service, netbird_service from app.utils.security import decrypt_value logger = logging.getLogger(__name__) @@ -72,7 +72,7 @@ async def start_customer( Result dict. """ _require_customer(db, customer_id) - result = netbird_service.start_customer(db, customer_id) + result = await netbird_service.start_customer(db, customer_id) if not result.get("success"): raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, @@ -96,7 +96,7 @@ async def stop_customer( Result dict. """ _require_customer(db, customer_id) - result = netbird_service.stop_customer(db, customer_id) + result = await netbird_service.stop_customer(db, customer_id) if not result.get("success"): raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, @@ -120,7 +120,7 @@ async def restart_customer( Result dict. """ _require_customer(db, customer_id) - result = netbird_service.restart_customer(db, customer_id) + result = await netbird_service.restart_customer(db, customer_id) if not result.get("success"): raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, @@ -207,6 +207,50 @@ async def get_customer_credentials( } +@router.post("/{customer_id}/update-images") +async def update_customer_images( + customer_id: int, + current_user: User = Depends(get_current_user), + db: Session = Depends(get_db), +): + """Recreate a customer's containers to pick up newly pulled images. + + Images must already be pulled via POST /monitoring/images/pull. + Bind-mounted data is preserved — no data loss. + """ + if current_user.role != "admin": + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Admin only.") + + customer = _require_customer(db, customer_id) + deployment = db.query(Deployment).filter(Deployment.customer_id == customer_id).first() + if not deployment: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="No deployment found for this customer.", + ) + + config = db.query(SystemConfig).filter(SystemConfig.id == 1).first() + if not config: + raise HTTPException( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="System not configured." + ) + + instance_dir = f"{config.data_dir}/{customer.subdomain}" + result = await image_service.update_customer_containers(instance_dir, deployment.container_prefix) + + if not result["success"]: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=result.get("error", "Failed to update containers."), + ) + + logger.info( + "Containers updated for customer '%s' (prefix: %s) by '%s'.", + customer.name, deployment.container_prefix, current_user.username, + ) + return {"message": f"Containers updated for '{customer.name}'."} + + def _require_customer(db: Session, customer_id: int) -> Customer: """Helper to fetch a customer or raise 404. diff --git a/app/routers/monitoring.py b/app/routers/monitoring.py index a35e8bd..211190a 100644 --- a/app/routers/monitoring.py +++ b/app/routers/monitoring.py @@ -5,13 +5,13 @@ import platform from typing import Any import psutil -from fastapi import APIRouter, Depends +from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, status from sqlalchemy.orm import Session -from app.database import get_db +from app.database import SessionLocal, get_db from app.dependencies import get_current_user -from app.models import Customer, Deployment, User -from app.services import docker_service +from app.models import Customer, Deployment, SystemConfig, User +from app.services import docker_service, image_service logger = logging.getLogger(__name__) router = APIRouter() @@ -115,3 +115,160 @@ async def host_resources( "percent": disk.percent, }, } + + +@router.get("/images/check") +async def check_image_updates( + current_user: User = Depends(get_current_user), + db: Session = Depends(get_db), +) -> dict[str, Any]: + """Check all configured NetBird images for available updates on Docker Hub. + + Compares local image digests against Docker Hub — no image is pulled. + + Returns: + images: dict mapping image name to update status + any_update_available: bool + customer_status: list of per-customer container image status + """ + config = db.query(SystemConfig).filter(SystemConfig.id == 1).first() + if not config: + raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="System not configured.") + + hub_status = await image_service.check_all_images(config) + + # Per-customer local check (no network) + deployments = db.query(Deployment).all() + customer_status = [] + for dep in deployments: + customer = dep.customer + cs = image_service.get_customer_container_image_status(dep.container_prefix, config) + customer_status.append({ + "customer_id": customer.id, + "customer_name": customer.name, + "subdomain": customer.subdomain, + "container_prefix": dep.container_prefix, + "needs_update": cs["needs_update"], + "services": cs["services"], + }) + + return {**hub_status, "customer_status": customer_status} + + +@router.post("/images/pull") +async def pull_all_netbird_images( + background_tasks: BackgroundTasks, + current_user: User = Depends(get_current_user), + db: Session = Depends(get_db), +) -> dict[str, Any]: + """Pull all configured NetBird images from Docker Hub. + + Runs in the background — returns immediately. After pulling, re-check + customer status via GET /images/check to see which customers need updating. + """ + if current_user.role != "admin": + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Admin only.") + + config = db.query(SystemConfig).filter(SystemConfig.id == 1).first() + if not config: + raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="System not configured.") + + # Snapshot image list before background task starts + images = [ + config.netbird_management_image, + config.netbird_signal_image, + config.netbird_relay_image, + config.netbird_dashboard_image, + ] + + async def _pull_bg() -> None: + bg_db = SessionLocal() + try: + cfg = bg_db.query(SystemConfig).filter(SystemConfig.id == 1).first() + if cfg: + await image_service.pull_all_images(cfg) + except Exception: + logger.exception("Background image pull failed") + finally: + bg_db.close() + + background_tasks.add_task(_pull_bg) + return {"message": "Image pull started in background.", "images": images} + + +@router.get("/customers/local-update-status") +async def customers_local_update_status( + current_user: User = Depends(get_current_user), + db: Session = Depends(get_db), +) -> list[dict[str, Any]]: + """Fast local-only check for outdated customer containers. + + Compares running container image IDs against locally stored images. + No network call — safe to call on every dashboard load. + """ + config = db.query(SystemConfig).filter(SystemConfig.id == 1).first() + if not config: + return [] + deployments = db.query(Deployment).all() + results = [] + for dep in deployments: + cs = image_service.get_customer_container_image_status(dep.container_prefix, config) + results.append({"customer_id": dep.customer_id, "needs_update": cs["needs_update"]}) + return results + + +@router.post("/customers/update-all") +async def update_all_customers( + current_user: User = Depends(get_current_user), + db: Session = Depends(get_db), +) -> dict[str, Any]: + """Recreate containers for all customers with outdated images — sequential, synchronous. + + Updates customers one at a time so a failing customer does not block others. + Images must already be pulled. Data is preserved (bind mounts). + Returns detailed per-customer results. + """ + if current_user.role != "admin": + raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, detail="Admin only.") + + config = db.query(SystemConfig).filter(SystemConfig.id == 1).first() + if not config: + raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="System not configured.") + + deployments = db.query(Deployment).all() + to_update = [] + for dep in deployments: + cs = image_service.get_customer_container_image_status(dep.container_prefix, config) + if cs["needs_update"]: + customer = dep.customer + to_update.append({ + "instance_dir": f"{config.data_dir}/{customer.subdomain}", + "project_name": dep.container_prefix, + "customer_name": customer.name, + "customer_id": customer.id, + }) + + if not to_update: + return {"message": "All customers are already up to date.", "updated": 0, "results": []} + + # Update customers sequentially — one at a time + update_results = [] + for entry in to_update: + res = await image_service.update_customer_containers( + entry["instance_dir"], entry["project_name"] + ) + ok = res["success"] + logger.info("Updated %s: %s", entry["project_name"], "OK" if ok else res.get("error")) + update_results.append({ + "customer_name": entry["customer_name"], + "customer_id": entry["customer_id"], + "success": ok, + "error": res.get("error"), + }) + + success_count = sum(1 for r in update_results if r["success"]) + return { + "message": f"Updated {success_count} of {len(update_results)} customer(s).", + "updated": success_count, + "results": update_results, + } diff --git a/app/routers/settings.py b/app/routers/settings.py index a6990b7..412c5e4 100644 --- a/app/routers/settings.py +++ b/app/routers/settings.py @@ -15,8 +15,8 @@ from sqlalchemy.orm import Session from app.database import get_db from app.dependencies import get_current_user from app.models import SystemConfig, User -from app.services import npm_service -from app.utils.config import get_system_config +from app.services import dns_service, ldap_service, npm_service, update_service +from app.utils.config import DATABASE_PATH, get_system_config from app.utils.security import encrypt_value from app.utils.validators import SystemConfigUpdate @@ -86,6 +86,18 @@ async def update_settings( raw_secret = update_data.pop("azure_client_secret") row.azure_client_secret_encrypted = encrypt_value(raw_secret) + # Handle DNS password encryption + if "dns_password" in update_data: + row.dns_password_encrypted = encrypt_value(update_data.pop("dns_password")) + + # Handle LDAP bind password encryption + if "ldap_bind_password" in update_data: + row.ldap_bind_password_encrypted = encrypt_value(update_data.pop("ldap_bind_password")) + + # Handle git token encryption + if "git_token" in update_data: + row.git_token_encrypted = encrypt_value(update_data.pop("git_token")) + for field, value in update_data.items(): if hasattr(row, field): setattr(row, field, value) @@ -129,9 +141,106 @@ async def test_npm( return result +@router.get("/npm-certificates") +async def list_npm_certificates( + current_user: User = Depends(get_current_user), + db: Session = Depends(get_db), +): + """List all SSL certificates configured in NPM. + + Used by the frontend to populate the wildcard certificate dropdown. + + Returns: + List of certificate dicts with id, domain_names, provider, expires_on, is_wildcard. + """ + config = get_system_config(db) + if not config: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="System configuration not initialized.", + ) + if not config.npm_api_url or not config.npm_api_email or not config.npm_api_password: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="NPM API URL or credentials not configured.", + ) + + result = await npm_service.list_certificates( + config.npm_api_url, config.npm_api_email, config.npm_api_password + ) + if "error" in result: + raise HTTPException( + status_code=status.HTTP_502_BAD_GATEWAY, + detail=result["error"], + ) + return result["certificates"] + + +@router.get("/test-dns") +async def test_dns( + current_user: User = Depends(get_current_user), + db: Session = Depends(get_db), +): + """Test connectivity to the Windows DNS server via WinRM. + + Returns: + Dict with ``ok`` and ``message``. + """ + config = get_system_config(db) + if not config: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="System configuration not initialized.", + ) + if not config.dns_enabled: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Windows DNS integration is not enabled.", + ) + if not config.dns_server or not config.dns_username or not config.dns_password: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="DNS server, username, or password not configured.", + ) + return await dns_service.test_dns_connection(config) + + +@router.get("/test-ldap") +async def test_ldap( + current_user: User = Depends(get_current_user), + db: Session = Depends(get_db), +): + """Test connectivity to the LDAP / Active Directory server. + + Returns: + Dict with ``ok`` and ``message``. + """ + config = get_system_config(db) + if not config: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="System configuration not initialized.", + ) + if not config.ldap_enabled: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="LDAP authentication is not enabled.", + ) + if not config.ldap_server or not config.ldap_bind_dn or not config.ldap_bind_password: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="LDAP server, bind DN, or bind password not configured.", + ) + return await ldap_service.test_ldap_connection(config) + + @router.get("/branding") async def get_branding(db: Session = Depends(get_db)): """Public endpoint — returns branding info for the login page (no auth required).""" + current_version = update_service.get_current_version().get("tag", "alpha-1.1") + if current_version == "unknown": + current_version = "alpha-1.1" + row = db.query(SystemConfig).filter(SystemConfig.id == 1).first() if not row: return { @@ -139,12 +248,14 @@ async def get_branding(db: Session = Depends(get_db)): "branding_subtitle": "Multi-Tenant Management Platform", "branding_logo_path": None, "default_language": "en", + "version": current_version } return { "branding_name": row.branding_name or "NetBird MSP Appliance", "branding_subtitle": row.branding_subtitle or "Multi-Tenant Management Platform", "branding_logo_path": row.branding_logo_path, "default_language": row.default_language or "en", + "version": current_version } @@ -209,3 +320,74 @@ async def delete_logo( db.commit() return {"branding_logo_path": None} + + +@router.get("/version") +async def get_version( + current_user: User = Depends(get_current_user), + db: Session = Depends(get_db), +): + """Return current installed version and latest available from the git remote. + + Returns: + Dict with current version, latest version, and needs_update flag. + """ + config = get_system_config(db) + current = update_service.get_current_version() + if not config or not config.git_repo_url: + return {"current": current, "latest": None, "needs_update": False} + result = await update_service.check_for_updates(config) + return result + + +@router.get("/branches") +async def get_branches( + current_user: User = Depends(get_current_user), + db: Session = Depends(get_db), +): + """Return a list of available branches from the configured git remote.""" + config = get_system_config(db) + if not config or not config.git_repo_url: + return [] + branches = await update_service.get_remote_branches(config) + return branches + + +@router.post("/update") +async def trigger_update( + current_user: User = Depends(get_current_user), + db: Session = Depends(get_db), +): + """Backup the database, git pull the latest code, and rebuild the container. + + The rebuild is fire-and-forget — the app will restart in ~60 seconds. + Only admin users may trigger an update. + + Returns: + Dict with ok, message, and backup path. + """ + if getattr(current_user, "role", "admin") != "admin": + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Only admin users can trigger an update.", + ) + config = get_system_config(db) + if not config: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="System configuration not initialized.", + ) + if not config.git_repo_url: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="git_repo_url is not configured in settings.", + ) + + result = update_service.trigger_update(config, DATABASE_PATH) + if not result.get("ok"): + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail=result.get("message", "Update failed."), + ) + logger.info("Update triggered by %s.", current_user.username) + return result diff --git a/app/routers/users.py b/app/routers/users.py index 6fafa69..03de7f2 100644 --- a/app/routers/users.py +++ b/app/routers/users.py @@ -33,6 +33,12 @@ async def create_user( db: Session = Depends(get_db), ): """Create a new local user.""" + if current_user.role != "admin": + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Only admins can create new users.", + ) + existing = db.query(User).filter(User.username == payload.username).first() if existing: raise HTTPException( @@ -64,12 +70,31 @@ async def update_user( current_user: User = Depends(get_current_user), db: Session = Depends(get_db), ): - """Update an existing user (email, is_active, role).""" + """Update an existing user (email, is_active, role). Admin only.""" + if current_user.role != "admin": + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Only admins can update users.", + ) + user = db.query(User).filter(User.id == user_id).first() if not user: raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="User not found.") update_data = payload.model_dump(exclude_none=True) + + if "role" in update_data: + if update_data["role"] not in ("admin", "viewer"): + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Role must be 'admin' or 'viewer'.", + ) + if user_id == current_user.id: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="You cannot change your own role.", + ) + for field, value in update_data.items(): if hasattr(user, field): setattr(user, field, value) @@ -120,7 +145,7 @@ async def reset_password( if user.auth_provider != "local": raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, - detail="Cannot reset password for Azure AD users.", + detail="Cannot reset password for external auth users (Azure AD / LDAP).", ) new_password = secrets.token_urlsafe(16) @@ -145,7 +170,7 @@ async def reset_mfa( if user.auth_provider != "local": raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, - detail="Cannot reset MFA for Azure AD users.", + detail="Cannot reset MFA for external auth users (Azure AD / LDAP).", ) user.totp_enabled = False diff --git a/app/services/dns_service.py b/app/services/dns_service.py new file mode 100644 index 0000000..4bacf83 --- /dev/null +++ b/app/services/dns_service.py @@ -0,0 +1,153 @@ +"""Windows DNS Server integration via WinRM + PowerShell. + +Uses pywinrm to execute PowerShell DNS cmdlets on a remote Windows DNS server. +All WinRM operations run in a thread executor since pywinrm is synchronous. + +Typical usage: + config = get_system_config(db) + result = await create_dns_record("kunde1", config) + # result == {"ok": True, "message": "A-record 'kunde1.example.com → 10.0.0.5' created."} +""" + +import asyncio +import logging +from typing import Any + +logger = logging.getLogger(__name__) + + +def _winrm_run(server: str, username: str, password: str, ps_script: str) -> tuple[int, str, str]: + """Execute a PowerShell script via WinRM and return (status_code, stdout, stderr). + + Runs synchronously — must be called via run_in_executor. + """ + import winrm # imported here so the app starts even without pywinrm installed + + session = winrm.Session( + target=server, + auth=(username, password), + transport="ntlm", + ) + result = session.run_ps(ps_script) + stdout = result.std_out.decode("utf-8", errors="replace").strip() + stderr = result.std_err.decode("utf-8", errors="replace").strip() + return result.status_code, stdout, stderr + + +async def _run_ps(server: str, username: str, password: str, ps_script: str) -> tuple[int, str, str]: + """Async wrapper: runs _winrm_run in a thread executor.""" + loop = asyncio.get_event_loop() + return await loop.run_in_executor(None, _winrm_run, server, username, password, ps_script) + + +async def test_dns_connection(config: Any) -> dict: + """Test WinRM connectivity to the Windows DNS server. + + Runs 'Get-DnsServerZone' to verify the configured zone exists. + + Args: + config: AppConfig with dns_server, dns_username, dns_password, dns_zone. + + Returns: + Dict with ``ok`` (bool) and ``message`` (str). + """ + zone = config.dns_zone.strip() + ps = f"Get-DnsServerZone -Name '{zone}' | Select-Object ZoneName, ZoneType" + try: + code, stdout, stderr = await _run_ps( + config.dns_server, config.dns_username, config.dns_password, ps + ) + if code == 0 and zone.lower() in stdout.lower(): + return {"ok": True, "message": f"Connected. Zone '{zone}' found on {config.dns_server}."} + err = stderr or stdout or "Unknown error" + return {"ok": False, "message": f"Zone '{zone}' not found or access denied: {err[:300]}"} + except ImportError: + return {"ok": False, "message": "pywinrm is not installed. Add 'pywinrm' to requirements.txt."} + except Exception as exc: + logger.error("DNS connection test failed: %s", exc) + return {"ok": False, "message": f"Connection failed: {exc}"} + + +async def create_dns_record(subdomain: str, config: Any) -> dict: + """Create an A-record in the Windows DNS server. + + Record: {subdomain}.{zone} → {dns_record_ip} + + If a record already exists for the subdomain, it is removed first to avoid + duplicate-record errors (idempotent behaviour for re-deployments). + + Args: + subdomain: The customer subdomain (e.g. ``kunde1``). + config: AppConfig with DNS settings. + + Returns: + Dict with ``ok`` (bool) and ``message`` (str). + """ + zone = config.dns_zone.strip() + ip = config.dns_record_ip.strip() + name = subdomain.strip() + + # Remove existing record first (idempotent — ignore errors) + ps_remove = ( + f"Try {{" + f" Remove-DnsServerResourceRecord -ZoneName '{zone}' -RRType 'A' -Name '{name}' -Force -ErrorAction SilentlyContinue" + f"}} Catch {{}}" + ) + # Create new A-record + ps_add = f"Add-DnsServerResourceRecordA -ZoneName '{zone}' -Name '{name}' -IPv4Address '{ip}' -TimeToLive 00:05:00" + + ps_script = f"{ps_remove}\n{ps_add}" + + try: + code, stdout, stderr = await _run_ps( + config.dns_server, config.dns_username, config.dns_password, ps_script + ) + if code == 0: + logger.info("DNS A-record created: %s.%s → %s", name, zone, ip) + return {"ok": True, "message": f"A-record '{name}.{zone} → {ip}' created successfully."} + err = stderr or stdout or "Unknown error" + logger.warning("DNS A-record creation failed for %s.%s: %s", name, zone, err) + return {"ok": False, "message": f"Failed to create DNS record: {err[:300]}"} + except ImportError: + return {"ok": False, "message": "pywinrm is not installed. Add 'pywinrm' to requirements.txt."} + except Exception as exc: + logger.error("DNS create_record error for %s.%s: %s", name, zone, exc) + return {"ok": False, "message": f"DNS error: {exc}"} + + +async def delete_dns_record(subdomain: str, config: Any) -> dict: + """Delete the A-record for a customer subdomain from the Windows DNS server. + + Args: + subdomain: The customer subdomain (e.g. ``kunde1``). + config: AppConfig with DNS settings. + + Returns: + Dict with ``ok`` (bool) and ``message`` (str). + """ + zone = config.dns_zone.strip() + name = subdomain.strip() + + ps_script = ( + f"Remove-DnsServerResourceRecord -ZoneName '{zone}' -RRType 'A' -Name '{name}' -Force" + ) + + try: + code, stdout, stderr = await _run_ps( + config.dns_server, config.dns_username, config.dns_password, ps_script + ) + if code == 0: + logger.info("DNS A-record deleted: %s.%s", name, zone) + return {"ok": True, "message": f"A-record '{name}.{zone}' deleted successfully."} + err = stderr or stdout or "Unknown error" + # Record not found is acceptable during deletion + if "not found" in err.lower() or "does not exist" in err.lower(): + logger.info("DNS A-record %s.%s not found (already deleted).", name, zone) + return {"ok": True, "message": f"A-record '{name}.{zone}' not found (already deleted)."} + logger.warning("DNS A-record deletion failed for %s.%s: %s", name, zone, err) + return {"ok": False, "message": f"Failed to delete DNS record: {err[:300]}"} + except ImportError: + return {"ok": False, "message": "pywinrm is not installed. Add 'pywinrm' to requirements.txt."} + except Exception as exc: + logger.error("DNS delete_record error for %s.%s: %s", name, zone, exc) + return {"ok": False, "message": f"DNS error: {exc}"} diff --git a/app/services/docker_service.py b/app/services/docker_service.py index 1690575..5927514 100644 --- a/app/services/docker_service.py +++ b/app/services/docker_service.py @@ -5,6 +5,7 @@ per-customer Docker Compose stacks. Also provides log retrieval and container health/status information. """ +import asyncio import logging import os import subprocess @@ -17,6 +18,15 @@ from docker.errors import DockerException, NotFound logger = logging.getLogger(__name__) +async def _run_cmd(cmd: list[str], timeout: int = 120) -> subprocess.CompletedProcess: + """Run a subprocess command in a thread pool to avoid blocking the event loop.""" + loop = asyncio.get_event_loop() + return await loop.run_in_executor( # type: ignore[arg-type] + None, + lambda: subprocess.run(cmd, capture_output=True, text=True, timeout=timeout), + ) + + def _get_client() -> docker.DockerClient: """Return a Docker client connected via the Unix socket. @@ -26,7 +36,7 @@ def _get_client() -> docker.DockerClient: return docker.from_env() -def compose_up( +async def compose_up( instance_dir: str, project_name: str, services: Optional[list[str]] = None, @@ -63,7 +73,7 @@ def compose_up( cmd.extend(services) logger.info("Running: %s", " ".join(cmd)) - result = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout) + result = await _run_cmd(cmd, timeout=timeout) if result.returncode != 0: logger.error("docker compose up failed: %s", result.stderr) @@ -74,7 +84,7 @@ def compose_up( return True -def compose_down(instance_dir: str, project_name: str, remove_volumes: bool = False) -> bool: +async def compose_down(instance_dir: str, project_name: str, remove_volumes: bool = False) -> bool: """Run ``docker compose down`` for a customer instance. Args: @@ -96,14 +106,14 @@ def compose_down(instance_dir: str, project_name: str, remove_volumes: bool = Fa cmd.append("-v") logger.info("Running: %s", " ".join(cmd)) - result = subprocess.run(cmd, capture_output=True, text=True, timeout=120) + result = await _run_cmd(cmd) if result.returncode != 0: logger.warning("docker compose down returned non-zero: %s", result.stderr) return True -def compose_stop(instance_dir: str, project_name: str) -> bool: +async def compose_stop(instance_dir: str, project_name: str) -> bool: """Run ``docker compose stop`` for a customer instance. Args: @@ -121,11 +131,11 @@ def compose_stop(instance_dir: str, project_name: str) -> bool: "stop", ] logger.info("Running: %s", " ".join(cmd)) - result = subprocess.run(cmd, capture_output=True, text=True, timeout=120) + result = await _run_cmd(cmd) return result.returncode == 0 -def compose_start(instance_dir: str, project_name: str) -> bool: +async def compose_start(instance_dir: str, project_name: str) -> bool: """Run ``docker compose start`` for a customer instance. Args: @@ -143,11 +153,11 @@ def compose_start(instance_dir: str, project_name: str) -> bool: "start", ] logger.info("Running: %s", " ".join(cmd)) - result = subprocess.run(cmd, capture_output=True, text=True, timeout=120) + result = await _run_cmd(cmd) return result.returncode == 0 -def compose_restart(instance_dir: str, project_name: str) -> bool: +async def compose_restart(instance_dir: str, project_name: str) -> bool: """Run ``docker compose restart`` for a customer instance. Args: @@ -165,7 +175,7 @@ def compose_restart(instance_dir: str, project_name: str) -> bool: "restart", ] logger.info("Running: %s", " ".join(cmd)) - result = subprocess.run(cmd, capture_output=True, text=True, timeout=120) + result = await _run_cmd(cmd) return result.returncode == 0 diff --git a/app/services/image_service.py b/app/services/image_service.py new file mode 100644 index 0000000..b45376c --- /dev/null +++ b/app/services/image_service.py @@ -0,0 +1,251 @@ +"""NetBird Docker image update service. + +Compares locally pulled images against Docker Hub to detect available updates. +Provides pull and per-customer container recreation functions without data loss. +""" + +import asyncio +import json +import logging +import os +import subprocess +from typing import Any + +import httpx + +logger = logging.getLogger(__name__) + +# Services that make up a customer's NetBird deployment +NETBIRD_SERVICES = ["management", "signal", "relay", "dashboard"] + + +async def _run_cmd(cmd: list[str], timeout: int = 300) -> subprocess.CompletedProcess: + """Run a subprocess command without blocking the event loop.""" + loop = asyncio.get_event_loop() + return await loop.run_in_executor( + None, + lambda: subprocess.run(cmd, capture_output=True, text=True, timeout=timeout), + ) + + +def _parse_image_name(image: str) -> tuple[str, str]: + """Split 'repo/name:tag' into ('repo/name', 'tag'). Defaults tag to 'latest'.""" + if ":" in image: + name, tag = image.rsplit(":", 1) + else: + name, tag = image, "latest" + return name, tag + + +async def get_hub_digest(image: str) -> str | None: + """Fetch the current digest from Docker Hub for an image:tag. + + Uses the Docker Hub REST API — does NOT pull the image. + Returns the digest string (sha256:...) or None on failure. + """ + name, tag = _parse_image_name(image) + url = f"https://hub.docker.com/v2/repositories/{name}/tags/{tag}/" + try: + async with httpx.AsyncClient(timeout=15) as client: + resp = await client.get(url) + if resp.status_code != 200: + logger.warning("Docker Hub API returned %d for %s", resp.status_code, image) + return None + data = resp.json() + images = data.get("images", []) + # Prefer linux/amd64 digest + for img in images: + if img.get("os") == "linux" and img.get("architecture") in ("amd64", ""): + d = img.get("digest") + if d: + return d + # Fallback: first available digest + if images: + return images[0].get("digest") + return None + except Exception as exc: + logger.warning("Failed to fetch Docker Hub digest for %s: %s", image, exc) + return None + + +def get_local_digest(image: str) -> str | None: + """Get the RepoDigest for a locally pulled image. + + Returns the digest (sha256:...) or None if image not found locally. + """ + try: + result = subprocess.run( + ["docker", "image", "inspect", image, "--format", "{{json .RepoDigests}}"], + capture_output=True, text=True, timeout=10, + ) + if result.returncode != 0: + return None + digests = json.loads(result.stdout.strip()) + if not digests: + return None + # RepoDigests look like "netbirdio/management@sha256:abc..." + for d in digests: + if "@" in d: + return d.split("@", 1)[1] + return None + except Exception as exc: + logger.warning("Failed to inspect local image %s: %s", image, exc) + return None + + +def get_container_image_id(container_name: str) -> str | None: + """Get the full image ID (sha256:...) of a running or stopped container.""" + try: + result = subprocess.run( + ["docker", "inspect", container_name, "--format", "{{.Image}}"], + capture_output=True, text=True, timeout=10, + ) + if result.returncode != 0: + return None + return result.stdout.strip() or None + except Exception: + return None + + +def get_local_image_id(image: str) -> str | None: + """Get the full image ID (sha256:...) of a locally stored image.""" + try: + result = subprocess.run( + ["docker", "image", "inspect", image, "--format", "{{.Id}}"], + capture_output=True, text=True, timeout=10, + ) + if result.returncode != 0: + return None + return result.stdout.strip() or None + except Exception: + return None + + +async def check_image_status(image: str) -> dict[str, Any]: + """Check whether a configured image has an update available on Docker Hub. + + Returns a dict with: + image: the image name:tag + local_digest: digest of locally cached image (or None) + hub_digest: latest digest from Docker Hub (or None) + update_available: True if hub_digest differs from local_digest + """ + hub_digest, local_digest = await asyncio.gather( + get_hub_digest(image), + asyncio.get_event_loop().run_in_executor(None, get_local_digest, image), + ) + + if hub_digest and local_digest: + update_available = hub_digest != local_digest + elif hub_digest and not local_digest: + # Image not pulled locally yet — needs pull + update_available = True + else: + update_available = False + + return { + "image": image, + "local_digest": local_digest, + "hub_digest": hub_digest, + "update_available": update_available, + } + + +async def check_all_images(config) -> dict[str, Any]: + """Check all 4 configured NetBird images for available updates. + + Returns a dict with: + images: dict mapping image name -> status dict + any_update_available: bool + """ + images = [ + config.netbird_management_image, + config.netbird_signal_image, + config.netbird_relay_image, + config.netbird_dashboard_image, + ] + results = await asyncio.gather(*[check_image_status(img) for img in images]) + by_image = {r["image"]: r for r in results} + any_update = any(r["update_available"] for r in results) + return {"images": by_image, "any_update_available": any_update} + + +async def pull_image(image: str) -> dict[str, Any]: + """Pull a Docker image. Returns success/error dict.""" + logger.info("Pulling image: %s", image) + result = await _run_cmd(["docker", "pull", image], timeout=600) + if result.returncode != 0: + logger.error("Failed to pull %s: %s", image, result.stderr) + return {"image": image, "success": False, "error": result.stderr[:500]} + return {"image": image, "success": True} + + +async def pull_all_images(config) -> dict[str, Any]: + """Pull all 4 configured NetBird images. Returns results per image.""" + images = [ + config.netbird_management_image, + config.netbird_signal_image, + config.netbird_relay_image, + config.netbird_dashboard_image, + ] + results = await asyncio.gather(*[pull_image(img) for img in images]) + return { + "results": {r["image"]: r for r in results}, + "all_success": all(r["success"] for r in results), + } + + +def get_customer_container_image_status(container_prefix: str, config) -> dict[str, Any]: + """Check which service containers are running outdated local images. + + Compares each running container's image ID against the locally stored image ID + for the configured image tag. This is a local check — no network call. + + Returns: + services: dict mapping service name to status info + needs_update: True if any service has a different image ID than locally stored + """ + service_images = { + "management": config.netbird_management_image, + "signal": config.netbird_signal_image, + "relay": config.netbird_relay_image, + "dashboard": config.netbird_dashboard_image, + } + services: dict[str, Any] = {} + for svc, image in service_images.items(): + container_name = f"{container_prefix}-{svc}" + container_id = get_container_image_id(container_name) + local_id = get_local_image_id(image) + if container_id and local_id: + up_to_date = container_id == local_id + else: + up_to_date = None # container not running or image not pulled + services[svc] = { + "container": container_name, + "image": image, + "up_to_date": up_to_date, + } + needs_update = any(s["up_to_date"] is False for s in services.values()) + return {"services": services, "needs_update": needs_update} + + +async def update_customer_containers(instance_dir: str, project_name: str) -> dict[str, Any]: + """Recreate customer containers to pick up newly pulled images. + + Runs `docker compose up -d` in the customer's instance directory. + Images must already be pulled. Bind-mounted data is preserved — no data loss. + """ + compose_file = os.path.join(instance_dir, "docker-compose.yml") + if not os.path.isfile(compose_file): + return {"success": False, "error": f"docker-compose.yml not found at {compose_file}"} + cmd = [ + "docker", "compose", + "-f", compose_file, + "-p", project_name, + "up", "-d", "--remove-orphans", + ] + logger.info("Updating containers for %s", project_name) + result = await _run_cmd(cmd, timeout=300) + if result.returncode != 0: + return {"success": False, "error": result.stderr[:1000]} + return {"success": True} diff --git a/app/services/ldap_service.py b/app/services/ldap_service.py new file mode 100644 index 0000000..d4f5662 --- /dev/null +++ b/app/services/ldap_service.py @@ -0,0 +1,180 @@ +"""Active Directory / LDAP authentication via ldap3. + +Provides LDAP-based user authentication as an alternative to local password +authentication. Supports standard Active Directory via sAMAccountName lookup +and optional group membership restriction. + +All ldap3 operations run in a thread executor since ldap3 is synchronous. + +Authentication flow: + 1. Bind with service account (ldap_bind_dn + ldap_bind_password) + 2. Search for the user entry using ldap_user_filter + 3. If ldap_group_dn is set: verify group membership + 4. Re-bind with the user's own DN + supplied password to verify credentials + 5. Return user info dict on success + +Raises: + ValueError: If the user was found but the password is wrong. + RuntimeError: If LDAP is misconfigured or the server is unreachable. +""" + +import asyncio +import logging +from typing import Any + +logger = logging.getLogger(__name__) + + +def _ldap_test(server: str, port: int, use_ssl: bool, bind_dn: str, bind_password: str) -> dict: + """Synchronous LDAP connectivity test — bind with service account. + + Returns dict with ``ok`` and ``message``. + """ + from ldap3 import ALL, SIMPLE, Connection, Server as LdapServer, SUBTREE # noqa: F401 + + srv = LdapServer(server, port=port, use_ssl=use_ssl, get_info=ALL) + try: + conn = Connection(srv, user=bind_dn, password=bind_password, authentication=SIMPLE, auto_bind=True) + conn.unbind() + return {"ok": True, "message": f"Bind successful to {server}:{port} as '{bind_dn}'."} + except Exception as exc: + return {"ok": False, "message": f"LDAP bind failed: {exc}"} + + +def _ldap_authenticate( + server: str, + port: int, + use_ssl: bool, + bind_dn: str, + bind_password: str, + base_dn: str, + user_filter: str, + group_dn: str, + username: str, + password: str, +) -> dict | None: + """Synchronous LDAP authentication. + + Returns: + User info dict on success: {"username": ..., "email": ..., "display_name": ...} + None if user was not found in LDAP (caller may fall back to local auth). + + Raises: + ValueError: Correct username but wrong password. + RuntimeError: LDAP server error / misconfiguration. + """ + from ldap3 import ALL, SIMPLE, SUBTREE, Connection, Server as LdapServer + + srv = LdapServer(server, port=port, use_ssl=use_ssl, get_info=ALL) + + # Step 1: Bind with service account to search for the user + try: + conn = Connection(srv, user=bind_dn, password=bind_password, authentication=SIMPLE, auto_bind=True) + except Exception as exc: + raise RuntimeError(f"LDAP service account bind failed: {exc}") from exc + + # Step 2: Search for user + safe_filter = user_filter.replace("{username}", username.replace("(", "").replace(")", "").replace("*", "")) + conn.search( + search_base=base_dn, + search_filter=safe_filter, + search_scope=SUBTREE, + attributes=["distinguishedName", "mail", "displayName", "sAMAccountName", "memberOf"], + ) + + if not conn.entries: + conn.unbind() + return None # User not found in LDAP — caller falls back to local auth + + entry = conn.entries[0] + user_dn = entry.entry_dn + email = str(entry.mail.value) if entry.mail else username + display_name = str(entry.displayName.value) if entry.displayName else username + + # Step 3: Optional group membership check + if group_dn: + member_of = [str(g) for g in entry.memberOf] if entry.memberOf else [] + if not any(group_dn.lower() == g.lower() for g in member_of): + conn.unbind() + logger.warning( + "LDAP login denied for '%s': not a member of required group '%s'.", + username, group_dn, + ) + raise ValueError(f"Access denied: not a member of the required AD group.") + + conn.unbind() + + # Step 4: Verify user's password by binding as the user + try: + user_conn = Connection(srv, user=user_dn, password=password, authentication=SIMPLE, auto_bind=True) + user_conn.unbind() + except Exception: + raise ValueError("Invalid password.") + + return { + "username": username.lower(), + "email": email, + "display_name": display_name, + } + + +async def test_ldap_connection(config: Any) -> dict: + """Test connectivity to the LDAP / Active Directory server. + + Attempts a service account bind to verify credentials and reachability. + + Args: + config: AppConfig with LDAP settings. + + Returns: + Dict with ``ok`` (bool) and ``message`` (str). + """ + try: + loop = asyncio.get_event_loop() + return await loop.run_in_executor( + None, + _ldap_test, + config.ldap_server, + config.ldap_port, + config.ldap_use_ssl, + config.ldap_bind_dn, + config.ldap_bind_password, + ) + except ImportError: + return {"ok": False, "message": "ldap3 is not installed. Add 'ldap3' to requirements.txt."} + except Exception as exc: + logger.error("LDAP test_connection error: %s", exc) + return {"ok": False, "message": f"LDAP error: {exc}"} + + +async def authenticate_ldap(username: str, password: str, config: Any) -> dict | None: + """Authenticate a user against LDAP / Active Directory. + + Args: + username: The login username (matched via ldap_user_filter). + password: The user's password. + config: AppConfig with LDAP settings. + + Returns: + User info dict on success: {"username": ..., "email": ..., "display_name": ...} + None if the user was not found in LDAP. + + Raises: + ValueError: User found but password incorrect, or group membership denied. + RuntimeError: LDAP server unreachable or misconfigured. + """ + loop = asyncio.get_event_loop() + return await loop.run_in_executor( + None, + _ldap_authenticate, + config.ldap_server, + config.ldap_port, + config.ldap_use_ssl, + config.ldap_bind_dn, + config.ldap_bind_password, + config.ldap_base_dn, + config.ldap_user_filter, + config.ldap_group_dn, + username, + password, + ) diff --git a/app/services/netbird_service.py b/app/services/netbird_service.py index 1a38e44..a1e8bdf 100644 --- a/app/services/netbird_service.py +++ b/app/services/netbird_service.py @@ -30,7 +30,7 @@ from jinja2 import Environment, FileSystemLoader from sqlalchemy.orm import Session from app.models import Customer, Deployment, DeploymentLog -from app.services import docker_service, npm_service, port_manager +from app.services import dns_service, docker_service, npm_service, port_manager from app.utils.config import get_system_config from app.utils.security import encrypt_value, generate_datastore_encryption_key, generate_relay_secret @@ -118,7 +118,7 @@ async def deploy_customer(db: Session, customer_id: int) -> dict[str, Any]: allocated_port = None instance_dir = None - container_prefix = f"netbird-kunde{customer_id}" + container_prefix = f"netbird-{customer.subdomain}" local_mode = _is_local_domain(config.base_domain) existing_deployment = db.query(Deployment).filter(Deployment.customer_id == customer_id).first() @@ -135,7 +135,7 @@ async def deploy_customer(db: Session, customer_id: int) -> dict[str, Any]: # Step 2: Generate secrets (reuse existing key if instance data exists) relay_secret = generate_relay_secret() datastore_key = _get_existing_datastore_key( - os.path.join(config.data_dir, f"kunde{customer_id}", "management.json") + os.path.join(config.data_dir, customer.subdomain, "management.json") ) if datastore_key: _log_action(db, customer_id, "deploy", "info", @@ -159,7 +159,7 @@ async def deploy_customer(db: Session, customer_id: int) -> dict[str, Any]: relay_ws_protocol = "rels" # Step 4: Create instance directory - instance_dir = os.path.join(config.data_dir, f"kunde{customer_id}") + instance_dir = os.path.join(config.data_dir, customer.subdomain) os.makedirs(instance_dir, exist_ok=True) os.makedirs(os.path.join(instance_dir, "data", "management"), exist_ok=True) os.makedirs(os.path.join(instance_dir, "data", "signal"), exist_ok=True) @@ -204,14 +204,14 @@ async def deploy_customer(db: Session, customer_id: int) -> dict[str, Any]: # Step 5b: Stop existing containers if re-deploying if existing_deployment: try: - docker_service.compose_down(instance_dir, container_prefix, remove_volumes=False) + await docker_service.compose_down(instance_dir, container_prefix, remove_volumes=False) _log_action(db, customer_id, "deploy", "info", "Stopped existing containers for re-deployment.") except Exception as exc: logger.warning("Could not stop existing containers: %s", exc) # Step 6: Start all Docker containers - docker_service.compose_up(instance_dir, container_prefix, timeout=120) + await docker_service.compose_up(instance_dir, container_prefix, timeout=120) _log_action(db, customer_id, "deploy", "info", "Docker containers started.") # Step 7: Wait for containers to be healthy @@ -225,7 +225,7 @@ async def deploy_customer(db: Session, customer_id: int) -> dict[str, Any]: # Step 8: Auto-create admin user via NetBird setup API admin_email = customer.email admin_password = secrets.token_urlsafe(16) - management_container = f"netbird-kunde{customer_id}-management" + management_container = f"netbird-{customer.subdomain}-management" setup_api_url = f"http://{management_container}:80/api/setup" setup_payload = json.dumps({ "name": customer.name, @@ -264,7 +264,7 @@ async def deploy_customer(db: Session, customer_id: int) -> dict[str, Any]: _log_action(db, customer_id, "deploy", "info", "Auto-setup failed — admin must complete setup manually.") - # Step 9: Create NPM proxy host + stream (production only) + # Step 9: Create NPM proxy host (production only) npm_proxy_id = None npm_stream_id = None if not local_mode: @@ -277,6 +277,8 @@ async def deploy_customer(db: Session, customer_id: int) -> dict[str, Any]: forward_host=forward_host, forward_port=dashboard_port, admin_email=config.admin_email, + ssl_mode=config.ssl_mode, + wildcard_cert_id=config.wildcard_cert_id, ) npm_proxy_id = npm_result.get("proxy_id") if npm_result.get("error"): @@ -292,27 +294,6 @@ async def deploy_customer(db: Session, customer_id: int) -> dict[str, Any]: f"(SSL: {'OK' if ssl_ok else 'FAILED — check DNS and port 80 accessibility'})", ) - # Create NPM UDP stream for relay STUN port - stream_result = await npm_service.create_stream( - api_url=config.npm_api_url, - npm_email=config.npm_api_email, - npm_password=config.npm_api_password, - incoming_port=allocated_port, - forwarding_host=forward_host, - forwarding_port=allocated_port, - ) - npm_stream_id = stream_result.get("stream_id") - if stream_result.get("error"): - _log_action( - db, customer_id, "deploy", "error", - f"NPM stream creation failed: {stream_result['error']}", - ) - else: - _log_action( - db, customer_id, "deploy", "info", - f"NPM UDP stream created: port {allocated_port} -> {forward_host}:{allocated_port}", - ) - # Note: Keep HTTPS configs even if SSL cert creation failed. # SSL can be set up manually in NPM later. Switching to HTTP # would break the dashboard when the user accesses via HTTPS. @@ -324,7 +305,20 @@ async def deploy_customer(db: Session, customer_id: int) -> dict[str, Any]: "Please create it manually in NPM or ensure DNS resolves and port 80 is reachable, then re-deploy.", ) - # Step 10: Create or update deployment record + # Step 10: Create Windows DNS A-record (non-fatal — failure does not abort deployment) + if config.dns_enabled and config.dns_server and config.dns_zone and config.dns_record_ip: + try: + dns_result = await dns_service.create_dns_record(customer.subdomain, config) + if dns_result["ok"]: + _log_action(db, customer_id, "dns_create", "success", dns_result["message"]) + else: + _log_action(db, customer_id, "dns_create", "error", dns_result["message"]) + logger.warning("DNS record creation failed (non-fatal): %s", dns_result["message"]) + except Exception as exc: + logger.error("DNS service error (non-fatal): %s", exc) + _log_action(db, customer_id, "dns_create", "error", str(exc)) + + # Step 11: Create or update deployment record setup_url = external_url deployment = db.query(Deployment).filter(Deployment.customer_id == customer_id).first() @@ -371,8 +365,8 @@ async def deploy_customer(db: Session, customer_id: int) -> dict[str, Any]: # Rollback: stop containers if they were started try: - docker_service.compose_down( - instance_dir or os.path.join(config.data_dir, f"kunde{customer_id}"), + await docker_service.compose_down( + instance_dir or os.path.join(config.data_dir, customer.subdomain), container_prefix, remove_volumes=True, ) @@ -408,11 +402,11 @@ async def undeploy_customer(db: Session, customer_id: int) -> dict[str, Any]: config = get_system_config(db) if deployment and config: - instance_dir = os.path.join(config.data_dir, f"kunde{customer_id}") + instance_dir = os.path.join(config.data_dir, customer.subdomain) # Stop and remove containers try: - docker_service.compose_down(instance_dir, deployment.container_prefix, remove_volumes=True) + await docker_service.compose_down(instance_dir, deployment.container_prefix, remove_volumes=True) _log_action(db, customer_id, "undeploy", "info", "Containers removed.") except Exception as exc: _log_action(db, customer_id, "undeploy", "error", f"Container removal error: {exc}") @@ -428,16 +422,16 @@ async def undeploy_customer(db: Session, customer_id: int) -> dict[str, Any]: except Exception as exc: _log_action(db, customer_id, "undeploy", "error", f"NPM removal error: {exc}") - # Remove NPM stream - if deployment.npm_stream_id and config.npm_api_email: + # Remove Windows DNS A-record (non-fatal) + if config and config.dns_enabled and config.dns_server and config.dns_zone: try: - await npm_service.delete_stream( - config.npm_api_url, config.npm_api_email, config.npm_api_password, - deployment.npm_stream_id, - ) - _log_action(db, customer_id, "undeploy", "info", "NPM stream removed.") + dns_result = await dns_service.delete_dns_record(customer.subdomain, config) + if dns_result["ok"]: + _log_action(db, customer_id, "undeploy", "info", dns_result["message"]) + else: + _log_action(db, customer_id, "undeploy", "error", f"DNS removal: {dns_result['message']}") except Exception as exc: - _log_action(db, customer_id, "undeploy", "error", f"NPM stream removal error: {exc}") + logger.error("DNS record deletion failed (non-fatal): %s", exc) # Remove instance directory if os.path.isdir(instance_dir): @@ -455,20 +449,19 @@ async def undeploy_customer(db: Session, customer_id: int) -> dict[str, Any]: return {"success": True} -def stop_customer(db: Session, customer_id: int) -> dict[str, Any]: +async def stop_customer(db: Session, customer_id: int) -> dict[str, Any]: """Stop containers for a customer.""" deployment = db.query(Deployment).filter(Deployment.customer_id == customer_id).first() + customer = db.query(Customer).filter(Customer.id == customer_id).first() config = get_system_config(db) - if not deployment or not config: - return {"success": False, "error": "Deployment or config not found."} + if not deployment or not config or not customer: + return {"success": False, "error": "Deployment, customer or config not found."} - instance_dir = os.path.join(config.data_dir, f"kunde{customer_id}") - ok = docker_service.compose_stop(instance_dir, deployment.container_prefix) + instance_dir = os.path.join(config.data_dir, customer.subdomain) + ok = await docker_service.compose_stop(instance_dir, deployment.container_prefix) if ok: deployment.deployment_status = "stopped" - customer = db.query(Customer).filter(Customer.id == customer_id).first() - if customer: - customer.status = "inactive" + customer.status = "inactive" db.commit() _log_action(db, customer_id, "stop", "success", "Containers stopped.") else: @@ -476,20 +469,19 @@ def stop_customer(db: Session, customer_id: int) -> dict[str, Any]: return {"success": ok} -def start_customer(db: Session, customer_id: int) -> dict[str, Any]: +async def start_customer(db: Session, customer_id: int) -> dict[str, Any]: """Start containers for a customer.""" deployment = db.query(Deployment).filter(Deployment.customer_id == customer_id).first() + customer = db.query(Customer).filter(Customer.id == customer_id).first() config = get_system_config(db) - if not deployment or not config: - return {"success": False, "error": "Deployment or config not found."} + if not deployment or not config or not customer: + return {"success": False, "error": "Deployment, customer or config not found."} - instance_dir = os.path.join(config.data_dir, f"kunde{customer_id}") - ok = docker_service.compose_start(instance_dir, deployment.container_prefix) + instance_dir = os.path.join(config.data_dir, customer.subdomain) + ok = await docker_service.compose_start(instance_dir, deployment.container_prefix) if ok: deployment.deployment_status = "running" - customer = db.query(Customer).filter(Customer.id == customer_id).first() - if customer: - customer.status = "active" + customer.status = "active" db.commit() _log_action(db, customer_id, "start", "success", "Containers started.") else: @@ -497,20 +489,19 @@ def start_customer(db: Session, customer_id: int) -> dict[str, Any]: return {"success": ok} -def restart_customer(db: Session, customer_id: int) -> dict[str, Any]: +async def restart_customer(db: Session, customer_id: int) -> dict[str, Any]: """Restart containers for a customer.""" deployment = db.query(Deployment).filter(Deployment.customer_id == customer_id).first() + customer = db.query(Customer).filter(Customer.id == customer_id).first() config = get_system_config(db) - if not deployment or not config: - return {"success": False, "error": "Deployment or config not found."} + if not deployment or not config or not customer: + return {"success": False, "error": "Deployment, customer or config not found."} - instance_dir = os.path.join(config.data_dir, f"kunde{customer_id}") - ok = docker_service.compose_restart(instance_dir, deployment.container_prefix) + instance_dir = os.path.join(config.data_dir, customer.subdomain) + ok = await docker_service.compose_restart(instance_dir, deployment.container_prefix) if ok: deployment.deployment_status = "running" - customer = db.query(Customer).filter(Customer.id == customer_id).first() - if customer: - customer.status = "active" + customer.status = "active" db.commit() _log_action(db, customer_id, "restart", "success", "Containers restarted.") else: diff --git a/app/services/npm_service.py b/app/services/npm_service.py index f32ab42..b84ef55 100644 --- a/app/services/npm_service.py +++ b/app/services/npm_service.py @@ -14,6 +14,7 @@ Also manages NPM streams for STUN/TURN relay UDP ports. import logging import os +import socket from typing import Any import httpx @@ -41,7 +42,17 @@ def _get_forward_host() -> str: logger.info("Using HOST_IP from environment: %s", host_ip) return host_ip - logger.warning("HOST_IP not set in environment — please add HOST_IP= to .env") + # Auto-detect: connect to external address to find the outbound interface IP + try: + with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as s: + s.connect(("8.8.8.8", 80)) + detected = s.getsockname()[0] + logger.info("Auto-detected host IP: %s (set HOST_IP in .env to override)", detected) + return detected + except Exception: + pass + + logger.warning("Could not detect host IP — falling back to 127.0.0.1. Set HOST_IP in .env!") return "127.0.0.1" @@ -112,6 +123,45 @@ async def test_npm_connection(api_url: str, email: str, password: str) -> dict[s return {"ok": False, "message": f"Unexpected error: {exc}"} +async def list_certificates(api_url: str, email: str, password: str) -> dict[str, Any]: + """Fetch all SSL certificates from NPM. + + Args: + api_url: NPM API base URL. + email: NPM login email. + password: NPM login password. + + Returns: + Dict with ``certificates`` list on success, or ``error`` on failure. + """ + try: + async with httpx.AsyncClient(timeout=NPM_TIMEOUT) as client: + token = await _npm_login(client, api_url, email, password) + headers = {"Authorization": f"Bearer {token}"} + resp = await client.get(f"{api_url}/nginx/certificates", headers=headers) + if resp.status_code == 200: + result = [] + for cert in resp.json(): + domains = cert.get("domain_names", []) + result.append({ + "id": cert.get("id"), + "domain_names": domains, + "provider": cert.get("provider", "unknown"), + "expires_on": cert.get("expires_on"), + "is_wildcard": any(d.startswith("*.") for d in domains), + }) + return {"certificates": result} + return {"error": f"NPM returned {resp.status_code}: {resp.text[:200]}"} + except RuntimeError as exc: + return {"error": str(exc)} + except httpx.ConnectError: + return {"error": "Connection refused. Is NPM running and reachable?"} + except httpx.TimeoutException: + return {"error": "Connection timed out."} + except Exception as exc: + return {"error": f"Unexpected error: {exc}"} + + async def _find_cert_by_domain( client: httpx.AsyncClient, api_url: str, headers: dict, domain: str ) -> int | None: @@ -169,6 +219,8 @@ async def create_proxy_host( forward_host: str, forward_port: int = 80, admin_email: str = "", + ssl_mode: str = "letsencrypt", + wildcard_cert_id: int | None = None, ) -> dict[str, Any]: """Create a proxy host entry in NPM with SSL for a customer. @@ -207,7 +259,16 @@ async def create_proxy_host( "block_exploits": True, "allow_websocket_upgrade": True, "access_list_id": 0, - "advanced_config": "", + "advanced_config": ( + "location ^~ /management.ManagementService/ {\n" + f" grpc_pass grpc://{forward_host}:{forward_port};\n" + " grpc_set_header Host $host;\n" + "}\n" + "location ^~ /signalexchange.SignalExchange/ {\n" + f" grpc_pass grpc://{forward_host}:{forward_port};\n" + " grpc_set_header Host $host;\n" + "}\n" + ), "meta": { "letsencrypt_agree": True, "letsencrypt_email": admin_email, @@ -265,7 +326,10 @@ async def create_proxy_host( return {"error": error_msg} # Step 2: Request SSL certificate and enable HTTPS - ssl_ok = await _request_ssl(client, api_url, headers, proxy_id, domain, admin_email) + ssl_ok = await _request_ssl( + client, api_url, headers, proxy_id, domain, admin_email, + ssl_mode=ssl_mode, wildcard_cert_id=wildcard_cert_id, + ) return {"proxy_id": proxy_id, "ssl": ssl_ok} except RuntimeError as exc: @@ -283,13 +347,14 @@ async def _request_ssl( proxy_id: int, domain: str, admin_email: str, + ssl_mode: str = "letsencrypt", + wildcard_cert_id: int | None = None, ) -> bool: - """Request a Let's Encrypt SSL certificate and enable HTTPS on the proxy host. + """Request an SSL certificate and enable HTTPS on the proxy host. - Flow: - 1. Create LE certificate via NPM API (HTTP-01 validation, up to 120s) - 2. Assign certificate to the proxy host - 3. Enable ssl_forced + hsts on the proxy host + Supports two modes: + - ``letsencrypt``: Create a per-domain LE certificate (HTTP-01 validation). + - ``wildcard``: Assign a pre-existing wildcard certificate from NPM. Args: client: httpx client (already authenticated). @@ -298,10 +363,49 @@ async def _request_ssl( proxy_id: The proxy host ID. domain: The domain to certify. admin_email: Contact email for LE. + ssl_mode: ``"letsencrypt"`` or ``"wildcard"``. + wildcard_cert_id: NPM certificate ID for wildcard mode. Returns: True if SSL was successfully enabled, False otherwise. """ + # Wildcard mode: assign the pre-existing wildcard cert directly + if ssl_mode == "wildcard" and wildcard_cert_id: + logger.info( + "Wildcard mode: assigning cert id=%s to proxy host %s for %s", + wildcard_cert_id, proxy_id, domain, + ) + ssl_update = { + "certificate_id": wildcard_cert_id, + "ssl_forced": True, + "hsts_enabled": True, + "http2_support": True, + } + try: + update_resp = await client.put( + f"{api_url}/nginx/proxy-hosts/{proxy_id}", + json=ssl_update, + headers=headers, + ) + if update_resp.status_code in (200, 201): + logger.info( + "SSL enabled on proxy host %s (wildcard cert_id=%s)", + proxy_id, wildcard_cert_id, + ) + return True + logger.error( + "Failed to assign wildcard cert %s to proxy host %s: HTTP %s — %s", + wildcard_cert_id, proxy_id, + update_resp.status_code, update_resp.text[:300], + ) + return False + except Exception as exc: + logger.error( + "Failed to assign wildcard cert to proxy host %s: %s", proxy_id, exc, + ) + return False + + # Let's Encrypt mode (default) if not admin_email: logger.warning("No admin email set — skipping SSL certificate for %s", domain) return False diff --git a/app/services/update_service.py b/app/services/update_service.py new file mode 100644 index 0000000..ed59b06 --- /dev/null +++ b/app/services/update_service.py @@ -0,0 +1,446 @@ +"""Update management — version check and in-place update via git + docker compose.""" + +import json +import logging +import os +import shutil +import subprocess +import httpx +from datetime import datetime +from pathlib import Path +from typing import Any + +import httpx + +SOURCE_DIR = "/app-source" +VERSION_FILE = "/app/version.json" +BACKUP_DIR = "/app/backups" +CONTAINER_NAME = "netbird-msp-appliance" +SERVICE_NAME = "netbird-msp-appliance" + +logger = logging.getLogger(__name__) + + +def _get_compose_project_name() -> str: + """Detect the compose project name from the running container's labels. + + Docker Compose sets the label ``com.docker.compose.project`` on every + managed container. Reading it at runtime avoids hard-coding a project + name that may differ from the directory name used at deploy time. + + Returns: + The compose project name (e.g. ``netbird-msp``). + """ + try: + result = subprocess.run( + [ + "docker", "inspect", CONTAINER_NAME, + "--format", + '{{index .Config.Labels "com.docker.compose.project"}}', + ], + capture_output=True, text=True, timeout=10, + ) + if result.returncode == 0: + project = result.stdout.strip() + if project: + logger.info("Detected compose project name: %s", project) + return project + except Exception as exc: + logger.warning("Could not detect compose project name: %s", exc) + + # Fallback: derive from SOURCE_DIR basename (mirrors Compose default behaviour) + fallback = Path(SOURCE_DIR).name + logger.warning("Using fallback compose project name: %s", fallback) + return fallback + + +def get_current_version() -> dict: + """Read the version baked at build time from /app/version.json.""" + try: + data = json.loads(Path(VERSION_FILE).read_text()) + return { + "tag": data.get("tag", "unknown"), + "commit": data.get("commit", "unknown"), + "branch": data.get("branch", "unknown"), + "date": data.get("date", "unknown"), + } + except Exception: + return {"tag": "unknown", "commit": "unknown", "branch": "unknown", "date": "unknown"} + + +async def check_for_updates(config: Any) -> dict: + """Query the Gitea API for the latest tag and commit on the configured branch. + + Parses the repo URL to build the Gitea API endpoint: + https://git.example.com/owner/repo + → https://git.example.com/api/v1/repos/owner/repo/... + + Uses tags for version comparison when available, falls back to commit SHAs. + Returns dict with current, latest, needs_update, and optional error. + """ + current = get_current_version() + if not config.git_repo_url: + return { + "current": current, + "latest": None, + "needs_update": False, + "error": "git_repo_url not configured", + } + + repo_url = config.git_repo_url.rstrip("/") + parts = repo_url.split("/") + if len(parts) < 5: + return { + "current": current, + "latest": None, + "needs_update": False, + "error": f"Cannot parse repo URL: {repo_url}", + } + + base_url = "/".join(parts[:-2]) + owner = parts[-2] + repo = parts[-1] + branch = config.git_branch or "main" + branch_api = f"{base_url}/api/v1/repos/{owner}/{repo}/branches/{branch}" + tags_api = f"{base_url}/api/v1/repos/{owner}/{repo}/tags?limit=1" + + headers = {} + if config.git_token: + headers["Authorization"] = f"token {config.git_token}" + + try: + async with httpx.AsyncClient(timeout=10) as client: + # Fetch branch info (latest commit) + resp = await client.get(branch_api, headers=headers) + if resp.status_code != 200: + return { + "current": current, + "latest": None, + "needs_update": False, + "error": f"Gitea API returned HTTP {resp.status_code}", + } + data = resp.json() + latest_commit = data.get("commit", {}) + full_sha = latest_commit.get("id", "unknown") + short_sha = full_sha[:8] if full_sha != "unknown" else "unknown" + + # Fetch latest tag + latest_tag = "unknown" + try: + tag_resp = await client.get(tags_api, headers=headers) + if tag_resp.status_code == 200: + tags = tag_resp.json() + if tags and len(tags) > 0: + latest_tag = tags[0].get("name", "unknown") + except Exception: + pass # Tag fetch is best-effort + + latest = { + "tag": latest_tag, + "commit": short_sha, + "commit_full": full_sha, + "message": latest_commit.get("commit", {}).get("message", "").split("\n")[0] if latest_commit.get("commit") else "", + "date": latest_commit.get("timestamp", ""), + "branch": branch, + } + + # Determine if update is needed: prefer tag comparison, fallback to commit + current_tag = current.get("tag", "unknown") + current_sha = current.get("commit", "unknown") + + # If we don't know our current version but the remote has one, we should update + if current_tag == "unknown" and current_sha == "unknown": + needs_update = latest_tag != "unknown" or short_sha != "unknown" + elif current_tag != "unknown" and latest_tag != "unknown": + needs_update = current_tag != latest_tag + else: + needs_update = ( + current_sha != "unknown" + and short_sha != "unknown" + and current_sha != short_sha + and not full_sha.startswith(current_sha) + ) + return {"current": current, "latest": latest, "needs_update": needs_update} + except Exception as exc: + return { + "current": current, + "latest": None, + "needs_update": False, + "error": str(exc), + } + + +async def get_remote_branches(config: Any) -> list[str]: + """Query the Gitea API for available branches on the configured repository. + + Returns a list of branch names (e.g., ['main', 'unstable', 'development']). + If the repository URL is not configured or an error occurs, returns an empty list. + """ + if not config.git_repo_url: + return [] + + repo_url = config.git_repo_url.rstrip("/") + parts = repo_url.split("/") + if len(parts) < 5: + return [] + + base_url = "/".join(parts[:-2]) + owner = parts[-2] + repo = parts[-1] + branches_api = f"{base_url}/api/v1/repos/{owner}/{repo}/branches?limit=100" + + headers = {} + if config.git_token: + headers["Authorization"] = f"token {config.git_token}" + + try: + async with httpx.AsyncClient(timeout=10) as client: + resp = await client.get(branches_api, headers=headers) + if resp.status_code == 200: + data = resp.json() + if isinstance(data, list): + return [branch.get("name") for branch in data if "name" in branch] + except Exception as exc: + logger.error("Error fetching branches: %s", exc) + + return [] + + +def backup_database(db_path: str) -> str: + """Create a timestamped backup of the SQLite database. + + Returns the backup file path. + """ + Path(BACKUP_DIR).mkdir(parents=True, exist_ok=True) + timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S") + backup_path = f"{BACKUP_DIR}/netbird_msp_{timestamp}.db" + shutil.copy2(db_path, backup_path) + logger.info("Database backed up to %s", backup_path) + return backup_path + + +def trigger_update(config: Any, db_path: str) -> dict: + """Backup DB, git pull latest code, then fire-and-forget docker compose rebuild. + + Returns immediately after launching the rebuild. The container will restart + in ~30-60 seconds causing a brief HTTP connection drop. + + Args: + config: AppConfig with git_repo_url, git_branch, git_token. + db_path: Absolute path to the SQLite database file. + + Returns: + Dict with ok (bool), message, backup path, and pulled_branch. + """ + # 1. Backup database before any changes + try: + backup_path = backup_database(db_path) + except Exception as exc: + logger.error("Database backup failed: %s", exc) + return {"ok": False, "message": f"Database backup failed: {exc}", "backup": None} + + # 2. Build git pull command (embed token in URL if provided) + branch = config.git_branch or "main" + if config.git_token and config.git_repo_url: + scheme_sep = config.git_repo_url.split("://", 1) + if len(scheme_sep) == 2: + auth_url = f"{scheme_sep[0]}://token:{config.git_token}@{scheme_sep[1]}" + else: + auth_url = config.git_repo_url + pull_cmd = ["git", "-C", SOURCE_DIR, "pull", auth_url, branch] + else: + pull_cmd = ["git", "-C", SOURCE_DIR, "pull", "origin", branch] + + # 3. Git pull (synchronous — must complete before rebuild) + # Ensure .git directory is owned by the process user (root inside container). + # The .git dir may be owned by the host user after manual operations. + try: + subprocess.run( + ["git", "config", "--global", "--add", "safe.directory", SOURCE_DIR], + capture_output=True, timeout=10, + ) + except Exception: + pass + + try: + result = subprocess.run( + pull_cmd, + capture_output=True, + text=True, + timeout=120, + ) + except subprocess.TimeoutExpired: + return {"ok": False, "message": "git pull timed out after 120s.", "backup": backup_path} + except Exception as exc: + return {"ok": False, "message": f"git pull error: {exc}", "backup": backup_path} + + if result.returncode != 0: + stderr = result.stderr.strip()[:500] + logger.error("git pull failed (exit %d): %s", result.returncode, stderr) + return { + "ok": False, + "message": f"git pull failed: {stderr}", + "backup": backup_path, + } + + logger.info("git pull succeeded: %s", result.stdout.strip()[:200]) + + # Fetch tags separately — git pull does not always pull all tags + try: + subprocess.run( + ["git", "-C", SOURCE_DIR, "fetch", "--tags"], + capture_output=True, text=True, timeout=30, + ) + except Exception as exc: + logger.warning("git fetch --tags failed (non-fatal): %s", exc) + + # 4. Read version info from the freshly-pulled source + build_env = os.environ.copy() + try: + build_env["GIT_COMMIT"] = subprocess.run( + ["git", "-C", SOURCE_DIR, "rev-parse", "--short", "HEAD"], + capture_output=True, text=True, timeout=10, + ).stdout.strip() or "unknown" + + build_env["GIT_BRANCH"] = subprocess.run( + ["git", "-C", SOURCE_DIR, "rev-parse", "--abbrev-ref", "HEAD"], + capture_output=True, text=True, timeout=10, + ).stdout.strip() or "unknown" + + build_env["GIT_COMMIT_DATE"] = subprocess.run( + ["git", "-C", SOURCE_DIR, "log", "-1", "--format=%cI"], + capture_output=True, text=True, timeout=10, + ).stdout.strip() or "unknown" + + tag_result = subprocess.run( + ["git", "-C", SOURCE_DIR, "describe", "--tags", "--abbrev=0"], + capture_output=True, text=True, timeout=10, + ) + build_env["GIT_TAG"] = tag_result.stdout.strip() if tag_result.returncode == 0 else "unknown" + except Exception as exc: + logger.warning("Could not read version info from source: %s", exc) + + logger.info( + "Rebuilding with GIT_TAG=%s GIT_COMMIT=%s GIT_BRANCH=%s", + build_env.get("GIT_TAG", "?"), + build_env.get("GIT_COMMIT", "?"), + build_env.get("GIT_BRANCH", "?"), + ) + + # 5. Two-phase rebuild: Build image first, then swap container. + # The swap will kill this process (we ARE the container), so we must + # ensure the compose-up runs detached on the Docker host via a wrapper. + log_path = Path(BACKUP_DIR) / "update_rebuild.log" + + # Detect compose project name at runtime — avoids hard-coding a name that + # may differ from the directory used at deploy time. + project_name = _get_compose_project_name() + # Image name follows Docker Compose convention: {project}-{service} + service_image = f"{project_name}-{SERVICE_NAME}:latest" + logger.info("Using project=%s image=%s", project_name, service_image) + + # Phase A — build the new image (does NOT stop anything) + build_cmd = [ + "docker", "compose", + "-p", project_name, + "-f", f"{SOURCE_DIR}/docker-compose.yml", + "build", "--no-cache", + SERVICE_NAME, + ] + logger.info("Phase A: building new image …") + try: + build_result = subprocess.run( + build_cmd, + capture_output=True, text=True, + timeout=600, + env=build_env, + ) + with open(log_path, "w") as f: + f.write(build_result.stdout) + f.write(build_result.stderr) + if build_result.returncode != 0: + logger.error("Image build failed: %s", build_result.stderr[:500]) + return { + "ok": False, + "message": f"Image build failed: {build_result.stderr[:300]}", + "backup": backup_path, + } + except subprocess.TimeoutExpired: + return {"ok": False, "message": "Image build timed out after 600s.", "backup": backup_path} + + logger.info("Phase A complete — image built successfully.") + + # Phase B — swap the container using a helper container. + # When compose recreates our container, ALL processes inside die (PID namespace + # is destroyed). So we launch a *separate* helper container via 'docker run -d' + # that has access to the Docker socket and runs 'docker compose up -d'. + # This helper lives outside our container and survives our restart. + + # Discover the host-side path of /app-source (docker volumes use host paths) + try: + inspect_result = subprocess.run( + ["docker", "inspect", "netbird-msp-appliance", + "--format", '{{range .Mounts}}{{if eq .Destination "/app-source"}}{{.Source}}{{end}}{{end}}'], + capture_output=True, text=True, timeout=10, + ) + host_source_dir = inspect_result.stdout.strip() + if not host_source_dir: + raise ValueError("Could not find /app-source mount") + except Exception as exc: + logger.error("Failed to discover host source path: %s", exc) + return {"ok": False, "message": f"Could not find host source path: {exc}", "backup": backup_path} + + logger.info("Host source directory: %s", host_source_dir) + + env_flags = [] + for key in ("GIT_TAG", "GIT_COMMIT", "GIT_BRANCH", "GIT_COMMIT_DATE"): + val = build_env.get(key, "unknown") + env_flags.extend(["-e", f"{key}={val}"]) + + helper_cmd = [ + "docker", "run", "--rm", "-d", "--privileged", + "--name", "msp-updater", + "-v", "/var/run/docker.sock:/var/run/docker.sock:z", + "-v", f"{host_source_dir}:{host_source_dir}:ro,z", + *env_flags, + service_image, # freshly built image — has docker CLI + compose plugin + "sh", "-c", + ( + "sleep 3 && " + f"docker compose -p {project_name} " + f"-f {host_source_dir}/docker-compose.yml " + f"up --force-recreate --no-deps -d {SERVICE_NAME}" + ), + ] + try: + # Remove stale updater container if any + subprocess.run( + ["docker", "rm", "-f", "msp-updater"], + capture_output=True, timeout=10, + ) + result = subprocess.run( + helper_cmd, + capture_output=True, text=True, + timeout=30, + env=build_env, + ) + if result.returncode != 0: + logger.error("Failed to start updater container: %s", result.stderr.strip()) + return { + "ok": False, + "message": f"Update-Container konnte nicht gestartet werden: {result.stderr.strip()[:200]}", + "backup": backup_path, + } + logger.info("Phase B: updater container started — this container will restart in ~5s.") + except Exception as exc: + logger.error("Failed to launch updater: %s", exc) + return {"ok": False, "message": f"Updater launch failed: {exc}", "backup": backup_path} + + return { + "ok": True, + "message": ( + "Update gestartet. Die App wird in ca. 60 Sekunden mit der neuen Version verfügbar sein." + ), + "backup": backup_path, + "pulled_branch": branch, + } diff --git a/app/utils/config.py b/app/utils/config.py index 69716fa..0eb66c5 100644 --- a/app/utils/config.py +++ b/app/utils/config.py @@ -31,10 +31,59 @@ class AppConfig: docker_network: str relay_base_port: int dashboard_base_port: int + ssl_mode: str + wildcard_cert_id: int | None + # Windows DNS + dns_enabled: bool = False + dns_server: str = "" + dns_username: str = "" + dns_password: str = "" # decrypted + dns_zone: str = "" + dns_record_ip: str = "" + # LDAP + ldap_enabled: bool = False + ldap_server: str = "" + ldap_port: int = 389 + ldap_use_ssl: bool = False + ldap_bind_dn: str = "" + ldap_bind_password: str = "" # decrypted + ldap_base_dn: str = "" + ldap_user_filter: str = "(sAMAccountName={username})" + ldap_group_dn: str = "" + # Update management + git_repo_url: str = "" + git_branch: str = "main" + git_token: str = "" # decrypted +# --------------------------------------------------------------------------- # Environment-level settings (not stored in DB) -SECRET_KEY: str = os.environ.get("SECRET_KEY", "change-me-in-production") +# --------------------------------------------------------------------------- + +# Known insecure default values that must never be used in production. +_INSECURE_KEY_VALUES: set[str] = { + "change-me-in-production", + "local-test-secret-key-not-for-production-1234", + "secret", + "changeme", + "", +} + +SECRET_KEY: str = os.environ.get("SECRET_KEY", "") + +# --- Startup security gate --- +# Abort immediately if the key is missing, too short, or a known default. +_MIN_KEY_LENGTH = 32 +if SECRET_KEY in _INSECURE_KEY_VALUES or len(SECRET_KEY) < _MIN_KEY_LENGTH: + raise RuntimeError( + "FATAL: SECRET_KEY is insecure, missing, or too short.\n" + f" Current length : {len(SECRET_KEY)} characters (minimum: {_MIN_KEY_LENGTH})\n" + " The key must be at least 32 random characters and must not be a known default value.\n" + " Generate a secure key with:\n" + " python3 -c \"import secrets; print(secrets.token_hex(32))\"\n" + " Then set it in your .env file as: SECRET_KEY=" + ) + DATABASE_PATH: str = os.environ.get("DATABASE_PATH", "/app/data/netbird_msp.db") LOG_LEVEL: str = os.environ.get("LOG_LEVEL", "INFO") JWT_ALGORITHM: str = "HS256" @@ -64,6 +113,18 @@ def get_system_config(db: Session) -> Optional[AppConfig]: npm_password = decrypt_value(row.npm_api_password_encrypted) except Exception: npm_password = "" + try: + dns_password = decrypt_value(row.dns_password_encrypted) if row.dns_password_encrypted else "" + except Exception: + dns_password = "" + try: + ldap_bind_password = decrypt_value(row.ldap_bind_password_encrypted) if row.ldap_bind_password_encrypted else "" + except Exception: + ldap_bind_password = "" + try: + git_token = decrypt_value(row.git_token_encrypted) if row.git_token_encrypted else "" + except Exception: + git_token = "" return AppConfig( base_domain=row.base_domain, @@ -79,4 +140,24 @@ def get_system_config(db: Session) -> Optional[AppConfig]: docker_network=row.docker_network, relay_base_port=row.relay_base_port, dashboard_base_port=getattr(row, "dashboard_base_port", 9000) or 9000, + ssl_mode=getattr(row, "ssl_mode", "letsencrypt") or "letsencrypt", + wildcard_cert_id=getattr(row, "wildcard_cert_id", None), + dns_enabled=bool(getattr(row, "dns_enabled", False)), + dns_server=getattr(row, "dns_server", "") or "", + dns_username=getattr(row, "dns_username", "") or "", + dns_password=dns_password, + dns_zone=getattr(row, "dns_zone", "") or "", + dns_record_ip=getattr(row, "dns_record_ip", "") or "", + ldap_enabled=bool(getattr(row, "ldap_enabled", False)), + ldap_server=getattr(row, "ldap_server", "") or "", + ldap_port=getattr(row, "ldap_port", 389) or 389, + ldap_use_ssl=bool(getattr(row, "ldap_use_ssl", False)), + ldap_bind_dn=getattr(row, "ldap_bind_dn", "") or "", + ldap_bind_password=ldap_bind_password, + ldap_base_dn=getattr(row, "ldap_base_dn", "") or "", + ldap_user_filter=getattr(row, "ldap_user_filter", "(sAMAccountName={username})") or "(sAMAccountName={username})", + ldap_group_dn=getattr(row, "ldap_group_dn", "") or "", + git_repo_url=getattr(row, "git_repo_url", "") or "", + git_branch=getattr(row, "git_branch", "main") or "main", + git_token=git_token, ) diff --git a/app/utils/validators.py b/app/utils/validators.py index 881a2c3..27c6181 100644 --- a/app/utils/validators.py +++ b/app/utils/validators.py @@ -126,11 +126,49 @@ class SystemConfigUpdate(BaseModel): branding_name: Optional[str] = Field(None, max_length=255) branding_subtitle: Optional[str] = Field(None, max_length=255) default_language: Optional[str] = Field(None, max_length=10) + ssl_mode: Optional[str] = Field(None, max_length=20) + wildcard_cert_id: Optional[int] = Field(None, ge=0) mfa_enabled: Optional[bool] = None azure_enabled: Optional[bool] = None azure_tenant_id: Optional[str] = Field(None, max_length=255) azure_client_id: Optional[str] = Field(None, max_length=255) azure_client_secret: Optional[str] = None # encrypted before storage + azure_allowed_group_id: Optional[str] = Field( + None, max_length=255, + description="Azure AD group object ID. If set, only members of this group can log in." + ) + # Windows DNS + dns_enabled: Optional[bool] = None + dns_server: Optional[str] = Field(None, max_length=255) + dns_username: Optional[str] = Field(None, max_length=255) + dns_password: Optional[str] = None # plaintext, encrypted before storage + dns_zone: Optional[str] = Field(None, max_length=255) + dns_record_ip: Optional[str] = Field(None, max_length=45) + # LDAP + ldap_enabled: Optional[bool] = None + ldap_server: Optional[str] = Field(None, max_length=255) + ldap_port: Optional[int] = Field(None, ge=1, le=65535) + ldap_use_ssl: Optional[bool] = None + ldap_bind_dn: Optional[str] = Field(None, max_length=500) + ldap_bind_password: Optional[str] = None # plaintext, encrypted before storage + ldap_base_dn: Optional[str] = Field(None, max_length=500) + ldap_user_filter: Optional[str] = Field(None, max_length=255) + ldap_group_dn: Optional[str] = Field(None, max_length=500) + # Update management + git_repo_url: Optional[str] = Field(None, max_length=500) + git_branch: Optional[str] = Field(None, max_length=100) + git_token: Optional[str] = None # plaintext, encrypted before storage + + @field_validator("ssl_mode") + @classmethod + def validate_ssl_mode(cls, v: Optional[str]) -> Optional[str]: + """SSL mode must be 'letsencrypt' or 'wildcard'.""" + if v is None: + return v + allowed = {"letsencrypt", "wildcard"} + if v not in allowed: + raise ValueError(f"ssl_mode must be one of: {', '.join(sorted(allowed))}") + return v @field_validator("base_domain") @classmethod diff --git a/docker-compose.yml b/docker-compose.yml index 748cf9c..bd5b1f6 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,16 +1,68 @@ services: + # --------------------------------------------------------------------------- + # Docker Socket Proxy — limits Docker API access to only what is needed. + # The main app container no longer has direct access to /var/run/docker.sock. + # --------------------------------------------------------------------------- + docker-socket-proxy: + image: tecnativa/docker-socket-proxy:latest + container_name: docker-socket-proxy + restart: unless-stopped + environment: + # Read-only endpoints + CONTAINERS: 1 + IMAGES: 1 + NETWORKS: 1 + INFO: 1 + # Write endpoints (needed for compose up/down/start/stop) + POST: 1 + DELETE: 1 + # Volumes needed for docker compose (creates/removes volumes per customer) + VOLUMES: 1 + # Explicitly deny dangerous endpoints + AUTH: 0 + SECRETS: 0 + SWARM: 0 + NODES: 0 + SERVICES: 0 + TASKS: 0 + CONFIGS: 0 + PLUGINS: 0 + BUILD: 0 + COMMIT: 0 + DISTRIBUTION: 0 + EXEC: 1 + volumes: + - /var/run/docker.sock:/var/run/docker.sock:ro,z + networks: + - npm-network + # Only accessible from within the Docker network — never expose port externally + netbird-msp-appliance: - build: . + build: + context: . + args: + GIT_COMMIT: ${GIT_COMMIT:-unknown} + GIT_BRANCH: ${GIT_BRANCH:-unknown} + GIT_COMMIT_DATE: ${GIT_COMMIT_DATE:-unknown} + GIT_TAG: ${GIT_TAG:-unknown} container_name: netbird-msp-appliance restart: unless-stopped + security_opt: + - label:disable + extra_hosts: + - "host.docker.internal:host-gateway" + depends_on: + - docker-socket-proxy ports: - "${WEB_UI_PORT:-8000}:8000" volumes: - - ./data:/app/data - - ./logs:/app/logs - - ./backups:/app/backups - - /var/run/docker.sock:/var/run/docker.sock - - ${DATA_DIR:-/opt/netbird-instances}:${DATA_DIR:-/opt/netbird-instances} + - ./data:/app/data:z + - ./data/uploads:/app/static/uploads:z + - ./logs:/app/logs:z + - ./backups:/app/backups:z + - /var/run/docker.sock:/var/run/docker.sock:z + - ${DATA_DIR:-/opt/netbird-instances}:${DATA_DIR:-/opt/netbird-instances}:z + - .:/app-source:z environment: - SECRET_KEY=${SECRET_KEY} - DATABASE_PATH=/app/data/netbird_msp.db @@ -21,7 +73,7 @@ services: networks: - npm-network healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:8000/api/health"] + test: [ "CMD", "curl", "-f", "http://localhost:8000/api/health" ] interval: 30s timeout: 10s retries: 3 diff --git a/requirements.txt b/requirements.txt index f778cda..32e6515 100644 --- a/requirements.txt +++ b/requirements.txt @@ -20,6 +20,9 @@ pyyaml==6.0.1 msal==1.28.0 pyotp==2.9.0 qrcode[pil]==7.4.2 +slowapi==0.1.9 +pywinrm>=0.4.3 +ldap3>=2.9.1 pytest==7.4.3 pytest-asyncio==0.23.2 pytest-httpx==0.28.0 diff --git a/static/css/styles.css b/static/css/styles.css index a78ddfb..ed991db 100644 --- a/static/css/styles.css +++ b/static/css/styles.css @@ -188,3 +188,36 @@ body.i18n-loading #app-page { font-weight: 600; background: rgba(0, 0, 0, 0.02); } + +/* --------------------------------------------------------------------------- + Dark mode overrides (Bootstrap 5.3 data-bs-theme="dark") + Bootstrap handles most components automatically; only custom elements need + explicit overrides here. +--------------------------------------------------------------------------- */ +[data-bs-theme="dark"] .card { + border-color: rgba(255, 255, 255, 0.08); +} + +[data-bs-theme="dark"] .card-header { + background: rgba(255, 255, 255, 0.04); +} + +[data-bs-theme="dark"] .log-entry { + border-bottom-color: rgba(255, 255, 255, 0.07); +} + +[data-bs-theme="dark"] .log-time { + color: #9ca3af; +} + +[data-bs-theme="dark"] .table th { + color: #9ca3af; +} + +[data-bs-theme="dark"] .login-container { + background: linear-gradient(135deg, #0d0d1a 0%, #0a1020 50%, #071525 100%); +} + +[data-bs-theme="dark"] .stat-card { + background: var(--bs-card-bg); +} diff --git a/static/favicon.svg b/static/favicon.svg new file mode 100644 index 0000000..e6b3cfc --- /dev/null +++ b/static/favicon.svg @@ -0,0 +1,21 @@ + + + + + + + diff --git a/static/index.html b/static/index.html index b2433d9..4c236c7 100644 --- a/static/index.html +++ b/static/index.html @@ -1,13 +1,23 @@ + NetBird MSP Appliance + + +
@@ -17,7 +27,10 @@

NetBird MSP Appliance

-

Multi-Tenant Management Platform

+

Multi-Tenant Management + Platform

+

+

@@ -36,30 +49,37 @@

-
-

Enter your 6-digit authenticator code

+

Enter your 6-digit + authenticator code

+ maxlength="6" pattern="[0-9]{6}" inputmode="numeric" autocomplete="one-time-code" + required autofocus>
- Back to login + Back to login
-

Scan this QR code with your authenticator app

+

Scan this QR code with your + authenticator app

TOTP QR Code
@@ -69,13 +89,16 @@

+ maxlength="6" pattern="[0-9]{6}" inputmode="numeric" autocomplete="one-time-code" + required>
- Back to login + Back to login
@@ -87,17 +110,25 @@