Fix SSL cert creation and HTTP fallback for Unauthenticated error

- Create NPM proxy host WITHOUT SSL initially (ssl_forced=False),
  then request Let's Encrypt cert, then enable SSL only after cert
  is assigned. Prevents broken proxy when cert fails.
- If SSL cert creation fails, automatically fall back to HTTP mode:
  re-render management.json, dashboard.env, relay.env with http://
  URLs and recreate containers so dashboard login works.
- Better error logging in _request_ssl with specific timeout hints.
- Use template variables for relay WebSocket protocol (rels/rel)
  instead of hardcoded rels:// in management.json.j2 and relay.env.j2.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-08 21:18:37 +01:00
parent 6d42e583d6
commit 8853087161
4 changed files with 105 additions and 30 deletions

View File

@@ -119,10 +119,12 @@ async def deploy_customer(db: Session, customer_id: int) -> dict[str, Any]:
external_url = f"http://localhost:{dashboard_port}"
netbird_protocol = "http"
netbird_port = str(dashboard_port)
relay_ws_protocol = "rel"
else:
external_url = f"https://{netbird_domain}"
netbird_protocol = "https"
netbird_port = "443"
relay_ws_protocol = "rels"
# Step 4: Create instance directory
instance_dir = os.path.join(config.data_dir, f"kunde{customer_id}")
@@ -151,6 +153,7 @@ async def deploy_customer(db: Session, customer_id: int) -> dict[str, Any]:
"netbird_dashboard_image": config.netbird_dashboard_image,
"docker_network": config.docker_network,
"datastore_encryption_key": datastore_key,
"relay_ws_protocol": relay_ws_protocol,
}
_render_template(jinja_env, "docker-compose.yml.j2",
@@ -241,9 +244,11 @@ async def deploy_customer(db: Session, customer_id: int) -> dict[str, Any]:
f"NPM proxy creation failed: {npm_result['error']}",
)
else:
ssl_ok = npm_result.get("ssl", False)
_log_action(
db, customer_id, "deploy", "info",
f"NPM proxy host created: {netbird_domain} -> {forward_host}:{dashboard_port}",
f"NPM proxy host created: {netbird_domain} -> {forward_host}:{dashboard_port} "
f"(SSL: {'OK' if ssl_ok else 'FAILED — check DNS and port 80 accessibility'})",
)
# Create NPM UDP stream for relay STUN port
@@ -267,6 +272,36 @@ async def deploy_customer(db: Session, customer_id: int) -> dict[str, Any]:
f"NPM UDP stream created: port {allocated_port} -> {forward_host}:{allocated_port}",
)
# Step 9b: If SSL failed, fall back to HTTP so the dashboard works
ssl_ok = npm_result.get("ssl", False) if not npm_result.get("error") else False
if not ssl_ok:
logger.warning("SSL cert failed for %s — switching configs to HTTP", netbird_domain)
external_url = f"http://{netbird_domain}"
netbird_protocol = "http"
netbird_port = "80"
relay_ws_protocol = "rel"
template_vars["external_url"] = external_url
template_vars["netbird_protocol"] = netbird_protocol
template_vars["netbird_port"] = netbird_port
template_vars["relay_ws_protocol"] = relay_ws_protocol
# Re-render configs that contain URL/protocol references
_render_template(jinja_env, "management.json.j2",
os.path.join(instance_dir, "management.json"), **template_vars)
_render_template(jinja_env, "dashboard.env.j2",
os.path.join(instance_dir, "dashboard.env"), **template_vars)
_render_template(jinja_env, "relay.env.j2",
os.path.join(instance_dir, "relay.env"), **template_vars)
# Recreate containers to pick up new config
docker_service.compose_up(instance_dir, container_prefix, timeout=120)
_log_action(
db, customer_id, "deploy", "info",
"SSL not available — switched to HTTP mode. "
"To enable HTTPS: ensure DNS resolves and port 80 is reachable, then re-deploy.",
)
# Step 10: Create deployment record
setup_url = external_url

View File

@@ -127,6 +127,10 @@ async def create_proxy_host(
Caddy reverse proxy is listening. Caddy handles internal routing to
management, signal, relay, and dashboard containers.
Creates the proxy host WITHOUT SSL first (so HTTP works immediately),
then requests a Let's Encrypt certificate, and only enables SSL
after the cert is successfully assigned.
Args:
api_url: NPM API base URL.
npm_email: NPM login email.
@@ -137,16 +141,18 @@ async def create_proxy_host(
admin_email: Email for Let's Encrypt.
Returns:
Dict with ``proxy_id`` on success or ``error`` on failure.
Dict with ``proxy_id`` and ``ssl`` (bool) on success, or ``error`` on failure.
"""
# Step 1: Create proxy host WITHOUT SSL — so HTTP works immediately
# SSL is enabled later only after a cert is successfully obtained.
payload = {
"domain_names": [domain],
"forward_scheme": "http",
"forward_host": forward_host,
"forward_port": forward_port,
"certificate_id": 0,
"ssl_forced": True,
"hsts_enabled": True,
"ssl_forced": False,
"hsts_enabled": False,
"hsts_subdomains": False,
"http2_support": True,
"block_exploits": True,
@@ -162,14 +168,12 @@ async def create_proxy_host(
try:
async with httpx.AsyncClient(timeout=180) as client: # Long timeout for LE cert
# Step 1: Login to NPM
token = await _npm_login(client, api_url, npm_email, npm_password)
headers = {
"Authorization": f"Bearer {token}",
"Content-Type": "application/json",
}
# Step 2: Create proxy host
resp = await client.post(
f"{api_url}/nginx/proxy-hosts", json=payload, headers=headers
)
@@ -179,10 +183,10 @@ async def create_proxy_host(
logger.info("Created NPM proxy host %s -> %s:%d (id=%s)",
domain, forward_host, forward_port, proxy_id)
# Step 3: Request SSL certificate
await _request_ssl(client, api_url, headers, proxy_id, domain, admin_email)
# Step 2: Request SSL certificate and enable HTTPS
ssl_ok = await _request_ssl(client, api_url, headers, proxy_id, domain, admin_email)
return {"proxy_id": proxy_id}
return {"proxy_id": proxy_id, "ssl": ssl_ok}
else:
error_msg = f"NPM returned {resp.status_code}: {resp.text[:300]}"
logger.error("Failed to create proxy host: %s", error_msg)
@@ -202,11 +206,13 @@ async def _request_ssl(
proxy_id: int,
domain: str,
admin_email: str,
) -> None:
"""Request a Let's Encrypt SSL certificate for a proxy host.
) -> bool:
"""Request a Let's Encrypt SSL certificate and enable HTTPS on the proxy host.
Let's Encrypt validation can take up to 120 seconds, so we use
a longer timeout for certificate requests.
Flow:
1. Create LE certificate via NPM API (HTTP-01 validation, up to 120s)
2. Assign certificate to the proxy host
3. Enable ssl_forced + hsts on the proxy host
Args:
client: httpx client (already authenticated).
@@ -215,7 +221,14 @@ async def _request_ssl(
proxy_id: The proxy host ID.
domain: The domain to certify.
admin_email: Contact email for LE.
Returns:
True if SSL was successfully enabled, False otherwise.
"""
if not admin_email:
logger.warning("No admin email set — skipping SSL certificate for %s", domain)
return False
ssl_payload = {
"domain_names": [domain],
"provider": "letsencrypt",
@@ -227,30 +240,57 @@ async def _request_ssl(
},
}
try:
logger.info("Requesting Let's Encrypt certificate for %s ...", domain)
logger.info("Requesting Let's Encrypt certificate for %s (email: %s) ...", domain, admin_email)
resp = await client.post(
f"{api_url}/nginx/certificates",
json=ssl_payload,
headers=headers,
timeout=120, # LE validation can be slow
)
if resp.status_code in (200, 201):
if resp.status_code not in (200, 201):
logger.error(
"SSL cert request for %s failed (HTTP %s): %s",
domain, resp.status_code, resp.text[:500],
)
return False
cert_id = resp.json().get("id")
logger.info("Certificate created (id=%s), assigning to proxy host %s", cert_id, proxy_id)
logger.info("Certificate created (id=%s) for %s", cert_id, domain)
# Assign cert AND enable SSL + HSTS in one update
ssl_update = {
"certificate_id": cert_id,
"ssl_forced": True,
"hsts_enabled": True,
"http2_support": True,
}
assign_resp = await client.put(
f"{api_url}/nginx/proxy-hosts/{proxy_id}",
json={"certificate_id": cert_id},
json=ssl_update,
headers=headers,
)
if assign_resp.status_code in (200, 201):
logger.info("SSL certificate %s assigned to proxy host %s", cert_id, proxy_id)
logger.info("SSL enabled on proxy host %s for %s (cert_id=%s)", proxy_id, domain, cert_id)
return True
else:
logger.warning("Failed to assign cert to proxy host: %s %s",
assign_resp.status_code, assign_resp.text[:200])
else:
logger.warning("SSL cert request returned %s: %s", resp.status_code, resp.text[:500])
logger.error(
"Failed to assign cert %s to proxy host %s: HTTP %s%s",
cert_id, proxy_id, assign_resp.status_code, assign_resp.text[:300],
)
return False
except httpx.TimeoutException:
logger.error(
"SSL cert request for %s timed out after 120s. "
"Check: 1) DNS resolves %s to your server, "
"2) Port 80 is accessible from the internet, "
"3) NPM is listening on port 80.",
domain, domain,
)
return False
except Exception as exc:
logger.warning("SSL certificate request failed: %s", exc)
logger.error("SSL certificate request failed for %s: %s", domain, exc)
return False
async def create_stream(

View File

@@ -22,7 +22,7 @@
},
"Relay": {
"Addresses": [
"rels://{{ netbird_domain }}:443"
"{{ relay_ws_protocol }}://{{ netbird_domain }}:{{ netbird_port }}"
],
"CredentialsTTL": "24h",
"Secret": "{{ relay_secret }}"

View File

@@ -2,6 +2,6 @@
# {{ subdomain }}.{{ base_domain }}
NB_AUTH_SECRET={{ relay_secret }}
NB_LISTEN_ADDRESS=:80
NB_EXPOSED_ADDRESS=rels://{{ subdomain }}.{{ base_domain }}:443
NB_EXPOSED_ADDRESS={{ relay_ws_protocol }}://{{ subdomain }}.{{ base_domain }}:{{ netbird_port }}
NB_ENABLE_STUN=true
NB_STUN_PORTS=3478