Fix SSL cert creation and HTTP fallback for Unauthenticated error

- Create NPM proxy host WITHOUT SSL initially (ssl_forced=False),
  then request Let's Encrypt cert, then enable SSL only after cert
  is assigned. Prevents broken proxy when cert fails.
- If SSL cert creation fails, automatically fall back to HTTP mode:
  re-render management.json, dashboard.env, relay.env with http://
  URLs and recreate containers so dashboard login works.
- Better error logging in _request_ssl with specific timeout hints.
- Use template variables for relay WebSocket protocol (rels/rel)
  instead of hardcoded rels:// in management.json.j2 and relay.env.j2.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-02-08 21:18:37 +01:00
parent 6d42e583d6
commit 8853087161
4 changed files with 105 additions and 30 deletions

View File

@@ -119,10 +119,12 @@ async def deploy_customer(db: Session, customer_id: int) -> dict[str, Any]:
external_url = f"http://localhost:{dashboard_port}" external_url = f"http://localhost:{dashboard_port}"
netbird_protocol = "http" netbird_protocol = "http"
netbird_port = str(dashboard_port) netbird_port = str(dashboard_port)
relay_ws_protocol = "rel"
else: else:
external_url = f"https://{netbird_domain}" external_url = f"https://{netbird_domain}"
netbird_protocol = "https" netbird_protocol = "https"
netbird_port = "443" netbird_port = "443"
relay_ws_protocol = "rels"
# Step 4: Create instance directory # Step 4: Create instance directory
instance_dir = os.path.join(config.data_dir, f"kunde{customer_id}") instance_dir = os.path.join(config.data_dir, f"kunde{customer_id}")
@@ -151,6 +153,7 @@ async def deploy_customer(db: Session, customer_id: int) -> dict[str, Any]:
"netbird_dashboard_image": config.netbird_dashboard_image, "netbird_dashboard_image": config.netbird_dashboard_image,
"docker_network": config.docker_network, "docker_network": config.docker_network,
"datastore_encryption_key": datastore_key, "datastore_encryption_key": datastore_key,
"relay_ws_protocol": relay_ws_protocol,
} }
_render_template(jinja_env, "docker-compose.yml.j2", _render_template(jinja_env, "docker-compose.yml.j2",
@@ -241,9 +244,11 @@ async def deploy_customer(db: Session, customer_id: int) -> dict[str, Any]:
f"NPM proxy creation failed: {npm_result['error']}", f"NPM proxy creation failed: {npm_result['error']}",
) )
else: else:
ssl_ok = npm_result.get("ssl", False)
_log_action( _log_action(
db, customer_id, "deploy", "info", db, customer_id, "deploy", "info",
f"NPM proxy host created: {netbird_domain} -> {forward_host}:{dashboard_port}", f"NPM proxy host created: {netbird_domain} -> {forward_host}:{dashboard_port} "
f"(SSL: {'OK' if ssl_ok else 'FAILED — check DNS and port 80 accessibility'})",
) )
# Create NPM UDP stream for relay STUN port # Create NPM UDP stream for relay STUN port
@@ -267,6 +272,36 @@ async def deploy_customer(db: Session, customer_id: int) -> dict[str, Any]:
f"NPM UDP stream created: port {allocated_port} -> {forward_host}:{allocated_port}", f"NPM UDP stream created: port {allocated_port} -> {forward_host}:{allocated_port}",
) )
# Step 9b: If SSL failed, fall back to HTTP so the dashboard works
ssl_ok = npm_result.get("ssl", False) if not npm_result.get("error") else False
if not ssl_ok:
logger.warning("SSL cert failed for %s — switching configs to HTTP", netbird_domain)
external_url = f"http://{netbird_domain}"
netbird_protocol = "http"
netbird_port = "80"
relay_ws_protocol = "rel"
template_vars["external_url"] = external_url
template_vars["netbird_protocol"] = netbird_protocol
template_vars["netbird_port"] = netbird_port
template_vars["relay_ws_protocol"] = relay_ws_protocol
# Re-render configs that contain URL/protocol references
_render_template(jinja_env, "management.json.j2",
os.path.join(instance_dir, "management.json"), **template_vars)
_render_template(jinja_env, "dashboard.env.j2",
os.path.join(instance_dir, "dashboard.env"), **template_vars)
_render_template(jinja_env, "relay.env.j2",
os.path.join(instance_dir, "relay.env"), **template_vars)
# Recreate containers to pick up new config
docker_service.compose_up(instance_dir, container_prefix, timeout=120)
_log_action(
db, customer_id, "deploy", "info",
"SSL not available — switched to HTTP mode. "
"To enable HTTPS: ensure DNS resolves and port 80 is reachable, then re-deploy.",
)
# Step 10: Create deployment record # Step 10: Create deployment record
setup_url = external_url setup_url = external_url

View File

@@ -127,6 +127,10 @@ async def create_proxy_host(
Caddy reverse proxy is listening. Caddy handles internal routing to Caddy reverse proxy is listening. Caddy handles internal routing to
management, signal, relay, and dashboard containers. management, signal, relay, and dashboard containers.
Creates the proxy host WITHOUT SSL first (so HTTP works immediately),
then requests a Let's Encrypt certificate, and only enables SSL
after the cert is successfully assigned.
Args: Args:
api_url: NPM API base URL. api_url: NPM API base URL.
npm_email: NPM login email. npm_email: NPM login email.
@@ -137,16 +141,18 @@ async def create_proxy_host(
admin_email: Email for Let's Encrypt. admin_email: Email for Let's Encrypt.
Returns: Returns:
Dict with ``proxy_id`` on success or ``error`` on failure. Dict with ``proxy_id`` and ``ssl`` (bool) on success, or ``error`` on failure.
""" """
# Step 1: Create proxy host WITHOUT SSL — so HTTP works immediately
# SSL is enabled later only after a cert is successfully obtained.
payload = { payload = {
"domain_names": [domain], "domain_names": [domain],
"forward_scheme": "http", "forward_scheme": "http",
"forward_host": forward_host, "forward_host": forward_host,
"forward_port": forward_port, "forward_port": forward_port,
"certificate_id": 0, "certificate_id": 0,
"ssl_forced": True, "ssl_forced": False,
"hsts_enabled": True, "hsts_enabled": False,
"hsts_subdomains": False, "hsts_subdomains": False,
"http2_support": True, "http2_support": True,
"block_exploits": True, "block_exploits": True,
@@ -162,14 +168,12 @@ async def create_proxy_host(
try: try:
async with httpx.AsyncClient(timeout=180) as client: # Long timeout for LE cert async with httpx.AsyncClient(timeout=180) as client: # Long timeout for LE cert
# Step 1: Login to NPM
token = await _npm_login(client, api_url, npm_email, npm_password) token = await _npm_login(client, api_url, npm_email, npm_password)
headers = { headers = {
"Authorization": f"Bearer {token}", "Authorization": f"Bearer {token}",
"Content-Type": "application/json", "Content-Type": "application/json",
} }
# Step 2: Create proxy host
resp = await client.post( resp = await client.post(
f"{api_url}/nginx/proxy-hosts", json=payload, headers=headers f"{api_url}/nginx/proxy-hosts", json=payload, headers=headers
) )
@@ -179,10 +183,10 @@ async def create_proxy_host(
logger.info("Created NPM proxy host %s -> %s:%d (id=%s)", logger.info("Created NPM proxy host %s -> %s:%d (id=%s)",
domain, forward_host, forward_port, proxy_id) domain, forward_host, forward_port, proxy_id)
# Step 3: Request SSL certificate # Step 2: Request SSL certificate and enable HTTPS
await _request_ssl(client, api_url, headers, proxy_id, domain, admin_email) ssl_ok = await _request_ssl(client, api_url, headers, proxy_id, domain, admin_email)
return {"proxy_id": proxy_id} return {"proxy_id": proxy_id, "ssl": ssl_ok}
else: else:
error_msg = f"NPM returned {resp.status_code}: {resp.text[:300]}" error_msg = f"NPM returned {resp.status_code}: {resp.text[:300]}"
logger.error("Failed to create proxy host: %s", error_msg) logger.error("Failed to create proxy host: %s", error_msg)
@@ -202,11 +206,13 @@ async def _request_ssl(
proxy_id: int, proxy_id: int,
domain: str, domain: str,
admin_email: str, admin_email: str,
) -> None: ) -> bool:
"""Request a Let's Encrypt SSL certificate for a proxy host. """Request a Let's Encrypt SSL certificate and enable HTTPS on the proxy host.
Let's Encrypt validation can take up to 120 seconds, so we use Flow:
a longer timeout for certificate requests. 1. Create LE certificate via NPM API (HTTP-01 validation, up to 120s)
2. Assign certificate to the proxy host
3. Enable ssl_forced + hsts on the proxy host
Args: Args:
client: httpx client (already authenticated). client: httpx client (already authenticated).
@@ -215,7 +221,14 @@ async def _request_ssl(
proxy_id: The proxy host ID. proxy_id: The proxy host ID.
domain: The domain to certify. domain: The domain to certify.
admin_email: Contact email for LE. admin_email: Contact email for LE.
Returns:
True if SSL was successfully enabled, False otherwise.
""" """
if not admin_email:
logger.warning("No admin email set — skipping SSL certificate for %s", domain)
return False
ssl_payload = { ssl_payload = {
"domain_names": [domain], "domain_names": [domain],
"provider": "letsencrypt", "provider": "letsencrypt",
@@ -227,30 +240,57 @@ async def _request_ssl(
}, },
} }
try: try:
logger.info("Requesting Let's Encrypt certificate for %s ...", domain) logger.info("Requesting Let's Encrypt certificate for %s (email: %s) ...", domain, admin_email)
resp = await client.post( resp = await client.post(
f"{api_url}/nginx/certificates", f"{api_url}/nginx/certificates",
json=ssl_payload, json=ssl_payload,
headers=headers, headers=headers,
timeout=120, # LE validation can be slow timeout=120, # LE validation can be slow
) )
if resp.status_code in (200, 201): if resp.status_code not in (200, 201):
logger.error(
"SSL cert request for %s failed (HTTP %s): %s",
domain, resp.status_code, resp.text[:500],
)
return False
cert_id = resp.json().get("id") cert_id = resp.json().get("id")
logger.info("Certificate created (id=%s), assigning to proxy host %s", cert_id, proxy_id) logger.info("Certificate created (id=%s) for %s", cert_id, domain)
# Assign cert AND enable SSL + HSTS in one update
ssl_update = {
"certificate_id": cert_id,
"ssl_forced": True,
"hsts_enabled": True,
"http2_support": True,
}
assign_resp = await client.put( assign_resp = await client.put(
f"{api_url}/nginx/proxy-hosts/{proxy_id}", f"{api_url}/nginx/proxy-hosts/{proxy_id}",
json={"certificate_id": cert_id}, json=ssl_update,
headers=headers, headers=headers,
) )
if assign_resp.status_code in (200, 201): if assign_resp.status_code in (200, 201):
logger.info("SSL certificate %s assigned to proxy host %s", cert_id, proxy_id) logger.info("SSL enabled on proxy host %s for %s (cert_id=%s)", proxy_id, domain, cert_id)
return True
else: else:
logger.warning("Failed to assign cert to proxy host: %s %s", logger.error(
assign_resp.status_code, assign_resp.text[:200]) "Failed to assign cert %s to proxy host %s: HTTP %s%s",
else: cert_id, proxy_id, assign_resp.status_code, assign_resp.text[:300],
logger.warning("SSL cert request returned %s: %s", resp.status_code, resp.text[:500]) )
return False
except httpx.TimeoutException:
logger.error(
"SSL cert request for %s timed out after 120s. "
"Check: 1) DNS resolves %s to your server, "
"2) Port 80 is accessible from the internet, "
"3) NPM is listening on port 80.",
domain, domain,
)
return False
except Exception as exc: except Exception as exc:
logger.warning("SSL certificate request failed: %s", exc) logger.error("SSL certificate request failed for %s: %s", domain, exc)
return False
async def create_stream( async def create_stream(

View File

@@ -22,7 +22,7 @@
}, },
"Relay": { "Relay": {
"Addresses": [ "Addresses": [
"rels://{{ netbird_domain }}:443" "{{ relay_ws_protocol }}://{{ netbird_domain }}:{{ netbird_port }}"
], ],
"CredentialsTTL": "24h", "CredentialsTTL": "24h",
"Secret": "{{ relay_secret }}" "Secret": "{{ relay_secret }}"

View File

@@ -2,6 +2,6 @@
# {{ subdomain }}.{{ base_domain }} # {{ subdomain }}.{{ base_domain }}
NB_AUTH_SECRET={{ relay_secret }} NB_AUTH_SECRET={{ relay_secret }}
NB_LISTEN_ADDRESS=:80 NB_LISTEN_ADDRESS=:80
NB_EXPOSED_ADDRESS=rels://{{ subdomain }}.{{ base_domain }}:443 NB_EXPOSED_ADDRESS={{ relay_ws_protocol }}://{{ subdomain }}.{{ base_domain }}:{{ netbird_port }}
NB_ENABLE_STUN=true NB_ENABLE_STUN=true
NB_STUN_PORTS=3478 NB_STUN_PORTS=3478