Fix SSL cert creation and HTTP fallback for Unauthenticated error
- Create NPM proxy host WITHOUT SSL initially (ssl_forced=False), then request Let's Encrypt cert, then enable SSL only after cert is assigned. Prevents broken proxy when cert fails. - If SSL cert creation fails, automatically fall back to HTTP mode: re-render management.json, dashboard.env, relay.env with http:// URLs and recreate containers so dashboard login works. - Better error logging in _request_ssl with specific timeout hints. - Use template variables for relay WebSocket protocol (rels/rel) instead of hardcoded rels:// in management.json.j2 and relay.env.j2. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -119,10 +119,12 @@ async def deploy_customer(db: Session, customer_id: int) -> dict[str, Any]:
|
|||||||
external_url = f"http://localhost:{dashboard_port}"
|
external_url = f"http://localhost:{dashboard_port}"
|
||||||
netbird_protocol = "http"
|
netbird_protocol = "http"
|
||||||
netbird_port = str(dashboard_port)
|
netbird_port = str(dashboard_port)
|
||||||
|
relay_ws_protocol = "rel"
|
||||||
else:
|
else:
|
||||||
external_url = f"https://{netbird_domain}"
|
external_url = f"https://{netbird_domain}"
|
||||||
netbird_protocol = "https"
|
netbird_protocol = "https"
|
||||||
netbird_port = "443"
|
netbird_port = "443"
|
||||||
|
relay_ws_protocol = "rels"
|
||||||
|
|
||||||
# Step 4: Create instance directory
|
# Step 4: Create instance directory
|
||||||
instance_dir = os.path.join(config.data_dir, f"kunde{customer_id}")
|
instance_dir = os.path.join(config.data_dir, f"kunde{customer_id}")
|
||||||
@@ -151,6 +153,7 @@ async def deploy_customer(db: Session, customer_id: int) -> dict[str, Any]:
|
|||||||
"netbird_dashboard_image": config.netbird_dashboard_image,
|
"netbird_dashboard_image": config.netbird_dashboard_image,
|
||||||
"docker_network": config.docker_network,
|
"docker_network": config.docker_network,
|
||||||
"datastore_encryption_key": datastore_key,
|
"datastore_encryption_key": datastore_key,
|
||||||
|
"relay_ws_protocol": relay_ws_protocol,
|
||||||
}
|
}
|
||||||
|
|
||||||
_render_template(jinja_env, "docker-compose.yml.j2",
|
_render_template(jinja_env, "docker-compose.yml.j2",
|
||||||
@@ -241,9 +244,11 @@ async def deploy_customer(db: Session, customer_id: int) -> dict[str, Any]:
|
|||||||
f"NPM proxy creation failed: {npm_result['error']}",
|
f"NPM proxy creation failed: {npm_result['error']}",
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
|
ssl_ok = npm_result.get("ssl", False)
|
||||||
_log_action(
|
_log_action(
|
||||||
db, customer_id, "deploy", "info",
|
db, customer_id, "deploy", "info",
|
||||||
f"NPM proxy host created: {netbird_domain} -> {forward_host}:{dashboard_port}",
|
f"NPM proxy host created: {netbird_domain} -> {forward_host}:{dashboard_port} "
|
||||||
|
f"(SSL: {'OK' if ssl_ok else 'FAILED — check DNS and port 80 accessibility'})",
|
||||||
)
|
)
|
||||||
|
|
||||||
# Create NPM UDP stream for relay STUN port
|
# Create NPM UDP stream for relay STUN port
|
||||||
@@ -267,6 +272,36 @@ async def deploy_customer(db: Session, customer_id: int) -> dict[str, Any]:
|
|||||||
f"NPM UDP stream created: port {allocated_port} -> {forward_host}:{allocated_port}",
|
f"NPM UDP stream created: port {allocated_port} -> {forward_host}:{allocated_port}",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Step 9b: If SSL failed, fall back to HTTP so the dashboard works
|
||||||
|
ssl_ok = npm_result.get("ssl", False) if not npm_result.get("error") else False
|
||||||
|
if not ssl_ok:
|
||||||
|
logger.warning("SSL cert failed for %s — switching configs to HTTP", netbird_domain)
|
||||||
|
external_url = f"http://{netbird_domain}"
|
||||||
|
netbird_protocol = "http"
|
||||||
|
netbird_port = "80"
|
||||||
|
relay_ws_protocol = "rel"
|
||||||
|
template_vars["external_url"] = external_url
|
||||||
|
template_vars["netbird_protocol"] = netbird_protocol
|
||||||
|
template_vars["netbird_port"] = netbird_port
|
||||||
|
template_vars["relay_ws_protocol"] = relay_ws_protocol
|
||||||
|
|
||||||
|
# Re-render configs that contain URL/protocol references
|
||||||
|
_render_template(jinja_env, "management.json.j2",
|
||||||
|
os.path.join(instance_dir, "management.json"), **template_vars)
|
||||||
|
_render_template(jinja_env, "dashboard.env.j2",
|
||||||
|
os.path.join(instance_dir, "dashboard.env"), **template_vars)
|
||||||
|
_render_template(jinja_env, "relay.env.j2",
|
||||||
|
os.path.join(instance_dir, "relay.env"), **template_vars)
|
||||||
|
|
||||||
|
# Recreate containers to pick up new config
|
||||||
|
docker_service.compose_up(instance_dir, container_prefix, timeout=120)
|
||||||
|
|
||||||
|
_log_action(
|
||||||
|
db, customer_id, "deploy", "info",
|
||||||
|
"SSL not available — switched to HTTP mode. "
|
||||||
|
"To enable HTTPS: ensure DNS resolves and port 80 is reachable, then re-deploy.",
|
||||||
|
)
|
||||||
|
|
||||||
# Step 10: Create deployment record
|
# Step 10: Create deployment record
|
||||||
setup_url = external_url
|
setup_url = external_url
|
||||||
|
|
||||||
|
|||||||
@@ -127,6 +127,10 @@ async def create_proxy_host(
|
|||||||
Caddy reverse proxy is listening. Caddy handles internal routing to
|
Caddy reverse proxy is listening. Caddy handles internal routing to
|
||||||
management, signal, relay, and dashboard containers.
|
management, signal, relay, and dashboard containers.
|
||||||
|
|
||||||
|
Creates the proxy host WITHOUT SSL first (so HTTP works immediately),
|
||||||
|
then requests a Let's Encrypt certificate, and only enables SSL
|
||||||
|
after the cert is successfully assigned.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
api_url: NPM API base URL.
|
api_url: NPM API base URL.
|
||||||
npm_email: NPM login email.
|
npm_email: NPM login email.
|
||||||
@@ -137,16 +141,18 @@ async def create_proxy_host(
|
|||||||
admin_email: Email for Let's Encrypt.
|
admin_email: Email for Let's Encrypt.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Dict with ``proxy_id`` on success or ``error`` on failure.
|
Dict with ``proxy_id`` and ``ssl`` (bool) on success, or ``error`` on failure.
|
||||||
"""
|
"""
|
||||||
|
# Step 1: Create proxy host WITHOUT SSL — so HTTP works immediately
|
||||||
|
# SSL is enabled later only after a cert is successfully obtained.
|
||||||
payload = {
|
payload = {
|
||||||
"domain_names": [domain],
|
"domain_names": [domain],
|
||||||
"forward_scheme": "http",
|
"forward_scheme": "http",
|
||||||
"forward_host": forward_host,
|
"forward_host": forward_host,
|
||||||
"forward_port": forward_port,
|
"forward_port": forward_port,
|
||||||
"certificate_id": 0,
|
"certificate_id": 0,
|
||||||
"ssl_forced": True,
|
"ssl_forced": False,
|
||||||
"hsts_enabled": True,
|
"hsts_enabled": False,
|
||||||
"hsts_subdomains": False,
|
"hsts_subdomains": False,
|
||||||
"http2_support": True,
|
"http2_support": True,
|
||||||
"block_exploits": True,
|
"block_exploits": True,
|
||||||
@@ -162,14 +168,12 @@ async def create_proxy_host(
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
async with httpx.AsyncClient(timeout=180) as client: # Long timeout for LE cert
|
async with httpx.AsyncClient(timeout=180) as client: # Long timeout for LE cert
|
||||||
# Step 1: Login to NPM
|
|
||||||
token = await _npm_login(client, api_url, npm_email, npm_password)
|
token = await _npm_login(client, api_url, npm_email, npm_password)
|
||||||
headers = {
|
headers = {
|
||||||
"Authorization": f"Bearer {token}",
|
"Authorization": f"Bearer {token}",
|
||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
}
|
}
|
||||||
|
|
||||||
# Step 2: Create proxy host
|
|
||||||
resp = await client.post(
|
resp = await client.post(
|
||||||
f"{api_url}/nginx/proxy-hosts", json=payload, headers=headers
|
f"{api_url}/nginx/proxy-hosts", json=payload, headers=headers
|
||||||
)
|
)
|
||||||
@@ -179,10 +183,10 @@ async def create_proxy_host(
|
|||||||
logger.info("Created NPM proxy host %s -> %s:%d (id=%s)",
|
logger.info("Created NPM proxy host %s -> %s:%d (id=%s)",
|
||||||
domain, forward_host, forward_port, proxy_id)
|
domain, forward_host, forward_port, proxy_id)
|
||||||
|
|
||||||
# Step 3: Request SSL certificate
|
# Step 2: Request SSL certificate and enable HTTPS
|
||||||
await _request_ssl(client, api_url, headers, proxy_id, domain, admin_email)
|
ssl_ok = await _request_ssl(client, api_url, headers, proxy_id, domain, admin_email)
|
||||||
|
|
||||||
return {"proxy_id": proxy_id}
|
return {"proxy_id": proxy_id, "ssl": ssl_ok}
|
||||||
else:
|
else:
|
||||||
error_msg = f"NPM returned {resp.status_code}: {resp.text[:300]}"
|
error_msg = f"NPM returned {resp.status_code}: {resp.text[:300]}"
|
||||||
logger.error("Failed to create proxy host: %s", error_msg)
|
logger.error("Failed to create proxy host: %s", error_msg)
|
||||||
@@ -202,11 +206,13 @@ async def _request_ssl(
|
|||||||
proxy_id: int,
|
proxy_id: int,
|
||||||
domain: str,
|
domain: str,
|
||||||
admin_email: str,
|
admin_email: str,
|
||||||
) -> None:
|
) -> bool:
|
||||||
"""Request a Let's Encrypt SSL certificate for a proxy host.
|
"""Request a Let's Encrypt SSL certificate and enable HTTPS on the proxy host.
|
||||||
|
|
||||||
Let's Encrypt validation can take up to 120 seconds, so we use
|
Flow:
|
||||||
a longer timeout for certificate requests.
|
1. Create LE certificate via NPM API (HTTP-01 validation, up to 120s)
|
||||||
|
2. Assign certificate to the proxy host
|
||||||
|
3. Enable ssl_forced + hsts on the proxy host
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
client: httpx client (already authenticated).
|
client: httpx client (already authenticated).
|
||||||
@@ -215,7 +221,14 @@ async def _request_ssl(
|
|||||||
proxy_id: The proxy host ID.
|
proxy_id: The proxy host ID.
|
||||||
domain: The domain to certify.
|
domain: The domain to certify.
|
||||||
admin_email: Contact email for LE.
|
admin_email: Contact email for LE.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if SSL was successfully enabled, False otherwise.
|
||||||
"""
|
"""
|
||||||
|
if not admin_email:
|
||||||
|
logger.warning("No admin email set — skipping SSL certificate for %s", domain)
|
||||||
|
return False
|
||||||
|
|
||||||
ssl_payload = {
|
ssl_payload = {
|
||||||
"domain_names": [domain],
|
"domain_names": [domain],
|
||||||
"provider": "letsencrypt",
|
"provider": "letsencrypt",
|
||||||
@@ -227,30 +240,57 @@ async def _request_ssl(
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
try:
|
try:
|
||||||
logger.info("Requesting Let's Encrypt certificate for %s ...", domain)
|
logger.info("Requesting Let's Encrypt certificate for %s (email: %s) ...", domain, admin_email)
|
||||||
resp = await client.post(
|
resp = await client.post(
|
||||||
f"{api_url}/nginx/certificates",
|
f"{api_url}/nginx/certificates",
|
||||||
json=ssl_payload,
|
json=ssl_payload,
|
||||||
headers=headers,
|
headers=headers,
|
||||||
timeout=120, # LE validation can be slow
|
timeout=120, # LE validation can be slow
|
||||||
)
|
)
|
||||||
if resp.status_code in (200, 201):
|
if resp.status_code not in (200, 201):
|
||||||
|
logger.error(
|
||||||
|
"SSL cert request for %s failed (HTTP %s): %s",
|
||||||
|
domain, resp.status_code, resp.text[:500],
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
cert_id = resp.json().get("id")
|
cert_id = resp.json().get("id")
|
||||||
logger.info("Certificate created (id=%s), assigning to proxy host %s", cert_id, proxy_id)
|
logger.info("Certificate created (id=%s) for %s", cert_id, domain)
|
||||||
|
|
||||||
|
# Assign cert AND enable SSL + HSTS in one update
|
||||||
|
ssl_update = {
|
||||||
|
"certificate_id": cert_id,
|
||||||
|
"ssl_forced": True,
|
||||||
|
"hsts_enabled": True,
|
||||||
|
"http2_support": True,
|
||||||
|
}
|
||||||
assign_resp = await client.put(
|
assign_resp = await client.put(
|
||||||
f"{api_url}/nginx/proxy-hosts/{proxy_id}",
|
f"{api_url}/nginx/proxy-hosts/{proxy_id}",
|
||||||
json={"certificate_id": cert_id},
|
json=ssl_update,
|
||||||
headers=headers,
|
headers=headers,
|
||||||
)
|
)
|
||||||
if assign_resp.status_code in (200, 201):
|
if assign_resp.status_code in (200, 201):
|
||||||
logger.info("SSL certificate %s assigned to proxy host %s", cert_id, proxy_id)
|
logger.info("SSL enabled on proxy host %s for %s (cert_id=%s)", proxy_id, domain, cert_id)
|
||||||
|
return True
|
||||||
else:
|
else:
|
||||||
logger.warning("Failed to assign cert to proxy host: %s %s",
|
logger.error(
|
||||||
assign_resp.status_code, assign_resp.text[:200])
|
"Failed to assign cert %s to proxy host %s: HTTP %s — %s",
|
||||||
else:
|
cert_id, proxy_id, assign_resp.status_code, assign_resp.text[:300],
|
||||||
logger.warning("SSL cert request returned %s: %s", resp.status_code, resp.text[:500])
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
except httpx.TimeoutException:
|
||||||
|
logger.error(
|
||||||
|
"SSL cert request for %s timed out after 120s. "
|
||||||
|
"Check: 1) DNS resolves %s to your server, "
|
||||||
|
"2) Port 80 is accessible from the internet, "
|
||||||
|
"3) NPM is listening on port 80.",
|
||||||
|
domain, domain,
|
||||||
|
)
|
||||||
|
return False
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.warning("SSL certificate request failed: %s", exc)
|
logger.error("SSL certificate request failed for %s: %s", domain, exc)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
async def create_stream(
|
async def create_stream(
|
||||||
|
|||||||
@@ -22,7 +22,7 @@
|
|||||||
},
|
},
|
||||||
"Relay": {
|
"Relay": {
|
||||||
"Addresses": [
|
"Addresses": [
|
||||||
"rels://{{ netbird_domain }}:443"
|
"{{ relay_ws_protocol }}://{{ netbird_domain }}:{{ netbird_port }}"
|
||||||
],
|
],
|
||||||
"CredentialsTTL": "24h",
|
"CredentialsTTL": "24h",
|
||||||
"Secret": "{{ relay_secret }}"
|
"Secret": "{{ relay_secret }}"
|
||||||
|
|||||||
@@ -2,6 +2,6 @@
|
|||||||
# {{ subdomain }}.{{ base_domain }}
|
# {{ subdomain }}.{{ base_domain }}
|
||||||
NB_AUTH_SECRET={{ relay_secret }}
|
NB_AUTH_SECRET={{ relay_secret }}
|
||||||
NB_LISTEN_ADDRESS=:80
|
NB_LISTEN_ADDRESS=:80
|
||||||
NB_EXPOSED_ADDRESS=rels://{{ subdomain }}.{{ base_domain }}:443
|
NB_EXPOSED_ADDRESS={{ relay_ws_protocol }}://{{ subdomain }}.{{ base_domain }}:{{ netbird_port }}
|
||||||
NB_ENABLE_STUN=true
|
NB_ENABLE_STUN=true
|
||||||
NB_STUN_PORTS=3478
|
NB_STUN_PORTS=3478
|
||||||
|
|||||||
Reference in New Issue
Block a user