diff --git a/clusters/_template/sovereign-tls/cilium-gateway-cert.yaml b/clusters/_template/sovereign-tls/cilium-gateway-cert.yaml index b0f5e207..f4abb018 100644 --- a/clusters/_template/sovereign-tls/cilium-gateway-cert.yaml +++ b/clusters/_template/sovereign-tls/cilium-gateway-cert.yaml @@ -60,7 +60,14 @@ metadata: spec: secretName: sovereign-wildcard-tls issuerRef: - name: letsencrypt-dns01-prod-powerdns + # Resolved by Flux postBuild to either + # `letsencrypt-dns01-prod-powerdns` (default) or + # `letsencrypt-dns01-staging-powerdns` (qaTestEnabled=true) per + # tofu local.wildcard_cert_issuer. PROD has a 5/168h rate limit + # per exact set of identifiers — high-cadence QA reprovs hit it + # within hours and pin the Cilium Gateway listener to a Ready=False + # Certificate; STAGING is rate-limit-free for QA iteration. + name: ${WILDCARD_CERT_ISSUER} kind: ClusterIssuer commonName: "*.${SOVEREIGN_FQDN}" dnsNames: diff --git a/infra/hetzner/cloudinit-control-plane.tftpl b/infra/hetzner/cloudinit-control-plane.tftpl index 8b3d63b6..e3582bb2 100644 --- a/infra/hetzner/cloudinit-control-plane.tftpl +++ b/infra/hetzner/cloudinit-control-plane.tftpl @@ -1017,6 +1017,13 @@ write_files: metadata: name: sovereign-tls namespace: flux-system + annotations: + # WILDCARD_CERT_ISSUER selector (Fix #176 — qa-loop iter-1 LE + # rate-limit unblock for the cilium-gateway-cert.yaml path). + # When wildcard_cert_use_staging=true the issuer string below + # routes the Certificate to LE STAGING (no 5/168h rate limit); + # default false → real-trusted production certs. + openova.io/wildcard-cert-issuer-tag: "${wildcard_cert_use_staging}" spec: # Carries the cert-manager Certificate that backs Cilium Gateway's # wildcard-TLS listener. Split out of bootstrap-kit so its @@ -1060,6 +1067,13 @@ write_files: # bp-catalyst-platform into clusters/_template/sovereign-tls/ # has access to the parent-zone list without a config copy. PARENT_DOMAINS_YAML: '${parent_domains_yaml}' + # WILDCARD_CERT_ISSUER (Fix #176 — qa-loop iter-1 LE + # rate-limit unblock). cilium-gateway-cert.yaml references + # this via ${WILDCARD_CERT_ISSUER}. When + # wildcard_cert_use_staging=true → STAGING ClusterIssuer + # (no 5/168h limit); default → PROD. Locals in main.tf + # render the final string so this template stays declarative. + WILDCARD_CERT_ISSUER: "${wildcard_cert_issuer}" --- apiVersion: kustomize.toolkit.fluxcd.io/v1 kind: Kustomization diff --git a/infra/hetzner/main.tf b/infra/hetzner/main.tf index 7eeab843..097717a2 100644 --- a/infra/hetzner/main.tf +++ b/infra/hetzner/main.tf @@ -115,6 +115,19 @@ resource "hcloud_ssh_key" "main" { locals { control_plane_count = var.ha_enabled ? 3 : 1 + # Wildcard cert ClusterIssuer selector (Fix #176 — qa-loop iter-1 LE + # PROD rate-limit unblock for clusters/_template/sovereign-tls/cilium- + # gateway-cert.yaml). The sovereign-tls Kustomization's + # postBuild.substitute WILDCARD_CERT_ISSUER below resolves to: + # - letsencrypt-dns01-staging-powerdns when qa_test_session_enabled (or + # wildcard_cert_use_staging) is "true" → fast iteration, no rate limit + # - letsencrypt-dns01-prod-powerdns when "false" → real-trusted cert + # Both ClusterIssuers are shipped by bp-cert-manager-powerdns-webhook + # (bootstrap-kit slot 49). Without this, cilium-gateway-cert.yaml + # always hits PROD even on qaTestEnabled Sovereigns, and the 5/168h + # rate limit pins the Gateway to a `Ready=False` Certificate. + wildcard_cert_issuer = var.wildcard_cert_use_staging == "true" ? "letsencrypt-dns01-staging-powerdns" : "letsencrypt-dns01-prod-powerdns" + # ── Effective singular-path SKU selection (Fix #157) ───────────────────── # When qa_fixtures_enabled='true', the Sovereign is a QA-loop matrix # consumer carrying the full bp-* stack PLUS qaFixtures (Continuum + @@ -364,6 +377,7 @@ locals { qa_fixtures_namespace = var.qa_fixtures_namespace qa_organization = var.qa_organization wildcard_cert_use_staging = var.wildcard_cert_use_staging + wildcard_cert_issuer = local.wildcard_cert_issuer cluster_mesh_name = var.cluster_mesh_name cluster_mesh_id = var.cluster_mesh_id @@ -879,6 +893,7 @@ locals { qa_fixtures_namespace = var.qa_fixtures_namespace qa_organization = var.qa_organization wildcard_cert_use_staging = var.wildcard_cert_use_staging + wildcard_cert_issuer = local.wildcard_cert_issuer # Per-secondary-region ClusterMesh anchors. id is incremented per # peer index so each secondary region gets a unique slot in the # mesh registry; primary region keeps var.cluster_mesh_id.