fix(flow): derive HR dependsOn from live watcher + fix canvas drill-down 404 (#1431)

Two bugs the operator hit on /sovereign/provision/<id>/jobs:

1) Phase-1 install-* Jobs rendered DISCONNECTED on the canvas —
   helmwatch.Bridge doesn't persist Job.DependsOn (only the Phase-0
   tofu chain + cluster-bootstrap is wired today). Pull HR.spec.dependsOn
   from the live Watcher's informer cache via SnapshotComponents()
   (ComponentSnapshot.DependsOn already populated by extractDependsOn)
   at snapshot-time and emit finish-to-start edges from upstream
   install-<dep> to install-<self>. Also add provisioner→bootstrap-kit
   group-to-group finish-to-start so the Phase-0/Phase-1 ordering is
   visible on the canvas.

2) Clicking a canvas node → "404 page not found" because
   FlowPage.handleNodeDoubleClick passed the full
   "<deploymentId>:install-X" id verbatim. The backend Store.GetJob
   keys by bare jobName ("install-X"), so the colon-prefixed id missed
   exact-match and JobDetail returned 404. Mirror useJobLinkBuilder
   (JobsTable.tsx line 364): strip the "<deploymentId>:" prefix and
   encodeURIComponent the remainder before pushing to the router.

Co-authored-by: e3mrah <1234567+e3mrah@users.noreply.github.com>
Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
e3mrah 2026-05-12 10:36:22 +04:00 committed by GitHub
parent 59b6940c18
commit e3771f6813
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 88 additions and 2 deletions

View File

@ -164,6 +164,31 @@ func (h *Handler) flowSnapshotFromJobs(deploymentID string) (*flowSnapshotLocalM
return nil, false
}
// Pull spec.dependsOn from the live helmwatch.Watcher's informer
// cache. jobs.Store does NOT persist Job.DependsOn for Phase-1
// install-* Jobs today (only the Phase-0 tofu chain + cluster-
// bootstrap gets dep wiring — see jobs/types.go PhaseTofu*).
// Without this every install-* bubble renders disconnected on
// the canvas. SnapshotComponents() returns ComponentSnapshot
// {AppID, DependsOn} populated by extractDependsOn from each
// HelmRelease's spec.dependsOn[].name (bp- prefix stripped). We
// index by AppID so the per-Job lookup is O(1).
hrDeps := map[string][]string{}
if val, ok := h.deployments.Load(deploymentID); ok {
if dep, ok := val.(*Deployment); ok && dep != nil {
dep.mu.Lock()
w := dep.liveWatcher
dep.mu.Unlock()
if w != nil {
for _, cs := range w.SnapshotComponents() {
if len(cs.DependsOn) > 0 {
hrDeps[cs.AppID] = cs.DependsOn
}
}
}
}
}
// FlowInstance.StartedAt — earliest non-zero Job.StartedAt across
// the deployment. If every Job is still pending (no StartedAt
// set), default to 0 — the canvas just shows the flow as "newly
@ -226,16 +251,67 @@ func (h *Handler) flowSnapshotFromJobs(deploymentID string) (*flowSnapshotLocalM
// installs to this job. jobs.Bridge already normalises the
// dep ids into the JobID(deploymentID, "install-<chart>")
// form, so we copy them verbatim.
seenDep := map[string]bool{}
for _, dep := range j.DependsOn {
if dep == "" || dep == j.ID {
continue
}
seenDep[dep] = true
rels = append(rels, flowSnapshotLocalRelationship{
FromID: dep,
ToID: j.ID,
Type: "finish-to-start",
})
}
// Layer-2 dependency derivation — helmwatch.Bridge does NOT
// persist Job.DependsOn for Phase-1 install-* Jobs today, but
// the live HR informer cache HAS the data (HR.spec.dependsOn).
// For each install-<chart> Job, look up the chart's AppID and
// emit finish-to-start edges to its sibling install-* Jobs.
// Skipped for group jobs (j.AppID empty) and when the live
// watcher hasn't attached yet.
if j.AppID != "" {
for _, depAppID := range hrDeps[j.AppID] {
if depAppID == "" {
continue
}
depJobID := jobs.JobID(deploymentID, jobs.JobNamePrefix+depAppID)
if depJobID == j.ID || seenDep[depJobID] {
continue
}
seenDep[depJobID] = true
rels = append(rels, flowSnapshotLocalRelationship{
FromID: depJobID,
ToID: j.ID,
Type: "finish-to-start",
})
}
}
}
// Group-level sequential edge — `provisioner` (Phase-0 tofu chain)
// must complete before `bootstrap-kit` (Phase-1 Flux reconcile)
// starts. This is the real temporal relationship between the two
// top-level groups; without it the canvas renders them as siblings
// with no ordering hint.
provisionerID := jobs.JobID(deploymentID, jobs.GroupProvisioner)
bootstrapID := jobs.JobID(deploymentID, jobs.GroupBootstrapKit)
hasProvisioner := false
hasBootstrap := false
for _, j := range js {
if j.ID == provisionerID {
hasProvisioner = true
}
if j.ID == bootstrapID {
hasBootstrap = true
}
}
if hasProvisioner && hasBootstrap {
rels = append(rels, flowSnapshotLocalRelationship{
FromID: provisionerID,
ToID: bootstrapID,
Type: "finish-to-start",
})
}
return &flowSnapshotLocalMessage{

View File

@ -284,14 +284,24 @@ export function FlowPage({
const handleNodeDoubleClick = useCallback(
(nodeId: string) => {
// Drill-down id form: jobs.Store.GetJob keys by bare jobName
// (e.g. "install-reflector"), NOT the full
// "<deploymentId>:install-reflector" id form the canvas emits.
// Mirror useJobLinkBuilder (JobsTable.tsx line 364): strip the
// "<deploymentId>:" prefix and URL-encode the remainder. Without
// this the backend returns 404 because the exact-match path
// misses on the colon-prefixed id (Traefik also drops the URL
// encoding of `:` in path segments — see PR #1414 history).
const bare = nodeId.includes(':') ? nodeId.slice(nodeId.indexOf(':') + 1) : nodeId
const encoded = encodeURIComponent(bare)
// Double-click navigates to the job-detail surface. Chroot-aware:
// on the mother's monitoring view the deploymentId is in the URL;
// on the Sovereign's adult hostname the deploymentId is implicit
// so the clean root form is correct.
const target =
deploymentId && DETECTED_MODE.mode !== 'sovereign'
? `/provision/${deploymentId}/jobs/${nodeId}`
: `/jobs/${nodeId}`
? `/provision/${deploymentId}/jobs/${encoded}`
: `/jobs/${encoded}`
navigate({ to: target as never })
},
[navigate, deploymentId],