Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 27 additions & 1 deletion api/v1alpha1/seinode_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ type PlannedTask struct {
// Type identifies the task (e.g. "snapshot-restore", "config-patch").
Type string `json:"type"`

// ID is a deterministic UUID v5 derived from nodeName/taskType/attempt.
// ID is a deterministic UUID v5 derived from planID/taskType/planIndex.
// Used as the key for sidecar task submission and status polling.
ID string `json:"id"`

Expand All @@ -140,14 +140,40 @@ type PlannedTask struct {
RetryCount int `json:"retryCount,omitempty"`
}

// FailedTaskInfo records details about a task failure for observability.
type FailedTaskInfo struct {
// Type is the task type that failed.
Type string `json:"type"`
// ID is the task ID that failed.
ID string `json:"id"`
// Error is the error message from the failed execution.
Error string `json:"error"`
// RetryCount is the number of retries that were attempted.
RetryCount int `json:"retryCount"`
// MaxRetries is the configured retry limit.
MaxRetries int `json:"maxRetries"`
}

// TaskPlan tracks an ordered sequence of sidecar tasks that the controller
// executes to initialize a node.
type TaskPlan struct {
// ID is a unique identifier for this plan instance.
// +optional
ID string `json:"id,omitempty"`

// Phase is the overall state of the plan.
Phase TaskPlanPhase `json:"phase"`

// Tasks is the ordered list of tasks to execute.
Tasks []PlannedTask `json:"tasks"`

// FailedTaskIndex is the index of the task that caused the plan to fail.
// +optional
FailedTaskIndex *int `json:"failedTaskIndex,omitempty"`

// FailedTaskDetail records diagnostics about the task that caused the plan to fail.
// +optional
FailedTaskDetail *FailedTaskInfo `json:"failedTaskDetail,omitempty"`
}

// SeiNodePhase represents the high-level lifecycle state of a SeiNode.
Expand Down
25 changes: 25 additions & 0 deletions api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

36 changes: 35 additions & 1 deletion config/crd/sei.io_seinodegroups.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1067,6 +1067,40 @@ spec:
Plan tracks the active group-level task plan (genesis assembly,
deployment, etc.). Nil when no plan is in progress.
properties:
failedTaskDetail:
description: FailedTaskDetail records diagnostics about the task
that caused the plan to fail.
properties:
error:
description: Error is the error message from the failed execution.
type: string
id:
description: ID is the task ID that failed.
type: string
maxRetries:
description: MaxRetries is the configured retry limit.
type: integer
retryCount:
description: RetryCount is the number of retries that were
attempted.
type: integer
type:
description: Type is the task type that failed.
type: string
required:
- error
- id
- maxRetries
- retryCount
- type
type: object
failedTaskIndex:
description: FailedTaskIndex is the index of the task that caused
the plan to fail.
type: integer
id:
description: ID is a unique identifier for this plan instance.
type: string
phase:
description: Phase is the overall state of the plan.
enum:
Expand All @@ -1087,7 +1121,7 @@ spec:
type: string
id:
description: |-
ID is a deterministic UUID v5 derived from nodeName/taskType/attempt.
ID is a deterministic UUID v5 derived from planID/taskType/planIndex.
Used as the key for sidecar task submission and status polling.
type: string
maxRetries:
Expand Down
36 changes: 35 additions & 1 deletion config/crd/sei.io_seinodes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -662,6 +662,40 @@ spec:
Plan tracks the active task sequence for this node. A planner generates
the plan based on the node's current state and conditions.
properties:
failedTaskDetail:
description: FailedTaskDetail records diagnostics about the task
that caused the plan to fail.
properties:
error:
description: Error is the error message from the failed execution.
type: string
id:
description: ID is the task ID that failed.
type: string
maxRetries:
description: MaxRetries is the configured retry limit.
type: integer
retryCount:
description: RetryCount is the number of retries that were
attempted.
type: integer
type:
description: Type is the task type that failed.
type: string
required:
- error
- id
- maxRetries
- retryCount
- type
type: object
failedTaskIndex:
description: FailedTaskIndex is the index of the task that caused
the plan to fail.
type: integer
id:
description: ID is a unique identifier for this plan instance.
type: string
phase:
description: Phase is the overall state of the plan.
enum:
Expand All @@ -682,7 +716,7 @@ spec:
type: string
id:
description: |-
ID is a deterministic UUID v5 derived from nodeName/taskType/attempt.
ID is a deterministic UUID v5 derived from planID/taskType/planIndex.
Used as the key for sidecar task submission and status polling.
type: string
maxRetries:
Expand Down
17 changes: 14 additions & 3 deletions internal/controller/node/plan_execution_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -349,15 +349,26 @@ func TestBuildPlanPhaseAndTasks(t *testing.T) {
}
}

func TestBuildPlan_DeterministicIDs(t *testing.T) {
func TestBuildPlan_UniqueIDsAcrossRebuilds(t *testing.T) {
node := snapshotNode()
p, _ := planner.ForNode(node)
plan1 := mustBuildPlan(t, p, node)
plan2 := mustBuildPlan(t, p, node)
if plan1.ID == plan2.ID {
t.Errorf("plan IDs should differ across rebuilds: both %q", plan1.ID)
}
for i := range plan1.Tasks {
if plan1.Tasks[i].ID != plan2.Tasks[i].ID {
t.Errorf("task %d ID not deterministic: %q vs %q", i, plan1.Tasks[i].ID, plan2.Tasks[i].ID)
if plan1.Tasks[i].ID == plan2.Tasks[i].ID {
t.Errorf("task %d ID should differ across rebuilds: both %q", i, plan1.Tasks[i].ID)
}
}
// Verify task IDs are unique within a single plan.
seen := map[string]bool{}
for _, tsk := range plan1.Tasks {
if seen[tsk.ID] {
t.Errorf("duplicate task ID within plan: %q", tsk.ID)
}
seen[tsk.ID] = true
}
}

Expand Down
20 changes: 9 additions & 11 deletions internal/planner/bootstrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package planner
import (
"slices"

"github.com/google/uuid"
seiconfig "github.com/sei-protocol/sei-config"
sidecar "github.com/sei-protocol/seictl/sidecar/client"

Expand All @@ -21,12 +22,8 @@ func buildBootstrapPlan(
snap *seiv1alpha1.SnapshotSource,
configApplyParams *task.ConfigApplyParams,
) (*seiv1alpha1.TaskPlan, error) {
attempts := map[string]int{}
nextAttempt := func(taskType string) int {
a := attempts[taskType]
attempts[taskType] = a + 1
return a
}
planID := uuid.New().String()
planIndex := 0

jobName := task.BootstrapJobName(node)
serviceName := node.Name
Expand All @@ -36,11 +33,12 @@ func buildBootstrapPlan(
tasks := make([]seiv1alpha1.PlannedTask, 0, 2+len(bootstrapProg)+2+len(postProg))

appendTask := func(taskType string, params any) error {
t, err := buildPlannedTask(node, taskType, nextAttempt(taskType), params)
t, err := buildPlannedTask(planID, taskType, planIndex, params)
if err != nil {
return err
}
tasks = append(tasks, t)
planIndex++
return nil
}

Expand Down Expand Up @@ -78,7 +76,7 @@ func buildBootstrapPlan(
}
}

return &seiv1alpha1.TaskPlan{Phase: seiv1alpha1.TaskPlanActive, Tasks: tasks}, nil
return &seiv1alpha1.TaskPlan{ID: planID, Phase: seiv1alpha1.TaskPlanActive, Tasks: tasks}, nil
}

// buildBootstrapProgression returns the sidecar task sequence for the
Expand Down Expand Up @@ -135,7 +133,7 @@ const genesisConfigureMaxRetries = 180
// uploaded genesis.json to S3.
func buildGenesisPlan(node *seiv1alpha1.SeiNode) (*seiv1alpha1.TaskPlan, error) {
gc := node.Spec.Validator.GenesisCeremony
attempt := 0
planID := uuid.New().String()

prog := []string{
TaskGenerateIdentity,
Expand All @@ -150,7 +148,7 @@ func buildGenesisPlan(node *seiv1alpha1.SeiNode) (*seiv1alpha1.TaskPlan, error)

tasks := make([]seiv1alpha1.PlannedTask, len(prog))
for i, taskType := range prog {
t, err := buildPlannedTask(node, taskType, attempt, genesisParamsForTaskType(node, gc, taskType))
t, err := buildPlannedTask(planID, taskType, i, genesisParamsForTaskType(node, gc, taskType))
if err != nil {
return nil, err
}
Expand All @@ -159,7 +157,7 @@ func buildGenesisPlan(node *seiv1alpha1.SeiNode) (*seiv1alpha1.TaskPlan, error)
}
tasks[i] = t
}
return &seiv1alpha1.TaskPlan{Phase: seiv1alpha1.TaskPlanActive, Tasks: tasks}, nil
return &seiv1alpha1.TaskPlan{ID: planID, Phase: seiv1alpha1.TaskPlanActive, Tasks: tasks}, nil
}

func genesisParamsForTaskType(node *seiv1alpha1.SeiNode, gc *seiv1alpha1.GenesisCeremonyNodeConfig, taskType string) any {
Expand Down
12 changes: 8 additions & 4 deletions internal/planner/deployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ import (
"fmt"
"strconv"

"github.com/google/uuid"

seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1"
"github.com/sei-protocol/sei-k8s-controller/internal/task"
)
Expand Down Expand Up @@ -54,6 +56,7 @@ type hardForkDeploymentPlanner struct{}
func (p *hardForkDeploymentPlanner) BuildPlan(
group *seiv1alpha1.SeiNodeGroup,
) (*seiv1alpha1.TaskPlan, error) {
planID := uuid.New().String()
haltHeight := group.Spec.UpdateStrategy.HardFork.HaltHeight
incumbentNodes := group.Status.IncumbentNodes
entrantNodes := EntrantNodeNames(group)
Expand Down Expand Up @@ -99,13 +102,13 @@ func (p *hardForkDeploymentPlanner) BuildPlan(

tasks := make([]seiv1alpha1.PlannedTask, len(prog))
for i, p := range prog {
t, err := buildGroupPlannedTask(group.Name, p.taskType, p.params)
t, err := buildGroupPlannedTask(planID, p.taskType, i, p.params)
if err != nil {
return nil, err
}
tasks[i] = t
}
return &seiv1alpha1.TaskPlan{Phase: seiv1alpha1.TaskPlanActive, Tasks: tasks}, nil
return &seiv1alpha1.TaskPlan{ID: planID, Phase: seiv1alpha1.TaskPlanActive, Tasks: tasks}, nil
}

// blueGreenDeploymentPlanner builds a deployment plan for the BlueGreen strategy.
Expand All @@ -114,6 +117,7 @@ type blueGreenDeploymentPlanner struct{}
func (p *blueGreenDeploymentPlanner) BuildPlan(
group *seiv1alpha1.SeiNodeGroup,
) (*seiv1alpha1.TaskPlan, error) {
planID := uuid.New().String()
incumbentNodes := group.Status.IncumbentNodes
entrantNodes := EntrantNodeNames(group)
entrantRevision := EntrantRevision(group)
Expand Down Expand Up @@ -152,11 +156,11 @@ func (p *blueGreenDeploymentPlanner) BuildPlan(

tasks := make([]seiv1alpha1.PlannedTask, len(prog))
for i, p := range prog {
t, err := buildGroupPlannedTask(group.Name, p.taskType, p.params)
t, err := buildGroupPlannedTask(planID, p.taskType, i, p.params)
if err != nil {
return nil, err
}
tasks[i] = t
}
return &seiv1alpha1.TaskPlan{Phase: seiv1alpha1.TaskPlanActive, Tasks: tasks}, nil
return &seiv1alpha1.TaskPlan{ID: planID, Phase: seiv1alpha1.TaskPlanActive, Tasks: tasks}, nil
}
Loading
Loading