diff --git a/api/builder.go b/api/builder.go index d6ef0e5..2a8d647 100644 --- a/api/builder.go +++ b/api/builder.go @@ -3,16 +3,28 @@ package api import "github.com/sarchlab/akita/v4/sim" type defaultPortFactory struct { + incomingBufCap int + outgoingBufCap int } func (f defaultPortFactory) make(c sim.Component, name string) sim.Port { - return sim.NewPort(c, 1, 1, name) + incoming := f.incomingBufCap + if incoming <= 0 { + incoming = 1 + } + outgoing := f.outgoingBufCap + if outgoing <= 0 { + outgoing = 1 + } + return sim.NewPort(c, incoming, outgoing, name) } // DriverBuilder creates a new instance of Driver. type DriverBuilder struct { - engine sim.Engine - freq sim.Freq + engine sim.Engine + freq sim.Freq + portIncomingBufferCap int + portOutgoingBufferCap int } // WithEngine sets the engine. @@ -27,10 +39,20 @@ func (b DriverBuilder) WithFreq(freq sim.Freq) DriverBuilder { return b } +// WithPortBufferDepth configures driver boundary-port incoming/outgoing capacity. +func (b DriverBuilder) WithPortBufferDepth(incoming, outgoing int) DriverBuilder { + b.portIncomingBufferCap = incoming + b.portOutgoingBufferCap = outgoing + return b +} + // Build create a driver. func (b DriverBuilder) Build(name string) Driver { d := &driverImpl{ - portFactory: defaultPortFactory{}, + portFactory: defaultPortFactory{ + incomingBufCap: b.portIncomingBufferCap, + outgoingBufCap: b.portOutgoingBufferCap, + }, } d.TickingComponent = sim.NewTickingComponent(name, b.engine, b.freq, d) diff --git a/api/builder_microarch_test.go b/api/builder_microarch_test.go new file mode 100644 index 0000000..77fbcf8 --- /dev/null +++ b/api/builder_microarch_test.go @@ -0,0 +1,29 @@ +package api + +import ( + "testing" + + "github.com/sarchlab/akita/v4/sim" +) + +func TestDriverBuilderWithPortBufferDepth(t *testing.T) { + engine := sim.NewSerialEngine() + driver := DriverBuilder{}. + WithEngine(engine). + WithFreq(1*sim.GHz). + WithPortBufferDepth(3, 5). + Build("Driver") + + impl, ok := driver.(*driverImpl) + if !ok { + t.Fatalf("expected *driverImpl, got %T", driver) + } + + factory, ok := impl.portFactory.(defaultPortFactory) + if !ok { + t.Fatalf("expected defaultPortFactory, got %T", impl.portFactory) + } + if factory.incomingBufCap != 3 || factory.outgoingBufCap != 5 { + t.Fatalf("unexpected driver port caps: in=%d out=%d", factory.incomingBufCap, factory.outgoingBufCap) + } +} diff --git a/api/driver.go b/api/driver.go index 41fa379..4a0e3c6 100644 --- a/api/driver.go +++ b/api/driver.go @@ -142,17 +142,23 @@ func (d *driverImpl) doOneFeedInTask(task *feedInTask) bool { err := port.Send(msg) //fmt.Println(msg) if err != nil { - panic("CGRA cannot handle the data rate") + // Keep task pending when downstream is temporarily back-pressured. + continue } - core.Trace("DataFlow", - "Behavior", "FeedIn", - slog.Float64("Time", float64(d.Engine.CurrentTime()*1e9)), - "Data", task.data[dataIndex], - "Color", task.color, - "From", port.Name(), - "To", task.remotePorts[i], - ) + timeValue := float64(d.Engine.CurrentTime() * 1e9) + if core.TraceEnabled() { + core.Trace("DataFlow", + "Behavior", "FeedIn", + slog.Float64("Time", timeValue), + "Data", task.data[dataIndex], + "Color", task.color, + "From", port.Name(), + "To", task.remotePorts[i], + ) + } else { + core.ObserveDataFlow("FeedIn", timeValue, port.Name(), string(task.remotePorts[i]), "", "") + } task.portRounds[i]++ madeProgress = true } @@ -202,15 +208,20 @@ func (d *driverImpl) doOneCollectTask(task *collectTask) bool { } task.data[dataIndex] = msg.Data.First() - core.Trace("DataFlow", - "Behavior", "Collect", - slog.Float64("Time", float64(d.Engine.CurrentTime()*1e9)), - "Data", msg.Data.First(), - "Pred", msg.Data.Pred, - "Color", task.color, - "From", task.ports[i].Name(), - "To", "None", - ) + timeValue := float64(d.Engine.CurrentTime() * 1e9) + if core.TraceEnabled() { + core.Trace("DataFlow", + "Behavior", "Collect", + slog.Float64("Time", timeValue), + "Data", msg.Data.First(), + "Pred", msg.Data.Pred, + "Color", task.color, + "From", task.ports[i].Name(), + "To", "None", + ) + } else { + core.ObserveDataFlow("Collect", timeValue, task.ports[i].Name(), "None", "", "") + } task.portRounds[i]++ madeProgress = true diff --git a/api/feedin_backpressure_test.go b/api/feedin_backpressure_test.go new file mode 100644 index 0000000..3767429 --- /dev/null +++ b/api/feedin_backpressure_test.go @@ -0,0 +1,48 @@ +package api + +import ( + "testing" + + gomock "github.com/golang/mock/gomock" + "github.com/sarchlab/akita/v4/sim" +) + +func TestDoOneFeedInTaskBackpressureDoesNotPanic(t *testing.T) { + ctrl := gomock.NewController(t) + defer ctrl.Finish() + + engine := sim.NewSerialEngine() + d := &driverImpl{} + d.TickingComponent = sim.NewTickingComponent("Driver", engine, 1*sim.GHz, d) + + port := NewMockPort(ctrl) + port.EXPECT().CanSend().Return(true).Times(2) + port.EXPECT().Name().Return("mock-port").AnyTimes() + port.EXPECT().AsRemote().Return(sim.RemotePort("driver-local")).AnyTimes() + port.EXPECT().Send(gomock.Any()).Return(sim.NewSendError()).Times(1) + port.EXPECT().Send(gomock.Any()).Return(nil).Times(1) + + task := &feedInTask{ + data: []uint32{7}, + localPorts: []sim.Port{port}, + remotePorts: []sim.RemotePort{sim.RemotePort("device-remote")}, + stride: 1, + color: 0, + rounds: 1, + portRounds: []int{0}, + } + + if progressed := d.doOneFeedInTask(task); progressed { + t.Fatal("expected no progress when Send returns backpressure error") + } + if task.portRounds[0] != 0 { + t.Fatalf("expected round to stay 0 after backpressure, got %d", task.portRounds[0]) + } + + if progressed := d.doOneFeedInTask(task); !progressed { + t.Fatal("expected progress once backpressure clears") + } + if task.portRounds[0] != 1 { + t.Fatalf("expected round to advance to 1, got %d", task.portRounds[0]) + } +} diff --git a/config/config.go b/config/config.go index 875f499..9825c56 100644 --- a/config/config.go +++ b/config/config.go @@ -20,9 +20,19 @@ type DeviceBuilder struct { freq sim.Freq monitor *monitoring.Monitor //portFactory portFactory - width, height int - memoryMode string // simple or shared or local - memoryShare map[[2]int]int //map[[x, y]]GroupID + width, height int + memoryMode string // simple or shared or local + memoryShare map[[2]int]int //map[[x, y]]GroupID + executionPolicy string + strictMaxSlip int64 + strictFailOnViolation bool + corePortIncomingCap int + corePortOutgoingCap int + enableFIFOModel bool + enableQueueWatches bool + queueWatches []core.QueueWatchSpec + numRegisters int + localMemoryWords int } // type portFactory interface { @@ -74,6 +84,60 @@ func (d DeviceBuilder) WithMemoryShare(share map[[2]int]int) DeviceBuilder { return d } +// WithExecutionPolicy sets core execution policy. +func (d DeviceBuilder) WithExecutionPolicy(policy string) DeviceBuilder { + d.executionPolicy = policy + return d +} + +// WithStrictTimingConfig sets strict timing replay controls. +func (d DeviceBuilder) WithStrictTimingConfig(maxSlip int64, failOnViolation bool) DeviceBuilder { + d.strictMaxSlip = maxSlip + d.strictFailOnViolation = failOnViolation + return d +} + +// WithCorePortBufferDepth sets core port incoming/outgoing capacities. +func (d DeviceBuilder) WithCorePortBufferDepth(incoming, outgoing int) DeviceBuilder { + d.corePortIncomingCap = incoming + d.corePortOutgoingCap = outgoing + return d +} + +// WithEnableFIFOModel toggles FIFO-based core execution model. +func (d DeviceBuilder) WithEnableFIFOModel(enabled bool) DeviceBuilder { + d.enableFIFOModel = enabled + return d +} + +// WithEnableQueueWatches toggles optional queue-occupancy instrumentation. +func (d DeviceBuilder) WithEnableQueueWatches(enabled bool) DeviceBuilder { + d.enableQueueWatches = enabled + return d +} + +// WithQueueWatches sets optional queue watch definitions for all cores. +func (d DeviceBuilder) WithQueueWatches(queueWatches []core.QueueWatchSpec) DeviceBuilder { + if len(queueWatches) == 0 { + d.queueWatches = nil + return d + } + d.queueWatches = append([]core.QueueWatchSpec(nil), queueWatches...) + return d +} + +// WithRegisterCount sets register-file size per core. +func (d DeviceBuilder) WithRegisterCount(num int) DeviceBuilder { + d.numRegisters = num + return d +} + +// WithLocalMemoryWords sets local memory size (in words) per core. +func (d DeviceBuilder) WithLocalMemoryWords(words int) DeviceBuilder { + d.localMemoryWords = words + return d +} + // Build creates a CGRA device. func (d DeviceBuilder) Build(name string) cgra.Device { dev := &device{ @@ -188,6 +252,14 @@ func (d DeviceBuilder) createTiles( WithExitAddr(&exit). WithRetValAddr(&retVal). WithExitReqAddr(&exitReqTimestamp). + WithExecutionPolicy(d.executionPolicy). + WithStrictTimingConfig(d.strictMaxSlip, d.strictFailOnViolation). + WithPortBufferDepth(d.corePortIncomingCap, d.corePortOutgoingCap). + WithEnableFIFOModel(d.enableFIFOModel). + WithEnableQueueWatches(d.enableQueueWatches). + WithQueueWatches(d.queueWatches). + WithRegisterCount(d.numRegisters). + WithLocalMemoryWords(d.localMemoryWords). Build(coreName) if d.monitor != nil { diff --git a/config/config_microarch_test.go b/config/config_microarch_test.go new file mode 100644 index 0000000..eee7606 --- /dev/null +++ b/config/config_microarch_test.go @@ -0,0 +1,35 @@ +package config + +import ( + "testing" + + "github.com/sarchlab/akita/v4/sim" +) + +func TestDeviceBuilderLocalMemoryWordsPropagatesToTile(t *testing.T) { + engine := sim.NewSerialEngine() + dev := DeviceBuilder{}. + WithEngine(engine). + WithFreq(1 * sim.GHz). + WithWidth(1). + WithHeight(1). + WithMemoryMode("simple"). + WithLocalMemoryWords(32). + Build("Device") + + tile := dev.GetTile(0, 0) + _ = tile.GetMemory(0, 0, 31) + + didPanic := false + func() { + defer func() { + if recover() != nil { + didPanic = true + } + }() + _ = tile.GetMemory(0, 0, 32) + }() + if !didPanic { + t.Fatal("expected out-of-range panic at address 32 with local_memory_words=32") + } +} diff --git a/core/builder.go b/core/builder.go index e6c3a9d..a1b5e38 100644 --- a/core/builder.go +++ b/core/builder.go @@ -1,17 +1,30 @@ package core import ( + "os" + "strings" + "github.com/sarchlab/akita/v4/sim" "github.com/sarchlab/zeonica/cgra" ) // Builder can create new cores. type Builder struct { - engine sim.Engine - freq sim.Freq - exitAddr *bool - retValAddr *uint32 - exitReqAddr *float64 + engine sim.Engine + freq sim.Freq + exitAddr *bool + retValAddr *uint32 + exitReqAddr *float64 + executionPolicy string + strictMaxSlip int64 + strictFailOnViolation bool + portIncomingBufferCap int + portOutgoingBufferCap int + enableFIFOModel bool + enableQueueWatches bool + queueWatches []QueueWatchSpec + numRegisters int + localMemoryWords int } // WithEngine sets the engine. @@ -43,15 +56,98 @@ func (b Builder) WithExitReqAddr(exitReqAddr *float64) Builder { return b } +// WithExecutionPolicy sets the execution policy for issue-time gating. +func (b Builder) WithExecutionPolicy(policy string) Builder { + b.executionPolicy = policy + return b +} + +// WithStrictTimingConfig sets strict timing replay controls. +func (b Builder) WithStrictTimingConfig(maxSlip int64, failOnViolation bool) Builder { + b.strictMaxSlip = maxSlip + b.strictFailOnViolation = failOnViolation + return b +} + +// WithPortBufferDepth configures each core port incoming/outgoing capacity. +func (b Builder) WithPortBufferDepth(incoming, outgoing int) Builder { + b.portIncomingBufferCap = incoming + b.portOutgoingBufferCap = outgoing + return b +} + +// WithEnableFIFOModel toggles FIFO-based execution behavior. +func (b Builder) WithEnableFIFOModel(enabled bool) Builder { + b.enableFIFOModel = enabled + return b +} + +// WithEnableQueueWatches toggles optional queue-occupancy instrumentation. +func (b Builder) WithEnableQueueWatches(enabled bool) Builder { + b.enableQueueWatches = enabled + return b +} + +// WithQueueWatches sets optional queue watch definitions for occupancy instrumentation. +func (b Builder) WithQueueWatches(queueWatches []QueueWatchSpec) Builder { + if len(queueWatches) == 0 { + b.queueWatches = nil + return b + } + b.queueWatches = append([]QueueWatchSpec(nil), queueWatches...) + return b +} + +// WithRegisterCount configures register-file size per core. +func (b Builder) WithRegisterCount(num int) Builder { + b.numRegisters = num + return b +} + +// WithLocalMemoryWords configures local memory size (in words) per core. +func (b Builder) WithLocalMemoryWords(words int) Builder { + b.localMemoryWords = words + return b +} + +func readyHeldTraceEnabledFromEnv() bool { + value := strings.ToLower(strings.TrimSpace(os.Getenv("ZEONICA_TRACE_READY_HELD"))) + return value == "1" || value == "true" || value == "yes" || value == "on" +} + // Build creates a core. // //nolint:funlen func (b Builder) Build(name string) *Core { c := &Core{} + incomingBufCap := b.portIncomingBufferCap + if incomingBufCap <= 0 { + incomingBufCap = 1 + } + outgoingBufCap := b.portOutgoingBufferCap + if outgoingBufCap <= 0 { + outgoingBufCap = 1 + } + registerCount := b.numRegisters + if registerCount <= 0 { + registerCount = 64 + } + localMemoryWords := b.localMemoryWords + if localMemoryWords <= 0 { + localMemoryWords = 1024 + } + resolvedQueueWatches, err := resolveQueueWatchSpecs(b.queueWatches) + if err != nil { + panic(err) + } + c.TickingComponent = sim.NewTickingComponent(name, b.engine, b.freq, c) c.emu = instEmulator{ - CareFlags: true, + CareFlags: true, + ExecutionPolicy: normalizeExecutionPolicyString(b.executionPolicy), + StrictMaxSlip: b.strictMaxSlip, + StrictFailOnViolation: b.strictFailOnViolation, } c.state = coreState{ exit: b.exitAddr, @@ -70,16 +166,35 @@ func (b Builder) Build(name string) *Core { "NorthWest": true, "Router": true, }, - Registers: make([]cgra.Data, 64), - Memory: make([]uint32, 1024), - RecvBufHead: make([][]cgra.Data, 4), - RecvBufHeadReady: make([][]bool, 4), - SendBufHead: make([][]cgra.Data, 4), - SendBufHeadBusy: make([][]bool, 4), - AddrBuf: 0, - IsToWriteMemory: false, - States: make(map[string]interface{}), - Mode: SyncOp, + Registers: make([]cgra.Data, registerCount), + Memory: make([]uint32, localMemoryWords), + RecvBufHead: make([][]cgra.Data, 4), + RecvBufHeadReady: make([][]bool, 4), + SendBufHead: make([][]cgra.Data, 4), + SendBufHeadBusy: make([][]bool, 4), + RecvBufQueue: make([][][]cgra.Data, 4), + SendBufQueue: make([][][]cgra.Data, 4), + RecvQueueCapacity: incomingBufCap, + SendQueueCapacity: outgoingBufCap, + EnableFIFOModel: b.enableFIFOModel, + EnableQueueWatches: b.enableQueueWatches, + ConfiguredQueueWatches: cloneQueueWatches(resolvedQueueWatches), + OpInputReadCache: make(map[string]cgra.Data), + AddrBuf: 0, + IsToWriteMemory: false, + States: make(map[string]interface{}), + Mode: SyncOp, + CurrentCycle: 0, + OpTimingCursor: make(map[int]int), + OpTimingLate: make(map[int]bool), + OpTimingRollCycle: make(map[int]int64), + OpIssueCount: make(map[int]int), + ReadyHeldTraceEnabled: readyHeldTraceEnabledFromEnv(), + ReadyHeldRunMode: strings.TrimSpace(os.Getenv("ZEONICA_READY_HELD_RUN_MODE")), + TimingWaitBlocked: false, + StallReason: "", + StallOpID: 0, + StallOpCode: "", CurrReservationState: ReservationState{ ReservationMap: make(map[int]bool), OpToExec: 0, @@ -92,28 +207,34 @@ func (b Builder) Build(name string) *Core { c.state.RecvBufHeadReady[i] = make([]bool, 12) c.state.SendBufHead[i] = make([]cgra.Data, 12) c.state.SendBufHeadBusy[i] = make([]bool, 12) + c.state.RecvBufQueue[i] = make([][]cgra.Data, 12) + c.state.SendBufQueue[i] = make([][]cgra.Data, 12) + for direction := 0; direction < 12; direction++ { + c.state.RecvBufQueue[i][direction] = make([]cgra.Data, 0, incomingBufCap) + c.state.SendBufQueue[i][direction] = make([]cgra.Data, 0, outgoingBufCap) + } } c.ports = make(map[cgra.Side]*portPair) - b.makePort(c, cgra.North) - b.makePort(c, cgra.West) - b.makePort(c, cgra.South) - b.makePort(c, cgra.East) - b.makePort(c, cgra.NorthEast) - b.makePort(c, cgra.SouthEast) - b.makePort(c, cgra.SouthWest) - b.makePort(c, cgra.NorthWest) - b.makePort(c, cgra.Router) - b.makePort(c, cgra.Dummy1) - b.makePort(c, cgra.Dummy2) - b.makePort(c, cgra.Dummy3) + b.makePort(c, cgra.North, incomingBufCap, outgoingBufCap) + b.makePort(c, cgra.West, incomingBufCap, outgoingBufCap) + b.makePort(c, cgra.South, incomingBufCap, outgoingBufCap) + b.makePort(c, cgra.East, incomingBufCap, outgoingBufCap) + b.makePort(c, cgra.NorthEast, incomingBufCap, outgoingBufCap) + b.makePort(c, cgra.SouthEast, incomingBufCap, outgoingBufCap) + b.makePort(c, cgra.SouthWest, incomingBufCap, outgoingBufCap) + b.makePort(c, cgra.NorthWest, incomingBufCap, outgoingBufCap) + b.makePort(c, cgra.Router, incomingBufCap, outgoingBufCap) + b.makePort(c, cgra.Dummy1, incomingBufCap, outgoingBufCap) + b.makePort(c, cgra.Dummy2, incomingBufCap, outgoingBufCap) + b.makePort(c, cgra.Dummy3, incomingBufCap, outgoingBufCap) return c } -func (b *Builder) makePort(c *Core, side cgra.Side) { - localPort := sim.NewPort(c, 1, 1, c.Name()+"."+side.Name()) +func (b *Builder) makePort(c *Core, side cgra.Side, incomingBufCap, outgoingBufCap int) { + localPort := sim.NewPort(c, incomingBufCap, outgoingBufCap, c.Name()+"."+side.Name()) c.ports[side] = &portPair{ local: localPort, } diff --git a/core/builder_microarch_test.go b/core/builder_microarch_test.go new file mode 100644 index 0000000..79a2362 --- /dev/null +++ b/core/builder_microarch_test.go @@ -0,0 +1,32 @@ +package core + +import ( + "testing" + + "github.com/sarchlab/akita/v4/sim" +) + +func TestCoreBuilderResourceSizing(t *testing.T) { + engine := sim.NewSerialEngine() + c := Builder{}. + WithEngine(engine). + WithFreq(1*sim.GHz). + WithEnableFIFOModel(true). + WithRegisterCount(96). + WithLocalMemoryWords(2048). + WithPortBufferDepth(4, 6). + Build("Core") + + if got := len(c.state.Registers); got != 96 { + t.Fatalf("unexpected register count: got %d want 96", got) + } + if got := len(c.state.Memory); got != 2048 { + t.Fatalf("unexpected local memory words: got %d want 2048", got) + } + if c.GetPortByName("North") == nil { + t.Fatal("expected North port to be initialized") + } + if !c.state.EnableFIFOModel { + t.Fatal("expected EnableFIFOModel to propagate to core state") + } +} diff --git a/core/core.go b/core/core.go index 8b32db5..64a81f3 100644 --- a/core/core.go +++ b/core/core.go @@ -59,14 +59,19 @@ func (c *Core) WriteMemory(x int, y int, data uint32, baseAddr uint32) { if x == int(c.state.TileX) && y == int(c.state.TileY) { c.state.Memory[baseAddr] = data //fmt.Printf("Core [%d][%d] write memory[%d] = %d\n", c.state.TileX, c.state.TileY, baseAddr, c.state.Memory[baseAddr]) - Trace("Memory", - "Behavior", "WriteMemory", - "Time", float64(c.Engine.CurrentTime()*1e9), - "Data", data, - "X", x, - "Y", y, - "Addr", baseAddr, - ) + timeValue := float64(c.Engine.CurrentTime() * 1e9) + if TraceEnabled() { + Trace("Memory", + "Behavior", "WriteMemory", + "Time", timeValue, + "Data", data, + "X", x, + "Y", y, + "Addr", baseAddr, + ) + } else { + ObserveMemory("WriteMemory", timeValue, x, y, "", "") + } } else { panic(fmt.Sprintf("Invalid Tile: Expect (%d, %d),but get (%d, %d)", c.state.TileX, c.state.TileY, x, y)) } @@ -85,8 +90,20 @@ func (c *Core) MapProgram(program interface{}, x int, y int) { panic("MapProgram expects core.Program type") } c.state.PCInBlock = -1 + c.state.CurrentCycle = 0 + c.state.OpTimingCursor = make(map[int]int) + c.state.OpTimingLate = make(map[int]bool) + c.state.OpTimingRollCycle = make(map[int]int64) + c.state.PendingSyncGroup = nil + c.state.TimingWaitBlocked = false + c.state.StallReason = "" + c.state.StallOpID = 0 + c.state.StallOpCode = "" + c.state.OpInputReadCache = make(map[string]cgra.Data) + c.state.resetPortQueues() c.state.TileX = uint32(x) c.state.TileY = uint32(y) + c.state.WatchedQueues = matchingQueueWatchesForTile(c.state.EnableQueueWatches, c.state.ConfiguredQueueWatches, x, y) } // Tick runs the program for one cycle. @@ -96,6 +113,8 @@ func (c *Core) Tick() (madeProgress bool) { // madeProgress = c.emu.runRoutingRules(&c.state) || madeProgress madeProgress = c.runProgram() || madeProgress madeProgress = c.doSend() || madeProgress + c.state.observeWatchedQueues(float64(c.Engine.CurrentTime() * 1e9)) + c.state.CurrentCycle++ return madeProgress } @@ -103,12 +122,16 @@ func makeBytesFromUint32(data uint32) []byte { return []byte{byte(data >> 24), byte(data >> 16), byte(data >> 8), byte(data)} } +//nolint:gocyclo func (c *Core) doSend() bool { madeProgress := false for i := 0; i < 8; i++ { // only 8 directions for color := 0; color < 4; color++ { - - if !c.state.SendBufHeadBusy[color][i] { + if !c.state.sendQueueHasData(color, i) { + continue + } + head, ok := c.state.sendQueuePeek(color, i) + if !ok { continue } @@ -117,7 +140,7 @@ func (c *Core) doSend() bool { msg := cgra.MoveMsgBuilder{}. WithDst(c.ports[cgra.Side(i)].remote). WithSrc(c.ports[cgra.Side(i)].local.AsRemote()). - WithData(c.state.SendBufHead[color][i]). + WithData(head). WithSendTime(c.Engine.CurrentTime()). WithColor(color). Build() @@ -127,26 +150,37 @@ func (c *Core) doSend() bool { continue } - Trace("DataFlow", - "Behavior", "Send", - slog.Float64("Time", float64(c.Engine.CurrentTime()*1e9)), - "Data", msg.Data.First(), - "Pred", c.state.SendBufHead[color][i].Pred, - "Color", color, - "Src", msg.Src, - "Dst", msg.Dst, - ) - c.state.SendBufHeadBusy[color][i] = false + timeValue := float64(c.Engine.CurrentTime() * 1e9) + if TraceEnabled() { + Trace("DataFlow", + "Behavior", "Send", + slog.Float64("Time", timeValue), + "Data", msg.Data.First(), + "Pred", head.Pred, + "Color", color, + "Src", msg.Src, + "Dst", msg.Dst, + ) + } else { + ObserveDataFlow("Send", timeValue, "", "", string(msg.Src), string(msg.Dst)) + } + c.state.sendQueueConsume(color, i) + madeProgress = true } } // handle the memory request - if c.state.SendBufHeadBusy[c.emu.getColorIndex("R")][cgra.Router] { // only one port, must be Router-red + routerColor := c.emu.getColorIndex("R") + if c.state.sendQueueHasData(routerColor, int(cgra.Router)) { // only one port, must be Router-red + head, ok := c.state.sendQueuePeek(routerColor, int(cgra.Router)) + if !ok { + return madeProgress + } if c.state.IsToWriteMemory { msg := mem.WriteReqBuilder{}. WithAddress(uint64(c.state.AddrBuf)). - WithData(makeBytesFromUint32(c.state.SendBufHead[c.emu.getColorIndex("R")][cgra.Router].First())). + WithData(makeBytesFromUint32(head.First())). WithSrc(c.ports[cgra.Router].local.AsRemote()). WithDst(c.ports[cgra.Router].remote). Build() @@ -156,16 +190,22 @@ func (c *Core) doSend() bool { return madeProgress } - Trace("Memory", - "Behavior", "Send", - slog.Float64("Time", float64(c.Engine.CurrentTime()*1e9)), - "Data", c.state.SendBufHead[c.emu.getColorIndex("R")][cgra.Router].First(), - "Pred", c.state.SendBufHead[c.emu.getColorIndex("R")][cgra.Router].Pred, - "Color", "R", - "Src", msg.Src, - "Dst", msg.Dst, - ) - c.state.SendBufHeadBusy[c.emu.getColorIndex("R")][cgra.Router] = false + timeValue := float64(c.Engine.CurrentTime() * 1e9) + if TraceEnabled() { + Trace("Memory", + "Behavior", "Send", + slog.Float64("Time", timeValue), + "Data", head.First(), + "Pred", head.Pred, + "Color", "R", + "Src", msg.Src, + "Dst", msg.Dst, + ) + } else { + ObserveMemory("Send", timeValue, int(c.state.TileX), int(c.state.TileY), string(msg.Src), string(msg.Dst)) + } + c.state.sendQueueConsume(routerColor, int(cgra.Router)) + madeProgress = true } else { msg := mem.ReadReqBuilder{}. WithAddress(uint64(c.state.AddrBuf)). @@ -179,15 +219,21 @@ func (c *Core) doSend() bool { return madeProgress } - Trace("Memory", - "Behavior", "Send", - slog.Float64("Time", float64(c.Engine.CurrentTime()*1e9)), - "Data", c.state.AddrBuf, - "Color", "R", - "Src", msg.Src, - "Dst", msg.Dst, - ) - c.state.SendBufHeadBusy[c.emu.getColorIndex("R")][cgra.Router] = false + timeValue := float64(c.Engine.CurrentTime() * 1e9) + if TraceEnabled() { + Trace("Memory", + "Behavior", "Send", + slog.Float64("Time", timeValue), + "Data", c.state.AddrBuf, + "Color", "R", + "Src", msg.Src, + "Dst", msg.Dst, + ) + } else { + ObserveMemory("Send", timeValue, int(c.state.TileX), int(c.state.TileY), string(msg.Src), string(msg.Dst)) + } + c.state.sendQueueConsume(routerColor, int(cgra.Router)) + madeProgress = true } } @@ -198,6 +244,7 @@ func convert4BytesToUint32(data []byte) uint32 { return uint32(data[0])<<24 | uint32(data[1])<<16 | uint32(data[2])<<8 | uint32(data[3]) } +//nolint:gocyclo func (c *Core) doRecv() bool { madeProgress := false for i := 0; i < 8; i++ { //direction @@ -214,7 +261,7 @@ func (c *Core) doRecv() bool { for color := 0; color < 4; color++ { //fmt.Printf("%s Receiving Data with color %d. Recv buffer head: %+v\n", // c.Name(), color, c.state.RecvBufHeadReady[color][i]) - if c.state.RecvBufHeadReady[color][i] { + if c.state.recvQueueIsFull(color, i) { continue } @@ -223,18 +270,24 @@ func (c *Core) doRecv() bool { continue } - c.state.RecvBufHeadReady[color][i] = true - c.state.RecvBufHead[color][i] = msg.Data + if !c.state.recvQueuePush(color, i, msg.Data) { + continue + } - Trace("DataFlow", - "Behavior", "Recv", - "Time", float64(c.Engine.CurrentTime()*1e9), - "Data", msg.Data.First(), - "Pred", c.state.RecvBufHead[color][i].Pred, - "Src", msg.Src, - "Dst", msg.Dst, - "Color", color, - ) + timeValue := float64(c.Engine.CurrentTime() * 1e9) + if TraceEnabled() { + Trace("DataFlow", + "Behavior", "Recv", + "Time", timeValue, + "Data", msg.Data.First(), + "Pred", msg.Data.Pred, + "Src", msg.Src, + "Dst", msg.Dst, + "Color", color, + ) + } else { + ObserveDataFlow("Recv", timeValue, "", "", string(msg.Src), string(msg.Dst)) + } c.ports[cgra.Side(i)].local.RetrieveIncoming() madeProgress = true @@ -245,39 +298,55 @@ func (c *Core) doRecv() bool { if item == nil { return madeProgress } - if c.state.RecvBufHeadReady[c.emu.getColorIndex("R")][cgra.Router] { + routerColor := c.emu.getColorIndex("R") + routerDir := int(cgra.Router) + if c.state.recvQueueIsFull(routerColor, routerDir) { return madeProgress } // if msg is DataReadyRsp, then the data is ready if msg, ok := item.(*mem.DataReadyRsp); ok { - c.state.RecvBufHeadReady[c.emu.getColorIndex("R")][cgra.Router] = true - c.state.RecvBufHead[c.emu.getColorIndex("R")][cgra.Router] = cgra.NewScalar(convert4BytesToUint32(msg.Data)) - - Trace("Memory", - "Behavior", "Recv", - "Time", float64(c.Engine.CurrentTime()*1e9), - "Data", msg.Data, - "Src", msg.Src, - "Dst", msg.Dst, - "Pred", c.state.RecvBufHead[c.emu.getColorIndex("R")][cgra.Router].Pred, - "Color", "R", - ) + value := cgra.NewScalar(convert4BytesToUint32(msg.Data)) + if !c.state.recvQueuePush(routerColor, routerDir, value) { + return madeProgress + } + + timeValue := float64(c.Engine.CurrentTime() * 1e9) + if TraceEnabled() { + Trace("Memory", + "Behavior", "Recv", + "Time", timeValue, + "Data", msg.Data, + "Src", msg.Src, + "Dst", msg.Dst, + "Pred", value.Pred, + "Color", "R", + ) + } else { + ObserveMemory("Recv", timeValue, int(c.state.TileX), int(c.state.TileY), string(msg.Src), string(msg.Dst)) + } c.ports[cgra.Router].local.RetrieveIncoming() madeProgress = true } else if msg, ok := item.(*mem.WriteDoneRsp); ok { - c.state.RecvBufHeadReady[c.emu.getColorIndex("R")][cgra.Router] = true - c.state.RecvBufHead[c.emu.getColorIndex("R")][cgra.Router] = cgra.NewScalar(0) - - Trace("Memory", - "Behavior", "Recv", - "Time", float64(c.Engine.CurrentTime()*1e9), - "Src", msg.Src, - "Dst", msg.Dst, - "Pred", c.state.RecvBufHead[c.emu.getColorIndex("R")][cgra.Router].Pred, - "Color", "R", - ) + value := cgra.NewScalar(0) + if !c.state.recvQueuePush(routerColor, routerDir, value) { + return madeProgress + } + + timeValue := float64(c.Engine.CurrentTime() * 1e9) + if TraceEnabled() { + Trace("Memory", + "Behavior", "Recv", + "Time", timeValue, + "Src", msg.Src, + "Dst", msg.Dst, + "Pred", value.Pred, + "Color", "R", + ) + } else { + ObserveMemory("Recv", timeValue, int(c.state.TileX), int(c.state.TileY), string(msg.Src), string(msg.Dst)) + } c.ports[cgra.Router].local.RetrieveIncoming() madeProgress = true diff --git a/core/derived_timing.go b/core/derived_timing.go new file mode 100644 index 0000000..21c13b2 --- /dev/null +++ b/core/derived_timing.go @@ -0,0 +1,68 @@ +package core + +import ( + "encoding/json" + "fmt" + "os" + "strings" +) + +const timingSidecarEnv = "ZEONICA_TIMING_SIDECAR" + +type timingSidecar struct { + SourceLog string `json:"source_log"` + DerivedAt string `json:"derived_at"` + Ops []timingOpSchedule `json:"ops"` +} + +type timingOpSchedule struct { + X int `json:"x"` + Y int `json:"y"` + OpID int `json:"op_id"` + Cycles []int64 `json:"cycles"` +} + +func loadDerivedTimingFromEnv() (map[string]map[int][]int64, error) { + path := strings.TrimSpace(os.Getenv(timingSidecarEnv)) + if path == "" { + return nil, nil + } + + data, err := os.ReadFile(path) + if err != nil { + return nil, fmt.Errorf("read %s (%s): %w", timingSidecarEnv, path, err) + } + + var sidecar timingSidecar + if err := json.Unmarshal(data, &sidecar); err != nil { + return nil, fmt.Errorf("parse timing sidecar %s: %w", path, err) + } + + result := make(map[string]map[int][]int64) + for _, op := range sidecar.Ops { + if len(op.Cycles) == 0 { + continue + } + coordKey := fmt.Sprintf("(%d,%d)", op.X, op.Y) + if _, exists := result[coordKey]; !exists { + result[coordKey] = make(map[int][]int64) + } + result[coordKey][op.OpID] = append(result[coordKey][op.OpID], op.Cycles...) + } + + return result, nil +} + +func cloneDerivedTimingMap(src map[int][]int64) map[int][]int64 { + if len(src) == 0 { + return nil + } + + cloned := make(map[int][]int64, len(src)) + for opID, cycles := range src { + copied := make([]int64, len(cycles)) + copy(copied, cycles) + cloned[opID] = copied + } + return cloned +} diff --git a/core/emu.go b/core/emu.go index dfda1e9..354ac1b 100644 --- a/core/emu.go +++ b/core/emu.go @@ -24,6 +24,18 @@ const ( AsyncOp ) +const ( + ExecutionPolicyStrictTimed = "strict_timed" + ExecutionPolicyElasticScheduled = "elastic_scheduled" + ExecutionPolicyInOrderDataflow = "in_order_dataflow" +) + +const ( + StallReasonScheduleBubble = "schedule_bubble" + StallReasonOperandWait = "operand_wait" + StallReasonOutputBlocked = "output_blocked" +) + type routingRule struct { src cgra.Side dst cgra.Side @@ -80,7 +92,7 @@ func (r *ReservationState) SetReservationMap(ig InstructionGroup, state *coreSta r.ReservationMap[i] = true } r.OpToExec = len(ig.Operations) - print("SetReservationMap: ", r.OpToExec, "\n") + // print("SetReservationMap: ", r.OpToExec, "\n") } type coreState struct { @@ -102,20 +114,942 @@ type coreState struct { Mode OpMode - RecvBufHead [][]cgra.Data //[Color][Direction] - RecvBufHeadReady [][]bool - SendBufHead [][]cgra.Data - SendBufHeadBusy [][]bool - AddrBuf uint32 // buffer for the address of the memory - IsToWriteMemory bool - - routingRules []*routingRule - triggers []*Trigger - CurrentTime float64 // current simulation time for logging + RecvBufHead [][]cgra.Data //[Color][Direction] + RecvBufHeadReady [][]bool + SendBufHead [][]cgra.Data + SendBufHeadBusy [][]bool + RecvBufQueue [][][]cgra.Data // [Color][Direction]FIFO + SendBufQueue [][][]cgra.Data // [Color][Direction]FIFO + RecvQueueCapacity int + SendQueueCapacity int + EnableFIFOModel bool + EnableQueueWatches bool + ConfiguredQueueWatches []resolvedQueueWatch + WatchedQueues []resolvedQueueWatch + OpInputReadCache map[string]cgra.Data + AddrBuf uint32 // buffer for the address of the memory + IsToWriteMemory bool + + routingRules []*routingRule + triggers []*Trigger + CurrentTime float64 // current simulation time for logging + CurrentCycle int64 + OpTimingCursor map[int]int + OpTimingLate map[int]bool + OpTimingRollCycle map[int]int64 + OpIssueCount map[int]int + PendingSyncGroup *pendingSyncGroup + ReadyHeldTraceEnabled bool + ReadyHeldRunMode string + TimingWaitBlocked bool + StallReason string + StallOpID int + StallOpCode string } type instEmulator struct { - CareFlags bool + CareFlags bool + ExecutionPolicy string + StrictMaxSlip int64 + StrictFailOnViolation bool +} + +type issueReadiness struct { + OperandsReady bool + PredicateReadyOrTrue bool + ResourcesAvailable bool + Ready bool + WaitReason string +} + +type issueDecision struct { + AnnotatedTimeT *int64 + OperandsReady bool + PredicateReadyOrTrue bool + ResourcesAvailable bool + TimingGateSatisfied bool + FireableExceptTime bool + BlockedByLowerBound bool + CanIssue bool + WaitReason string + TimingWaitBlocked bool +} + +type readyHeldObservation struct { + RunMode string + Cycle int64 + X int + Y int + OpID int + OccurrenceIndex int + OpCode string + AnnotatedTimeT *int64 + OperandsReady bool + PredicateReadyOrTrue bool + ResourcesAvailable bool + TimingGateSatisfied bool + FireableExceptTime bool + BlockedByLowerBound bool + IssuedThisCycle bool +} + +type pendingSyncGroup struct { + RemainingCycles int + BufferedResults map[Operand]cgra.Data + InvalidDecrements []int + RepresentativeID int + RepresentativeOp string +} + +func (s *coreState) recvFIFOEnabled() bool { + return s.EnableFIFOModel && + len(s.RecvBufQueue) == 4 && + len(s.RecvBufQueue[0]) > int(cgra.Router) +} + +func (s *coreState) sendFIFOEnabled() bool { + return s.EnableFIFOModel && + len(s.SendBufQueue) == 4 && + len(s.SendBufQueue[0]) > int(cgra.Router) +} + +func (s *coreState) recvQueueCap() int { + if s.RecvQueueCapacity > 0 { + return s.RecvQueueCapacity + } + return 1 +} + +func (s *coreState) sendQueueCap(color, direction int) int { + // Keep router-red as single outstanding request to preserve existing + // address/req-state coupling semantics. + if color == 0 && direction == int(cgra.Router) { + return 1 + } + if s.SendQueueCapacity > 0 { + return s.SendQueueCapacity + } + return 1 +} + +func (s *coreState) syncRecvHead(color, direction int) { + if len(s.RecvBufHead) <= color || len(s.RecvBufHeadReady) <= color { + return + } + if len(s.RecvBufHead[color]) <= direction || len(s.RecvBufHeadReady[color]) <= direction { + return + } + if s.recvFIFOEnabled() && len(s.RecvBufQueue[color]) > direction && len(s.RecvBufQueue[color][direction]) > 0 { + s.RecvBufHead[color][direction] = s.RecvBufQueue[color][direction][0] + s.RecvBufHeadReady[color][direction] = true + return + } + s.RecvBufHeadReady[color][direction] = false +} + +func (s *coreState) syncSendHead(color, direction int) { + if len(s.SendBufHead) <= color || len(s.SendBufHeadBusy) <= color { + return + } + if len(s.SendBufHead[color]) <= direction || len(s.SendBufHeadBusy[color]) <= direction { + return + } + if s.sendFIFOEnabled() && len(s.SendBufQueue[color]) > direction && len(s.SendBufQueue[color][direction]) > 0 { + s.SendBufHead[color][direction] = s.SendBufQueue[color][direction][0] + s.SendBufHeadBusy[color][direction] = true + return + } + s.SendBufHeadBusy[color][direction] = false +} + +func (s *coreState) recvQueueLen(color, direction int) int { + if s.recvFIFOEnabled() && len(s.RecvBufQueue[color]) > direction { + return len(s.RecvBufQueue[color][direction]) + } + if s.RecvBufHeadReady[color][direction] { + return 1 + } + return 0 +} + +func (s *coreState) sendQueueLen(color, direction int) int { + if s.sendFIFOEnabled() && len(s.SendBufQueue[color]) > direction { + return len(s.SendBufQueue[color][direction]) + } + if s.SendBufHeadBusy[color][direction] { + return 1 + } + return 0 +} + +func (s *coreState) recvQueueIsFull(color, direction int) bool { + if s.recvFIFOEnabled() && len(s.RecvBufQueue[color]) > direction { + return len(s.RecvBufQueue[color][direction]) >= s.recvQueueCap() + } + return s.RecvBufHeadReady[color][direction] +} + +func (s *coreState) recvQueuePush(color, direction int, data cgra.Data) bool { + if s.recvFIFOEnabled() && len(s.RecvBufQueue[color]) > direction { + if len(s.RecvBufQueue[color][direction]) >= s.recvQueueCap() { + return false + } + s.RecvBufQueue[color][direction] = append(s.RecvBufQueue[color][direction], data) + s.syncRecvHead(color, direction) + return true + } + if s.RecvBufHeadReady[color][direction] { + return false + } + s.RecvBufHead[color][direction] = data + s.RecvBufHeadReady[color][direction] = true + return true +} + +func (s *coreState) recvQueuePeek(color, direction int) (cgra.Data, bool) { + if s.recvFIFOEnabled() && len(s.RecvBufQueue[color]) > direction { + if len(s.RecvBufQueue[color][direction]) == 0 { + return cgra.Data{}, false + } + return s.RecvBufQueue[color][direction][0], true + } + if !s.RecvBufHeadReady[color][direction] { + return cgra.Data{}, false + } + return s.RecvBufHead[color][direction], true +} + +func (s *coreState) recvQueueConsume(color, direction int) (cgra.Data, bool) { + if s.recvFIFOEnabled() && len(s.RecvBufQueue[color]) > direction { + if len(s.RecvBufQueue[color][direction]) == 0 { + return cgra.Data{}, false + } + value := s.RecvBufQueue[color][direction][0] + s.RecvBufQueue[color][direction] = s.RecvBufQueue[color][direction][1:] + s.syncRecvHead(color, direction) + return value, true + } + if !s.RecvBufHeadReady[color][direction] { + return cgra.Data{}, false + } + value := s.RecvBufHead[color][direction] + s.RecvBufHeadReady[color][direction] = false + return value, true +} + +func (s *coreState) sendQueueHasData(color, direction int) bool { + if s.sendFIFOEnabled() && len(s.SendBufQueue[color]) > direction { + return len(s.SendBufQueue[color][direction]) > 0 + } + return s.SendBufHeadBusy[color][direction] +} + +func (s *coreState) sendQueueIsFull(color, direction int) bool { + if s.sendFIFOEnabled() && len(s.SendBufQueue[color]) > direction { + return len(s.SendBufQueue[color][direction]) >= s.sendQueueCap(color, direction) + } + return s.SendBufHeadBusy[color][direction] +} + +func (s *coreState) sendQueuePush(color, direction int, data cgra.Data) bool { + if s.sendFIFOEnabled() && len(s.SendBufQueue[color]) > direction { + if len(s.SendBufQueue[color][direction]) >= s.sendQueueCap(color, direction) { + return false + } + s.SendBufQueue[color][direction] = append(s.SendBufQueue[color][direction], data) + s.syncSendHead(color, direction) + return true + } + if s.SendBufHeadBusy[color][direction] { + return false + } + s.SendBufHeadBusy[color][direction] = true + s.SendBufHead[color][direction] = data + return true +} + +func (s *coreState) sendQueuePeek(color, direction int) (cgra.Data, bool) { + if s.sendFIFOEnabled() && len(s.SendBufQueue[color]) > direction { + if len(s.SendBufQueue[color][direction]) == 0 { + return cgra.Data{}, false + } + return s.SendBufQueue[color][direction][0], true + } + if !s.SendBufHeadBusy[color][direction] { + return cgra.Data{}, false + } + return s.SendBufHead[color][direction], true +} + +func (s *coreState) sendQueueConsume(color, direction int) (cgra.Data, bool) { + if s.sendFIFOEnabled() && len(s.SendBufQueue[color]) > direction { + if len(s.SendBufQueue[color][direction]) == 0 { + return cgra.Data{}, false + } + value := s.SendBufQueue[color][direction][0] + s.SendBufQueue[color][direction] = s.SendBufQueue[color][direction][1:] + s.syncSendHead(color, direction) + return value, true + } + if !s.SendBufHeadBusy[color][direction] { + return cgra.Data{}, false + } + value := s.SendBufHead[color][direction] + s.SendBufHeadBusy[color][direction] = false + return value, true +} + +func (s *coreState) resetPortQueues() { + for color := range s.RecvBufHeadReady { + for direction := range s.RecvBufHeadReady[color] { + s.RecvBufHeadReady[color][direction] = false + } + } + for color := range s.SendBufHeadBusy { + for direction := range s.SendBufHeadBusy[color] { + s.SendBufHeadBusy[color][direction] = false + } + } + if s.recvFIFOEnabled() { + for color := range s.RecvBufQueue { + for direction := range s.RecvBufQueue[color] { + s.RecvBufQueue[color][direction] = s.RecvBufQueue[color][direction][:0] + s.syncRecvHead(color, direction) + } + } + } + if s.sendFIFOEnabled() { + for color := range s.SendBufQueue { + for direction := range s.SendBufQueue[color] { + s.SendBufQueue[color][direction] = s.SendBufQueue[color][direction][:0] + s.syncSendHead(color, direction) + } + } + } +} + +func clone2DData(input [][]cgra.Data) [][]cgra.Data { + if input == nil { + return nil + } + out := make([][]cgra.Data, len(input)) + for i := range input { + if input[i] == nil { + continue + } + out[i] = append([]cgra.Data(nil), input[i]...) + } + return out +} + +func clone2DBool(input [][]bool) [][]bool { + if input == nil { + return nil + } + out := make([][]bool, len(input)) + for i := range input { + if input[i] == nil { + continue + } + out[i] = append([]bool(nil), input[i]...) + } + return out +} + +func clone3DData(input [][][]cgra.Data) [][][]cgra.Data { + if input == nil { + return nil + } + out := make([][][]cgra.Data, len(input)) + for i := range input { + if input[i] == nil { + continue + } + out[i] = make([][]cgra.Data, len(input[i])) + for j := range input[i] { + if input[i][j] == nil { + continue + } + out[i][j] = append([]cgra.Data(nil), input[i][j]...) + } + } + return out +} + +func cloneStringBoolMap(input map[string]bool) map[string]bool { + if input == nil { + return nil + } + out := make(map[string]bool, len(input)) + for k, v := range input { + out[k] = v + } + return out +} + +func cloneStringIntMap(input map[string]int) map[string]int { + if input == nil { + return nil + } + out := make(map[string]int, len(input)) + for k, v := range input { + out[k] = v + } + return out +} + +func cloneIntBoolMap(input map[int]bool) map[int]bool { + if input == nil { + return nil + } + out := make(map[int]bool, len(input)) + for k, v := range input { + out[k] = v + } + return out +} + +func cloneIntIntMap(input map[int]int) map[int]int { + if input == nil { + return nil + } + out := make(map[int]int, len(input)) + for k, v := range input { + out[k] = v + } + return out +} + +func cloneIntInt64Map(input map[int]int64) map[int]int64 { + if input == nil { + return nil + } + out := make(map[int]int64, len(input)) + for k, v := range input { + out[k] = v + } + return out +} + +func cloneOperandDataMap(input map[Operand]cgra.Data) map[Operand]cgra.Data { + if input == nil { + return nil + } + out := make(map[Operand]cgra.Data, len(input)) + for operand, value := range input { + out[operand] = value + } + return out +} + +func cloneIntSlice(input []int) []int { + if input == nil { + return nil + } + return append([]int(nil), input...) +} + +func clonePendingSyncGroup(input *pendingSyncGroup) *pendingSyncGroup { + if input == nil { + return nil + } + return &pendingSyncGroup{ + RemainingCycles: input.RemainingCycles, + BufferedResults: cloneOperandDataMap(input.BufferedResults), + InvalidDecrements: cloneIntSlice(input.InvalidDecrements), + RepresentativeID: input.RepresentativeID, + RepresentativeOp: input.RepresentativeOp, + } +} + +func cloneIntAnyMap(input map[string]interface{}) map[string]interface{} { + if input == nil { + return nil + } + out := make(map[string]interface{}, len(input)) + for k, v := range input { + out[k] = v + } + return out +} + +func (s *coreState) cloneForEval() *coreState { + clone := *s + clone.Registers = append([]cgra.Data(nil), s.Registers...) + clone.Memory = append([]uint32(nil), s.Memory...) + clone.States = cloneIntAnyMap(s.States) + clone.Directions = cloneStringBoolMap(s.Directions) + clone.RecvBufHead = clone2DData(s.RecvBufHead) + clone.RecvBufHeadReady = clone2DBool(s.RecvBufHeadReady) + clone.SendBufHead = clone2DData(s.SendBufHead) + clone.SendBufHeadBusy = clone2DBool(s.SendBufHeadBusy) + clone.RecvBufQueue = clone3DData(s.RecvBufQueue) + clone.SendBufQueue = clone3DData(s.SendBufQueue) + clone.ConfiguredQueueWatches = cloneQueueWatches(s.ConfiguredQueueWatches) + clone.WatchedQueues = cloneQueueWatches(s.WatchedQueues) + clone.OpInputReadCache = make(map[string]cgra.Data) + clone.OpTimingCursor = cloneIntIntMap(s.OpTimingCursor) + clone.OpTimingLate = cloneIntBoolMap(s.OpTimingLate) + clone.OpTimingRollCycle = cloneIntInt64Map(s.OpTimingRollCycle) + clone.OpIssueCount = cloneIntIntMap(s.OpIssueCount) + clone.PendingSyncGroup = clonePendingSyncGroup(s.PendingSyncGroup) + clone.CurrReservationState = ReservationState{ + ReservationMap: cloneIntBoolMap(s.CurrReservationState.ReservationMap), + OpToExec: s.CurrReservationState.OpToExec, + RefCountRuntime: cloneStringIntMap(s.CurrReservationState.RefCountRuntime), + } + return &clone +} + +func (s *coreState) observeWatchedQueues(timeValue float64) { + if s == nil || len(s.WatchedQueues) == 0 { + return + } + + for _, watch := range s.WatchedQueues { + occupancy := 0 + var capacity int + switch watch.Kind { + case "recv": + occupancy = s.recvQueueLen(watch.ColorIdx, watch.DirectionIdx) + capacity = s.recvQueueCap() + case "send": + occupancy = s.sendQueueLen(watch.ColorIdx, watch.DirectionIdx) + capacity = s.sendQueueCap(watch.ColorIdx, watch.DirectionIdx) + default: + continue + } + + ObserveQueue( + watch.Label, + watch.Kind, + timeValue, + int(s.TileX), + int(s.TileY), + watch.Direction, + watch.Color, + occupancy, + capacity, + ) + } +} + +func normalizeExecutionPolicyString(policy string) string { + text := strings.ToLower(strings.TrimSpace(policy)) + switch text { + case ExecutionPolicyStrictTimed, "strict-timed", "static": + return ExecutionPolicyStrictTimed + case ExecutionPolicyElasticScheduled, "elastic-scheduled", "hybrid": + return ExecutionPolicyElasticScheduled + case "", ExecutionPolicyInOrderDataflow, "in-order-dataflow", "dynamic": + return ExecutionPolicyInOrderDataflow + default: + // Fall back to in-order dataflow for backward compatibility. + return ExecutionPolicyInOrderDataflow + } +} + +func isStrictControlSensitiveOp(opCode string) bool { + normalized := strings.ToUpper(strings.TrimSpace(opCode)) + switch { + case normalized == "SEL", + normalized == "JMP", + normalized == "RET", + normalized == "CTRL_MOV", + normalized == "CMP_EXPORT", + normalized == "LT_EX": + return true + case strings.HasPrefix(normalized, "PHI"), + strings.HasPrefix(normalized, "GRANT"), + strings.HasPrefix(normalized, "ICMP"), + strings.HasPrefix(normalized, "RETURN"), + strings.HasPrefix(normalized, "B"): + return true + default: + return false + } +} + +func (i instEmulator) panicSynchronizationViolation(operation Operation, state *coreState, reason string) { + currentStep, targetStep, ii := i.resolveScheduleStep(operation, state) + panic(fmt.Sprintf( + "synchronization violation under %s: op=%s id=%d cycle=%d schedule_step=%d target_step=%d ii=%d raw_timestep=%d tile=(%d,%d): %s", + normalizeExecutionPolicyString(i.ExecutionPolicy), + operation.OpCode, + operation.ID, + state.CurrentCycle, + currentStep, + targetStep, + ii, + operation.TimeStep, + state.TileX, + state.TileY, + reason, + )) +} + +func (i instEmulator) resolveScheduleStep(operation Operation, state *coreState) (currentStep int64, targetStep int64, ii int64) { + ii = int64(state.Code.CompiledII) + if ii <= 0 { + return state.CurrentCycle, int64(operation.TimeStep), 0 + } + + currentStep = state.CurrentCycle % ii + if currentStep < 0 { + currentStep += ii + } + + targetStep = int64(operation.TimeStep) + if targetStep < 0 { + panic(fmt.Sprintf( + "invalid time_step=%d for compiled_ii=%d at op=%s id=%d tile=(%d,%d)", + operation.TimeStep, + state.Code.CompiledII, + operation.OpCode, + operation.ID, + state.TileX, + state.TileY, + )) + } + // Normalize to phase within II: compiler may emit time_step >= ii (e.g. 4 when ii=4 → step 0). + if targetStep >= ii { + targetStep = targetStep % ii + } + + return currentStep, targetStep, ii +} + +func (i instEmulator) resolveDerivedSchedule(operation Operation, state *coreState) ([]int64, int, bool) { + if state == nil || state.Code.DerivedTiming == nil { + return nil, 0, false + } + + schedule, exists := state.Code.DerivedTiming[operation.ID] + if !exists || len(schedule) == 0 { + return nil, 0, false + } + + cursor := state.OpTimingCursor[operation.ID] + return schedule, cursor, true +} + +func (i instEmulator) advanceDerivedTimingCursor(operation Operation, state *coreState) { + if state == nil || state.Code.DerivedTiming == nil { + return + } + if _, exists := state.Code.DerivedTiming[operation.ID]; !exists { + return + } + state.OpTimingCursor[operation.ID] = state.OpTimingCursor[operation.ID] + 1 + delete(state.OpTimingLate, operation.ID) + delete(state.OpTimingRollCycle, operation.ID) +} + +func (i instEmulator) setStallReason(state *coreState, operation Operation, reason string) { + if state == nil || reason == "" { + return + } + state.StallReason = reason + state.StallOpID = operation.ID + state.StallOpCode = operation.OpCode +} + +func (i instEmulator) rollStrictExpectedCycle(expectedCycle, currentCycle int64, compiledII int) int64 { + ii := int64(compiledII) + if ii <= 0 { + ii = 1 + } + // Move to the next window start strictly after the current cycle. + nextExpected := expectedCycle + ii + if nextExpected > currentCycle { + return nextExpected + } + delta := currentCycle - expectedCycle + steps := delta/ii + 1 + return expectedCycle + steps*ii +} + +func (s *coreState) readyHeldTraceActive() bool { + return s != nil && s.ReadyHeldTraceEnabled && strings.TrimSpace(s.ReadyHeldRunMode) != "" +} + +func (s *coreState) nextOpOccurrenceIndex(opID int) int { + if s == nil || s.OpIssueCount == nil { + return 0 + } + return s.OpIssueCount[opID] +} + +func (s *coreState) advanceOpOccurrenceIndex(opID int) { + if s == nil { + return + } + if s.OpIssueCount == nil { + s.OpIssueCount = make(map[int]int) + } + s.OpIssueCount[opID] = s.OpIssueCount[opID] + 1 +} + +func (i instEmulator) applyIssueDecision(operation Operation, state *coreState, decision issueDecision) { + if state == nil { + return + } + if decision.TimingWaitBlocked { + state.TimingWaitBlocked = true + } + if decision.WaitReason != "" { + i.setStallReason(state, operation, decision.WaitReason) + } +} + +func (i instEmulator) readyHeldObservationFor( + operation Operation, + state *coreState, + decision issueDecision, + occurrenceIndex int, + issuedThisCycle bool, +) (readyHeldObservation, bool) { + if state == nil || !state.readyHeldTraceActive() { + return readyHeldObservation{}, false + } + if !decision.FireableExceptTime && !decision.BlockedByLowerBound && !issuedThisCycle { + return readyHeldObservation{}, false + } + return readyHeldObservation{ + RunMode: state.ReadyHeldRunMode, + Cycle: state.CurrentCycle, + X: int(state.TileX), + Y: int(state.TileY), + OpID: operation.ID, + OccurrenceIndex: occurrenceIndex, + OpCode: operation.OpCode, + AnnotatedTimeT: decision.AnnotatedTimeT, + OperandsReady: decision.OperandsReady, + PredicateReadyOrTrue: decision.PredicateReadyOrTrue, + ResourcesAvailable: decision.ResourcesAvailable, + TimingGateSatisfied: decision.TimingGateSatisfied, + FireableExceptTime: decision.FireableExceptTime, + BlockedByLowerBound: decision.BlockedByLowerBound, + IssuedThisCycle: issuedThisCycle, + }, true +} + +func (i instEmulator) emitReadyHeldObservation(observation readyHeldObservation) { + var annotated any + if observation.AnnotatedTimeT != nil { + annotated = *observation.AnnotatedTimeT + } + Trace( + "ReadyHeld", + "run_mode", observation.RunMode, + "cycle", observation.Cycle, + "X", observation.X, + "Y", observation.Y, + "ID", observation.OpID, + "occurrence_index", observation.OccurrenceIndex, + "OpCode", observation.OpCode, + "annotated_time_t", annotated, + "operands_ready", observation.OperandsReady, + "predicate_ready_or_true", observation.PredicateReadyOrTrue, + "resources_available", observation.ResourcesAvailable, + "timing_gate_satisfied", observation.TimingGateSatisfied, + "fireable_except_time", observation.FireableExceptTime, + "blocked_by_lower_bound", observation.BlockedByLowerBound, + "issued_this_cycle", observation.IssuedThisCycle, + ) +} + +func (i instEmulator) issueDecision(operation Operation, state *coreState) issueDecision { + decision := issueDecision{ + OperandsReady: true, + PredicateReadyOrTrue: true, + ResourcesAvailable: true, + TimingGateSatisfied: true, + CanIssue: true, + } + + if !i.CareFlags || operation.InvalidIterations > 0 { + decision.FireableExceptTime = true + return decision + } + + readiness := i.checkIssueReadinessDetails(operation, state) + decision.OperandsReady = readiness.OperandsReady + decision.PredicateReadyOrTrue = readiness.PredicateReadyOrTrue + decision.ResourcesAvailable = readiness.ResourcesAvailable + decision.FireableExceptTime = readiness.Ready + + policy := normalizeExecutionPolicyString(i.ExecutionPolicy) + if schedule, cursor, hasDerived := i.resolveDerivedSchedule(operation, state); hasDerived && + (policy == ExecutionPolicyStrictTimed || policy == ExecutionPolicyElasticScheduled) { + if cursor >= len(schedule) { + decision.TimingGateSatisfied = false + decision.FireableExceptTime = false + decision.CanIssue = false + return decision + } + + annotatedTime := schedule[cursor] + decision.AnnotatedTimeT = int64Ptr(annotatedTime) + expectedCycle := annotatedTime + + switch policy { + case ExecutionPolicyStrictTimed: + if rolledCycle, exists := state.OpTimingRollCycle[operation.ID]; exists { + expectedCycle = rolledCycle + } + decision.TimingGateSatisfied = state.CurrentCycle >= expectedCycle + + if isStrictControlSensitiveOp(operation.OpCode) { + if state.CurrentCycle < expectedCycle { + decision.CanIssue = false + decision.WaitReason = StallReasonScheduleBubble + decision.TimingWaitBlocked = true + return decision + } + if readiness.Ready { + if state.CurrentCycle > annotatedTime { + state.OpTimingLate[operation.ID] = true + } + decision.CanIssue = true + return decision + } + if state.CurrentCycle > annotatedTime { + state.OpTimingLate[operation.ID] = true + } + decision.CanIssue = false + decision.WaitReason = readiness.WaitReason + return decision + } + + if state.CurrentCycle < expectedCycle { + decision.CanIssue = false + decision.WaitReason = StallReasonScheduleBubble + decision.TimingWaitBlocked = true + return decision + } + + lateness := state.CurrentCycle - expectedCycle + if lateness > 0 && i.StrictMaxSlip >= 0 && lateness > i.StrictMaxSlip { + reason := fmt.Sprintf( + "strict slip window violation: lateness=%d exceeds max_slip=%d (expected=%d current=%d)", + lateness, + i.StrictMaxSlip, + expectedCycle, + state.CurrentCycle, + ) + if i.StrictFailOnViolation { + i.panicSynchronizationViolation(operation, state, reason) + } + + nextExpected := i.rollStrictExpectedCycle(expectedCycle, state.CurrentCycle, state.Code.CompiledII) + state.OpTimingRollCycle[operation.ID] = nextExpected + Trace( + "TimingViolation", + "Policy", policy, + "OpCode", operation.OpCode, + "ID", operation.ID, + "X", state.TileX, + "Y", state.TileY, + "ExpectedCycle", expectedCycle, + "NextExpectedCycle", nextExpected, + "CurrentCycle", state.CurrentCycle, + "Lateness", lateness, + "MaxSlip", i.StrictMaxSlip, + ) + decision.CanIssue = false + decision.WaitReason = StallReasonScheduleBubble + decision.TimingWaitBlocked = true + return decision + } + + if !readiness.Ready { + if state.CurrentCycle > annotatedTime { + state.OpTimingLate[operation.ID] = true + } + decision.CanIssue = false + decision.WaitReason = readiness.WaitReason + return decision + } + + if state.CurrentCycle > annotatedTime { + state.OpTimingLate[operation.ID] = true + } + decision.CanIssue = true + return decision + case ExecutionPolicyElasticScheduled: + decision.TimingGateSatisfied = state.CurrentCycle >= expectedCycle + decision.BlockedByLowerBound = readiness.Ready && !decision.TimingGateSatisfied + if !decision.TimingGateSatisfied { + decision.CanIssue = false + decision.WaitReason = StallReasonScheduleBubble + decision.TimingWaitBlocked = true + return decision + } + if readiness.Ready { + decision.CanIssue = true + return decision + } + decision.CanIssue = false + decision.WaitReason = readiness.WaitReason + return decision + } + } + + currentStep, targetStep, ii := i.resolveScheduleStep(operation, state) + + // No schedule (compiled_ii missing or 0): ignore time gating so existing workloads + // (e.g. histogram) that do not use II-based scheduling still run like in-order. + if ii <= 0 { + decision.CanIssue = readiness.Ready + decision.WaitReason = readiness.WaitReason + return decision + } + + switch policy { + case ExecutionPolicyStrictTimed: + decision.TimingGateSatisfied = currentStep >= targetStep + if currentStep < targetStep { + decision.CanIssue = false + decision.WaitReason = StallReasonScheduleBubble + decision.TimingWaitBlocked = true + return decision + } + if currentStep == targetStep { + if readiness.Ready { + decision.CanIssue = true + return decision + } + i.panicSynchronizationViolation(operation, state, "operand/credit not ready at scheduled step") + } + i.panicSynchronizationViolation(operation, state, "operation missed its exact scheduled step") + return decision + case ExecutionPolicyElasticScheduled: + decision.TimingGateSatisfied = currentStep >= targetStep + if currentStep < targetStep { + decision.CanIssue = false + decision.WaitReason = StallReasonScheduleBubble + decision.TimingWaitBlocked = true + return decision + } + decision.CanIssue = readiness.Ready + decision.WaitReason = readiness.WaitReason + return decision + case ExecutionPolicyInOrderDataflow: + decision.CanIssue = readiness.Ready + decision.WaitReason = readiness.WaitReason + return decision + default: + decision.CanIssue = readiness.Ready + decision.WaitReason = readiness.WaitReason + return decision + } +} + +func (i instEmulator) canIssue(operation Operation, state *coreState) bool { + decision := i.issueDecision(operation, state) + i.applyIssueDecision(operation, state, decision) + return decision.CanIssue } // set up the necessary state for the instruction group @@ -131,10 +1065,181 @@ func (i instEmulator) SetUpInstructionGroup(index int32, state *coreState) { state.CurrReservationState.SetRefCount(iGroup, state) } +func supportsDeferredLatency(opCode string) bool { + switch normalizeLatencyOpcode(opCode) { + case "LOAD", "STORE", "LDD", "STD", "LD", "LDW", "ST", "STW", + "TRIGGER", "JMP", "BEQ", "BNE", "BLT", + "RETURN_VALUE", "RETURN_VOID", "RET", + "PHI", "PHI_CONST", "PHI_START", "GRANT_PREDICATE", "GRANT_ONCE": + return false + default: + return true + } +} + +func (i instEmulator) deferredSyncGroupLatency(cinst InstructionGroup, state *coreState) (int, int, string, bool) { + if state == nil { + return 1, 0, "", false + } + + type sendKey struct { + color int + direction int + } + + maxLatency := 1 + representativeID := 0 + representativeOp := "" + requiredSends := make(map[sendKey]int) + executedOps := 0 + + for _, operation := range cinst.Operations { + if operation.InvalidIterations > 0 { + continue + } + executedOps++ + if !supportsDeferredLatency(operation.OpCode) { + return 1, 0, "", false + } + + latency := state.Code.OperationLatency(operation.OpCode) + if latency > maxLatency { + maxLatency = latency + representativeID = operation.ID + representativeOp = operation.OpCode + } else if representativeOp == "" { + representativeID = operation.ID + representativeOp = operation.OpCode + } + + for _, dst := range operation.DstOperands.Operands { + normalized := i.normalizeDirection(dst.Impl) + if !state.Directions[normalized] { + continue + } + key := sendKey{ + color: i.getColorIndex(dst.Color), + direction: i.getDirecIndex(normalized), + } + requiredSends[key]++ + if requiredSends[key] > state.sendQueueCap(key.color, key.direction) { + return 1, 0, "", false + } + } + } + + if executedOps == 0 || maxLatency <= 1 { + return 1, 0, "", false + } + + return maxLatency, representativeID, representativeOp, true +} + +func (i instEmulator) canCommitPendingSyncGroup(state *coreState, pending *pendingSyncGroup) bool { + if state == nil || pending == nil { + return true + } + + type sendKey struct { + color int + direction int + } + + requiredSends := make(map[sendKey]int) + for operand := range pending.BufferedResults { + normalized := i.normalizeDirection(operand.Impl) + if !state.Directions[normalized] { + continue + } + key := sendKey{ + color: i.getColorIndex(operand.Color), + direction: i.getDirecIndex(normalized), + } + requiredSends[key]++ + } + + for key, required := range requiredSends { + free := state.sendQueueCap(key.color, key.direction) - state.sendQueueLen(key.color, key.direction) + if free < required { + return false + } + } + return true +} + +func (i instEmulator) advancePendingSyncGroup(state *coreState) bool { + if state == nil || state.PendingSyncGroup == nil { + return false + } + + pending := state.PendingSyncGroup + if pending.RemainingCycles > 1 { + pending.RemainingCycles-- + state.TimingWaitBlocked = true + return true + } + + if pending.RemainingCycles == 1 { + pending.RemainingCycles = 0 + } + + if !i.canCommitPendingSyncGroup(state, pending) { + state.TimingWaitBlocked = true + state.StallReason = StallReasonOutputBlocked + state.StallOpID = pending.RepresentativeID + state.StallOpCode = pending.RepresentativeOp + return true + } + + for operand, value := range pending.BufferedResults { + i.writeOperand(operand, value, state) + } + i.applyInvalidIterationDecrements(state, pending.InvalidDecrements) + state.PendingSyncGroup = nil + return true +} + +func (i instEmulator) bufferDeferredResult( + operand Operand, + value cgra.Data, + workState *coreState, + bufferedResults map[Operand]cgra.Data, +) { + bufferedResults[operand] = value + if workState == nil || !strings.HasPrefix(operand.Impl, "$") { + return + } + registerIndex, err := strconv.Atoi(strings.TrimPrefix(operand.Impl, "$")) + if err != nil { + panic(fmt.Sprintf("invalid register index in deferred result buffering: %v", operand)) + } + if registerIndex < 0 || registerIndex >= len(workState.Registers) { + panic(fmt.Sprintf("register index %d out of range in deferred result buffering", registerIndex)) + } + workState.Registers[registerIndex] = value +} + +func (i instEmulator) applyDeferredSyncIssueState(state *coreState, workState *coreState) { + if state == nil || workState == nil { + return + } + + state.RecvBufHead = clone2DData(workState.RecvBufHead) + state.RecvBufHeadReady = clone2DBool(workState.RecvBufHeadReady) + state.RecvBufQueue = clone3DData(workState.RecvBufQueue) + state.OpTimingCursor = cloneIntIntMap(workState.OpTimingCursor) + state.OpTimingLate = cloneIntBoolMap(workState.OpTimingLate) + state.OpTimingRollCycle = cloneIntInt64Map(workState.OpTimingRollCycle) + state.OpIssueCount = cloneIntIntMap(workState.OpIssueCount) + state.CurrentTime = workState.CurrentTime +} + func (i instEmulator) RunInstructionGroup(cinst InstructionGroup, state *coreState, time float64) bool { // check the Return signal if *state.exit && time > *state.requestExitTimestamp { - fmt.Println("Exit signal ( requested at", *state.requestExitTimestamp, ") received at time", time) + if DebugEnabled() { + slog.Debug("ExitSignal", "requestedAt", *state.requestExitTimestamp, "time", time) + } return false } prevPC := state.PCInBlock @@ -172,19 +1277,23 @@ func (i instEmulator) RunInstructionGroup(cinst InstructionGroup, state *coreSta } // else, this group is not finished, PC stays the same } else if state.Mode == SyncOp { if progressSync { - if state.NextPCInBlock == -1 { - print("PC+4 for PC=", state.PCInBlock, " X:", state.TileX, " Y:", state.TileY, "\n") - print("Instruction at PC=", state.PCInBlock, " is ", state.SelectedBlock.InstructionGroups[state.PCInBlock].Operations[0].OpCode, "\n") - state.PCInBlock++ - } else { - print("PC+Jump to ", state.NextPCInBlock, " X:", state.TileX, " Y:", state.TileY, "\n") - state.PCInBlock = state.NextPCInBlock + // Timing wait means "advance cycle but keep the same instruction group", + // otherwise later groups may observe stale local registers. + if !state.TimingWaitBlocked { + if state.NextPCInBlock == -1 { + // print("PC+4 for PC=", state.PCInBlock, " X:", state.TileX, " Y:", state.TileY, "\n") + // print("Instruction at PC=", state.PCInBlock, " is ", state.SelectedBlock.InstructionGroups[state.PCInBlock].Operations[0].OpCode, "\n") + state.PCInBlock++ + } else { + // print("PC+Jump to ", state.NextPCInBlock, " X:", state.TileX, " Y:", state.TileY, "\n") + state.PCInBlock = state.NextPCInBlock + } } } if state.SelectedBlock != nil && state.PCInBlock >= int32(len(state.SelectedBlock.InstructionGroups)) { state.PCInBlock = -1 state.SelectedBlock = nil - print("PCInBlock = -1 at (", state.TileX, ",", state.TileY, ")\n") + // print("PCInBlock = -1 at (", state.TileX, ",", state.TileY, ")\n") slog.Info("Flow", "PCInBlock", "-1", "X", state.TileX, "Y", state.TileY) } state.NextPCInBlock = -1 @@ -208,43 +1317,128 @@ func (i instEmulator) RunInstructionGroup(cinst InstructionGroup, state *coreSta } func (i instEmulator) RunInstructionGroupWithSyncOps(cinst InstructionGroup, state *coreState, time float64) bool { + state.TimingWaitBlocked = false + state.StallReason = "" + state.StallOpID = 0 + state.StallOpCode = "" + state.OpInputReadCache = make(map[string]cgra.Data) + if state.PendingSyncGroup != nil { + return i.advancePendingSyncGroup(state) + } + if state.EnableFIFOModel { + return i.runInstructionGroupWithSyncOpsTwoPhase(cinst, state, time) + } + return i.runInstructionGroupWithSyncOpsLegacy(cinst, state, time) +} + +func (i instEmulator) runInstructionGroupWithSyncOpsLegacy(cinst InstructionGroup, state *coreState, time float64) bool { run := true + type evaluatedDecision struct { + operation Operation + decision issueDecision + occurrenceIndex int + } + evaluated := make([]evaluatedDecision, 0, len(cinst.Operations)) for _, operation := range cinst.Operations { - if (!i.CareFlags) || operation.InvalidIterations > 0 || i.CheckFlags(operation, state) { + decision := i.issueDecision(operation, state) + i.applyIssueDecision(operation, state, decision) + evaluated = append(evaluated, evaluatedDecision{ + operation: operation, + decision: decision, + occurrenceIndex: state.nextOpOccurrenceIndex(operation.ID), + }) + if decision.CanIssue { continue - } else { - run = false - break } + run = false + break } if run { - // Collect all results first + deferredLatency, representativeID, representativeOp, deferGroup := i.deferredSyncGroupLatency(cinst, state) allResults := make(map[Operand]cgra.Data) + invalidDecrements := make([]int, 0) for index := range cinst.Operations { - // Get reference to the original operation in state.SelectedBlock operation := &state.SelectedBlock.InstructionGroups[state.PCInBlock].Operations[index] - // Decrement InvalidIterations before running if needed if operation.InvalidIterations > 0 { - print("Invalid iteration for ", operation.OpCode, "@(", state.TileX, ",", state.TileY, ")\n") - operation.InvalidIterations-- + if deferGroup { + invalidDecrements = append(invalidDecrements, index) + } else { + operation.InvalidIterations-- + } continue } + occurrenceIndex := state.nextOpOccurrenceIndex(operation.ID) + decision := evaluated[index].decision + if observation, ok := i.readyHeldObservationFor(*operation, state, decision, occurrenceIndex, true); ok { + i.emitReadyHeldObservation(observation) + } results := i.RunOperation(*operation, state, time) - // Merge results into allResults + state.advanceOpOccurrenceIndex(operation.ID) + i.advanceDerivedTimingCursor(*operation, state) for operand, value := range results { allResults[operand] = value } - //print("RunOperation", operation.OpCode, "@(", state.TileX, ",", state.TileY, ")", time, ":", "YES", "\n") } - // Write all results at once + if deferGroup { + state.PendingSyncGroup = &pendingSyncGroup{ + RemainingCycles: deferredLatency - 1, + BufferedResults: allResults, + InvalidDecrements: invalidDecrements, + RepresentativeID: representativeID, + RepresentativeOp: representativeOp, + } + state.TimingWaitBlocked = true + return true + } for operand, value := range allResults { i.writeOperand(operand, value, state) } + } else { + for _, eval := range evaluated { + if observation, ok := i.readyHeldObservationFor(eval.operation, state, eval.decision, eval.occurrenceIndex, false); ok { + i.emitReadyHeldObservation(observation) + } + } + } + if state.TimingWaitBlocked { + if !run && state.StallReason != "" { + Trace( + "Stall", + "Behavior", state.StallReason, + "Policy", normalizeExecutionPolicyString(i.ExecutionPolicy), + "Time", float64(state.CurrentCycle), + "X", state.TileX, + "Y", state.TileY, + "ID", state.StallOpID, + "OpCode", state.StallOpCode, + ) + } + return true + } + if !run && state.StallReason != "" { + Trace( + "Stall", + "Behavior", state.StallReason, + "Policy", normalizeExecutionPolicyString(i.ExecutionPolicy), + "Time", float64(state.CurrentCycle), + "X", state.TileX, + "Y", state.TileY, + "ID", state.StallOpID, + "OpCode", state.StallOpCode, + ) } return run } func (i instEmulator) RunInstructionGroupWithAsyncOps(cinst InstructionGroup, state *coreState, time float64) { + if state.EnableFIFOModel { + i.runInstructionGroupWithAsyncOpsTwoPhase(cinst, state, time) + return + } + i.runInstructionGroupWithAsyncOpsLegacy(cinst, state, time) +} + +func (i instEmulator) runInstructionGroupWithAsyncOpsLegacy(cinst InstructionGroup, state *coreState, time float64) { // Collect all results first allResults := make(map[Operand]cgra.Data) for index := range cinst.Operations { @@ -254,16 +1448,18 @@ func (i instEmulator) RunInstructionGroupWithAsyncOps(cinst InstructionGroup, st } // Get reference to the original operation in state.SelectedBlock operation := &state.SelectedBlock.InstructionGroups[state.PCInBlock].Operations[index] - if (!i.CareFlags) || operation.InvalidIterations > 0 || i.CheckFlags(*operation, state) { // can also only choose one (another pattern) + if i.canIssue(*operation, state) { // can also only choose one (another pattern) state.CurrReservationState.ReservationMap[index] = false state.CurrReservationState.OpToExec-- // Decrement InvalidIterations before running if needed if operation.InvalidIterations > 0 { - print("Invalid iteration for ", operation.OpCode, "@(", state.TileX, ",", state.TileY, ")\n") + // print("Invalid iteration for ", operation.OpCode, "@(", state.TileX, ",", state.TileY, ")\n") operation.InvalidIterations-- continue } results := i.RunOperation(*operation, state, time) + state.advanceOpOccurrenceIndex(operation.ID) + i.advanceDerivedTimingCursor(*operation, state) // Merge results into allResults for operand, value := range results { allResults[operand] = value @@ -279,6 +1475,166 @@ func (i instEmulator) RunInstructionGroupWithAsyncOps(cinst InstructionGroup, st } } +func (i instEmulator) runInstructionGroupWithSyncOpsTwoPhase(cinst InstructionGroup, state *coreState, time float64) bool { + workState := state.cloneForEval() + run := true + type evaluatedDecision struct { + operation Operation + decision issueDecision + occurrenceIndex int + } + evaluated := make([]evaluatedDecision, 0, len(cinst.Operations)) + for _, operation := range cinst.Operations { + decision := i.issueDecision(operation, workState) + i.applyIssueDecision(operation, workState, decision) + evaluated = append(evaluated, evaluatedDecision{ + operation: operation, + decision: decision, + occurrenceIndex: workState.nextOpOccurrenceIndex(operation.ID), + }) + if decision.CanIssue { + continue + } + run = false + break + } + + if !run { + for _, eval := range evaluated { + if observation, ok := i.readyHeldObservationFor(eval.operation, workState, eval.decision, eval.occurrenceIndex, false); ok { + i.emitReadyHeldObservation(observation) + } + } + state.TimingWaitBlocked = workState.TimingWaitBlocked + state.StallReason = workState.StallReason + state.StallOpID = workState.StallOpID + state.StallOpCode = workState.StallOpCode + if state.TimingWaitBlocked { + if state.StallReason != "" { + Trace( + "Stall", + "Behavior", state.StallReason, + "Policy", normalizeExecutionPolicyString(i.ExecutionPolicy), + "Time", float64(state.CurrentCycle), + "X", state.TileX, + "Y", state.TileY, + "ID", state.StallOpID, + "OpCode", state.StallOpCode, + ) + } + return true + } + if state.StallReason != "" { + Trace( + "Stall", + "Behavior", state.StallReason, + "Policy", normalizeExecutionPolicyString(i.ExecutionPolicy), + "Time", float64(state.CurrentCycle), + "X", state.TileX, + "Y", state.TileY, + "ID", state.StallOpID, + "OpCode", state.StallOpCode, + ) + } + return false + } + + deferredLatency, representativeID, representativeOp, deferGroup := i.deferredSyncGroupLatency(cinst, state) + invalidDecrements := make([]int, 0) + issuedObservations := make([]readyHeldObservation, 0, len(cinst.Operations)) + bufferedResults := make(map[Operand]cgra.Data) + for index, operation := range cinst.Operations { + if operation.InvalidIterations > 0 { + invalidDecrements = append(invalidDecrements, index) + continue + } + occurrenceIndex := workState.nextOpOccurrenceIndex(operation.ID) + decision := evaluated[index].decision + if observation, ok := i.readyHeldObservationFor(operation, workState, decision, occurrenceIndex, true); ok { + issuedObservations = append(issuedObservations, observation) + } + results := i.RunOperation(operation, workState, time) + workState.advanceOpOccurrenceIndex(operation.ID) + i.advanceDerivedTimingCursor(operation, workState) + for operand, value := range results { + if deferGroup { + i.bufferDeferredResult(operand, value, workState, bufferedResults) + continue + } + i.writeOperand(operand, value, workState) + } + } + if deferGroup { + i.applyDeferredSyncIssueState(state, workState) + for _, observation := range issuedObservations { + i.emitReadyHeldObservation(observation) + } + state.PendingSyncGroup = &pendingSyncGroup{ + RemainingCycles: deferredLatency - 1, + BufferedResults: bufferedResults, + InvalidDecrements: invalidDecrements, + RepresentativeID: representativeID, + RepresentativeOp: representativeOp, + } + state.TimingWaitBlocked = true + return true + } + *state = *workState + for _, observation := range issuedObservations { + i.emitReadyHeldObservation(observation) + } + i.applyInvalidIterationDecrements(state, invalidDecrements) + return true +} + +func (i instEmulator) runInstructionGroupWithAsyncOpsTwoPhase(cinst InstructionGroup, state *coreState, time float64) { + workState := state.cloneForEval() + allResults := make(map[Operand]cgra.Data) + invalidDecrements := make([]int, 0) + for index, operation := range cinst.Operations { + if !workState.CurrReservationState.ReservationMap[index] { + continue + } + if i.canIssue(operation, workState) { + workState.CurrReservationState.ReservationMap[index] = false + workState.CurrReservationState.OpToExec-- + if operation.InvalidIterations > 0 { + invalidDecrements = append(invalidDecrements, index) + continue + } + results := i.RunOperation(operation, workState, time) + workState.advanceOpOccurrenceIndex(operation.ID) + i.advanceDerivedTimingCursor(operation, workState) + for operand, value := range results { + allResults[operand] = value + } + } + } + for operand, value := range allResults { + i.writeOperand(operand, value, workState) + } + *state = *workState + i.applyInvalidIterationDecrements(state, invalidDecrements) +} + +func (i instEmulator) applyInvalidIterationDecrements(state *coreState, indices []int) { + if len(indices) == 0 || state == nil || state.SelectedBlock == nil { + return + } + if state.PCInBlock < 0 || int(state.PCInBlock) >= len(state.SelectedBlock.InstructionGroups) { + return + } + operations := state.SelectedBlock.InstructionGroups[state.PCInBlock].Operations + for _, idx := range indices { + if idx < 0 || idx >= len(operations) { + continue + } + if operations[idx].InvalidIterations > 0 { + operations[idx].InvalidIterations-- + } + } +} + func (i instEmulator) normalizeDirection(s string) string { u := strings.ToUpper(s) switch u { @@ -305,34 +1661,38 @@ func (i instEmulator) normalizeDirection(s string) string { } } -func (i instEmulator) CheckFlags(inst Operation, state *coreState) bool { - //PrintState(state) - flag := true +func (i instEmulator) checkIssueReadinessDetails(inst Operation, state *coreState) issueReadiness { + readiness := issueReadiness{ + OperandsReady: true, + PredicateReadyOrTrue: true, + ResourcesAvailable: true, + Ready: true, + } + for index, src := range inst.SrcOperands.Operands { if index == 1 { if inst.OpCode == "PHI_CONST" || inst.OpCode == "PHI_START" { - // Track PHI_CONST per instruction to avoid cross-interference. var stateKey string if inst.OpCode == "PHI_CONST" { stateKey = fmt.Sprintf("PhiConst_%d", inst.ID) } else if inst.OpCode == "PHI_START" { stateKey = fmt.Sprintf("PhiStart_%d", inst.ID) } - if state.States[stateKey] == nil || state.States[stateKey] == false { // first execution + if state.States[stateKey] == nil || state.States[stateKey] == false { if len(inst.SrcOperands.Operands) > 1 { - fmt.Println("ID", inst.ID, "bypass check") continue - } else { - panic("PHI_CONST or PHI_START must have two sources") } + panic("PHI_CONST or PHI_START must have two sources") } } } srcImpl := i.normalizeDirection(src.Impl) if state.Directions[srcImpl] { - if !state.RecvBufHeadReady[i.getColorIndex(src.Color)][i.getDirecIndex(srcImpl)] { - flag = false - break + if state.recvQueueLen(i.getColorIndex(src.Color), i.getDirecIndex(srcImpl)) == 0 { + readiness.OperandsReady = false + readiness.Ready = false + readiness.WaitReason = StallReasonOperandWait + return readiness } } } @@ -340,15 +1700,38 @@ func (i instEmulator) CheckFlags(inst Operation, state *coreState) bool { for _, dst := range inst.DstOperands.Operands { dstImpl := i.normalizeDirection(dst.Impl) if state.Directions[dstImpl] { - if state.SendBufHeadBusy[i.getColorIndex(dst.Color)][i.getDirecIndex(dstImpl)] { - flag = false - break + if state.sendQueueIsFull(i.getColorIndex(dst.Color), i.getDirecIndex(dstImpl)) { + Trace( + "Backpressure", + "Time", float64(state.CurrentCycle), + "X", state.TileX, + "Y", state.TileY, + "OpCode", inst.OpCode, + "ID", inst.ID, + "Reason", "SendBufBusy", + "DstDir", dstImpl, + "Color", dst.Color, + "Policy", normalizeExecutionPolicyString(i.ExecutionPolicy), + ) + readiness.ResourcesAvailable = false + readiness.Ready = false + readiness.WaitReason = StallReasonOutputBlocked + return readiness } } } - //fmt.Println("[CheckFlags] checking flags for inst", inst.OpCode, "@(", state.TileX, ",", state.TileY, "):", flag) - fmt.Println("Check", inst.OpCode, "ID", inst.ID, "@(", state.TileX, ",", state.TileY, "):", flag) - return flag + + return readiness +} + +func (i instEmulator) checkIssueReadiness(inst Operation, state *coreState) (bool, string) { + readiness := i.checkIssueReadinessDetails(inst, state) + return readiness.Ready, readiness.WaitReason +} + +func (i instEmulator) CheckFlags(inst Operation, state *coreState) bool { + ready, _ := i.checkIssueReadiness(inst, state) + return ready } func (i instEmulator) RunOperation(inst Operation, state *coreState, time float64) map[Operand]cgra.Data { @@ -467,13 +1850,39 @@ func (i instEmulator) readOperand(operand Operand, state *coreState) (value cgra //fmt.Println("operand.Impl", operand.Impl) // must first check it is ready color, direction := i.getColorIndex(operand.Color), i.getDirecIndex(normalizedImpl) - value = state.RecvBufHead[color][direction] - // set the ready flag to false + cacheKey := fmt.Sprintf("%d:%d", color, direction) + if state.Mode == SyncOp { + if cached, ok := state.OpInputReadCache[cacheKey]; ok { + return cached + } + } + peek, ok := state.recvQueuePeek(color, direction) + if !ok { + if state.Mode == SyncOp { + // In sync mode, all ops in the same instruction group share one + // snapshot of input heads. If a previous op consumed this queue + // head earlier in the same tick, keep returning the snapshot. + fallback := state.RecvBufHead[color][direction] + state.OpInputReadCache[cacheKey] = fallback + return fallback + } + panic(fmt.Sprintf("operand queue unexpectedly empty in async mode: %v", operand)) + } + value = peek + // consume queue head according to existing sync/async rules if state.Mode == SyncOp { - state.RecvBufHeadReady[color][direction] = false + consumed, ok := state.recvQueueConsume(color, direction) + if !ok { + panic(fmt.Sprintf("operand queue consume failed in sync mode: %v", operand)) + } + value = consumed + state.OpInputReadCache[cacheKey] = value } else { if !state.CurrReservationState.DecrementRefCount(operand, state) { - state.RecvBufHeadReady[color][direction] = false // no longer used, closed + // no longer used, pop queue head + if _, ok := state.recvQueueConsume(color, direction); !ok { + panic(fmt.Sprintf("operand queue consume failed in async mode: %v", operand)) + } //fmt.Println("Reduce {", operand.Impl, "} to zero") } else { //fmt.Println("Reduce {", operand.Impl, "} to ", state.CurrReservationState.RefCountRuntime[operand.Impl], "@(", state.TileX, ",", state.TileY, ")") @@ -523,12 +1932,13 @@ func (i instEmulator) writeOperand(operand Operand, value cgra.Data, state *core } else { normalizedImpl := i.normalizeDirection(operand.Impl) if state.Directions[normalizedImpl] { - if state.SendBufHeadBusy[i.getColorIndex(operand.Color)][i.getDirecIndex(normalizedImpl)] { + color := i.getColorIndex(operand.Color) + direction := i.getDirecIndex(normalizedImpl) + if state.sendQueueIsFull(color, direction) { //fmt.Printf("sendbufhead busy\n") return } - state.SendBufHeadBusy[i.getColorIndex(operand.Color)][i.getDirecIndex(normalizedImpl)] = true - state.SendBufHead[i.getColorIndex(operand.Color)][i.getDirecIndex(normalizedImpl)] = value + state.sendQueuePush(color, direction, value) } else { panic(fmt.Sprintf("Invalid operand %v in writeOperand; expected register", operand)) } @@ -682,9 +2092,19 @@ func (i instEmulator) runLoadDirect(inst Operation, state *coreState) map[Operan src1 := inst.SrcOperands.Operands[0] addrStruct := i.readOperand(src1, state) addr := addrStruct.First() + finalPred := addrStruct.Pred + results := make(map[Operand]cgra.Data) + + // Predicated-off load should not touch memory or trigger bounds checks. + if !finalPred { + for _, dst := range inst.DstOperands.Operands { + results[dst] = cgra.NewScalarWithPred(0, false) + } + return results + } if addr >= uint32(len(state.Memory)) { - panic("memory address out of bounds") + panic("memory address out of bounds, addr: " + strconv.Itoa(int(addr)) + ", len(state.Memory): " + strconv.Itoa(len(state.Memory))) } value := state.Memory[addr] slog.Warn("Memory", @@ -695,8 +2115,6 @@ func (i instEmulator) runLoadDirect(inst Operation, state *coreState) map[Operan "X", state.TileX, "Y", state.TileY, ) - finalPred := addrStruct.Pred - results := make(map[Operand]cgra.Data) for _, dst := range inst.DstOperands.Operands { results[dst] = cgra.NewScalarWithPred(value, finalPred) } @@ -754,6 +2172,11 @@ func (i instEmulator) runStoreDirect(inst Operation, state *coreState) map[Opera src2 := inst.SrcOperands.Operands[1] addrStruct := i.readOperand(src2, state) addr := addrStruct.First() + finalPred := addrStruct.Pred && valueStruct.Pred + if !finalPred { + Trace("Inst", "Time", state.CurrentTime, "OpCode", inst.OpCode, "ID", inst.ID, "X", state.TileX, "Y", state.TileY, "Pred", finalPred) + return make(map[Operand]cgra.Data) + } if addr >= uint32(len(state.Memory)) { panic("memory address out of bounds, addr: " + strconv.Itoa(int(addr)) + ", len(state.Memory): " + strconv.Itoa(len(state.Memory))) } @@ -765,7 +2188,6 @@ func (i instEmulator) runStoreDirect(inst Operation, state *coreState) map[Opera "Y", state.TileY, ) state.Memory[addr] = value - finalPred := addrStruct.Pred && valueStruct.Pred Trace("Inst", "Time", state.CurrentTime, "OpCode", inst.OpCode, "ID", inst.ID, "X", state.TileX, "Y", state.TileY, "Pred", finalPred) // elect no next PC return make(map[Operand]cgra.Data) @@ -921,7 +2343,7 @@ func (i instEmulator) runSub(inst Operation, state *coreState) map[Operand]cgra. dstValSigned := src1Signed - src2Signed dstVal := uint32(dstValSigned) - fmt.Printf("ISUB: Subtracting %d (src1) - %d (src2) = %d\n", src1Signed, src2Signed, dstValSigned) + // fmt.Printf("ISUB: Subtracting %d (src1) - %d (src2) = %d\n", src1Signed, src2Signed, dstValSigned) finalPred := src1Struct.Pred && src2Struct.Pred results := make(map[Operand]cgra.Data) @@ -973,8 +2395,9 @@ func (i instEmulator) runMulAdd(inst Operation, state *coreState) map[Operand]cg s2Val := int32(s2.First()) dstValSigned := s0Val*s1Val + s2Val dstVal := uint32(dstValSigned) - finalPred := s0.Pred && s1.Pred && s2.Pred - + //finalPred := s0.Pred && s1.Pred && s2.Pred + //Only for systolic array currently. if need for other cases, please modify the finalPred calculation. + finalPred := s0.Pred && s1.Pred results := make(map[Operand]cgra.Data) for _, dst := range inst.DstOperands.Operands { results[dst] = cgra.NewScalarWithPred(dstVal, finalPred) @@ -1201,9 +2624,9 @@ func (i instEmulator) runRetImm(inst Operation, state *coreState, time float64) *state.retVal = srcVal *state.exit = true *state.requestExitTimestamp = time - fmt.Println("++++++++++++ RETURN executed", srcVal, "T=", time) - } else { - fmt.Println("++++++++++++ RETURN bypassed") + // fmt.Println("++++++++++++ RETURN executed", srcVal, "T=", time) + // } else { + // fmt.Println("++++++++++++ RETURN bypassed") } } else { panic("RETURN_VALUE requires a source operand") @@ -1229,9 +2652,9 @@ func (i instEmulator) runRetDelay(inst Operation, state *coreState, time float64 *state.retVal = 0 *state.exit = true *state.requestExitTimestamp = time + ExitDelay - fmt.Println("++++++++++++ RETURN executed", srcVal, "T=", time) - } else { - fmt.Println("++++++++++++ RETURN bypassed") + // fmt.Println("++++++++++++ RETURN executed", srcVal, "T=", time) + // } else { + // fmt.Println("++++++++++++ RETURN bypassed") } } else { panic("RETURN_VOID requires a source operand") @@ -1338,14 +2761,14 @@ func (i instEmulator) runCmpExport(inst Operation, state *coreState) map[Operand for _, dst := range inst.DstOperands.Operands { results[dst] = cgra.NewScalarWithPred(1, finalPred) } - fmt.Println(">>>>>>>>>>>>>>> ICMP_EQ: ", src1Val.First(), src2Val.First(), "Yes") + // fmt.Println(">>>>>>>>>>>>>>> ICMP_EQ: ", src1Val.First(), src2Val.First(), "Yes") } else { finalPred = src1Val.Pred resultVal = 0 for _, dst := range inst.DstOperands.Operands { results[dst] = cgra.NewScalarWithPred(0, finalPred) } - fmt.Println(">>>>>>>>>>>>>>> ICMP_EQ: ", src1Val.First(), src2Val.First(), "No") + // fmt.Println(">>>>>>>>>>>>>>> ICMP_EQ: ", src1Val.First(), src2Val.First(), "No") } Trace("Inst", "Time", state.CurrentTime, "OpCode", inst.OpCode, "ID", inst.ID, "X", state.TileX, "Y", state.TileY, "Src1", fmt.Sprintf("%d(%t)", src1Val.First(), src1Val.Pred), "Src2", fmt.Sprintf("%d(%t)", src2Val.First(), src2Val.Pred), "Result", fmt.Sprintf("%d(%t)", resultVal, finalPred)) return results @@ -1555,13 +2978,13 @@ func (i instEmulator) runPhiStart(inst Operation, state *coreState) map[Operand] results := make(map[Operand]cgra.Data) if state.States[stateKey] == nil || state.States[stateKey] == false { // first execution - if !src1Pred { - panic("Predicate of first time PHI_START must be true at (" + strconv.Itoa(int(state.TileX)) + "," + strconv.Itoa(int(state.TileY)) + ") instruction " + strconv.Itoa(inst.ID)) - } + // if !src1Pred { + // panic("Predicate of first time PHI_START must be true at (" + strconv.Itoa(int(state.TileX)) + "," + strconv.Itoa(int(state.TileY)) + ") instruction " + strconv.Itoa(inst.ID)) + // } result = src1Val finalPred = src1Pred state.States[stateKey] = true - fmt.Println("set state.States[", stateKey, "] to true") + // fmt.Println("set state.States[", stateKey, "] to true") for _, dst := range inst.DstOperands.Operands { results[dst] = cgra.NewScalarWithPred(result, finalPred) } @@ -1616,7 +3039,7 @@ func (i instEmulator) runGrantPred(inst Operation, state *coreState) map[Operand results[dst] = cgra.NewScalarWithPred(srcVal, finalPred) } - fmt.Println("<<<<<<<<<<<<<< GRANTPRED: ", srcVal, predVal, finalPred) + // fmt.Println("<<<<<<<<<<<<<< GRANTPRED: ", srcVal, predVal, finalPred) Trace("Inst", "Time", state.CurrentTime, "OpCode", inst.OpCode, "ID", inst.ID, "X", state.TileX, "Y", state.TileY, "SrcOperand", fmt.Sprintf("%d(%t)", srcVal, srcStruct.Pred), "PredOperand", fmt.Sprintf("%d(%t)", predVal, predStruct.Pred), "Pred", finalPred, "Result", fmt.Sprintf("%d(%t)", srcVal, finalPred)) // elect no next PC diff --git a/core/execution_policy_test.go b/core/execution_policy_test.go new file mode 100644 index 0000000..5304441 --- /dev/null +++ b/core/execution_policy_test.go @@ -0,0 +1,812 @@ +package core + +import ( + "bytes" + "encoding/json" + "log/slog" + "os" + "strings" + "testing" +) + +type readyHeldLog struct { + RunMode string `json:"run_mode"` + Cycle int64 `json:"cycle"` + X int `json:"X"` + Y int `json:"Y"` + ID int `json:"ID"` + OccurrenceIndex int `json:"occurrence_index"` + OpCode string `json:"OpCode"` + AnnotatedTimeT *int64 `json:"annotated_time_t"` + OperandsReady bool `json:"operands_ready"` + PredicateReadyOrTrue bool `json:"predicate_ready_or_true"` + ResourcesAvailable bool `json:"resources_available"` + TimingGateSatisfied bool `json:"timing_gate_satisfied"` + FireableExceptTime bool `json:"fireable_except_time"` + BlockedByLowerBound bool `json:"blocked_by_lower_bound"` + IssuedThisCycle bool `json:"issued_this_cycle"` +} + +func newPolicyTestState() coreState { + state := coreState{ + Directions: map[string]bool{ + "North": true, + "East": true, + "South": true, + "West": true, + "NorthEast": true, + "SouthEast": true, + "SouthWest": true, + "NorthWest": true, + "Router": true, + }, + RecvBufHeadReady: make([][]bool, 4), + SendBufHeadBusy: make([][]bool, 4), + OpTimingCursor: make(map[int]int), + OpTimingLate: make(map[int]bool), + OpTimingRollCycle: make(map[int]int64), + OpIssueCount: make(map[int]int), + TimingWaitBlocked: false, + StallReason: "", + StallOpID: 0, + StallOpCode: "", + } + + for i := 0; i < 4; i++ { + state.RecvBufHeadReady[i] = make([]bool, 12) + state.SendBufHeadBusy[i] = make([]bool, 12) + } + + return state +} + +func TestCanIssueInOrderIgnoresTimeStep(t *testing.T) { + emu := instEmulator{ + CareFlags: true, + ExecutionPolicy: ExecutionPolicyInOrderDataflow, + } + state := newPolicyTestState() + state.CurrentCycle = 0 + op := Operation{ + OpCode: "NOP", + TimeStep: 10, + } + + if !emu.canIssue(op, &state) { + t.Fatalf("in_order_dataflow should ignore timestep and allow ready op") + } +} + +func TestCanIssueElasticScheduled(t *testing.T) { + emu := instEmulator{ + CareFlags: true, + ExecutionPolicy: ExecutionPolicyElasticScheduled, + } + state := newPolicyTestState() + state.Code.CompiledII = 10 // must have schedule so elastic time-gating applies + op := Operation{ + OpCode: "NOP", + TimeStep: 5, + } + + state.CurrentCycle = 4 + if emu.canIssue(op, &state) { + t.Fatalf("elastic_scheduled should block before timestep") + } + + state.CurrentCycle = 5 + if !emu.canIssue(op, &state) { + t.Fatalf("elastic_scheduled should allow at timestep when ready") + } +} + +func TestCanIssueElasticScheduledWithCompiledIIConversion(t *testing.T) { + emu := instEmulator{ + CareFlags: true, + ExecutionPolicy: ExecutionPolicyElasticScheduled, + } + state := newPolicyTestState() + state.Code.CompiledII = 4 + op := Operation{ + OpCode: "NOP", + TimeStep: 1, + } + + state.CurrentCycle = 0 // step 0 + if emu.canIssue(op, &state) { + t.Fatalf("elastic_scheduled should block before converted step") + } + + state.CurrentCycle = 2 // step 2 + if !emu.canIssue(op, &state) { + t.Fatalf("elastic_scheduled should allow when converted step >= time_step") + } + + state.CurrentCycle = 5 // step 1 (5 %% 4) + if !emu.canIssue(op, &state) { + t.Fatalf("elastic_scheduled should allow on converted matching step") + } +} + +func TestCanIssueStrictTimedViolation(t *testing.T) { + emu := instEmulator{ + CareFlags: true, + ExecutionPolicy: ExecutionPolicyStrictTimed, + } + state := newPolicyTestState() + state.Code.CompiledII = 4 // must have schedule so strict time check runs + state.CurrentCycle = 6 // step 2 (6%4); op was for step 1 → missed step, violation + state.RecvBufHeadReady[0][0] = true // North dir slot 0 ready so CheckFlags passes + op := Operation{ + OpCode: "DATA_MOV", + TimeStep: 1, // scheduled step 1; current step 2 > 1 → panic + SrcOperands: OperandList{ + Operands: []Operand{ + {Impl: "North", Color: "R"}, + }, + }, + } + + defer func() { + recovered := recover() + if recovered == nil { + t.Fatalf("expected strict_timed synchronization violation panic") + } + if !strings.Contains(recovered.(string), "synchronization violation") { + t.Fatalf("unexpected panic: %v", recovered) + } + }() + + _ = emu.canIssue(op, &state) +} + +func TestCanIssueStrictTimedWithCompiledIIConversion(t *testing.T) { + emu := instEmulator{ + CareFlags: true, + ExecutionPolicy: ExecutionPolicyStrictTimed, + } + state := newPolicyTestState() + state.Code.CompiledII = 4 + op := Operation{ + OpCode: "NOP", + TimeStep: 1, + } + + state.CurrentCycle = 5 // step 1 + if !emu.canIssue(op, &state) { + t.Fatalf("strict_timed should allow when converted step equals time_step") + } + + state.CurrentCycle = 6 // step 2: missed exact step + defer func() { + recovered := recover() + if recovered == nil { + t.Fatalf("expected strict_timed missed-step synchronization violation") + } + if !strings.Contains(recovered.(string), "missed its exact scheduled step") { + t.Fatalf("unexpected panic: %v", recovered) + } + }() + _ = emu.canIssue(op, &state) +} + +func TestCanIssueStrictTimedWithDerivedTiming(t *testing.T) { + emu := instEmulator{ + CareFlags: true, + ExecutionPolicy: ExecutionPolicyStrictTimed, + StrictMaxSlip: 4, + StrictFailOnViolation: false, + } + state := newPolicyTestState() + state.Code.DerivedTiming = map[int][]int64{ + 7: []int64{5}, + } + op := Operation{ + OpCode: "NOP", + ID: 7, + } + + state.CurrentCycle = 4 + if emu.canIssue(op, &state) { + t.Fatalf("strict_timed should block before derived cycle") + } + + state.CurrentCycle = 5 + if !emu.canIssue(op, &state) { + t.Fatalf("strict_timed should allow exactly on derived cycle when ready") + } + + state.CurrentCycle = 6 + if !emu.canIssue(op, &state) { + t.Fatalf("strict_timed should allow late issue after derived cycle when ready") + } +} + +func TestCanIssueElasticScheduledWithDerivedTiming(t *testing.T) { + emu := instEmulator{ + CareFlags: true, + ExecutionPolicy: ExecutionPolicyElasticScheduled, + } + state := newPolicyTestState() + state.Code.DerivedTiming = map[int][]int64{ + 9: []int64{5}, + } + op := Operation{ + OpCode: "NOP", + ID: 9, + } + + state.CurrentCycle = 4 + if emu.canIssue(op, &state) { + t.Fatalf("elastic_scheduled should block before derived cycle") + } + + state.CurrentCycle = 6 + if !emu.canIssue(op, &state) { + t.Fatalf("elastic_scheduled should allow after derived cycle when ready") + } +} + +func TestRunInstructionGroupWithSyncOpsKeepsAliveOnDerivedTimingWait(t *testing.T) { + emu := instEmulator{ + CareFlags: true, + ExecutionPolicy: ExecutionPolicyStrictTimed, + } + state := newPolicyTestState() + state.Code.DerivedTiming = map[int][]int64{ + 13: []int64{5}, + } + state.CurrentCycle = 4 + + group := InstructionGroup{ + Operations: []Operation{ + { + OpCode: "NOP", + ID: 13, + }, + }, + } + + progress := emu.RunInstructionGroupWithSyncOps(group, &state, 0) + if !progress { + t.Fatalf("timing wait should keep core ticking until derived cycle is reached") + } + if !state.TimingWaitBlocked { + t.Fatalf("expected timing wait marker to be set") + } +} + +func TestCanIssueStrictTimedDerivedTimingNotReady(t *testing.T) { + emu := instEmulator{ + CareFlags: true, + ExecutionPolicy: ExecutionPolicyStrictTimed, + StrictMaxSlip: 4, + StrictFailOnViolation: false, + } + state := newPolicyTestState() + state.Code.DerivedTiming = map[int][]int64{ + 11: []int64{5}, + } + op := Operation{ + OpCode: "DATA_MOV", + ID: 11, + SrcOperands: OperandList{ + Operands: []Operand{ + {Impl: "North", Color: "R"}, + }, + }, + } + + state.CurrentCycle = 5 + if emu.canIssue(op, &state) { + t.Fatalf("strict_timed should stall when operand is not ready on derived cycle") + } + + state.RecvBufHeadReady[0][0] = true // North-R becomes ready + state.CurrentCycle = 6 + if !emu.canIssue(op, &state) { + t.Fatalf("strict_timed should allow late issue after derived-cycle stall") + } +} + +func TestCanIssueStrictTimedDerivedTimingWindowViolationSoft(t *testing.T) { + emu := instEmulator{ + CareFlags: true, + ExecutionPolicy: ExecutionPolicyStrictTimed, + StrictMaxSlip: 1, + StrictFailOnViolation: false, + } + state := newPolicyTestState() + state.Code.CompiledII = 2 + state.Code.DerivedTiming = map[int][]int64{ + 17: []int64{5}, + } + op := Operation{ + OpCode: "NOP", + ID: 17, + } + + state.CurrentCycle = 7 // lateness=2 > max slip=1, should roll to next II window + if emu.canIssue(op, &state) { + t.Fatalf("strict_timed soft mode should stall after window violation") + } + if !state.TimingWaitBlocked { + t.Fatalf("expected timing wait after strict window violation") + } + if state.OpTimingRollCycle[op.ID] != 9 { + t.Fatalf("expected rolled cycle 9, got %d", state.OpTimingRollCycle[op.ID]) + } + + state.CurrentCycle = 8 + if emu.canIssue(op, &state) { + t.Fatalf("strict_timed should keep waiting before rolled cycle") + } + + state.CurrentCycle = 9 + if !emu.canIssue(op, &state) { + t.Fatalf("strict_timed should issue at rolled cycle when ready") + } +} + +func TestCanIssueStrictTimedDerivedTimingSetsScheduleBubbleReason(t *testing.T) { + emu := instEmulator{ + CareFlags: true, + ExecutionPolicy: ExecutionPolicyStrictTimed, + StrictMaxSlip: 1, + StrictFailOnViolation: false, + } + state := newPolicyTestState() + state.Code.CompiledII = 2 + state.Code.DerivedTiming = map[int][]int64{ + 21: []int64{5}, + } + op := Operation{ + OpCode: "NOP", + ID: 21, + } + + state.CurrentCycle = 7 + if emu.canIssue(op, &state) { + t.Fatalf("expected strict_timed violation to block issue") + } + if state.StallReason != StallReasonScheduleBubble { + t.Fatalf("expected schedule bubble stall reason, got %q", state.StallReason) + } +} + +func TestCanIssueStrictTimedDerivedTimingWindowViolationOverridesReadinessReason(t *testing.T) { + emu := instEmulator{ + CareFlags: true, + ExecutionPolicy: ExecutionPolicyStrictTimed, + StrictMaxSlip: 1, + StrictFailOnViolation: false, + } + state := newPolicyTestState() + state.Code.CompiledII = 2 + state.Code.DerivedTiming = map[int][]int64{ + 31: []int64{5}, + } + op := Operation{ + OpCode: "DATA_MOV", + ID: 31, + SrcOperands: OperandList{ + Operands: []Operand{ + {Impl: "North", Color: "R"}, + }, + }, + } + + state.CurrentCycle = 7 // lateness=2 > max slip, and operand not ready + if emu.canIssue(op, &state) { + t.Fatalf("expected strict_timed to block on window violation") + } + if state.StallReason != StallReasonScheduleBubble { + t.Fatalf("expected schedule bubble to override readiness reason, got %q", state.StallReason) + } + if !state.TimingWaitBlocked { + t.Fatalf("expected timing wait after strict window violation") + } + if state.OpTimingRollCycle[op.ID] != 9 { + t.Fatalf("expected rolled cycle 9, got %d", state.OpTimingRollCycle[op.ID]) + } +} + +func TestCanIssueStrictTimedDerivedTimingRollDoesNotDependOnReadiness(t *testing.T) { + emu := instEmulator{ + CareFlags: true, + ExecutionPolicy: ExecutionPolicyStrictTimed, + StrictMaxSlip: 1, + StrictFailOnViolation: false, + } + state := newPolicyTestState() + state.Code.CompiledII = 2 + state.Code.DerivedTiming = map[int][]int64{ + 32: []int64{5}, + } + op := Operation{ + OpCode: "DATA_MOV", + ID: 32, + SrcOperands: OperandList{ + Operands: []Operand{ + {Impl: "North", Color: "R"}, + }, + }, + } + + state.CurrentCycle = 7 // first observe violation while not ready + if emu.canIssue(op, &state) { + t.Fatalf("expected violation cycle to block") + } + if state.OpTimingRollCycle[op.ID] != 9 { + t.Fatalf("expected rolled cycle 9, got %d", state.OpTimingRollCycle[op.ID]) + } + + state.CurrentCycle = 8 + if emu.canIssue(op, &state) { + t.Fatalf("expected strict_timed to wait before rolled cycle") + } + + state.RecvBufHeadReady[0][0] = true // ready at rolled cycle + state.CurrentCycle = 9 + if !emu.canIssue(op, &state) { + t.Fatalf("expected strict_timed to issue at first legal rolled cycle when ready") + } +} + +func TestIsStrictControlSensitiveOpCoversAliasesAndFamilies(t *testing.T) { + cases := []struct { + opCode string + want bool + }{ + {opCode: "PHI_START", want: true}, + {opCode: "grant_once", want: true}, + {opCode: "ICMP_SGE", want: true}, + {opCode: "CMP_EXPORT", want: true}, + {opCode: "lt_ex", want: true}, + {opCode: "RETURN_VALUE", want: true}, + {opCode: "BNE", want: true}, + {opCode: "CTRL_MOV", want: true}, + {opCode: "ADD", want: false}, + {opCode: "DATA_MOV", want: false}, + {opCode: "NOP", want: false}, + } + + for _, tc := range cases { + got := isStrictControlSensitiveOp(tc.opCode) + if got != tc.want { + t.Fatalf("isStrictControlSensitiveOp(%q) = %t, want %t", tc.opCode, got, tc.want) + } + } +} + +func TestCanIssueStrictTimedDerivedTimingControlAliasSkipsWindowPenalty(t *testing.T) { + emu := instEmulator{ + CareFlags: true, + ExecutionPolicy: ExecutionPolicyStrictTimed, + StrictMaxSlip: 0, + StrictFailOnViolation: false, + } + state := newPolicyTestState() + state.Code.CompiledII = 2 + state.Code.DerivedTiming = map[int][]int64{ + 33: []int64{5}, + } + op := Operation{ + OpCode: "CMP_EXPORT", + ID: 33, + } + + state.CurrentCycle = 7 // lateness=2, but control-sensitive alias should skip penalty + if !emu.canIssue(op, &state) { + t.Fatalf("expected control-sensitive alias to skip finite-W replay penalty") + } + if _, exists := state.OpTimingRollCycle[op.ID]; exists { + t.Fatalf("did not expect roll cycle for control-sensitive op") + } +} + +func TestCanIssueGuidedDerivedTimingSetsOutputBlockedReason(t *testing.T) { + emu := instEmulator{ + CareFlags: true, + ExecutionPolicy: ExecutionPolicyElasticScheduled, + } + state := newPolicyTestState() + state.Code.DerivedTiming = map[int][]int64{ + 23: []int64{5}, + } + op := Operation{ + OpCode: "DATA_MOV", + ID: 23, + DstOperands: OperandList{ + Operands: []Operand{ + {Impl: "East", Color: "R"}, + }, + }, + } + state.CurrentCycle = 5 + state.SendBufHeadBusy[0][emu.getDirecIndex("East")] = true // East-R blocked + if emu.canIssue(op, &state) { + t.Fatalf("expected guided mode to block when output is busy") + } + if state.StallReason != StallReasonOutputBlocked { + t.Fatalf("expected output blocked stall reason, got %q", state.StallReason) + } +} + +func TestCanIssueStrictTimedDerivedTimingWindowViolationHard(t *testing.T) { + emu := instEmulator{ + CareFlags: true, + ExecutionPolicy: ExecutionPolicyStrictTimed, + StrictMaxSlip: 0, + StrictFailOnViolation: true, + } + state := newPolicyTestState() + state.Code.CompiledII = 2 + state.Code.DerivedTiming = map[int][]int64{ + 19: []int64{5}, + } + op := Operation{ + OpCode: "NOP", + ID: 19, + } + + defer func() { + recovered := recover() + if recovered == nil { + t.Fatalf("expected strict_timed hard mode panic on window violation") + } + if !strings.Contains(recovered.(string), "strict slip window violation") { + t.Fatalf("unexpected panic: %v", recovered) + } + }() + + state.CurrentCycle = 6 // lateness=1 > max slip=0 + _ = emu.canIssue(op, &state) +} + +func captureReadyHeldLogs(t *testing.T, fn func()) []readyHeldLog { + t.Helper() + + var buffer bytes.Buffer + oldLogger := slog.Default() + oldTraceEnabled := TraceEnabled() + oldObserver := traceObserver + + handler := slog.NewJSONHandler(&buffer, &slog.HandlerOptions{Level: LevelTrace}) + slog.SetDefault(slog.New(handler)) + SetTraceEnabled(true) + traceObserver = nil + defer func() { + traceObserver = oldObserver + SetTraceEnabled(oldTraceEnabled) + slog.SetDefault(oldLogger) + }() + + fn() + + logs := make([]readyHeldLog, 0) + for _, line := range strings.Split(strings.TrimSpace(buffer.String()), "\n") { + if strings.TrimSpace(line) == "" { + continue + } + var entry struct { + Msg string `json:"msg"` + readyHeldLog + } + if err := json.Unmarshal([]byte(line), &entry); err != nil { + t.Fatalf("unmarshal trace line: %v", err) + } + if entry.Msg != "ReadyHeld" { + continue + } + logs = append(logs, entry.readyHeldLog) + } + return logs +} + +func newSyncTraceState(group InstructionGroup) coreState { + state := newPolicyTestState() + state.SelectedBlock = &EntryBlock{InstructionGroups: []InstructionGroup{group}} + state.PCInBlock = 0 + state.ReadyHeldTraceEnabled = true + state.ReadyHeldRunMode = "lower_bound" + return state +} + +func TestIssueDecisionElasticScheduledDerivedTimingReadyButHeld(t *testing.T) { + emu := instEmulator{ + CareFlags: true, + ExecutionPolicy: ExecutionPolicyElasticScheduled, + } + state := newPolicyTestState() + state.Code.DerivedTiming = map[int][]int64{9: {5}} + state.CurrentCycle = 4 + operation := Operation{OpCode: "NOP", ID: 9} + + decision := emu.issueDecision(operation, &state) + if decision.AnnotatedTimeT == nil || *decision.AnnotatedTimeT != 5 { + t.Fatalf("annotated_time_t = %v, want 5", decision.AnnotatedTimeT) + } + if !decision.OperandsReady || !decision.PredicateReadyOrTrue || !decision.ResourcesAvailable { + t.Fatalf("expected all non-timing readiness gates to pass: %+v", decision) + } + if decision.TimingGateSatisfied { + t.Fatalf("expected timing gate to be unsatisfied before annotated cycle") + } + if !decision.FireableExceptTime { + t.Fatalf("expected fireable_except_time=true when only lower-bound timing blocks issue") + } + if !decision.BlockedByLowerBound { + t.Fatalf("expected blocked_by_lower_bound=true") + } + if decision.CanIssue { + t.Fatalf("expected can_issue=false before annotated cycle") + } + + emu.applyIssueDecision(operation, &state, decision) + if !state.TimingWaitBlocked { + t.Fatalf("expected timing wait marker after applying decision") + } + if state.StallReason != StallReasonScheduleBubble { + t.Fatalf("stall reason = %q, want %q", state.StallReason, StallReasonScheduleBubble) + } +} + +func TestIssueDecisionElasticScheduledDerivedTimingOutputBlocked(t *testing.T) { + emu := instEmulator{ + CareFlags: true, + ExecutionPolicy: ExecutionPolicyElasticScheduled, + } + state := newPolicyTestState() + state.Code.DerivedTiming = map[int][]int64{23: {5}} + state.CurrentCycle = 5 + state.SendBufHeadBusy[0][emu.getDirecIndex("East")] = true + operation := Operation{ + OpCode: "DATA_MOV", + ID: 23, + DstOperands: OperandList{Operands: []Operand{{Impl: "East", Color: "R"}}}, + } + + decision := emu.issueDecision(operation, &state) + if decision.ResourcesAvailable { + t.Fatalf("expected resources_available=false") + } + if decision.FireableExceptTime { + t.Fatalf("expected fireable_except_time=false when output credit is missing") + } + if decision.BlockedByLowerBound { + t.Fatalf("expected blocked_by_lower_bound=false when non-timing checks already fail") + } + if decision.CanIssue { + t.Fatalf("expected can_issue=false when output is blocked") + } +} + +func TestIssueDecisionElasticScheduledDerivedTimingOperandWait(t *testing.T) { + emu := instEmulator{ + CareFlags: true, + ExecutionPolicy: ExecutionPolicyElasticScheduled, + } + state := newPolicyTestState() + state.Code.DerivedTiming = map[int][]int64{11: {5}} + state.CurrentCycle = 5 + operation := Operation{ + OpCode: "DATA_MOV", + ID: 11, + SrcOperands: OperandList{Operands: []Operand{{Impl: "North", Color: "R"}}}, + } + + decision := emu.issueDecision(operation, &state) + if decision.OperandsReady { + t.Fatalf("expected operands_ready=false") + } + if decision.FireableExceptTime { + t.Fatalf("expected fireable_except_time=false when operands are not ready") + } + if decision.BlockedByLowerBound { + t.Fatalf("expected blocked_by_lower_bound=false when operand wait is the real blocker") + } +} + +func TestRunInstructionGroupWithSyncOpsEmitsBlockedThenIssuedReadyHeld(t *testing.T) { + emu := instEmulator{ + CareFlags: true, + ExecutionPolicy: ExecutionPolicyElasticScheduled, + } + group := InstructionGroup{Operations: []Operation{{OpCode: "NOP", ID: 41}}} + state := newSyncTraceState(group) + state.Code.DerivedTiming = map[int][]int64{41: {5}} + + logs := captureReadyHeldLogs(t, func() { + state.CurrentCycle = 4 + if !emu.RunInstructionGroupWithSyncOps(group, &state, 4) { + t.Fatalf("expected timing wait to keep sync core alive") + } + state.CurrentCycle = 5 + if !emu.RunInstructionGroupWithSyncOps(group, &state, 5) { + t.Fatalf("expected issued cycle to report progress") + } + }) + + if len(logs) != 2 { + t.Fatalf("expected 2 ReadyHeld logs, got %d: %+v", len(logs), logs) + } + if logs[0].OccurrenceIndex != 0 || logs[1].OccurrenceIndex != 0 { + t.Fatalf("expected same occurrence index for blocked/issued pair, got %+v", logs) + } + if !logs[0].BlockedByLowerBound || logs[0].IssuedThisCycle { + t.Fatalf("unexpected blocked log: %+v", logs[0]) + } + if logs[1].BlockedByLowerBound || !logs[1].IssuedThisCycle { + t.Fatalf("unexpected issued log: %+v", logs[1]) + } + if state.OpIssueCount[41] != 1 { + t.Fatalf("expected issued occurrence count to advance to 1, got %d", state.OpIssueCount[41]) + } +} + +func TestRunInstructionGroupWithSyncOpsReadyHeldOccurrenceIndexIncrements(t *testing.T) { + emu := instEmulator{ + CareFlags: true, + ExecutionPolicy: ExecutionPolicyElasticScheduled, + } + group := InstructionGroup{Operations: []Operation{{OpCode: "NOP", ID: 42}}} + state := newSyncTraceState(group) + state.Code.DerivedTiming = map[int][]int64{42: {5, 6}} + + logs := captureReadyHeldLogs(t, func() { + state.CurrentCycle = 5 + if !emu.RunInstructionGroupWithSyncOps(group, &state, 5) { + t.Fatalf("expected first issue to make progress") + } + state.CurrentCycle = 6 + if !emu.RunInstructionGroupWithSyncOps(group, &state, 6) { + t.Fatalf("expected second issue to make progress") + } + }) + + if len(logs) != 2 { + t.Fatalf("expected 2 issued ReadyHeld logs, got %d: %+v", len(logs), logs) + } + if logs[0].OccurrenceIndex != 0 || logs[1].OccurrenceIndex != 1 { + t.Fatalf("expected occurrence indexes [0 1], got [%d %d]", logs[0].OccurrenceIndex, logs[1].OccurrenceIndex) + } + if !logs[0].IssuedThisCycle || !logs[1].IssuedThisCycle { + t.Fatalf("expected issued_this_cycle=true for both logs: %+v", logs) + } + if state.OpIssueCount[42] != 2 { + t.Fatalf("expected occurrence count to reach 2, got %d", state.OpIssueCount[42]) + } +} + +func TestLoadProgramFileFromYAMLPreservesTimeStep(t *testing.T) { + filePath := "../test/testbench/stonneGEMM8x8/gemm.yaml" + if _, err := os.Stat(filePath); os.IsNotExist(err) { + t.Skipf("test file does not exist: %s", filePath) + } + + programMap := LoadProgramFileFromYAML(filePath) + program, ok := programMap["(0,0)"] + if !ok { + t.Fatalf("core (0,0) not found in parsed program") + } + if len(program.EntryBlocks) == 0 || len(program.EntryBlocks[0].InstructionGroups) < 2 { + t.Fatalf("unexpected program structure for core (0,0)") + } + + group0 := program.EntryBlocks[0].InstructionGroups[0] + if len(group0.Operations) == 0 { + t.Fatalf("group0 has no operations") + } + if group0.Operations[0].TimeStep != 0 { + t.Fatalf("unexpected timestep for first op: got %d want 0", group0.Operations[0].TimeStep) + } + + group1 := program.EntryBlocks[0].InstructionGroups[1] + if len(group1.Operations) == 0 { + t.Fatalf("group1 has no operations") + } + if group1.Operations[0].TimeStep != 1 { + t.Fatalf("unexpected timestep for second group first op: got %d want 1", group1.Operations[0].TimeStep) + } +} diff --git a/core/fifo_buffer_test.go b/core/fifo_buffer_test.go new file mode 100644 index 0000000..32a7559 --- /dev/null +++ b/core/fifo_buffer_test.go @@ -0,0 +1,166 @@ +package core + +import ( + "testing" + + "github.com/sarchlab/zeonica/cgra" +) + +func newFIFOTestState(recvCap, sendCap int) coreState { + state := coreState{ + Directions: map[string]bool{ + "North": true, + "East": true, + "South": true, + "West": true, + "NorthEast": true, + "SouthEast": true, + "SouthWest": true, + "NorthWest": true, + "Router": true, + }, + Mode: SyncOp, + EnableFIFOModel: true, + RecvQueueCapacity: recvCap, + SendQueueCapacity: sendCap, + RecvBufHead: make([][]cgra.Data, 4), + RecvBufHeadReady: make([][]bool, 4), + SendBufHead: make([][]cgra.Data, 4), + SendBufHeadBusy: make([][]bool, 4), + RecvBufQueue: make([][][]cgra.Data, 4), + SendBufQueue: make([][][]cgra.Data, 4), + OpInputReadCache: make(map[string]cgra.Data), + } + for c := 0; c < 4; c++ { + state.RecvBufHead[c] = make([]cgra.Data, 12) + state.RecvBufHeadReady[c] = make([]bool, 12) + state.SendBufHead[c] = make([]cgra.Data, 12) + state.SendBufHeadBusy[c] = make([]bool, 12) + state.RecvBufQueue[c] = make([][]cgra.Data, 12) + state.SendBufQueue[c] = make([][]cgra.Data, 12) + for d := 0; d < 12; d++ { + state.RecvBufQueue[c][d] = make([]cgra.Data, 0, recvCap) + state.SendBufQueue[c][d] = make([]cgra.Data, 0, sendCap) + } + } + return state +} + +func TestRecvFIFOOrderAndCapacity(t *testing.T) { + state := newFIFOTestState(2, 2) + emu := instEmulator{} + north := emu.getDirecIndex("North") + + if !state.recvQueuePush(0, north, cgra.NewScalar(11)) { + t.Fatal("expected first recv enqueue to succeed") + } + if !state.recvQueuePush(0, north, cgra.NewScalar(22)) { + t.Fatal("expected second recv enqueue to succeed") + } + if state.recvQueuePush(0, north, cgra.NewScalar(33)) { + t.Fatal("expected recv queue to report full at capacity") + } + + state.OpInputReadCache = make(map[string]cgra.Data) + v1 := emu.readOperand(Operand{Impl: "North", Color: "R"}, &state) + state.OpInputReadCache = make(map[string]cgra.Data) + v2 := emu.readOperand(Operand{Impl: "North", Color: "R"}, &state) + if v1.First() != 11 || v2.First() != 22 { + t.Fatalf("unexpected FIFO order: got (%d,%d), want (11,22)", v1.First(), v2.First()) + } + if state.recvQueueLen(0, north) != 0 { + t.Fatalf("expected recv queue empty after two consumes, got %d", state.recvQueueLen(0, north)) + } +} + +func TestSyncModeDuplicatePortReadConsumesOnce(t *testing.T) { + state := newFIFOTestState(4, 2) + emu := instEmulator{} + north := emu.getDirecIndex("North") + + if !state.recvQueuePush(0, north, cgra.NewScalar(101)) { + t.Fatal("expected first recv enqueue to succeed") + } + if !state.recvQueuePush(0, north, cgra.NewScalar(202)) { + t.Fatal("expected second recv enqueue to succeed") + } + + state.OpInputReadCache = make(map[string]cgra.Data) + v1 := emu.readOperand(Operand{Impl: "North", Color: "R"}, &state) + v2 := emu.readOperand(Operand{Impl: "North", Color: "R"}, &state) + + if v1.First() != 101 || v2.First() != 101 { + t.Fatalf("expected duplicate reads to reuse same token, got (%d,%d)", v1.First(), v2.First()) + } + if state.recvQueueLen(0, north) != 1 { + t.Fatalf("expected queue length 1 after duplicate read consume-once, got %d", state.recvQueueLen(0, north)) + } +} + +func TestSendQueueBlocksOnlyWhenFull(t *testing.T) { + state := newFIFOTestState(2, 2) + emu := instEmulator{} + east := emu.getDirecIndex("East") + + if !state.sendQueuePush(0, east, cgra.NewScalar(1)) { + t.Fatal("expected first send enqueue to succeed") + } + + op := Operation{ + OpCode: "MOV", + SrcOperands: OperandList{Operands: []Operand{ + {Impl: "#1", Color: "R"}, + }}, + DstOperands: OperandList{Operands: []Operand{ + {Impl: "East", Color: "R"}, + }}, + } + + ready, reason := emu.checkIssueReadiness(op, &state) + if !ready { + t.Fatalf("expected issue ready when queue has room, got reason=%s", reason) + } + + if !state.sendQueuePush(0, east, cgra.NewScalar(2)) { + t.Fatal("expected second send enqueue to succeed") + } + ready, reason = emu.checkIssueReadiness(op, &state) + if ready || reason != StallReasonOutputBlocked { + t.Fatalf("expected output blocked when queue full, got ready=%v reason=%s", ready, reason) + } +} + +func TestRouterRedKeepsSingleOutstanding(t *testing.T) { + state := newFIFOTestState(2, 8) + router := int(cgra.Router) + + if !state.sendQueuePush(0, router, cgra.NewScalar(7)) { + t.Fatal("expected first router-red enqueue to succeed") + } + if state.sendQueuePush(0, router, cgra.NewScalar(8)) { + t.Fatal("expected router-red second enqueue to fail (single outstanding)") + } +} + +func TestEnableFIFOModelSwitchControlsQueueDepth(t *testing.T) { + emu := instEmulator{} + north := emu.getDirecIndex("North") + + legacy := newFIFOTestState(4, 4) + legacy.EnableFIFOModel = false + if !legacy.recvQueuePush(0, north, cgra.NewScalar(1)) { + t.Fatal("legacy path first recv push should succeed") + } + if legacy.recvQueuePush(0, north, cgra.NewScalar(2)) { + t.Fatal("legacy path should stay single-slot regardless of configured depth") + } + + fifo := newFIFOTestState(4, 4) + fifo.EnableFIFOModel = true + if !fifo.recvQueuePush(0, north, cgra.NewScalar(1)) { + t.Fatal("fifo path first recv push should succeed") + } + if !fifo.recvQueuePush(0, north, cgra.NewScalar(2)) { + t.Fatal("fifo path second recv push should succeed when depth > 1") + } +} diff --git a/core/operation_latency.go b/core/operation_latency.go new file mode 100644 index 0000000..2a78ccb --- /dev/null +++ b/core/operation_latency.go @@ -0,0 +1,70 @@ +package core + +import ( + "fmt" + "os" + "strings" + + "gopkg.in/yaml.v3" +) + +const operationLatencyFileEnv = "ZEONICA_OPERATION_LATENCY_FILE" + +type operationLatencySidecar struct { + DefaultLatency int `yaml:"default_latency"` + Opcodes map[string]int `yaml:"opcodes"` +} + +func loadOperationLatencyProfileFromEnv() (map[string]int, int, error) { + path := strings.TrimSpace(os.Getenv(operationLatencyFileEnv)) + if path == "" { + return nil, 1, nil + } + + data, err := os.ReadFile(path) + if err != nil { + return nil, 0, fmt.Errorf("read %s (%s): %w", operationLatencyFileEnv, path, err) + } + + var sidecar operationLatencySidecar + if err := yaml.Unmarshal(data, &sidecar); err != nil { + return nil, 0, fmt.Errorf("parse %s (%s): %w", operationLatencyFileEnv, path, err) + } + + defaultLatency := sidecar.DefaultLatency + if defaultLatency == 0 { + defaultLatency = 1 + } + if defaultLatency <= 0 { + return nil, 0, fmt.Errorf("default_latency must be > 0, got %d", sidecar.DefaultLatency) + } + + normalized := make(map[string]int, len(sidecar.Opcodes)) + for opcode, latency := range sidecar.Opcodes { + key := normalizeLatencyOpcode(opcode) + if key == "" { + return nil, 0, fmt.Errorf("opcode latency entry has empty opcode key") + } + if latency <= 0 { + return nil, 0, fmt.Errorf("opcode %s latency must be > 0, got %d", key, latency) + } + normalized[key] = latency + } + + return normalized, defaultLatency, nil +} + +func normalizeLatencyOpcode(opCode string) string { + return strings.ToUpper(strings.TrimSpace(opCode)) +} + +func cloneOperationLatencyMap(src map[string]int) map[string]int { + if len(src) == 0 { + return nil + } + cloned := make(map[string]int, len(src)) + for opcode, latency := range src { + cloned[opcode] = latency + } + return cloned +} diff --git a/core/operation_latency_test.go b/core/operation_latency_test.go new file mode 100644 index 0000000..058a25d --- /dev/null +++ b/core/operation_latency_test.go @@ -0,0 +1,308 @@ +package core + +import ( + "os" + "path/filepath" + "testing" + + "github.com/sarchlab/zeonica/cgra" +) + +func TestLoadOperationLatencyProfileFromEnv(t *testing.T) { + tmpDir := t.TempDir() + path := filepath.Join(tmpDir, "latency.yaml") + content := []byte("default_latency: 3\nopcodes:\n mul: 2\n FMUL: 4\n") + if err := os.WriteFile(path, content, 0o644); err != nil { + t.Fatalf("write latency file: %v", err) + } + + t.Setenv(operationLatencyFileEnv, path) + + opcodes, defaultLatency, err := loadOperationLatencyProfileFromEnv() + if err != nil { + t.Fatalf("load latency profile: %v", err) + } + if defaultLatency != 3 { + t.Fatalf("unexpected default latency: got %d want 3", defaultLatency) + } + if opcodes["MUL"] != 2 { + t.Fatalf("expected normalized MUL latency 2, got %d", opcodes["MUL"]) + } + if opcodes["FMUL"] != 4 { + t.Fatalf("expected FMUL latency 4, got %d", opcodes["FMUL"]) + } +} + +func TestLoadOperationLatencyProfileFromEnvDefaultsToOneWhenUnset(t *testing.T) { + t.Setenv(operationLatencyFileEnv, "") + + opcodes, defaultLatency, err := loadOperationLatencyProfileFromEnv() + if err != nil { + t.Fatalf("load empty latency profile: %v", err) + } + if len(opcodes) != 0 { + t.Fatalf("expected no opcode latencies, got %d", len(opcodes)) + } + if defaultLatency != 1 { + t.Fatalf("unexpected default latency: got %d want 1", defaultLatency) + } +} + +func TestLoadOperationLatencyProfileRejectsInvalidLatency(t *testing.T) { + tmpDir := t.TempDir() + path := filepath.Join(tmpDir, "latency.yaml") + content := []byte("default_latency: 1\nopcodes:\n MUL: 0\n") + if err := os.WriteFile(path, content, 0o644); err != nil { + t.Fatalf("write latency file: %v", err) + } + + t.Setenv(operationLatencyFileEnv, path) + + if _, _, err := loadOperationLatencyProfileFromEnv(); err == nil { + t.Fatal("expected invalid latency error") + } +} + +func newLatencyTestState(recvCap, sendCap int, enableFIFO bool) coreState { + state := newFIFOTestState(recvCap, sendCap) + state.EnableFIFOModel = enableFIFO + state.Registers = make([]cgra.Data, 8) + state.OpTimingCursor = make(map[int]int) + state.OpTimingLate = make(map[int]bool) + state.OpTimingRollCycle = make(map[int]int64) + state.OpIssueCount = make(map[int]int) + state.Code = Program{DefaultOperationLatency: 1} + state.SelectedBlock = &EntryBlock{} + state.PCInBlock = 0 + return state +} + +func TestSyncOpcodeLatencyDelaysRegisterWriteback(t *testing.T) { + emu := instEmulator{ + CareFlags: true, + ExecutionPolicy: ExecutionPolicyInOrderDataflow, + } + state := newLatencyTestState(4, 4, false) + state.Registers[0] = cgra.NewScalar(5) + state.Code.OperationLatencies = map[string]int{"MUL": 2} + + ig := InstructionGroup{ + Operations: []Operation{ + { + OpCode: "MUL", + ID: 1, + SrcOperands: OperandList{Operands: []Operand{ + {Impl: "$0", Color: "R"}, + {Impl: "#3", Color: "R"}, + }}, + DstOperands: OperandList{Operands: []Operand{ + {Impl: "$1", Color: "R"}, + }}, + }, + }, + } + state.SelectedBlock.InstructionGroups = []InstructionGroup{ig} + + if !emu.RunInstructionGroupWithSyncOps(ig, &state, 0) { + t.Fatal("expected first issue cycle to make progress") + } + if got := state.Registers[1].First(); got != 0 { + t.Fatalf("unexpected early writeback: got %d want 0", got) + } + if state.PendingSyncGroup == nil { + t.Fatal("expected pending sync group after first issue") + } + + state.CurrentCycle = 1 + if !emu.RunInstructionGroupWithSyncOps(ig, &state, 1) { + t.Fatal("expected completion cycle to make progress") + } + if got := state.Registers[1].First(); got != 15 { + t.Fatalf("unexpected delayed writeback result: got %d want 15", got) + } + if state.PendingSyncGroup != nil { + t.Fatal("expected pending sync group cleared after commit") + } +} + +func TestSyncOpcodeLatencyDelaysGroupCommitWithDataMov(t *testing.T) { + emu := instEmulator{ + CareFlags: true, + ExecutionPolicy: ExecutionPolicyInOrderDataflow, + } + state := newLatencyTestState(4, 4, true) + state.Registers[0] = cgra.NewScalar(5) + state.Code.OperationLatencies = map[string]int{"MUL": 2} + + ig := InstructionGroup{ + Operations: []Operation{ + { + OpCode: "MUL", + ID: 11, + SrcOperands: OperandList{Operands: []Operand{ + {Impl: "$0", Color: "R"}, + {Impl: "#3", Color: "R"}, + }}, + DstOperands: OperandList{Operands: []Operand{ + {Impl: "$1", Color: "R"}, + }}, + }, + { + OpCode: "DATA_MOV", + ID: 12, + SrcOperands: OperandList{Operands: []Operand{ + {Impl: "$1", Color: "R"}, + }}, + DstOperands: OperandList{Operands: []Operand{ + {Impl: "East", Color: "R"}, + }}, + }, + }, + } + state.SelectedBlock.InstructionGroups = []InstructionGroup{ig} + + if !emu.RunInstructionGroupWithSyncOps(ig, &state, 0) { + t.Fatal("expected first issue cycle to make progress") + } + east := emu.getDirecIndex("East") + if state.sendQueueLen(0, east) != 0 { + t.Fatalf("expected no outgoing data before commit, got send queue len %d", state.sendQueueLen(0, east)) + } + if got := state.Registers[1].First(); got != 0 { + t.Fatalf("unexpected early register writeback: got %d want 0", got) + } + + state.CurrentCycle = 1 + if !emu.RunInstructionGroupWithSyncOps(ig, &state, 1) { + t.Fatal("expected completion cycle to make progress") + } + if got := state.Registers[1].First(); got != 15 { + t.Fatalf("unexpected committed register value: got %d want 15", got) + } + if state.sendQueueLen(0, east) != 1 { + t.Fatalf("expected delayed DATA_MOV to enqueue once, got len %d", state.sendQueueLen(0, east)) + } + head, ok := state.sendQueuePeek(0, east) + if !ok || head.First() != 15 { + t.Fatalf("unexpected delayed DATA_MOV payload: ok=%v value=%d", ok, head.First()) + } +} + +func TestSyncOpcodeLatencyUsesMaxAcrossGroup(t *testing.T) { + emu := instEmulator{ + CareFlags: true, + ExecutionPolicy: ExecutionPolicyInOrderDataflow, + } + state := newLatencyTestState(4, 4, true) + state.Registers[0] = cgra.NewScalar(5) + state.Code.OperationLatencies = map[string]int{ + "ADD": 1, + "MUL": 2, + } + + ig := InstructionGroup{ + Operations: []Operation{ + { + OpCode: "ADD", + ID: 21, + SrcOperands: OperandList{Operands: []Operand{ + {Impl: "$0", Color: "R"}, + {Impl: "#1", Color: "R"}, + }}, + DstOperands: OperandList{Operands: []Operand{ + {Impl: "$1", Color: "R"}, + }}, + }, + { + OpCode: "MUL", + ID: 22, + SrcOperands: OperandList{Operands: []Operand{ + {Impl: "$1", Color: "R"}, + {Impl: "#3", Color: "R"}, + }}, + DstOperands: OperandList{Operands: []Operand{ + {Impl: "$2", Color: "R"}, + }}, + }, + }, + } + state.SelectedBlock.InstructionGroups = []InstructionGroup{ig} + + if !emu.RunInstructionGroupWithSyncOps(ig, &state, 0) { + t.Fatal("expected first issue cycle to make progress") + } + if got := state.Registers[2].First(); got != 0 { + t.Fatalf("unexpected early result writeback: got %d want 0", got) + } + + state.CurrentCycle = 1 + if !emu.RunInstructionGroupWithSyncOps(ig, &state, 1) { + t.Fatal("expected completion cycle to make progress") + } + if got := state.Registers[1].First(); got != 6 { + t.Fatalf("unexpected committed ADD result: got %d want 6", got) + } + if got := state.Registers[2].First(); got != 18 { + t.Fatalf("unexpected committed MUL result: got %d want 18", got) + } +} + +func TestSyncOpcodeLatencyRespectsDerivedTimingIssueCycle(t *testing.T) { + emu := instEmulator{ + CareFlags: true, + ExecutionPolicy: ExecutionPolicyStrictTimed, + } + state := newLatencyTestState(4, 4, true) + state.Registers[0] = cgra.NewScalar(7) + state.Code.OperationLatencies = map[string]int{"MUL": 2} + state.Code.DerivedTiming = map[int][]int64{ + 31: []int64{5}, + } + + ig := InstructionGroup{ + Operations: []Operation{ + { + OpCode: "MUL", + ID: 31, + SrcOperands: OperandList{Operands: []Operand{ + {Impl: "$0", Color: "R"}, + {Impl: "#2", Color: "R"}, + }}, + DstOperands: OperandList{Operands: []Operand{ + {Impl: "$1", Color: "R"}, + }}, + }, + }, + } + state.SelectedBlock.InstructionGroups = []InstructionGroup{ig} + + state.CurrentCycle = 4 + if !emu.RunInstructionGroupWithSyncOps(ig, &state, 4) { + t.Fatal("expected pre-issue timing wait to keep core alive") + } + if state.PendingSyncGroup != nil { + t.Fatal("did not expect pending sync group before legal issue cycle") + } + if got := state.Registers[1].First(); got != 0 { + t.Fatalf("unexpected result before legal issue cycle: got %d want 0", got) + } + + state.CurrentCycle = 5 + if !emu.RunInstructionGroupWithSyncOps(ig, &state, 5) { + t.Fatal("expected legal issue cycle to make progress") + } + if state.PendingSyncGroup == nil { + t.Fatal("expected pending sync group on legal issue cycle") + } + if got := state.Registers[1].First(); got != 0 { + t.Fatalf("unexpected result on issue cycle before commit: got %d want 0", got) + } + + state.CurrentCycle = 6 + if !emu.RunInstructionGroupWithSyncOps(ig, &state, 6) { + t.Fatal("expected completion cycle to make progress") + } + if got := state.Registers[1].First(); got != 14 { + t.Fatalf("unexpected delayed result after derived-timing issue: got %d want 14", got) + } +} diff --git a/core/program.go b/core/program.go index 4d1db00..57bf6b2 100644 --- a/core/program.go +++ b/core/program.go @@ -3,6 +3,7 @@ package core import ( "fmt" + "log/slog" "os" "regexp" "strconv" @@ -63,8 +64,11 @@ type YAMLRoot struct { // Program is the internal executable representation for one core. type Program struct { - EntryBlocks []EntryBlock - CompiledII int + EntryBlocks []EntryBlock + CompiledII int + DerivedTiming map[int][]int64 + OperationLatencies map[string]int + DefaultOperationLatency int } // EntryBlock is one entry block in a core program. @@ -104,6 +108,7 @@ type Operation struct { SrcOperands OperandList ID int // ID from YAML file InvalidIterations int // Invalid iterations from YAML file + TimeStep int // Time step from YAML file } // OperandList wraps source or destination operands for an operation. @@ -135,8 +140,18 @@ func LoadProgramFileFromYAML(programFilePath string) map[string]Program { config := root.ArrayConfig - // Debug: Print the parsed config - fmt.Printf("Debug: Parsed config - Rows: %d, Cols: %d, Cores: %d\n", config.Rows, config.Cols, len(config.Cores)) + derivedTimingByCoord, err := loadDerivedTimingFromEnv() + if err != nil { + panic(fmt.Sprintf("Failed to load timing sidecar: %v", err)) + } + operationLatencies, defaultOperationLatency, err := loadOperationLatencyProfileFromEnv() + if err != nil { + panic(fmt.Sprintf("Failed to load operation latency sidecar: %v", err)) + } + + if DebugEnabled() { + slog.Debug("ParsedProgramConfig", "rows", config.Rows, "cols", config.Cols, "cores", len(config.Cores)) + } // Convert to map[(x,y)]Program programMap := make(map[string]Program) @@ -144,7 +159,9 @@ func LoadProgramFileFromYAML(programFilePath string) map[string]Program { for _, core := range config.Cores { // Create coordinate key coordKey := fmt.Sprintf("(%d,%d)", core.Column, core.Row) - fmt.Printf("Debug: Processing core at %s with %d entries\n", coordKey, len(core.Entries)) + if DebugEnabled() { + slog.Debug("ProcessingProgramCore", "coord", coordKey, "entries", len(core.Entries)) + } // Convert core entries to Program structure var entryBlocks []EntryBlock @@ -192,6 +209,7 @@ func LoadProgramFileFromYAML(programFilePath string) map[string]Program { DstOperands: OperandList{Operands: dstOperands}, ID: yamlOp.ID, InvalidIterations: yamlOp.InvalidIterations, + TimeStep: yamlOp.TimeStep, } operations = append(operations, operation) @@ -206,8 +224,11 @@ func LoadProgramFileFromYAML(programFilePath string) map[string]Program { } program := Program{ - EntryBlocks: entryBlocks, - CompiledII: config.CompiledII, + EntryBlocks: entryBlocks, + CompiledII: config.CompiledII, + DerivedTiming: cloneDerivedTimingMap(derivedTimingByCoord[coordKey]), + OperationLatencies: cloneOperationLatencyMap(operationLatencies), + DefaultOperationLatency: defaultOperationLatency, } programMap[coordKey] = program @@ -216,6 +237,20 @@ func LoadProgramFileFromYAML(programFilePath string) map[string]Program { return programMap } +// OperationLatency returns the configured latency for an opcode, defaulting to 1. +func (p Program) OperationLatency(opCode string) int { + normalized := normalizeLatencyOpcode(opCode) + if normalized != "" && len(p.OperationLatencies) > 0 { + if latency, ok := p.OperationLatencies[normalized]; ok && latency > 0 { + return latency + } + } + if p.DefaultOperationLatency > 0 { + return p.DefaultOperationLatency + } + return 1 +} + // splitRespectingBrackets splits a string by delimiter, but respects brackets // so [WEST, RED] is treated as a single token func splitRespectingBrackets(s, delimiter string) []string { diff --git a/core/queue_watch.go b/core/queue_watch.go new file mode 100644 index 0000000..732ecaa --- /dev/null +++ b/core/queue_watch.go @@ -0,0 +1,145 @@ +package core + +import ( + "fmt" + "strings" + + "github.com/sarchlab/zeonica/cgra" +) + +// QueueWatchSpec declares one queue to sample for occupancy reporting. +type QueueWatchSpec struct { + Label string `json:"label" yaml:"label"` + X int `json:"x" yaml:"x"` + Y int `json:"y" yaml:"y"` + Kind string `json:"kind" yaml:"kind"` + Direction string `json:"direction" yaml:"direction"` + Color string `json:"color" yaml:"color"` +} + +type resolvedQueueWatch struct { + Label string + X int + Y int + Kind string + Direction string + DirectionIdx int + Color string + ColorIdx int +} + +// ValidateQueueWatchSpecs checks queue watch definitions before runtime build. +func ValidateQueueWatchSpecs(specs []QueueWatchSpec) error { + _, err := resolveQueueWatchSpecs(specs) + return err +} + +func resolveQueueWatchSpecs(specs []QueueWatchSpec) ([]resolvedQueueWatch, error) { + if len(specs) == 0 { + return nil, nil + } + + resolved := make([]resolvedQueueWatch, 0, len(specs)) + for idx, spec := range specs { + watch, err := resolveQueueWatchSpec(spec) + if err != nil { + return nil, fmt.Errorf("queue watch[%d]: %w", idx, err) + } + resolved = append(resolved, watch) + } + return resolved, nil +} + +func matchingQueueWatchesForTile(enabled bool, queueWatches []resolvedQueueWatch, x, y int) []resolvedQueueWatch { + if !enabled || len(queueWatches) == 0 { + return nil + } + + var matched []resolvedQueueWatch + for _, watch := range queueWatches { + if watch.X == x && watch.Y == y { + matched = append(matched, watch) + } + } + return matched +} + +func cloneQueueWatches(input []resolvedQueueWatch) []resolvedQueueWatch { + if len(input) == 0 { + return nil + } + out := make([]resolvedQueueWatch, len(input)) + copy(out, input) + return out +} + +func resolveQueueWatchSpec(spec QueueWatchSpec) (resolvedQueueWatch, error) { + kind := strings.ToLower(strings.TrimSpace(spec.Kind)) + if kind != "recv" && kind != "send" { + return resolvedQueueWatch{}, fmt.Errorf("invalid kind %q", spec.Kind) + } + + directionIdx, directionName, err := resolveQueueWatchDirection(spec.Direction) + if err != nil { + return resolvedQueueWatch{}, err + } + + colorIdx, colorName, err := resolveQueueWatchColor(spec.Color) + if err != nil { + return resolvedQueueWatch{}, err + } + + label := strings.TrimSpace(spec.Label) + if label == "" { + label = fmt.Sprintf("%s(%d,%d).%s.%s", kind, spec.X, spec.Y, directionName, colorName) + } + + return resolvedQueueWatch{ + Label: label, + X: spec.X, + Y: spec.Y, + Kind: kind, + Direction: directionName, + DirectionIdx: directionIdx, + Color: colorName, + ColorIdx: colorIdx, + }, nil +} + +func resolveQueueWatchDirection(raw string) (int, string, error) { + switch strings.ToLower(strings.TrimSpace(raw)) { + case "north": + return int(cgra.North), cgra.North.Name(), nil + case "east": + return int(cgra.East), cgra.East.Name(), nil + case "south": + return int(cgra.South), cgra.South.Name(), nil + case "west": + return int(cgra.West), cgra.West.Name(), nil + case "northeast": + return int(cgra.NorthEast), cgra.NorthEast.Name(), nil + case "northwest": + return int(cgra.NorthWest), cgra.NorthWest.Name(), nil + case "southeast": + return int(cgra.SouthEast), cgra.SouthEast.Name(), nil + case "southwest": + return int(cgra.SouthWest), cgra.SouthWest.Name(), nil + case "router": + return int(cgra.Router), cgra.Router.Name(), nil + default: + return 0, "", fmt.Errorf("invalid direction %q", raw) + } +} + +func resolveQueueWatchColor(raw string) (int, string, error) { + switch strings.ToUpper(strings.TrimSpace(raw)) { + case "R", "RED": + return 0, "RED", nil + case "Y", "YELLOW": + return 1, "YELLOW", nil + case "B", "BLUE": + return 2, "BLUE", nil + default: + return 0, "", fmt.Errorf("invalid color %q", raw) + } +} diff --git a/core/two_phase_switch_test.go b/core/two_phase_switch_test.go new file mode 100644 index 0000000..020ce20 --- /dev/null +++ b/core/two_phase_switch_test.go @@ -0,0 +1,83 @@ +package core + +import ( + "testing" + + "github.com/sarchlab/zeonica/cgra" +) + +func TestSyncTwoPhaseNoPartialCommitOnStall(t *testing.T) { + emu := instEmulator{ + CareFlags: true, + ExecutionPolicy: ExecutionPolicyInOrderDataflow, + } + state := newFIFOTestState(4, 4) + state.Mode = SyncOp + state.EnableFIFOModel = true + state.Registers = make([]cgra.Data, 8) + state.Registers[0] = cgra.NewScalar(9) + + ig := InstructionGroup{ + Operations: []Operation{ + { + OpCode: "MOV", + SrcOperands: OperandList{Operands: []Operand{ + {Impl: "#1", Color: "R"}, + }}, + DstOperands: OperandList{Operands: []Operand{ + {Impl: "$0", Color: "R"}, + }}, + }, + { + OpCode: "MOV", + SrcOperands: OperandList{Operands: []Operand{ + {Impl: "North", Color: "R"}, + }}, + DstOperands: OperandList{Operands: []Operand{ + {Impl: "$1", Color: "R"}, + }}, + }, + }, + } + + run := emu.RunInstructionGroupWithSyncOps(ig, &state, 0) + if run { + t.Fatal("expected instruction group to stall on missing North operand") + } + if got := state.Registers[0].First(); got != 9 { + t.Fatalf("expected no partial commit on stall, got register0=%d want 9", got) + } +} + +func TestSyncTwoPhaseCommitOnSuccess(t *testing.T) { + emu := instEmulator{ + CareFlags: true, + ExecutionPolicy: ExecutionPolicyInOrderDataflow, + } + state := newFIFOTestState(4, 4) + state.Mode = SyncOp + state.EnableFIFOModel = true + state.Registers = make([]cgra.Data, 8) + + ig := InstructionGroup{ + Operations: []Operation{ + { + OpCode: "MOV", + SrcOperands: OperandList{Operands: []Operand{ + {Impl: "#7", Color: "R"}, + }}, + DstOperands: OperandList{Operands: []Operand{ + {Impl: "$0", Color: "R"}, + }}, + }, + }, + } + + run := emu.RunInstructionGroupWithSyncOps(ig, &state, 0) + if !run { + t.Fatal("expected instruction group to run successfully") + } + if got := state.Registers[0].First(); got != 7 { + t.Fatalf("unexpected committed register value: got %d want 7", got) + } +} diff --git a/core/util.go b/core/util.go index 1e3644c..a65a9e5 100644 --- a/core/util.go +++ b/core/util.go @@ -4,6 +4,8 @@ import ( "context" "fmt" "log/slog" + "sync/atomic" + "time" "github.com/jedib0t/go-pretty/v6/table" ) @@ -11,15 +13,270 @@ import ( const ( // PrintToggle enables verbose state table printing in debugging. PrintToggle = false - // LevelTrace is a custom trace level above info. - LevelTrace slog.Level = slog.LevelInfo + 1 + // LevelTrace is a custom trace level below debug/info. + LevelTrace slog.Level = slog.LevelDebug - 4 ) +// TraceObservation captures the subset of a trace event needed for report generation. +type TraceObservation struct { + WallTime time.Time + Msg string + Behavior string + Time *float64 + X *int + Y *int + Src string + Dst string + From string + To string + Label string + Kind string + Direction string + Color string + Occupancy *int + Capacity *int +} + +var traceEnabled atomic.Bool +var traceObserver func(TraceObservation) + +func init() { + traceEnabled.Store(true) +} + +// SetTraceEnabled controls whether trace events are written to the slog trace handler. +func SetTraceEnabled(enabled bool) { + traceEnabled.Store(enabled) +} + +// TraceEnabled reports whether trace output is enabled. +func TraceEnabled() bool { + return traceEnabled.Load() +} + +// DebugEnabled reports whether debug logging is enabled on the default logger. +func DebugEnabled() bool { + return slog.Default().Enabled(context.Background(), slog.LevelDebug) +} + +// SetTraceObserver registers a report observer for trace events. +func SetTraceObserver(observer func(TraceObservation)) { + traceObserver = observer +} + // Trace writes a trace-level structured log record. func Trace(msg string, args ...any) { + if traceObserver != nil { + if observation, valid := buildTraceObservation(msg, args...); valid { + traceObserver(observation) + } + } + if !TraceEnabled() { + return + } slog.Log(context.Background(), LevelTrace, msg, args...) } +// ObserveDataFlow records a dataflow event for report generation without emitting trace output. +func ObserveDataFlow(behavior string, timeValue float64, from, to, src, dst string) { + observeTrace(TraceObservation{ + WallTime: time.Now(), + Msg: "DataFlow", + Behavior: behavior, + Time: float64Ptr(timeValue), + From: from, + To: to, + Src: src, + Dst: dst, + }) +} + +// ObserveMemory records a memory event for report generation without emitting trace output. +func ObserveMemory(behavior string, timeValue float64, x, y int, src, dst string) { + observeTrace(TraceObservation{ + WallTime: time.Now(), + Msg: "Memory", + Behavior: behavior, + Time: float64Ptr(timeValue), + X: intPtr(x), + Y: intPtr(y), + Src: src, + Dst: dst, + }) +} + +// ObserveInst records an instruction event for report generation without emitting trace output. +func ObserveInst(timeValue float64, x, y int) { + observeTrace(TraceObservation{ + WallTime: time.Now(), + Msg: "Inst", + Time: float64Ptr(timeValue), + X: intPtr(x), + Y: intPtr(y), + }) +} + +// ObserveBackpressure records a backpressure event for report generation without emitting trace output. +func ObserveBackpressure(timeValue float64, x, y int) { + observeTrace(TraceObservation{ + WallTime: time.Now(), + Msg: "Backpressure", + Time: float64Ptr(timeValue), + X: intPtr(x), + Y: intPtr(y), + }) +} + +// ObserveQueue records a watched queue occupancy sample for report generation. +func ObserveQueue(label, kind string, timeValue float64, x, y int, direction, color string, occupancy, capacity int) { + observeTrace(TraceObservation{ + WallTime: time.Now(), + Msg: "Queue", + Behavior: "sample", + Time: float64Ptr(timeValue), + X: intPtr(x), + Y: intPtr(y), + Label: label, + Kind: kind, + Direction: direction, + Color: color, + Occupancy: intPtr(occupancy), + Capacity: intPtr(capacity), + }) +} + +func observeTrace(observation TraceObservation) { + if traceObserver != nil { + traceObserver(observation) + } +} + +//nolint:gocyclo +func buildTraceObservation(msg string, args ...any) (TraceObservation, bool) { + observation := TraceObservation{ + WallTime: time.Now(), + Msg: msg, + } + if msg != "Inst" && msg != "Memory" && msg != "DataFlow" && msg != "Backpressure" && msg != "Stall" && msg != "Queue" { + return observation, false + } + + for i := 0; i < len(args); i++ { + switch value := args[i].(type) { + case slog.Attr: + assignObservationField(&observation, value.Key, value.Value.Any()) + case string: + if i+1 >= len(args) { + continue + } + assignObservationField(&observation, value, args[i+1]) + i++ + } + } + + return observation, true +} + +//nolint:gocyclo +func assignObservationField(observation *TraceObservation, key string, value any) { + switch key { + case "Behavior": + observation.Behavior = fmt.Sprint(value) + case "Time": + if converted, ok := toFloat64(value); ok { + observation.Time = float64Ptr(converted) + } + case "X": + if converted, ok := toInt(value); ok { + observation.X = intPtr(converted) + } + case "Y": + if converted, ok := toInt(value); ok { + observation.Y = intPtr(converted) + } + case "Src": + observation.Src = fmt.Sprint(value) + case "Dst": + observation.Dst = fmt.Sprint(value) + case "From": + observation.From = fmt.Sprint(value) + case "To": + observation.To = fmt.Sprint(value) + case "Label": + observation.Label = fmt.Sprint(value) + case "Kind": + observation.Kind = fmt.Sprint(value) + case "Direction": + observation.Direction = fmt.Sprint(value) + case "Color": + observation.Color = fmt.Sprint(value) + case "Occupancy": + if converted, ok := toInt(value); ok { + observation.Occupancy = intPtr(converted) + } + case "Capacity": + if converted, ok := toInt(value); ok { + observation.Capacity = intPtr(converted) + } + } +} + +func toFloat64(value any) (float64, bool) { + switch typed := value.(type) { + case float64: + return typed, true + case float32: + return float64(typed), true + case int: + return float64(typed), true + case int64: + return float64(typed), true + case int32: + return float64(typed), true + case uint32: + return float64(typed), true + case uint64: + return float64(typed), true + default: + return 0, false + } +} + +func toInt(value any) (int, bool) { + switch typed := value.(type) { + case int: + return typed, true + case int32: + return int(typed), true + case int64: + return int(typed), true + case uint32: + return int(typed), true + case uint64: + return int(typed), true + default: + return 0, false + } +} + +func intPtr(value int) *int { + ptr := new(int) + *ptr = value + return ptr +} + +func float64Ptr(value float64) *float64 { + ptr := new(float64) + *ptr = value + return ptr +} + +func int64Ptr(value int64) *int64 { + ptr := new(int64) + *ptr = value + return ptr +} + // PrintState prints a formatted snapshot of core runtime state. // //nolint:gocyclo,funlen diff --git a/report/observer_test.go b/report/observer_test.go new file mode 100644 index 0000000..46c19b7 --- /dev/null +++ b/report/observer_test.go @@ -0,0 +1,123 @@ +package report + +import ( + "encoding/json" + "os" + "path/filepath" + "reflect" + "testing" + "time" + + "github.com/sarchlab/zeonica/core" +) + +//nolint:funlen +func TestObserverBuildMatchesGenerateFromLog(t *testing.T) { + logPath := filepath.Join(t.TempDir(), "trace.json.log") + ts0 := time.Date(2026, 3, 6, 0, 0, 0, 0, time.UTC) + ts1 := ts0.Add(10 * time.Millisecond) + ts2 := ts1.Add(10 * time.Millisecond) + ts3 := ts2.Add(10 * time.Millisecond) + + event0 := traceEvent{ + Timestamp: ts0.Format(time.RFC3339Nano), + Msg: "DataFlow", + Behavior: "FeedIn", + Time: testFloat64Ptr(0), + To: "Device.Tile[0][0].Core.West", + } + event1 := traceEvent{ + Timestamp: ts1.Format(time.RFC3339Nano), + Msg: "Inst", + Time: testFloat64Ptr(0), + X: testIntPtr(0), + Y: testIntPtr(0), + } + event2 := traceEvent{ + Timestamp: ts2.Format(time.RFC3339Nano), + Msg: "Backpressure", + Time: testFloat64Ptr(0), + X: testIntPtr(0), + Y: testIntPtr(0), + } + event3 := traceEvent{ + Timestamp: ts3.Format(time.RFC3339Nano), + Msg: "Stall", + Behavior: "schedule_bubble", + Time: testFloat64Ptr(1), + X: testIntPtr(0), + Y: testIntPtr(0), + } + + file, err := os.Create(logPath) + if err != nil { + t.Fatalf("Create returned error: %v", err) + } + for _, event := range []traceEvent{event0, event1, event2, event3} { + payload, err := json.Marshal(event) + if err != nil { + t.Fatalf("Marshal returned error: %v", err) + } + if _, err := file.Write(append(payload, '\n')); err != nil { + t.Fatalf("Write returned error: %v", err) + } + } + _ = file.Close() + + opts := GenerateOptions{ + TestName: "observer-test", + LogPath: logPath, + GridWidth: 1, + GridHeight: 1, + TopN: 5, + } + + fromLog, err := GenerateFromLog(opts) + if err != nil { + t.Fatalf("GenerateFromLog returned error: %v", err) + } + + observer := NewObserver() + observer.Observe(core.TraceObservation{ + WallTime: ts0, + Msg: "DataFlow", + Behavior: "FeedIn", + Time: testFloat64Ptr(0), + To: "Device.Tile[0][0].Core.West", + }) + observer.Observe(core.TraceObservation{ + WallTime: ts1, + Msg: "Inst", + Time: testFloat64Ptr(0), + X: testIntPtr(0), + Y: testIntPtr(0), + }) + observer.Observe(core.TraceObservation{ + WallTime: ts2, + Msg: "Backpressure", + Time: testFloat64Ptr(0), + X: testIntPtr(0), + Y: testIntPtr(0), + }) + observer.Observe(core.TraceObservation{ + WallTime: ts3, + Msg: "Stall", + Behavior: "schedule_bubble", + Time: testFloat64Ptr(1), + X: testIntPtr(0), + Y: testIntPtr(0), + }) + + fromObserver := observer.Build(opts) + if !reflect.DeepEqual(fromLog, fromObserver) { + t.Fatalf("expected observer report to match log report\nfrom log: %#v\nfrom observer: %#v", fromLog, fromObserver) + } +} + +func testIntPtr(v int) *int { + return &v +} + +func testFloat64Ptr(v float64) *float64 { + return &v +} diff --git a/report/report.go b/report/report.go index 5b142b0..883ba01 100644 --- a/report/report.go +++ b/report/report.go @@ -10,6 +10,9 @@ import ( "os" "regexp" "sort" + "time" + + "github.com/sarchlab/zeonica/core" ) // GenerateOptions controls report generation behavior from a trace log. @@ -25,22 +28,33 @@ type GenerateOptions struct { // Report is the aggregate execution summary derived from a trace log. type Report struct { - TestName string `json:"testName,omitempty"` - LogPath string `json:"logPath"` - Grid GridInfo `json:"grid"` - TotalCycles int64 `json:"totalCycles"` - ActiveCycles int64 `json:"activeCyclesGlobal"` - IdleCycles int64 `json:"idleCyclesGlobal"` - Passed *bool `json:"passed,omitempty"` - MismatchCount *int `json:"mismatchCount,omitempty"` - InstCount int64 `json:"instCount"` - SendCount int64 `json:"sendCount"` - RecvCount int64 `json:"recvCount"` - MemoryCount int64 `json:"memoryCount"` - TotalEvents int64 `json:"totalEvents"` - ActiveTileCount int `json:"activeTileCount"` - Tiles []TileStats `json:"tiles"` - TopHotTiles []TopHotTile `json:"topHotTiles"` + TestName string `json:"testName,omitempty"` + LogPath string `json:"logPath"` + Grid GridInfo `json:"grid"` + TotalCycles int64 `json:"totalCycles"` + ActiveCycles int64 `json:"activeCyclesGlobal"` + IdleCycles int64 `json:"idleCyclesGlobal"` + Passed *bool `json:"passed,omitempty"` + MismatchCount *int `json:"mismatchCount,omitempty"` + InstCount int64 `json:"instCount"` + SendCount int64 `json:"sendCount"` + RecvCount int64 `json:"recvCount"` + MemoryCount int64 `json:"memoryCount"` + TotalEvents int64 `json:"totalEvents"` + WallClockDurationSec float64 `json:"wallClockDurationSec"` + InstThroughputPerCycle float64 `json:"instThroughputPerCycle"` + EventThroughputPerCycle float64 `json:"eventThroughputPerCycle"` + InstThroughputPerSec float64 `json:"instThroughputPerSec"` + BackpressureCount int64 `json:"backpressureCount"` + BackpressureCycles int64 `json:"backpressureCycles"` + ScheduleBubbleStallCount int64 `json:"scheduleBubbleStallCount"` + OperandWaitStallCount int64 `json:"operandWaitStallCount"` + OutputBlockedStallCount int64 `json:"outputBlockedStallCount"` + ActiveTileCount int `json:"activeTileCount"` + Tiles []TileStats `json:"tiles"` + TopHotTiles []TopHotTile `json:"topHotTiles"` + TopBackpressureTiles []TopBackpressureTile `json:"topBackpressureTiles"` + WatchedQueues []QueueStats `json:"watchedQueues,omitempty"` } // GridInfo describes the grid size used by the workload. @@ -51,16 +65,20 @@ type GridInfo struct { // TileStats stores per-tile metrics in the generated report. type TileStats struct { - X int `json:"x"` - Y int `json:"y"` - Coord string `json:"coord"` - ActiveCycles int64 `json:"activeCycles"` - UtilizationPct float64 `json:"utilizationPct"` - InstCount int64 `json:"instCount"` - SendCount int64 `json:"sendCount"` - RecvCount int64 `json:"recvCount"` - MemoryCount int64 `json:"memoryCount"` - TotalEvents int64 `json:"totalEvents"` + X int `json:"x"` + Y int `json:"y"` + Coord string `json:"coord"` + ActiveCycles int64 `json:"activeCycles"` + UtilizationPct float64 `json:"utilizationPct"` + InstCount int64 `json:"instCount"` + SendCount int64 `json:"sendCount"` + RecvCount int64 `json:"recvCount"` + MemoryCount int64 `json:"memoryCount"` + TotalEvents int64 `json:"totalEvents"` + BackpressureCount int64 `json:"backpressureCount"` + ScheduleBubbleStallCount int64 `json:"scheduleBubbleStallCount"` + OperandWaitStallCount int64 `json:"operandWaitStallCount"` + OutputBlockedStallCount int64 `json:"outputBlockedStallCount"` } // TopHotTile is a ranked hot tile summary entry. @@ -73,6 +91,30 @@ type TopHotTile struct { TotalEvents int64 `json:"totalEvents"` } +// TopBackpressureTile is a ranked backpressure hot tile entry. +type TopBackpressureTile struct { + X int `json:"x"` + Y int `json:"y"` + Coord string `json:"coord"` + BackpressureCount int64 `json:"backpressureCount"` +} + +// QueueStats stores aggregated occupancy metrics for one watched queue. +type QueueStats struct { + Label string `json:"label"` + Kind string `json:"kind"` + X int `json:"x"` + Y int `json:"y"` + Coord string `json:"coord"` + Direction string `json:"direction"` + Color string `json:"color"` + Capacity int `json:"capacity"` + SampleCount int64 `json:"sampleCount"` + AvgOccupancy float64 `json:"avgOccupancy"` + PeakOccupancy int `json:"peakOccupancy"` + AvgUtilizationPct float64 `json:"avgUtilizationPct"` +} + type traceEvent struct { Timestamp string `json:"time"` Msg string `json:"msg"` @@ -84,6 +126,12 @@ type traceEvent struct { Dst string `json:"Dst"` From string `json:"From"` To string `json:"To"` + Label string `json:"Label"` + Kind string `json:"Kind"` + Direction string `json:"Direction"` + Color string `json:"Color"` + Occupancy *int `json:"Occupancy"` + Capacity *int `json:"Capacity"` } type tileCoord struct { @@ -92,95 +140,221 @@ type tileCoord struct { } type tileAccumulator struct { - cycles map[int64]struct{} - instCount int64 - sendCount int64 - recvCount int64 - memoryCount int64 - totalEvents int64 + cycles map[int64]struct{} + backpressureCycles map[int64]struct{} + instCount int64 + sendCount int64 + recvCount int64 + memoryCount int64 + totalEvents int64 + backpressureCount int64 + scheduleBubbleStallCount int64 + operandWaitStallCount int64 + outputBlockedStallCount int64 +} + +type queueKey struct { + label string + x int + y int + kind string + direction string + color string +} + +type queueAccumulator struct { + capacity int + sampleCount int64 + occupancySum int64 + peakOccupancy int +} + +type collector struct { + tileData map[tileCoord]*tileAccumulator + queueData map[queueKey]*queueAccumulator + globalCycleSet map[int64]struct{} + globalBackpressureCycles map[int64]struct{} + maxCycle int64 + maxX int + maxY int + globalBackpressureCount int64 + minWallTS *time.Time + maxWallTS *time.Time +} + +// Observer collects report statistics directly from runtime trace observations. +type Observer struct { + collector *collector } var tileEndpointPattern = regexp.MustCompile(`Device\.Tile\[(\d+)\]\[(\d+)\]\.Core\.`) -// GenerateFromLog builds a report by parsing a JSON trace log. -// -//nolint:gocyclo,funlen -func GenerateFromLog(opts GenerateOptions) (Report, error) { - if opts.LogPath == "" { - return Report{}, fmt.Errorf("log path is required") +// NewObserver creates a report observer for runtime trace events. +func NewObserver() *Observer { + return &Observer{ + collector: newCollector(), } +} - topN := opts.TopN - if topN <= 0 { - topN = 5 +func newCollector() *collector { + return &collector{ + tileData: make(map[tileCoord]*tileAccumulator), + queueData: make(map[queueKey]*queueAccumulator), + globalCycleSet: make(map[int64]struct{}), + globalBackpressureCycles: make(map[int64]struct{}), + maxCycle: -1, + maxX: -1, + maxY: -1, } +} - file, err := os.Open(opts.LogPath) - if err != nil { - return Report{}, fmt.Errorf("open log file: %w", err) +// Observe records a runtime trace observation into the in-memory report collector. +func (o *Observer) Observe(observation core.TraceObservation) { + if o == nil || o.collector == nil { + return + } + + event := traceEvent{ + Timestamp: observation.WallTime.Format(time.RFC3339Nano), + Msg: observation.Msg, + Behavior: observation.Behavior, + Time: observation.Time, + X: observation.X, + Y: observation.Y, + Src: observation.Src, + Dst: observation.Dst, + From: observation.From, + To: observation.To, + Label: observation.Label, + Kind: observation.Kind, + Direction: observation.Direction, + Color: observation.Color, + Occupancy: observation.Occupancy, + Capacity: observation.Capacity, + } + o.collector.observe(event) +} + +// Build materializes a Report using the collected runtime events. +func (o *Observer) Build(opts GenerateOptions) Report { + if o == nil || o.collector == nil { + return Report{ + TestName: opts.TestName, + LogPath: opts.LogPath, + Grid: GridInfo{ + Width: opts.GridWidth, + Height: opts.GridHeight, + }, + Passed: opts.Passed, + MismatchCount: opts.MismatchCount, + } } - defer func() { _ = file.Close() }() + return o.collector.build(opts) +} - tileData := make(map[tileCoord]*tileAccumulator) - globalCycleSet := make(map[int64]struct{}) +//nolint:gocyclo +func (c *collector) observe(event traceEvent) { + if ts, err := time.Parse(time.RFC3339Nano, event.Timestamp); err == nil { + if c.minWallTS == nil || ts.Before(*c.minWallTS) { + t := ts + c.minWallTS = &t + } + if c.maxWallTS == nil || ts.After(*c.maxWallTS) { + t := ts + c.maxWallTS = &t + } + } - var maxCycle int64 = -1 - maxX, maxY := -1, -1 + cycle, hasCycle := parseCycle(event.Time) + if hasCycle && cycle > c.maxCycle { + c.maxCycle = cycle + } - scanner := bufio.NewScanner(file) - for scanner.Scan() { - line := scanner.Bytes() - if len(line) == 0 { - continue - } + if event.Msg == "Queue" { + c.observeQueue(event) + return + } - var event traceEvent - if err := json.Unmarshal(line, &event); err != nil { - continue - } + coord, ok := resolveTileCoord(event) + if !ok { + return + } - coord, ok := resolveTileCoord(event) - if !ok { - continue - } + if coord.x > c.maxX { + c.maxX = coord.x + } + if coord.y > c.maxY { + c.maxY = coord.y + } - if coord.x > maxX { - maxX = coord.x - } - if coord.y > maxY { - maxY = coord.y + acc, exists := c.tileData[coord] + if !exists { + acc = &tileAccumulator{ + cycles: make(map[int64]struct{}), + backpressureCycles: make(map[int64]struct{}), } + c.tileData[coord] = acc + } - acc, exists := tileData[coord] - if !exists { - acc = &tileAccumulator{ - cycles: make(map[int64]struct{}), - } - tileData[coord] = acc + isBackpressureEvent := event.Msg == "Backpressure" + if hasCycle && !isBackpressureEvent { + acc.cycles[cycle] = struct{}{} + c.globalCycleSet[cycle] = struct{}{} + if cycle > c.maxCycle { + c.maxCycle = cycle } + } - cycle, hasCycle := parseCycle(event.Time) + if classifyAndCount(event, acc, cycle, hasCycle) { + c.globalBackpressureCount++ if hasCycle { - acc.cycles[cycle] = struct{}{} - globalCycleSet[cycle] = struct{}{} - if cycle > maxCycle { - maxCycle = cycle - } + c.globalBackpressureCycles[cycle] = struct{}{} } + } +} - classifyAndCount(event, acc) +func (c *collector) observeQueue(event traceEvent) { + if event.X == nil || event.Y == nil || event.Occupancy == nil { + return } - if err := scanner.Err(); err != nil { - return Report{}, fmt.Errorf("scan log file: %w", err) + key := queueKey{ + label: event.Label, + x: *event.X, + y: *event.Y, + kind: event.Kind, + direction: event.Direction, + color: event.Color, + } + + acc, exists := c.queueData[key] + if !exists { + acc = &queueAccumulator{} + c.queueData[key] = acc + } + if event.Capacity != nil && *event.Capacity > 0 { + acc.capacity = *event.Capacity + } + acc.sampleCount++ + acc.occupancySum += int64(*event.Occupancy) + if *event.Occupancy > acc.peakOccupancy { + acc.peakOccupancy = *event.Occupancy + } +} + +//nolint:gocyclo,funlen +func (c *collector) build(opts GenerateOptions) Report { + topN := opts.TopN + if topN <= 0 { + topN = 5 } totalCycles := int64(0) - if maxCycle >= 0 { - totalCycles = maxCycle + 1 + if c.maxCycle >= 0 { + totalCycles = c.maxCycle + 1 } - activeCycles := int64(len(globalCycleSet)) + activeCycles := int64(len(c.globalCycleSet)) idleCycles := totalCycles - activeCycles if idleCycles < 0 { idleCycles = 0 @@ -188,11 +362,11 @@ func GenerateFromLog(opts GenerateOptions) (Report, error) { width := opts.GridWidth if width <= 0 { - width = maxX + 1 + width = c.maxX + 1 } height := opts.GridHeight if height <= 0 { - height = maxY + 1 + height = c.maxY + 1 } if width < 0 { width = 0 @@ -201,8 +375,8 @@ func GenerateFromLog(opts GenerateOptions) (Report, error) { height = 0 } - tiles := make([]TileStats, 0, len(tileData)) - for coord, acc := range tileData { + tiles := make([]TileStats, 0, len(c.tileData)) + for coord, acc := range c.tileData { activeTileCycles := int64(len(acc.cycles)) util := 0.0 if totalCycles > 0 { @@ -210,16 +384,20 @@ func GenerateFromLog(opts GenerateOptions) (Report, error) { } tiles = append(tiles, TileStats{ - X: coord.x, - Y: coord.y, - Coord: formatCoord(coord.x, coord.y), - ActiveCycles: activeTileCycles, - UtilizationPct: util, - InstCount: acc.instCount, - SendCount: acc.sendCount, - RecvCount: acc.recvCount, - MemoryCount: acc.memoryCount, - TotalEvents: acc.totalEvents, + X: coord.x, + Y: coord.y, + Coord: formatCoord(coord.x, coord.y), + ActiveCycles: activeTileCycles, + UtilizationPct: util, + InstCount: acc.instCount, + SendCount: acc.sendCount, + RecvCount: acc.recvCount, + MemoryCount: acc.memoryCount, + TotalEvents: acc.totalEvents, + BackpressureCount: acc.backpressureCount, + ScheduleBubbleStallCount: acc.scheduleBubbleStallCount, + OperandWaitStallCount: acc.operandWaitStallCount, + OutputBlockedStallCount: acc.outputBlockedStallCount, }) } @@ -235,6 +413,9 @@ func GenerateFromLog(opts GenerateOptions) (Report, error) { var recvTotal int64 var memoryTotal int64 var eventTotal int64 + var scheduleBubbleStallTotal int64 + var operandWaitStallTotal int64 + var outputBlockedStallTotal int64 for _, tile := range tiles { instTotal += tile.InstCount @@ -242,30 +423,99 @@ func GenerateFromLog(opts GenerateOptions) (Report, error) { recvTotal += tile.RecvCount memoryTotal += tile.MemoryCount eventTotal += tile.TotalEvents + scheduleBubbleStallTotal += tile.ScheduleBubbleStallCount + operandWaitStallTotal += tile.OperandWaitStallCount + outputBlockedStallTotal += tile.OutputBlockedStallCount } topHotTiles := buildTopHotTiles(tiles, topN) + topBackpressureTiles := buildTopBackpressureTiles(tiles, topN) + watchedQueues := buildQueueStats(c.queueData) + wallClockDurationSec := 0.0 + if c.minWallTS != nil && c.maxWallTS != nil { + d := c.maxWallTS.Sub(*c.minWallTS).Seconds() + if d > 0 { + wallClockDurationSec = d + } + } + instThroughputPerCycle := 0.0 + eventThroughputPerCycle := 0.0 + if totalCycles > 0 { + instThroughputPerCycle = float64(instTotal) / float64(totalCycles) + eventThroughputPerCycle = float64(eventTotal) / float64(totalCycles) + } + instThroughputPerSec := 0.0 + if wallClockDurationSec > 0 { + instThroughputPerSec = float64(instTotal) / wallClockDurationSec + } + + return Report{ + TestName: opts.TestName, + LogPath: opts.LogPath, + Grid: GridInfo{Width: width, Height: height}, + TotalCycles: totalCycles, + ActiveCycles: activeCycles, + IdleCycles: idleCycles, + Passed: opts.Passed, + MismatchCount: opts.MismatchCount, + InstCount: instTotal, + SendCount: sendTotal, + RecvCount: recvTotal, + MemoryCount: memoryTotal, + TotalEvents: eventTotal, + WallClockDurationSec: wallClockDurationSec, + InstThroughputPerCycle: instThroughputPerCycle, + EventThroughputPerCycle: eventThroughputPerCycle, + InstThroughputPerSec: instThroughputPerSec, + BackpressureCount: c.globalBackpressureCount, + BackpressureCycles: int64(len(c.globalBackpressureCycles)), + ScheduleBubbleStallCount: scheduleBubbleStallTotal, + OperandWaitStallCount: operandWaitStallTotal, + OutputBlockedStallCount: outputBlockedStallTotal, + ActiveTileCount: len(tiles), + Tiles: tiles, + TopHotTiles: topHotTiles, + TopBackpressureTiles: topBackpressureTiles, + WatchedQueues: watchedQueues, + } +} - report := Report{ - TestName: opts.TestName, - LogPath: opts.LogPath, - Grid: GridInfo{Width: width, Height: height}, - TotalCycles: totalCycles, - ActiveCycles: activeCycles, - IdleCycles: idleCycles, - Passed: opts.Passed, - MismatchCount: opts.MismatchCount, - InstCount: instTotal, - SendCount: sendTotal, - RecvCount: recvTotal, - MemoryCount: memoryTotal, - TotalEvents: eventTotal, - ActiveTileCount: len(tiles), - Tiles: tiles, - TopHotTiles: topHotTiles, +// GenerateFromLog builds a report by parsing a JSON trace log. +// +//nolint:gocyclo,funlen +func GenerateFromLog(opts GenerateOptions) (Report, error) { + if opts.LogPath == "" { + return Report{}, fmt.Errorf("log path is required") + } + + file, err := os.Open(opts.LogPath) + if err != nil { + return Report{}, fmt.Errorf("open log file: %w", err) + } + defer func() { _ = file.Close() }() + + collector := newCollector() + + scanner := bufio.NewScanner(file) + for scanner.Scan() { + line := scanner.Bytes() + if len(line) == 0 { + continue + } + + var event traceEvent + if err := json.Unmarshal(line, &event); err != nil { + continue + } + + collector.observe(event) + } + + if err := scanner.Err(); err != nil { + return Report{}, fmt.Errorf("scan log file: %w", err) } - return report, nil + return collector.build(opts), nil } // SaveJSON writes a report as pretty-printed JSON. @@ -288,6 +538,8 @@ func PrintSummary(report Report) { } // PrintSummaryToWriter prints a compact report summary to the writer. +// +//nolint:funlen func PrintSummaryToWriter(report Report, w io.Writer) { fmt.Fprintln(w, "========================") fmt.Fprintln(w, "Zeonica Report Summary") @@ -298,6 +550,22 @@ func PrintSummaryToWriter(report Report, w io.Writer) { fmt.Fprintf(w, "cycles: total=%d active=%d idle=%d\n", report.TotalCycles, report.ActiveCycles, report.IdleCycles) fmt.Fprintf(w, "events: total=%d inst=%d send=%d recv=%d memory=%d\n", report.TotalEvents, report.InstCount, report.SendCount, report.RecvCount, report.MemoryCount) + fmt.Fprintf(w, "simulation time: wall=%.3fs\n", report.WallClockDurationSec) + fmt.Fprintf( + w, + "throughput: inst/cycle=%.4f events/cycle=%.4f inst/s=%.2f\n", + report.InstThroughputPerCycle, + report.EventThroughputPerCycle, + report.InstThroughputPerSec, + ) + fmt.Fprintf(w, "backpressure: count=%d cycles=%d\n", report.BackpressureCount, report.BackpressureCycles) + fmt.Fprintf( + w, + "stall breakdown: schedule_bubble=%d operand_wait=%d output_blocked=%d\n", + report.ScheduleBubbleStallCount, + report.OperandWaitStallCount, + report.OutputBlockedStallCount, + ) fmt.Fprintf(w, "active tiles: %d\n", report.ActiveTileCount) if report.Passed != nil { fmt.Fprintf(w, "passed: %t\n", *report.Passed) @@ -313,9 +581,35 @@ func PrintSummaryToWriter(report Report, w io.Writer) { idx+1, tile.Coord, tile.UtilizationPct, tile.ActiveCycles, tile.TotalEvents) } } + if len(report.TopBackpressureTiles) > 0 { + fmt.Fprintln(w, "top backpressure tiles:") + for idx, tile := range report.TopBackpressureTiles { + fmt.Fprintf(w, " %d) %s bp=%d\n", idx+1, tile.Coord, tile.BackpressureCount) + } + } + if len(report.WatchedQueues) > 0 { + fmt.Fprintln(w, "watched queues:") + for idx, queue := range report.WatchedQueues { + fmt.Fprintf( + w, + " %d) %s %s %s/%s avg=%.2f peak=%d cap=%d util=%.2f%% samples=%d\n", + idx+1, + queue.Coord, + queue.Label, + queue.Direction, + queue.Color, + queue.AvgOccupancy, + queue.PeakOccupancy, + queue.Capacity, + queue.AvgUtilizationPct, + queue.SampleCount, + ) + } + } } -func classifyAndCount(event traceEvent, acc *tileAccumulator) { +//nolint:gocyclo +func classifyAndCount(event traceEvent, acc *tileAccumulator, cycle int64, hasCycle bool) bool { switch event.Msg { case "Inst": acc.instCount++ @@ -331,7 +625,27 @@ func classifyAndCount(event traceEvent, acc *tileAccumulator) { acc.recvCount++ } acc.totalEvents++ + case "Backpressure": + acc.backpressureCount++ + if hasCycle { + acc.backpressureCycles[cycle] = struct{}{} + } + return true + case "Stall": + switch event.Behavior { + case "schedule_bubble": + acc.scheduleBubbleStallCount++ + case "operand_wait": + acc.operandWaitStallCount++ + case "output_blocked": + acc.outputBlockedStallCount++ + } + acc.totalEvents++ + case "Queue": + // Queue samples are aggregated separately in watchedQueues and should not + // inflate existing event throughput counters. } + return false } func resolveTileCoord(event traceEvent) (tileCoord, bool) { @@ -379,13 +693,14 @@ func parseTileFromEndpoint(endpoint string) (tileCoord, bool) { return tileCoord{}, false } - var x int - var y int - if _, err := fmt.Sscanf(matches[0], "Device.Tile[%d][%d].Core.", &x, &y); err != nil { + var row int + var col int + if _, err := fmt.Sscanf(matches[0], "Device.Tile[%d][%d].Core.", &row, &col); err != nil { return tileCoord{}, false } - return tileCoord{x: x, y: y}, true + // Endpoint naming is Tile[row][col], while report coordinates are (x=col, y=row). + return tileCoord{x: col, y: row}, true } func parseCycle(timeValue *float64) (int64, bool) { @@ -438,6 +753,86 @@ func buildTopHotTiles(tiles []TileStats, topN int) []TopHotTile { return out } +func buildQueueStats(queueData map[queueKey]*queueAccumulator) []QueueStats { + if len(queueData) == 0 { + return nil + } + + stats := make([]QueueStats, 0, len(queueData)) + for key, acc := range queueData { + avgOccupancy := 0.0 + if acc.sampleCount > 0 { + avgOccupancy = float64(acc.occupancySum) / float64(acc.sampleCount) + } + avgUtilizationPct := 0.0 + if acc.capacity > 0 { + avgUtilizationPct = avgOccupancy * 100.0 / float64(acc.capacity) + } + stats = append(stats, QueueStats{ + Label: key.label, + Kind: key.kind, + X: key.x, + Y: key.y, + Coord: formatCoord(key.x, key.y), + Direction: key.direction, + Color: key.color, + Capacity: acc.capacity, + SampleCount: acc.sampleCount, + AvgOccupancy: avgOccupancy, + PeakOccupancy: acc.peakOccupancy, + AvgUtilizationPct: avgUtilizationPct, + }) + } + + sort.Slice(stats, func(i, j int) bool { + if stats[i].Y != stats[j].Y { + return stats[i].Y < stats[j].Y + } + if stats[i].X != stats[j].X { + return stats[i].X < stats[j].X + } + if stats[i].Direction != stats[j].Direction { + return stats[i].Direction < stats[j].Direction + } + return stats[i].Label < stats[j].Label + }) + + return stats +} + +func buildTopBackpressureTiles(tiles []TileStats, topN int) []TopBackpressureTile { + if len(tiles) == 0 || topN <= 0 { + return nil + } + tmp := make([]TileStats, len(tiles)) + copy(tmp, tiles) + sort.Slice(tmp, func(i, j int) bool { + if tmp[i].BackpressureCount != tmp[j].BackpressureCount { + return tmp[i].BackpressureCount > tmp[j].BackpressureCount + } + if tmp[i].Y != tmp[j].Y { + return tmp[i].Y < tmp[j].Y + } + return tmp[i].X < tmp[j].X + }) + if topN > len(tmp) { + topN = len(tmp) + } + out := make([]TopBackpressureTile, 0, topN) + for i := 0; i < topN; i++ { + if tmp[i].BackpressureCount <= 0 { + continue + } + out = append(out, TopBackpressureTile{ + X: tmp[i].X, + Y: tmp[i].Y, + Coord: tmp[i].Coord, + BackpressureCount: tmp[i].BackpressureCount, + }) + } + return out +} + func formatCoord(x, y int) string { return fmt.Sprintf("(%d,%d)", x, y) } diff --git a/runtimecfg/disable-trace.report.json b/runtimecfg/disable-trace.report.json new file mode 100644 index 0000000..0cba9de --- /dev/null +++ b/runtimecfg/disable-trace.report.json @@ -0,0 +1,57 @@ +{ + "testName": "disable-trace", + "logPath": "/tmp/TestInitTraceLoggerDisableTraceCreatesEmptyLogAndReport326622658/001/trace.log", + "grid": { + "width": 1, + "height": 1 + }, + "totalCycles": 1, + "activeCyclesGlobal": 1, + "idleCyclesGlobal": 0, + "passed": true, + "mismatchCount": 0, + "instCount": 1, + "sendCount": 0, + "recvCount": 0, + "memoryCount": 0, + "totalEvents": 1, + "wallClockDurationSec": 0, + "instThroughputPerCycle": 1, + "eventThroughputPerCycle": 1, + "instThroughputPerSec": 0, + "backpressureCount": 0, + "backpressureCycles": 0, + "scheduleBubbleStallCount": 0, + "operandWaitStallCount": 0, + "outputBlockedStallCount": 0, + "activeTileCount": 1, + "tiles": [ + { + "x": 0, + "y": 0, + "coord": "(0,0)", + "activeCycles": 1, + "utilizationPct": 100, + "instCount": 1, + "sendCount": 0, + "recvCount": 0, + "memoryCount": 0, + "totalEvents": 1, + "backpressureCount": 0, + "scheduleBubbleStallCount": 0, + "operandWaitStallCount": 0, + "outputBlockedStallCount": 0 + } + ], + "topHotTiles": [ + { + "x": 0, + "y": 0, + "coord": "(0,0)", + "utilizationPct": 100, + "activeCycles": 1, + "totalEvents": 1 + } + ], + "topBackpressureTiles": [] +} \ No newline at end of file diff --git a/runtimecfg/enable_trace_test.go b/runtimecfg/enable_trace_test.go new file mode 100644 index 0000000..c44f143 --- /dev/null +++ b/runtimecfg/enable_trace_test.go @@ -0,0 +1,123 @@ +package runtimecfg + +import ( + "os" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/sarchlab/zeonica/core" + "github.com/sarchlab/zeonica/report" +) + +func TestResolveEnableTraceDefaultsFalse(t *testing.T) { + cfg, err := Resolve(ArchSpec{}, "enable-trace-default") + if err != nil { + t.Fatalf("Resolve returned error: %v", err) + } + if cfg.EnableTrace { + t.Fatal("expected enableTrace default to be false") + } +} + +func TestResolveEnableTraceTrue(t *testing.T) { + enabled := true + cfg, err := Resolve(ArchSpec{ + Simulator: Simulator{ + Logging: SimulatorLogging{ + EnableTrace: &enabled, + }, + }, + }, "enable-trace-true") + if err != nil { + t.Fatalf("Resolve returned error: %v", err) + } + if !cfg.EnableTrace { + t.Fatal("expected enableTrace to be true") + } +} + +func TestInitTraceLoggerDisableTraceCreatesEmptyLogAndReport(t *testing.T) { + logPath := filepath.Join(t.TempDir(), "trace.log") + rt := &Runtime{ + Config: ResolvedConfig{ + TestName: "disable-trace", + Rows: 1, + Columns: 1, + LoggingEnabled: true, + EnableTrace: false, + LogPath: logPath, + }, + Observer: report.NewObserver(), + } + + traceLog, err := rt.InitTraceLogger(core.LevelTrace) + if err != nil { + t.Fatalf("InitTraceLogger returned error: %v", err) + } + + core.Trace("Inst", "Time", float64(0), "X", 0, "Y", 0) + + if err := CloseTraceLog(traceLog); err != nil { + t.Fatalf("CloseTraceLog returned error: %v", err) + } + + info, err := os.Stat(logPath) + if err != nil { + t.Fatalf("Stat returned error: %v", err) + } + if info.Size() != 0 { + t.Fatalf("expected empty trace log when enableTrace=false, got %d bytes", info.Size()) + } + + passed := true + mismatch := 0 + reportPath, err := rt.GenerateSaveAndPrintReport(5, &passed, &mismatch) + if err != nil { + t.Fatalf("GenerateSaveAndPrintReport returned error: %v", err) + } + + content, err := os.ReadFile(reportPath) + if err != nil { + t.Fatalf("ReadFile returned error: %v", err) + } + if !strings.Contains(string(content), `"logPath": "`+logPath+`"`) { + t.Fatalf("expected report to preserve logPath %q, got %s", logPath, string(content)) + } +} + +func TestInitTraceLoggerEnableTraceWritesEvents(t *testing.T) { + logPath := filepath.Join(t.TempDir(), "trace.log") + rt := &Runtime{ + Config: ResolvedConfig{ + TestName: "enable-trace", + Rows: 1, + Columns: 1, + LoggingEnabled: true, + EnableTrace: true, + LogPath: logPath, + }, + Observer: report.NewObserver(), + } + + traceLog, err := rt.InitTraceLogger(core.LevelTrace) + if err != nil { + t.Fatalf("InitTraceLogger returned error: %v", err) + } + + core.Trace("Inst", "Time", float64(1), "X", 0, "Y", 0) + time.Sleep(5 * time.Millisecond) + + if err := CloseTraceLog(traceLog); err != nil { + t.Fatalf("CloseTraceLog returned error: %v", err) + } + + info, err := os.Stat(logPath) + if err != nil { + t.Fatalf("Stat returned error: %v", err) + } + if info.Size() == 0 { + t.Fatal("expected non-empty trace log when enableTrace=true") + } +} diff --git a/runtimecfg/report.go b/runtimecfg/report.go index e32d46d..a45daa0 100644 --- a/runtimecfg/report.go +++ b/runtimecfg/report.go @@ -10,9 +10,6 @@ const defaultTopN = 5 // BuildReportOptions builds report options from resolved runtime configuration. func (r *Runtime) BuildReportOptions(topN int, passed *bool, mismatchCount *int) (report.GenerateOptions, error) { - if !r.Config.LoggingEnabled { - return report.GenerateOptions{}, fmt.Errorf("logging is disabled, cannot build report options from trace log") - } if r.Config.LogPath == "" { return report.GenerateOptions{}, fmt.Errorf("log path is empty, cannot build report options") } @@ -43,9 +40,14 @@ func (r *Runtime) GenerateAndSaveReport(topN int, passed *bool, mismatchCount *i return report.Report{}, "", err } - result, err := report.GenerateFromLog(opts) - if err != nil { - return report.Report{}, "", fmt.Errorf("generate report from log: %w", err) + var result report.Report + if r.Observer != nil { + result = r.Observer.Build(opts) + } else { + result, err = report.GenerateFromLog(opts) + if err != nil { + return report.Report{}, "", fmt.Errorf("generate report from log: %w", err) + } } reportPath := r.DefaultReportPath() diff --git a/runtimecfg/runtime.go b/runtimecfg/runtime.go index 1e744d6..49db08f 100644 --- a/runtimecfg/runtime.go +++ b/runtimecfg/runtime.go @@ -1,9 +1,11 @@ package runtimecfg import ( + "context" "fmt" "log/slog" "os" + "path/filepath" "regexp" "strconv" "strings" @@ -12,32 +14,72 @@ import ( "github.com/sarchlab/zeonica/api" "github.com/sarchlab/zeonica/cgra" "github.com/sarchlab/zeonica/config" + "github.com/sarchlab/zeonica/core" + "github.com/sarchlab/zeonica/report" ) const ( - defaultRows = 4 - defaultColumns = 4 - defaultExecutionModel = "serial" - defaultDriverName = "Driver" - defaultDeviceName = "Device" - defaultLogTemplate = ".json.log" + defaultRows = 4 + defaultColumns = 4 + defaultExecutionModel = "serial" + defaultExecutionPolicy = "in_order_dataflow" + defaultStrictMaxSlip = int64(4) + defaultStrictFail = false + defaultEnableFIFOModel = false + defaultEnableQueueWatches = false + defaultDriverName = "Driver" + defaultDeviceName = "Device" + defaultLogTemplate = ".json.log" + + defaultDriverPortIncomingBufferDepth = 1 + defaultDriverPortOutgoingBufferDepth = 1 + defaultCorePortIncomingBufferDepth = 1 + defaultCorePortOutgoingBufferDepth = 1 + defaultNumRegisters = 64 + defaultLocalMemoryWords = 1024 + defaultMemoryMode = "simple" + defaultLinkLatency = 1 + defaultLinkBandwidth = 32 + linkTimingModelParseOnly = "parse_only" ) var freqPattern = regexp.MustCompile(`^([0-9]+)\s*(ghz|mhz|khz|hz)$`) // ResolvedConfig is the executable runtime configuration after defaults/resolution. type ResolvedConfig struct { - TestName string - Rows int - Columns int - ExecutionModel string - DriverName string - DriverFreq sim.Freq - DeviceName string - DeviceFreq sim.Freq - BindToArchitecture bool - LoggingEnabled bool - LogPath string + TestName string + Rows int + Columns int + ExecutionModel string + ExecutionPolicy string + StrictMaxSlip int64 + StrictFailOnViolation bool + EnableFIFOModel bool + EnableQueueWatches bool + DriverName string + DriverFreq sim.Freq + DeviceName string + DeviceFreq sim.Freq + BindToArchitecture bool + LoggingEnabled bool + EnableTrace bool + LogPath string + + DriverPortIncomingBufferDepth int + DriverPortOutgoingBufferDepth int + CorePortIncomingBufferDepth int + CorePortOutgoingBufferDepth int + NumRegisters int + LocalMemoryWords int + MemoryMode string + MemoryShare map[[2]int]int + LinkLatency int + LinkBandwidth int + LinkTimingModel string + ProgramYAML string + ReportName string + QueueWatches []core.QueueWatchSpec + BufferSweepDepths []int } // BuildOverrides allows optional size override when not binding to architecture. @@ -54,6 +96,7 @@ type Runtime struct { Engine sim.Engine Driver api.Driver Device cgra.Device + Observer *report.Observer } // LoadRuntime loads arch spec, resolves config, and builds runtime objects. @@ -63,7 +106,7 @@ func LoadRuntime(specPath, testName string) (*Runtime, error) { return nil, err } - cfg, err := Resolve(spec, testName) + cfg, err := ResolveWithSpecPath(spec, specPath, testName) if err != nil { return nil, err } @@ -78,16 +121,79 @@ func LoadRuntime(specPath, testName string) (*Runtime, error) { } // Resolve resolves defaults and validates runtime values from ArchSpec. +// +//nolint:gocyclo,funlen func Resolve(spec ArchSpec, testName string) (ResolvedConfig, error) { + return ResolveWithSpecPath(spec, "", testName) +} + +// ResolveWithSpecPath resolves defaults and validates runtime values from ArchSpec, +// using specPath to resolve case2 relative paths when available. +// +//nolint:gocyclo,funlen +func ResolveWithSpecPath(spec ArchSpec, specPath, testName string) (ResolvedConfig, error) { + programYAML := resolveSpecRelativePath(specPath, spec.Simulator.ProgramYAML) + reportName := strings.TrimSpace(spec.Simulator.ReportName) + queueWatches := append([]core.QueueWatchSpec(nil), spec.Simulator.QueueWatches...) + bufferSweepDepths, err := resolveBufferSweepDepths(spec.Simulator.BufferSweepDepths) + if err != nil { + return ResolvedConfig{}, err + } + if err := core.ValidateQueueWatchSpecs(queueWatches); err != nil { + return ResolvedConfig{}, fmt.Errorf("simulator.queue_watches: %w", err) + } + + effectiveTestName := strings.TrimSpace(testName) + if effectiveTestName == "" && reportName != "" { + effectiveTestName = reportName + } + resolved := ResolvedConfig{ - TestName: normalizeTestName(testName), - Rows: defaultOrPositive(spec.CGRADefaults.Rows, defaultRows), - Columns: defaultOrPositive(spec.CGRADefaults.Columns, defaultColumns), - ExecutionModel: defaultOrString(spec.Simulator.ExecutionModel, defaultExecutionModel), - DriverName: defaultOrString(spec.Simulator.Driver.Name, defaultDriverName), - DeviceName: defaultOrString(spec.Simulator.Device.Name, defaultDeviceName), - BindToArchitecture: defaultOrBool(spec.Simulator.Device.BindToArchitecture, true), - LoggingEnabled: defaultOrBool(spec.Simulator.Logging.Enabled, true), + TestName: normalizeTestName(effectiveTestName), + Rows: defaultOrPositive(spec.CGRADefaults.Rows, defaultRows), + Columns: defaultOrPositive(spec.CGRADefaults.Columns, defaultColumns), + ExecutionModel: defaultOrString(spec.Simulator.ExecutionModel, defaultExecutionModel), + ExecutionPolicy: defaultOrString(spec.Simulator.ExecutionPolicy, defaultExecutionPolicy), + EnableFIFOModel: defaultOrBool(spec.Simulator.EnableFIFOModel, defaultEnableFIFOModel), + EnableQueueWatches: defaultOrBool(spec.Simulator.EnableQueueWatches, defaultEnableQueueWatches), + StrictMaxSlip: defaultOrInt64(spec.Simulator.StrictMaxSlip, defaultStrictMaxSlip), + StrictFailOnViolation: defaultOrBool(spec.Simulator.StrictFailOnViolation, defaultStrictFail), + DriverName: defaultOrString(spec.Simulator.Driver.Name, defaultDriverName), + DeviceName: defaultOrString(spec.Simulator.Device.Name, defaultDeviceName), + BindToArchitecture: defaultOrBool(spec.Simulator.Device.BindToArchitecture, true), + LoggingEnabled: defaultOrBool(spec.Simulator.Logging.Enabled, true), + EnableTrace: defaultOrBool(spec.Simulator.Logging.EnableTrace, false), + LinkTimingModel: linkTimingModelParseOnly, + DriverPortIncomingBufferDepth: defaultDriverPortIncomingBufferDepth, + DriverPortOutgoingBufferDepth: defaultDriverPortOutgoingBufferDepth, + CorePortIncomingBufferDepth: defaultCorePortIncomingBufferDepth, + CorePortOutgoingBufferDepth: defaultCorePortOutgoingBufferDepth, + NumRegisters: defaultNumRegisters, + LocalMemoryWords: defaultLocalMemoryWords, + MemoryMode: defaultMemoryMode, + LinkLatency: defaultLinkLatency, + LinkBandwidth: defaultLinkBandwidth, + ProgramYAML: programYAML, + ReportName: reportName, + QueueWatches: queueWatches, + BufferSweepDepths: bufferSweepDepths, + } + + normalizedPolicy, err := normalizeExecutionPolicy(resolved.ExecutionPolicy) + if err != nil { + return ResolvedConfig{}, err + } + resolved.ExecutionPolicy = normalizedPolicy + + if envSlip, ok, err := parseInt64Env("ZEONICA_STRICT_MAX_SLIP"); err != nil { + return ResolvedConfig{}, err + } else if ok { + resolved.StrictMaxSlip = envSlip + } + if envFail, ok, err := parseBoolEnv("ZEONICA_STRICT_FAIL_ON_VIOLATION"); err != nil { + return ResolvedConfig{}, err + } else if ok { + resolved.StrictFailOnViolation = envFail } driverFreq, err := parseFrequency(spec.Simulator.Driver.Frequency, 1*sim.GHz) @@ -105,6 +211,87 @@ func Resolve(spec ArchSpec, testName string) (ResolvedConfig, error) { logTemplate := defaultOrString(spec.Simulator.Logging.File, defaultLogTemplate) resolved.LogPath = resolveLogPath(logTemplate, resolved.TestName) + resolved.DriverPortIncomingBufferDepth, err = resolvePositivePtr( + spec.Simulator.Driver.PortIncomingBufferDepth, + defaultDriverPortIncomingBufferDepth, + "simulator.driver.port_incoming_buffer_depth", + ) + if err != nil { + return ResolvedConfig{}, err + } + resolved.DriverPortOutgoingBufferDepth, err = resolvePositivePtr( + spec.Simulator.Driver.PortOutgoingBufferDepth, + defaultDriverPortOutgoingBufferDepth, + "simulator.driver.port_outgoing_buffer_depth", + ) + if err != nil { + return ResolvedConfig{}, err + } + resolved.CorePortIncomingBufferDepth, err = resolvePositivePtr( + spec.Simulator.Device.PortIncomingBufferDepth, + defaultCorePortIncomingBufferDepth, + "simulator.device.port_incoming_buffer_depth", + ) + if err != nil { + return ResolvedConfig{}, err + } + resolved.CorePortOutgoingBufferDepth, err = resolvePositivePtr( + spec.Simulator.Device.PortOutgoingBufferDepth, + defaultCorePortOutgoingBufferDepth, + "simulator.device.port_outgoing_buffer_depth", + ) + if err != nil { + return ResolvedConfig{}, err + } + + resolved.NumRegisters, err = resolvePositive( + spec.TileDefaults.NumRegisters, + defaultNumRegisters, + "tile_defaults.num_registers", + ) + if err != nil { + return ResolvedConfig{}, err + } + resolved.LocalMemoryWords, err = resolvePositive( + spec.TileDefaults.LocalMemoryWords, + defaultLocalMemoryWords, + "tile_defaults.local_memory_words", + ) + if err != nil { + return ResolvedConfig{}, err + } + + resolved.MemoryMode, err = normalizeMemoryMode(defaultOrString(spec.Simulator.Device.MemoryMode, defaultMemoryMode)) + if err != nil { + return ResolvedConfig{}, err + } + resolved.MemoryShare, err = resolveMemoryShare( + resolved.MemoryMode, + resolved.Rows, + resolved.Columns, + spec.Simulator.Device.MemoryShare, + ) + if err != nil { + return ResolvedConfig{}, err + } + + resolved.LinkLatency, err = resolveNonNegativePtr( + spec.LinkDefaults.Latency, + defaultLinkLatency, + "link_defaults.latency", + ) + if err != nil { + return ResolvedConfig{}, err + } + resolved.LinkBandwidth, err = resolvePositivePtr( + spec.LinkDefaults.Bandwidth, + defaultLinkBandwidth, + "link_defaults.bandwidth", + ) + if err != nil { + return ResolvedConfig{}, err + } + return resolved, nil } @@ -133,6 +320,7 @@ func BuildRuntime(cfg ResolvedConfig, overrides *BuildOverrides) (*Runtime, erro driver := api.DriverBuilder{}. WithEngine(engine). WithFreq(cfg.DriverFreq). + WithPortBufferDepth(cfg.DriverPortIncomingBufferDepth, cfg.DriverPortOutgoingBufferDepth). Build(cfg.DriverName) device := config.DeviceBuilder{}. @@ -140,33 +328,91 @@ func BuildRuntime(cfg ResolvedConfig, overrides *BuildOverrides) (*Runtime, erro WithFreq(cfg.DeviceFreq). WithWidth(width). WithHeight(height). + WithExecutionPolicy(cfg.ExecutionPolicy). + WithStrictTimingConfig(cfg.StrictMaxSlip, cfg.StrictFailOnViolation). + WithMemoryMode(cfg.MemoryMode). + WithMemoryShare(cfg.MemoryShare). + WithCorePortBufferDepth(cfg.CorePortIncomingBufferDepth, cfg.CorePortOutgoingBufferDepth). + WithEnableFIFOModel(cfg.EnableFIFOModel). + WithEnableQueueWatches(cfg.EnableQueueWatches). + WithQueueWatches(cfg.QueueWatches). + WithRegisterCount(cfg.NumRegisters). + WithLocalMemoryWords(cfg.LocalMemoryWords). Build(cfg.DeviceName) + if cfg.LinkTimingModel == linkTimingModelParseOnly { + slog.Info( + "link_defaults parsed in parse-only mode", + "latency", cfg.LinkLatency, + "bandwidth", cfg.LinkBandwidth, + ) + } + driver.RegisterDevice(device) return &Runtime{ - Config: cfg, - Engine: engine, - Driver: driver, - Device: device, + Config: cfg, + Engine: engine, + Driver: driver, + Device: device, + Observer: report.NewObserver(), }, nil } -// InitTraceLogger initializes the default slog JSON trace logger. -func (r *Runtime) InitTraceLogger(level slog.Leveler) (*os.File, error) { - if !r.Config.LoggingEnabled { +func resolveSpecRelativePath(specPath, target string) string { + trimmedTarget := strings.TrimSpace(target) + if trimmedTarget == "" { + return "" + } + cleanTarget := filepath.Clean(trimmedTarget) + if filepath.IsAbs(cleanTarget) || strings.TrimSpace(specPath) == "" { + return cleanTarget + } + return filepath.Clean(filepath.Join(filepath.Dir(specPath), cleanTarget)) +} + +func resolveBufferSweepDepths(input []int) ([]int, error) { + if len(input) == 0 { return nil, nil } + depths := make([]int, 0, len(input)) + for idx, depth := range input { + if depth <= 0 { + return nil, fmt.Errorf("simulator.buffer_sweep_depths[%d] must be > 0", idx) + } + depths = append(depths, depth) + } + return depths, nil +} +// InitTraceLogger initializes the default slog JSON trace logger. +func (r *Runtime) InitTraceLogger(level slog.Leveler) (*os.File, error) { file, err := os.Create(r.Config.LogPath) if err != nil { return nil, fmt.Errorf("create trace log file: %w", err) } - handler := slog.NewJSONHandler(file, &slog.HandlerOptions{ + core.SetTraceObserver(nil) + if r.Observer != nil { + core.SetTraceObserver(r.Observer.Observe) + } + core.SetTraceEnabled(r.Config.EnableTrace) + + if !r.Config.LoggingEnabled || !r.Config.EnableTrace { + stdoutHandler := slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{ + Level: slog.LevelError, + }) + slog.SetDefault(slog.New(stdoutHandler)) + return file, nil + } + + traceHandler := slog.NewJSONHandler(file, &slog.HandlerOptions{ Level: level, }) - slog.SetDefault(slog.New(handler)) + stdoutHandler := slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{ + Level: slog.LevelError, + }) + slog.SetDefault(slog.New(newTeeHandler(stdoutHandler, traceHandler))) return file, nil } @@ -206,6 +452,13 @@ func defaultOrBool(value *bool, fallback bool) bool { return *value } +func defaultOrInt64(value *int64, fallback int64) int64 { + if value == nil { + return fallback + } + return *value +} + func normalizeTestName(testName string) string { trimmed := strings.TrimSpace(testName) if trimmed == "" { @@ -254,3 +507,177 @@ func parseFrequency(input string, fallback sim.Freq) (sim.Freq, error) { return 0, fmt.Errorf("unsupported frequency unit %q", matches[2]) } } + +func parseInt64Env(name string) (int64, bool, error) { + raw, exists := os.LookupEnv(name) + if !exists { + return 0, false, nil + } + trimmed := strings.TrimSpace(raw) + if trimmed == "" { + return 0, false, nil + } + value, err := strconv.ParseInt(trimmed, 10, 64) + if err != nil { + return 0, false, fmt.Errorf("invalid %s=%q: %w", name, raw, err) + } + return value, true, nil +} + +func parseBoolEnv(name string) (bool, bool, error) { + raw, exists := os.LookupEnv(name) + if !exists { + return false, false, nil + } + trimmed := strings.TrimSpace(raw) + if trimmed == "" { + return false, false, nil + } + value, err := strconv.ParseBool(trimmed) + if err != nil { + return false, false, fmt.Errorf("invalid %s=%q: %w", name, raw, err) + } + return value, true, nil +} + +func normalizeExecutionPolicy(input string) (string, error) { + text := strings.ToLower(strings.TrimSpace(input)) + switch text { + case "", "in_order_dataflow", "in-order-dataflow", "dynamic": + return "in_order_dataflow", nil + case "elastic_scheduled", "elastic-scheduled", "hybrid": + return "elastic_scheduled", nil + case "strict_timed", "strict-timed", "static": + return "strict_timed", nil + default: + return "", fmt.Errorf( + "unsupported execution_policy %q (supported: strict_timed, elastic_scheduled, in_order_dataflow)", + input, + ) + } +} + +func normalizeMemoryMode(input string) (string, error) { + text := strings.ToLower(strings.TrimSpace(input)) + switch text { + case "", "simple": + return "simple", nil + case "shared": + return "shared", nil + case "local": + return "local", nil + default: + return "", fmt.Errorf("unsupported memory_mode %q (supported: simple, shared, local)", input) + } +} + +func resolvePositive(value, fallback int, field string) (int, error) { + if value == 0 { + return fallback, nil + } + if value < 0 { + return 0, fmt.Errorf("%s must be > 0, got %d", field, value) + } + return value, nil +} + +func resolvePositivePtr(value *int, fallback int, field string) (int, error) { + if value == nil { + return fallback, nil + } + if *value <= 0 { + return 0, fmt.Errorf("%s must be > 0, got %d", field, *value) + } + return *value, nil +} + +func resolveNonNegativePtr(value *int, fallback int, field string) (int, error) { + if value == nil { + return fallback, nil + } + if *value < 0 { + return 0, fmt.Errorf("%s must be >= 0, got %d", field, *value) + } + return *value, nil +} + +func resolveMemoryShare(mode string, rows, cols int, entries []MemoryShareEntry) (map[[2]int]int, error) { + if mode != "shared" { + return nil, nil + } + + share := make(map[[2]int]int, rows*cols) + for y := 0; y < rows; y++ { + for x := 0; x < cols; x++ { + share[[2]int{x, y}] = 0 + } + } + + for _, entry := range entries { + if entry.TileX < 0 || entry.TileX >= cols || entry.TileY < 0 || entry.TileY >= rows { + return nil, fmt.Errorf( + "simulator.device.memory_share has out-of-range tile (%d,%d) for grid %dx%d", + entry.TileX, + entry.TileY, + cols, + rows, + ) + } + if entry.Group < 0 { + return nil, fmt.Errorf("simulator.device.memory_share group must be >= 0, got %d", entry.Group) + } + share[[2]int{entry.TileX, entry.TileY}] = entry.Group + } + return share, nil +} + +type teeHandler struct { + handlers []slog.Handler +} + +func newTeeHandler(handlers ...slog.Handler) slog.Handler { + cleaned := make([]slog.Handler, 0, len(handlers)) + for _, handler := range handlers { + if handler != nil { + cleaned = append(cleaned, handler) + } + } + return &teeHandler{handlers: cleaned} +} + +func (h *teeHandler) Enabled(ctx context.Context, level slog.Level) bool { + for _, handler := range h.handlers { + if handler.Enabled(ctx, level) { + return true + } + } + return false +} + +func (h *teeHandler) Handle(ctx context.Context, record slog.Record) error { + for _, handler := range h.handlers { + if !handler.Enabled(ctx, record.Level) { + continue + } + if err := handler.Handle(ctx, record.Clone()); err != nil { + return err + } + } + return nil +} + +func (h *teeHandler) WithAttrs(attrs []slog.Attr) slog.Handler { + next := make([]slog.Handler, 0, len(h.handlers)) + for _, handler := range h.handlers { + next = append(next, handler.WithAttrs(attrs)) + } + return &teeHandler{handlers: next} +} + +func (h *teeHandler) WithGroup(name string) slog.Handler { + next := make([]slog.Handler, 0, len(h.handlers)) + for _, handler := range h.handlers { + next = append(next, handler.WithGroup(name)) + } + return &teeHandler{handlers: next} +} diff --git a/runtimecfg/runtime_test.go b/runtimecfg/runtime_test.go new file mode 100644 index 0000000..108c5c3 --- /dev/null +++ b/runtimecfg/runtime_test.go @@ -0,0 +1,316 @@ +package runtimecfg + +import ( + "path/filepath" + "strings" + "testing" + + "github.com/sarchlab/zeonica/core" +) + +func TestResolveExecutionPolicyDefaultsToInOrder(t *testing.T) { + cfg, err := Resolve(ArchSpec{}, "policy-default") + if err != nil { + t.Fatalf("Resolve returned error: %v", err) + } + if cfg.ExecutionPolicy != "in_order_dataflow" { + t.Fatalf("unexpected default execution policy: %q", cfg.ExecutionPolicy) + } +} + +func TestResolveExecutionPolicyAlias(t *testing.T) { + spec := ArchSpec{ + Simulator: Simulator{ + ExecutionPolicy: "hybrid", + }, + } + cfg, err := Resolve(spec, "policy-alias") + if err != nil { + t.Fatalf("Resolve returned error: %v", err) + } + if cfg.ExecutionPolicy != "elastic_scheduled" { + t.Fatalf("unexpected normalized policy: %q", cfg.ExecutionPolicy) + } +} + +func TestResolveExecutionPolicyInvalid(t *testing.T) { + spec := ArchSpec{ + Simulator: Simulator{ + ExecutionPolicy: "unknown_mode", + }, + } + _, err := Resolve(spec, "policy-invalid") + if err == nil { + t.Fatal("expected error for invalid policy, got nil") + } + if !strings.Contains(err.Error(), "unsupported execution_policy") { + t.Fatalf("unexpected error: %v", err) + } +} + +func TestResolveStrictDefaults(t *testing.T) { + cfg, err := Resolve(ArchSpec{}, "strict-default") + if err != nil { + t.Fatalf("Resolve returned error: %v", err) + } + if cfg.StrictMaxSlip != 4 { + t.Fatalf("unexpected strict max slip: got %d want 4", cfg.StrictMaxSlip) + } + if cfg.StrictFailOnViolation { + t.Fatalf("unexpected strict fail flag: got true want false") + } +} + +func TestResolveStrictEnvOverrides(t *testing.T) { + t.Setenv("ZEONICA_STRICT_MAX_SLIP", "8") + t.Setenv("ZEONICA_STRICT_FAIL_ON_VIOLATION", "true") + + cfg, err := Resolve(ArchSpec{ + Simulator: Simulator{ + StrictMaxSlip: int64Ptr(2), + StrictFailOnViolation: boolPtr(false), + }, + }, "strict-env") + if err != nil { + t.Fatalf("Resolve returned error: %v", err) + } + if cfg.StrictMaxSlip != 8 { + t.Fatalf("unexpected strict max slip from env: got %d want 8", cfg.StrictMaxSlip) + } + if !cfg.StrictFailOnViolation { + t.Fatalf("unexpected strict fail flag from env: got false want true") + } +} + +func TestResolveStrictInvalidEnv(t *testing.T) { + t.Setenv("ZEONICA_STRICT_MAX_SLIP", "bad") + _, err := Resolve(ArchSpec{}, "strict-invalid-env") + if err == nil { + t.Fatal("expected error for invalid strict env") + } + if !strings.Contains(err.Error(), "ZEONICA_STRICT_MAX_SLIP") { + t.Fatalf("unexpected error: %v", err) + } +} + +func TestResolveMicroarchitectureDefaults(t *testing.T) { + cfg, err := Resolve(ArchSpec{}, "microarch-defaults") + if err != nil { + t.Fatalf("Resolve returned error: %v", err) + } + + if cfg.DriverPortIncomingBufferDepth != 1 || cfg.DriverPortOutgoingBufferDepth != 1 { + t.Fatalf( + "unexpected driver port depth defaults: in=%d out=%d", + cfg.DriverPortIncomingBufferDepth, + cfg.DriverPortOutgoingBufferDepth, + ) + } + if cfg.CorePortIncomingBufferDepth != 1 || cfg.CorePortOutgoingBufferDepth != 1 { + t.Fatalf( + "unexpected core port depth defaults: in=%d out=%d", + cfg.CorePortIncomingBufferDepth, + cfg.CorePortOutgoingBufferDepth, + ) + } + if cfg.NumRegisters != 64 || cfg.LocalMemoryWords != 1024 { + t.Fatalf("unexpected tile defaults: regs=%d mem=%d", cfg.NumRegisters, cfg.LocalMemoryWords) + } + if cfg.MemoryMode != "simple" { + t.Fatalf("unexpected memory mode default: %q", cfg.MemoryMode) + } + if cfg.LinkLatency != 1 || cfg.LinkBandwidth != 32 { + t.Fatalf("unexpected link defaults: latency=%d bandwidth=%d", cfg.LinkLatency, cfg.LinkBandwidth) + } + if cfg.LinkTimingModel != "parse_only" { + t.Fatalf("unexpected link timing model: %q", cfg.LinkTimingModel) + } + if cfg.EnableFIFOModel { + t.Fatalf("unexpected fifo model default: got true want false") + } + if cfg.ProgramYAML != "" || cfg.ReportName != "" || len(cfg.QueueWatches) != 0 || len(cfg.BufferSweepDepths) != 0 { + t.Fatalf( + "unexpected experiment defaults: program=%q report=%q watches=%d depths=%d", + cfg.ProgramYAML, + cfg.ReportName, + len(cfg.QueueWatches), + len(cfg.BufferSweepDepths), + ) + } +} + +func TestResolveMicroarchitectureOverrides(t *testing.T) { + spec := ArchSpec{ + CGRADefaults: CGRADefaults{Rows: 2, Columns: 2}, + TileDefaults: TileDefaults{NumRegisters: 96, LocalMemoryWords: 2048}, + LinkDefaults: LinkDefaults{Latency: intPtr(3), Bandwidth: intPtr(128)}, + Simulator: Simulator{ + EnableFIFOModel: boolPtr(true), + Driver: NamedComponent{ + PortIncomingBufferDepth: intPtr(4), + PortOutgoingBufferDepth: intPtr(5), + }, + Device: DeviceComponent{ + MemoryMode: "shared", + PortIncomingBufferDepth: intPtr(6), + PortOutgoingBufferDepth: intPtr(7), + MemoryShare: []MemoryShareEntry{{TileX: 1, TileY: 1, Group: 9}}, + }, + }, + } + + cfg, err := Resolve(spec, "microarch-overrides") + if err != nil { + t.Fatalf("Resolve returned error: %v", err) + } + + if cfg.DriverPortIncomingBufferDepth != 4 || cfg.DriverPortOutgoingBufferDepth != 5 { + t.Fatalf( + "driver buffer depth override failed: in=%d out=%d", + cfg.DriverPortIncomingBufferDepth, + cfg.DriverPortOutgoingBufferDepth, + ) + } + if cfg.CorePortIncomingBufferDepth != 6 || cfg.CorePortOutgoingBufferDepth != 7 { + t.Fatalf( + "core buffer depth override failed: in=%d out=%d", + cfg.CorePortIncomingBufferDepth, + cfg.CorePortOutgoingBufferDepth, + ) + } + if cfg.NumRegisters != 96 || cfg.LocalMemoryWords != 2048 { + t.Fatalf("tile override failed: regs=%d mem=%d", cfg.NumRegisters, cfg.LocalMemoryWords) + } + if cfg.MemoryMode != "shared" { + t.Fatalf("memory mode override failed: %q", cfg.MemoryMode) + } + if len(cfg.MemoryShare) != 4 { + t.Fatalf("shared mode should materialize full 2x2 map, got %d", len(cfg.MemoryShare)) + } + if got := cfg.MemoryShare[[2]int{1, 1}]; got != 9 { + t.Fatalf("memory_share override for (1,1) failed: got %d want 9", got) + } + if cfg.LinkLatency != 3 || cfg.LinkBandwidth != 128 { + t.Fatalf("link override failed: latency=%d bandwidth=%d", cfg.LinkLatency, cfg.LinkBandwidth) + } + if !cfg.EnableFIFOModel { + t.Fatalf("fifo model override failed: got false want true") + } +} + +func TestResolveMicroarchitectureInvalidDepth(t *testing.T) { + spec := ArchSpec{ + Simulator: Simulator{ + Driver: NamedComponent{PortIncomingBufferDepth: intPtr(0)}, + }, + } + _, err := Resolve(spec, "microarch-invalid-depth") + if err == nil { + t.Fatal("expected invalid depth error") + } + if !strings.Contains(err.Error(), "port_incoming_buffer_depth") { + t.Fatalf("unexpected error: %v", err) + } +} + +func TestResolveInvalidMemoryMode(t *testing.T) { + spec := ArchSpec{Simulator: Simulator{Device: DeviceComponent{MemoryMode: "foo"}}} + _, err := Resolve(spec, "memory-mode-invalid") + if err == nil { + t.Fatal("expected invalid memory mode error") + } + if !strings.Contains(err.Error(), "unsupported memory_mode") { + t.Fatalf("unexpected error: %v", err) + } +} + +func TestResolveInvalidLinkLatency(t *testing.T) { + spec := ArchSpec{LinkDefaults: LinkDefaults{Latency: intPtr(-1)}} + _, err := Resolve(spec, "link-latency-invalid") + if err == nil { + t.Fatal("expected invalid link latency error") + } + if !strings.Contains(err.Error(), "link_defaults.latency") { + t.Fatalf("unexpected error: %v", err) + } +} + +func TestResolveInvalidMemoryShareCoordinate(t *testing.T) { + spec := ArchSpec{ + CGRADefaults: CGRADefaults{Rows: 2, Columns: 2}, + Simulator: Simulator{ + Device: DeviceComponent{ + MemoryMode: "shared", + MemoryShare: []MemoryShareEntry{{TileX: 3, TileY: 0, Group: 0}}, + }, + }, + } + _, err := Resolve(spec, "memory-share-invalid") + if err == nil { + t.Fatal("expected invalid memory share coordinate error") + } + if !strings.Contains(err.Error(), "out-of-range tile") { + t.Fatalf("unexpected error: %v", err) + } +} + +func TestResolveWithSpecPathExperimentConfig(t *testing.T) { + specPath := filepath.Join(t.TempDir(), "base_arch_spec.yaml") + spec := ArchSpec{ + Simulator: Simulator{ + ProgramYAML: "fir+histogram/tmp-generated-instructions.yaml", + ReportName: "fir_histogram", + BufferSweepDepths: []int{1, 2, 4, 8, 16}, + QueueWatches: []core.QueueWatchSpec{ + {Label: "hist_upstream", X: 1, Y: 1, Kind: "recv", Direction: "West", Color: "RED"}, + {Label: "hist_downstream", X: 2, Y: 1, Kind: "recv", Direction: "West", Color: "RED"}, + }, + }, + } + + cfg, err := ResolveWithSpecPath(spec, specPath, "") + if err != nil { + t.Fatalf("ResolveWithSpecPath returned error: %v", err) + } + + expectedProgram := filepath.Join(filepath.Dir(specPath), "fir+histogram", "tmp-generated-instructions.yaml") + if cfg.ProgramYAML != expectedProgram { + t.Fatalf("unexpected resolved program path: got %q want %q", cfg.ProgramYAML, expectedProgram) + } + if cfg.ReportName != "fir_histogram" { + t.Fatalf("unexpected report name: %q", cfg.ReportName) + } + if cfg.TestName != "fir_histogram" { + t.Fatalf("expected report_name to seed test name, got %q", cfg.TestName) + } + if len(cfg.QueueWatches) != 2 { + t.Fatalf("unexpected queue watch count: %d", len(cfg.QueueWatches)) + } + if len(cfg.BufferSweepDepths) != 5 { + t.Fatalf("unexpected buffer sweep depth count: %d", len(cfg.BufferSweepDepths)) + } +} + +func TestResolveWithSpecPathRejectsInvalidQueueWatch(t *testing.T) { + spec := ArchSpec{ + Simulator: Simulator{ + QueueWatches: []core.QueueWatchSpec{ + {Label: "bad", X: 0, Y: 0, Kind: "recv", Direction: "Bogus", Color: "RED"}, + }, + }, + } + + _, err := ResolveWithSpecPath(spec, "", "invalid-watch") + if err == nil { + t.Fatal("expected invalid queue watch error") + } + if !strings.Contains(err.Error(), "simulator.queue_watches") { + t.Fatalf("unexpected error: %v", err) + } +} + +func int64Ptr(v int64) *int64 { return &v } + +func boolPtr(v bool) *bool { return &v } + +func intPtr(v int) *int { return &v } diff --git a/runtimecfg/spec.go b/runtimecfg/spec.go index 1325203..ba183a6 100644 --- a/runtimecfg/spec.go +++ b/runtimecfg/spec.go @@ -5,6 +5,7 @@ import ( "fmt" "os" + "github.com/sarchlab/zeonica/core" "gopkg.in/yaml.v3" ) @@ -13,6 +14,8 @@ import ( // extension without changing callers. type ArchSpec struct { CGRADefaults CGRADefaults `yaml:"cgra_defaults"` + TileDefaults TileDefaults `yaml:"tile_defaults"` + LinkDefaults LinkDefaults `yaml:"link_defaults"` Simulator Simulator `yaml:"simulator"` Extra map[string]any `yaml:",inline"` } @@ -24,35 +27,74 @@ type CGRADefaults struct { Extra map[string]any `yaml:",inline"` } +// TileDefaults defines default per-tile microarchitecture parameters. +type TileDefaults struct { + NumRegisters int `yaml:"num_registers"` + LocalMemoryWords int `yaml:"local_memory_words"` + Extra map[string]any `yaml:",inline"` +} + +// LinkDefaults captures inter-tile link metadata. This release parses and validates +// these fields, but does not feed them into cycle-accurate link timing yet. +type LinkDefaults struct { + Latency *int `yaml:"latency"` + Bandwidth *int `yaml:"bandwidth"` + Extra map[string]any `yaml:",inline"` +} + // Simulator contains simulator runtime settings from arch spec. type Simulator struct { - ExecutionModel string `yaml:"execution_model"` - Logging SimulatorLogging `yaml:"logging"` - Driver NamedComponent `yaml:"driver"` - Device DeviceComponent `yaml:"device"` - Extra map[string]any `yaml:",inline"` + ExecutionModel string `yaml:"execution_model"` + ExecutionPolicy string `yaml:"execution_policy"` + EnableFIFOModel *bool `yaml:"enable_fifo_model"` + EnableQueueWatches *bool `yaml:"enable_queue_watches"` + ProgramYAML string `yaml:"program_yaml"` + ReportName string `yaml:"report_name"` + QueueWatches []core.QueueWatchSpec `yaml:"queue_watches"` + BufferSweepDepths []int `yaml:"buffer_sweep_depths"` + StrictMaxSlip *int64 `yaml:"strict_max_slip"` + StrictFailOnViolation *bool `yaml:"strict_fail_on_violation"` + Logging SimulatorLogging `yaml:"logging"` + Driver NamedComponent `yaml:"driver"` + Device DeviceComponent `yaml:"device"` + Extra map[string]any `yaml:",inline"` } // SimulatorLogging configures trace logging behavior. type SimulatorLogging struct { - Enabled *bool `yaml:"enabled"` - File string `yaml:"file"` - Extra map[string]any `yaml:",inline"` + Enabled *bool `yaml:"enabled"` + EnableTrace *bool `yaml:"enableTrace"` + File string `yaml:"file"` + Extra map[string]any `yaml:",inline"` } // NamedComponent contains shared component naming/frequency fields. type NamedComponent struct { - Name string `yaml:"name"` - Frequency string `yaml:"frequency"` - Extra map[string]any `yaml:",inline"` + Name string `yaml:"name"` + Frequency string `yaml:"frequency"` + PortIncomingBufferDepth *int `yaml:"port_incoming_buffer_depth"` + PortOutgoingBufferDepth *int `yaml:"port_outgoing_buffer_depth"` + Extra map[string]any `yaml:",inline"` +} + +// MemoryShareEntry maps one tile coordinate to a shared-memory controller group. +type MemoryShareEntry struct { + TileX int `yaml:"tile_x"` + TileY int `yaml:"tile_y"` + Group int `yaml:"group"` + Extra map[string]any `yaml:",inline"` } // DeviceComponent defines simulator device-specific settings. type DeviceComponent struct { - Name string `yaml:"name"` - Frequency string `yaml:"frequency"` - BindToArchitecture *bool `yaml:"bind_to_architecture"` - Extra map[string]any `yaml:",inline"` + Name string `yaml:"name"` + Frequency string `yaml:"frequency"` + BindToArchitecture *bool `yaml:"bind_to_architecture"` + MemoryMode string `yaml:"memory_mode"` + MemoryShare []MemoryShareEntry `yaml:"memory_share"` + PortIncomingBufferDepth *int `yaml:"port_incoming_buffer_depth"` + PortOutgoingBufferDepth *int `yaml:"port_outgoing_buffer_depth"` + Extra map[string]any `yaml:",inline"` } // Load reads and parses an architecture spec YAML file. diff --git a/test/arch_spec/arch_spec.yaml b/test/arch_spec/arch_spec.yaml index acb94d3..1dc6c53 100644 --- a/test/arch_spec/arch_spec.yaml +++ b/test/arch_spec/arch_spec.yaml @@ -15,6 +15,7 @@ cgra_defaults: tile_defaults: num_registers: 32 + local_memory_words: 2048 fu_types: ["add", "mul", "div", "fadd", "fmul", "fdiv", "logic", "cmp", "sel", "type_conv", "vfmul", "fadd_fadd", "fmul_fadd", "grant", "loop_control", "phi", "constant", "mem", "return", "mem_indexed", "alloca", "shift"] link_defaults: @@ -45,16 +46,38 @@ extensions: simulator: execution_model: "serial" + execution_policy: "in_order_dataflow" + enable_fifo_model: false + # three policy: strict_timed, elastic_scheduled, in_order_dataflow logging: enabled: true + enableTrace: true file: ".json.log" driver: name: "Driver" frequency: "1GHz" + port_incoming_buffer_depth: 4 + port_outgoing_buffer_depth: 4 device: name: "Device" frequency: "1GHz" bind_to_architecture: true + memory_mode: "local" + port_incoming_buffer_depth: 8 + port_outgoing_buffer_depth: 8 + memory_share: + - tile_x: 0 + tile_y: 0 + group: 0 + - tile_x: 1 + tile_y: 0 + group: 0 + - tile_x: 2 + tile_y: 0 + group: 1 + - tile_x: 3 + tile_y: 0 + group: 1 diff --git a/test/testbench/axpy/main.go b/test/testbench/axpy/main.go index 11c1f53..07d4bbf 100644 --- a/test/testbench/axpy/main.go +++ b/test/testbench/axpy/main.go @@ -177,13 +177,9 @@ func main() { } passed := mismatch == 0 - if rt.Config.LoggingEnabled { - reportPath, err := rt.GenerateSaveAndPrintReport(5, &passed, &mismatch) - if err != nil { - panic(err) - } - fmt.Printf("report saved: %s\n", reportPath) - } else { - fmt.Println("logging disabled in arch spec, skipped report generation") + reportPath, err := rt.GenerateSaveAndPrintReport(5, &passed, &mismatch) + if err != nil { + panic(err) } + fmt.Printf("report saved: %s\n", reportPath) } diff --git a/test/testbench/branch_for/main.go b/test/testbench/branch_for/main.go index dfc7055..02b80bc 100644 --- a/test/testbench/branch_for/main.go +++ b/test/testbench/branch_for/main.go @@ -146,13 +146,9 @@ func main() { } passed := mismatch == 0 - if rt.Config.LoggingEnabled { - reportPath, err := rt.GenerateSaveAndPrintReport(5, &passed, &mismatch) - if err != nil { - panic(err) - } - fmt.Printf("report saved: %s\n", reportPath) - } else { - fmt.Println("logging disabled in arch spec, skipped report generation") + reportPath, err := rt.GenerateSaveAndPrintReport(5, &passed, &mismatch) + if err != nil { + panic(err) } + fmt.Printf("report saved: %s\n", reportPath) } diff --git a/test/testbench/fir/main.go b/test/testbench/fir/main.go index 93197bd..b5a657b 100644 --- a/test/testbench/fir/main.go +++ b/test/testbench/fir/main.go @@ -166,13 +166,9 @@ func main() { } passed := mismatchCount == 0 - if rt.Config.LoggingEnabled { - reportPath, err := rt.GenerateSaveAndPrintReport(5, &passed, &mismatchCount) - if err != nil { - panic(err) - } - fmt.Printf("report saved: %s\n", reportPath) - } else { - fmt.Println("logging disabled in arch spec, skipped report generation") + reportPath, err := rt.GenerateSaveAndPrintReport(5, &passed, &mismatchCount) + if err != nil { + panic(err) } + fmt.Printf("report saved: %s\n", reportPath) } diff --git a/test/testbench/histogram/main.go b/test/testbench/histogram/main.go index 3376de8..4d3937a 100644 --- a/test/testbench/histogram/main.go +++ b/test/testbench/histogram/main.go @@ -13,8 +13,158 @@ import ( "github.com/sarchlab/akita/v4/sim" "github.com/sarchlab/zeonica/core" "github.com/sarchlab/zeonica/runtimecfg" + "gopkg.in/yaml.v3" ) +// YAML shapes mirror core/program.go (LoadProgramFileFromYAML) for patching only. + +type histogramYAMLRoot struct { + ArrayConfig histogramArrayConfig `yaml:"array_config"` +} + +type histogramArrayConfig struct { + Rows int `yaml:"rows"` + Cols int `yaml:"columns"` + CompiledII int `yaml:"compiled_ii"` + Cores []histogramYAMLCore `yaml:"cores"` +} + +type histogramYAMLCore struct { + Row int `yaml:"row"` + Column int `yaml:"column"` + CoreID string `yaml:"core_id"` + Entries []histogramYAMLEntry `yaml:"entries"` +} + +type histogramYAMLEntry struct { + EntryID string `yaml:"entry_id"` + Type string `yaml:"type"` + InstructionGroups []histogramYAMLInstGroup `yaml:"instructions"` +} + +type histogramYAMLInstGroup struct { + Operations []histogramYAMLOperation `yaml:"operations"` + IndexPerII int `yaml:"index_per_ii"` +} + +type histogramYAMLOperation struct { + OpCode string `yaml:"opcode"` + SrcOperands []histogramYAMLOperand `yaml:"src_operands"` + DstOperands []histogramYAMLOperand `yaml:"dst_operands"` + ID int `yaml:"id"` + InvalidIterations int `yaml:"invalid_iterations"` + TimeStep int `yaml:"time_step"` +} + +type histogramYAMLOperand struct { + Operand string `yaml:"operand"` + Color string `yaml:"color"` +} + +// gepArgReplacements maps LLVM-style kernel parameters to immediates for GEP. +// +// Matches histogram_int.cpp: +// +// void kernel(int input[], int histogram[]) +// +// arg0 — base of input[] (input_data); testbench preloads at tile (3,2) starting offset 0. +// arg1 — base of histogram[]; preloads at tile (2,1) starting offset 0. +// +// Override with ZEONICA_GEP_ARG0 / ZEONICA_GEP_ARG1 (e.g. "0" or "#0"). +func gepArgReplacements() map[string]string { + m := make(map[string]string) + if v := strings.TrimSpace(os.Getenv("ZEONICA_GEP_ARG0")); v != "" { + m["arg0"] = normalizeImmediateYAMLOperand(v) + } else { + m["arg0"] = "#0" + } + if v := strings.TrimSpace(os.Getenv("ZEONICA_GEP_ARG1")); v != "" { + m["arg1"] = normalizeImmediateYAMLOperand(v) + } else { + m["arg1"] = "#0" + } + return m +} + +func normalizeImmediateYAMLOperand(s string) string { + s = strings.TrimSpace(s) + if strings.HasPrefix(s, "#") { + return s + } + return "#" + s +} + +// patchGEPArgOperands replaces arg0/arg1 in GEP source operands with immediates. +// Zeonica's readOperand only accepts $reg, ports, or numeric immediates — not symbolic args. +func patchGEPArgOperands(root *histogramYAMLRoot, repl map[string]string) bool { + changed := false + for ci := range root.ArrayConfig.Cores { + core := &root.ArrayConfig.Cores[ci] + for ei := range core.Entries { + entry := &core.Entries[ei] + for gi := range entry.InstructionGroups { + group := &entry.InstructionGroups[gi] + for oi := range group.Operations { + op := &group.Operations[oi] + if op.OpCode != "GEP" { + continue + } + for si := range op.SrcOperands { + src := &op.SrcOperands[si] + if newOp, ok := repl[src.Operand]; ok { + src.Operand = newOp + changed = true + } + } + } + } + } + } + return changed +} + +// resolveProgramYAMLWithGEPArgs reads compiler-generated YAML, patches GEP arg operands, and +// returns a path suitable for core.LoadProgramFileFromYAML. If nothing changed, returns the +// original path and a no-op cleanup. +func resolveProgramYAMLWithGEPArgs(programPath string) (resolved string, cleanup func()) { + data, err := os.ReadFile(programPath) + if err != nil { + panic(fmt.Sprintf("Failed to read program file %q: %v", programPath, err)) + } + + var root histogramYAMLRoot + if err := yaml.Unmarshal(data, &root); err != nil { + panic(fmt.Sprintf("Failed to parse YAML %q: %v", programPath, err)) + } + + repl := gepArgReplacements() + if !patchGEPArgOperands(&root, repl) { + return programPath, func() {} + } + + out, err := yaml.Marshal(&root) + if err != nil { + panic(err) + } + + tmp, err := os.CreateTemp("", "zeonica-histogram-patched-*.yaml") + if err != nil { + panic(err) + } + path := tmp.Name() + if _, err := tmp.Write(out); err != nil { + _ = tmp.Close() + _ = os.Remove(path) + panic(err) + } + if err := tmp.Close(); err != nil { + _ = os.Remove(path) + panic(err) + } + + return path, func() { _ = os.Remove(path) } +} + // Histogram runs the histogram testbench on the configured runtime. // //nolint:gocyclo,funlen @@ -36,10 +186,12 @@ func Histogram(rt *runtimecfg.Runtime) int { programPath := os.Getenv("ZEONICA_PROGRAM_YAML") if programPath == "" { - //programPath = "test/Zeonica_Testbench/kernel/histogram/histogram-instructions.yaml" programPath = "tmp-generated-instructions.yaml" } - program := core.LoadProgramFileFromYAML(programPath) + resolvedPath, cleanupYAML := resolveProgramYAMLWithGEPArgs(programPath) + defer cleanupYAML() + + program := core.LoadProgramFileFromYAML(resolvedPath) fmt.Println("program:", program) if len(program) == 0 { @@ -62,13 +214,27 @@ func Histogram(rt *runtimecfg.Runtime) int { } expected := computeHistogram(inputData, 5) - // histogram tile (2,1): initialize histogram[0..4] to 0 - for addr := 0; addr < 5; addr++ { - driver.PreloadMemory(2, 1, 0, uint32(addr)) + // Tile layout must match the mapped kernel in tmp-generated-instructions.yaml (and .asm): + // LOAD / input GEP live on core column=1, row=0 -> tile (1,0) + // STORE / histogram GEP on column=0, row=1 -> tile (0,1) + // Older hand-tuned testbenches used (3,2)/(2,1); compiler-generated mapping differs. + const ( + inputTileX = 1 + inputTileY = 0 + histTileX = 0 + histTileY = 1 + histBins = 5 + inputDataLen = 20 + ) + + for addr := 0; addr < histBins; addr++ { + driver.PreloadMemory(histTileX, histTileY, 0, uint32(addr)) } - // data tile (3,2): input_data[0..19] for addr, val := range inputData { - driver.PreloadMemory(3, 2, val, uint32(addr)) + if addr >= inputDataLen { + break + } + driver.PreloadMemory(inputTileX, inputTileY, val, uint32(addr)) } // fire all the cores in the beginning @@ -86,17 +252,15 @@ func Histogram(rt *runtimecfg.Runtime) int { fmt.Println("========================") fmt.Println("========================") - // print output memory data - outputTile := [2]int{2, 1} + // Histogram results written by STORE on tile (0,1); read same tile. + outputTile := [2]int{histTileX, histTileY} fmt.Printf("output memory @ tile (%d,%d):\n", outputTile[0], outputTile[1]) - scanLimit := 5 + scanLimit := histBins outputData := make([]uint32, scanLimit) for addr := 0; addr < scanLimit; addr++ { val := driver.ReadMemory(outputTile[0], outputTile[1], uint32(addr)) outputData[addr] = val - if addr < len(inputData) { - fmt.Printf(" addr %d -> %d\n", addr, val) - } + fmt.Printf(" addr %d -> %d\n", addr, val) } fmt.Println("expected histogram (CPU):") @@ -198,13 +362,9 @@ func main() { } passed := mismatch == 0 - if rt.Config.LoggingEnabled { - reportPath, err := rt.GenerateSaveAndPrintReport(5, &passed, &mismatch) - if err != nil { - panic(err) - } - fmt.Printf("report saved: %s\n", reportPath) - } else { - fmt.Println("logging disabled in arch spec, skipped report generation") + reportPath, err := rt.GenerateSaveAndPrintReport(5, &passed, &mismatch) + if err != nil { + panic(err) } + fmt.Printf("report saved: %s\n", reportPath) } diff --git a/test/testbench/histogram/tmp-generated-dfg.png b/test/testbench/histogram/tmp-generated-dfg.png new file mode 100644 index 0000000..303e2a4 Binary files /dev/null and b/test/testbench/histogram/tmp-generated-dfg.png differ diff --git a/test/testbench/histogram/tmp-generated-instructions.asm b/test/testbench/histogram/tmp-generated-instructions.asm index 815642b..eec2cd2 100644 --- a/test/testbench/histogram/tmp-generated-instructions.asm +++ b/test/testbench/histogram/tmp-generated-instructions.asm @@ -1,75 +1,72 @@ -# Compiled II: 5 +# Compiled II: 6 -PE(2,1): +PE(0,0): { - ADD, [$0], [#1] -> [$0] (t=10, inv_iters=2) - DATA_MOV, [EAST, RED] -> [$1] (t=10, inv_iters=2) + GRANT_ONCE, [#0] -> [EAST, RED] (t=0, inv_iters=0) } (idx_per_ii=0) { - STORE, [$0], [$1] (t=11, inv_iters=2) -} (idx_per_ii=1) -{ - LOAD, [EAST, RED] -> [$0] (t=9, inv_iters=1) -} (idx_per_ii=4) + ADD, [EAST, RED], [#-5] -> [NORTH, RED] (t=5, inv_iters=0) +} (idx_per_ii=5) -PE(3,1): +PE(1,0): { - ADD, [NORTH, RED], [#-5] -> [$0] (t=5, inv_iters=1) + CTRL_MOV, [NORTH, RED] -> [$0] (t=6, inv_iters=1) } (idx_per_ii=0) { - DIV, [$0], [#18] -> [$0] (t=6, inv_iters=1) + PHI_START, [WEST, RED], [$0] -> [$0], [NORTH, RED] (t=1, inv_iters=0) } (idx_per_ii=1) { - SEXT, [$0] -> [$0] (t=7, inv_iters=1) + GEP, [arg0], [$0] -> [$0] (t=2, inv_iters=0) } (idx_per_ii=2) { - GEP, [$0] -> [WEST, RED], [$0] (t=8, inv_iters=1) + LOAD, [$0] -> [$0] (t=3, inv_iters=0) } (idx_per_ii=3) { - DATA_MOV, [$0] -> [WEST, RED] (t=9, inv_iters=1) -} (idx_per_ii=4) - -PE(1,2): -{ - DATA_MOV, [EAST, RED] -> [$1] (t=5, inv_iters=1) - GRANT_PREDICATE, [$0], [$1] -> [$2] (t=10, inv_iters=2) -} (idx_per_ii=0) -{ - RETURN_VOID, [$2] (t=11, inv_iters=2) -} (idx_per_ii=1) -{ - DATA_MOV, [EAST, RED] -> [$0] (t=4, inv_iters=0) + MUL, [$0], [#5] -> [WEST, RED] (t=4, inv_iters=0) } (idx_per_ii=4) -PE(2,2): +PE(0,1): { - GRANT_PREDICATE, [$1], [$0] -> [$0] (t=5, inv_iters=1) + DIV, [SOUTH, RED], [#18] -> [$0] (t=6, inv_iters=1) } (idx_per_ii=0) { - PHI_START, [EAST, RED], [$0] -> [EAST, RED], [$0] (t=1, inv_iters=0) + SEXT, [$0] -> [$0] (t=7, inv_iters=1) } (idx_per_ii=1) { - ADD, [$0], [#1] -> [$0], [$1] (t=2, inv_iters=0) + GEP, [arg1], [$0] -> [$0], [$1] (t=8, inv_iters=1) } (idx_per_ii=2) { - ICMP_EQ, [$0], [#20] -> [$0], [WEST, RED], [$2] (t=3, inv_iters=0) + LOAD, [$0] -> [$0] (t=9, inv_iters=1) } (idx_per_ii=3) { - NOT, [$0] -> [$0] (t=4, inv_iters=0) - DATA_MOV, [$2] -> [WEST, RED] (t=4, inv_iters=0) + ADD, [$0], [#1] -> [$0] (t=10, inv_iters=1) } (idx_per_ii=4) - -PE(3,2): { - GRANT_ONCE, [#0] -> [WEST, RED] (t=0, inv_iters=0) -} (idx_per_ii=0) + STORE, [$0], [$1] (t=11, inv_iters=1) +} (idx_per_ii=5) + +PE(1,1): { - GEP, [WEST, RED] -> [$0] (t=2, inv_iters=0) + ADD, [SOUTH, RED], [#1] -> [$0], [$1] (t=2, inv_iters=0) } (idx_per_ii=2) { - LOAD, [$0] -> [$0] (t=3, inv_iters=0) + ICMP_EQ, [$0], [#20] -> [$0], [NORTH, RED], [$2] (t=3, inv_iters=0) } (idx_per_ii=3) { - MUL, [$0], [#5] -> [SOUTH, RED] (t=4, inv_iters=0) + NOT, [$0] -> [$0] (t=4, inv_iters=0) + DATA_MOV, [$2] -> [NORTH, RED] (t=4, inv_iters=0) } (idx_per_ii=4) +{ + GRANT_PREDICATE, [$1], [$0] -> [SOUTH, RED] (t=5, inv_iters=0) +} (idx_per_ii=5) + +PE(1,2): +{ + DATA_MOV, [SOUTH, RED] -> [$0] (t=4, inv_iters=0) + GRANT_PREDICATE, [$0], [$1] -> [$1] (t=10, inv_iters=1) +} (idx_per_ii=4) +{ + DATA_MOV, [SOUTH, RED] -> [$1] (t=5, inv_iters=0) + RETURN_VOID, [$1] (t=11, inv_iters=1) +} (idx_per_ii=5) diff --git a/test/testbench/histogram/tmp-generated-instructions.yaml b/test/testbench/histogram/tmp-generated-instructions.yaml index aff64b0..3432b84 100644 --- a/test/testbench/histogram/tmp-generated-instructions.yaml +++ b/test/testbench/histogram/tmp-generated-instructions.yaml @@ -1,102 +1,83 @@ array_config: columns: 4 rows: 4 - compiled_ii: 5 + compiled_ii: 6 cores: - - column: 2 - row: 1 - core_id: "6" + - column: 0 + row: 0 + core_id: "0" entries: - entry_id: "entry0" instructions: - index_per_ii: 0 operations: - - opcode: "ADD" - id: 38 - time_step: 10 - invalid_iterations: 2 + - opcode: "GRANT_ONCE" + id: 0 + time_step: 0 + invalid_iterations: 0 src_operands: - - operand: "$0" - color: "RED" - - operand: "#1" + - operand: "#0" color: "RED" dst_operands: - - operand: "$0" - color: "RED" - - opcode: "DATA_MOV" - id: 34 - time_step: 10 - invalid_iterations: 2 - src_operands: - operand: "EAST" color: "RED" - dst_operands: - - operand: "$1" - color: "RED" - - index_per_ii: 1 - operations: - - opcode: "STORE" - id: 40 - time_step: 11 - invalid_iterations: 2 - src_operands: - - operand: "$0" - color: "RED" - - operand: "$1" - color: "RED" - - index_per_ii: 4 + - index_per_ii: 5 operations: - - opcode: "LOAD" - id: 36 - time_step: 9 - invalid_iterations: 1 + - opcode: "ADD" + id: 26 + time_step: 5 + invalid_iterations: 0 src_operands: - operand: "EAST" color: "RED" + - operand: "#-5" + color: "RED" dst_operands: - - operand: "$0" + - operand: "NORTH" color: "RED" - - column: 3 - row: 1 - core_id: "7" + - column: 1 + row: 0 + core_id: "1" entries: - entry_id: "entry0" instructions: - index_per_ii: 0 operations: - - opcode: "ADD" - id: 26 - time_step: 5 + - opcode: "CTRL_MOV" + id: 27 + time_step: 6 invalid_iterations: 1 src_operands: - operand: "NORTH" color: "RED" - - operand: "#-5" - color: "RED" dst_operands: - operand: "$0" color: "RED" - index_per_ii: 1 operations: - - opcode: "DIV" - id: 29 - time_step: 6 - invalid_iterations: 1 + - opcode: "PHI_START" + id: 4 + time_step: 1 + invalid_iterations: 0 src_operands: - - operand: "$0" + - operand: "WEST" color: "RED" - - operand: "#18" + - operand: "$0" color: "RED" dst_operands: - operand: "$0" color: "RED" + - operand: "NORTH" + color: "RED" - index_per_ii: 2 operations: - - opcode: "SEXT" - id: 31 - time_step: 7 - invalid_iterations: 1 + - opcode: "GEP" + id: 8 + time_step: 2 + invalid_iterations: 0 src_operands: + - operand: "arg0" + color: "RED" - operand: "$0" color: "RED" dst_operands: @@ -104,117 +85,121 @@ array_config: color: "RED" - index_per_ii: 3 operations: - - opcode: "GEP" - id: 33 - time_step: 8 - invalid_iterations: 1 + - opcode: "LOAD" + id: 13 + time_step: 3 + invalid_iterations: 0 src_operands: - operand: "$0" color: "RED" dst_operands: - - operand: "WEST" - color: "RED" - operand: "$0" color: "RED" - index_per_ii: 4 operations: - - opcode: "DATA_MOV" - id: 340000 - time_step: 9 - invalid_iterations: 1 + - opcode: "MUL" + id: 20 + time_step: 4 + invalid_iterations: 0 src_operands: - operand: "$0" color: "RED" + - operand: "#5" + color: "RED" dst_operands: - operand: "WEST" color: "RED" - - column: 1 - row: 2 - core_id: "9" + - column: 0 + row: 1 + core_id: "4" entries: - entry_id: "entry0" instructions: - index_per_ii: 0 operations: - - opcode: "DATA_MOV" - id: 15 - time_step: 5 + - opcode: "DIV" + id: 29 + time_step: 6 invalid_iterations: 1 src_operands: - - operand: "EAST" + - operand: "SOUTH" + color: "RED" + - operand: "#18" color: "RED" dst_operands: - - operand: "$1" + - operand: "$0" color: "RED" - - opcode: "GRANT_PREDICATE" - id: 18 - time_step: 10 - invalid_iterations: 2 + - index_per_ii: 1 + operations: + - opcode: "SEXT" + id: 31 + time_step: 7 + invalid_iterations: 1 src_operands: - operand: "$0" color: "RED" - - operand: "$1" - color: "RED" dst_operands: - - operand: "$2" + - operand: "$0" color: "RED" - - index_per_ii: 1 + - index_per_ii: 2 operations: - - opcode: "RETURN_VOID" - id: 24 - time_step: 11 - invalid_iterations: 2 + - opcode: "GEP" + id: 33 + time_step: 8 + invalid_iterations: 1 src_operands: - - operand: "$2" + - operand: "arg1" color: "RED" - - index_per_ii: 4 - operations: - - opcode: "DATA_MOV" - id: 14 - time_step: 4 - invalid_iterations: 0 - src_operands: - - operand: "EAST" + - operand: "$0" color: "RED" dst_operands: - operand: "$0" color: "RED" - - column: 2 - row: 2 - core_id: "10" - entries: - - entry_id: "entry0" - instructions: - - index_per_ii: 0 + - operand: "$1" + color: "RED" + - index_per_ii: 3 operations: - - opcode: "GRANT_PREDICATE" - id: 25 - time_step: 5 + - opcode: "LOAD" + id: 36 + time_step: 9 invalid_iterations: 1 src_operands: - - operand: "$1" - color: "RED" - operand: "$0" color: "RED" dst_operands: - operand: "$0" color: "RED" - - index_per_ii: 1 + - index_per_ii: 4 operations: - - opcode: "PHI_START" - id: 4 - time_step: 1 - invalid_iterations: 0 + - opcode: "ADD" + id: 38 + time_step: 10 + invalid_iterations: 1 src_operands: - - operand: "EAST" - color: "RED" - operand: "$0" color: "RED" + - operand: "#1" + color: "RED" dst_operands: - - operand: "EAST" + - operand: "$0" color: "RED" + - index_per_ii: 5 + operations: + - opcode: "STORE" + id: 40 + time_step: 11 + invalid_iterations: 1 + src_operands: - operand: "$0" color: "RED" + - operand: "$1" + color: "RED" + - column: 1 + row: 1 + core_id: "5" + entries: + - entry_id: "entry0" + instructions: - index_per_ii: 2 operations: - opcode: "ADD" @@ -222,7 +207,7 @@ array_config: time_step: 2 invalid_iterations: 0 src_operands: - - operand: "$0" + - operand: "SOUTH" color: "RED" - operand: "#1" color: "RED" @@ -245,7 +230,7 @@ array_config: dst_operands: - operand: "$0" color: "RED" - - operand: "WEST" + - operand: "NORTH" color: "RED" - operand: "$2" color: "RED" @@ -269,61 +254,68 @@ array_config: - operand: "$2" color: "RED" dst_operands: - - operand: "WEST" + - operand: "NORTH" color: "RED" - - column: 3 - row: 2 - core_id: "11" - entries: - - entry_id: "entry0" - instructions: - - index_per_ii: 0 + - index_per_ii: 5 operations: - - opcode: "GRANT_ONCE" - id: 0 - time_step: 0 + - opcode: "GRANT_PREDICATE" + id: 25 + time_step: 5 invalid_iterations: 0 src_operands: - - operand: "#0" + - operand: "$1" + color: "RED" + - operand: "$0" color: "RED" dst_operands: - - operand: "WEST" + - operand: "SOUTH" color: "RED" - - index_per_ii: 2 + - column: 1 + row: 2 + core_id: "9" + entries: + - entry_id: "entry0" + instructions: + - index_per_ii: 4 operations: - - opcode: "GEP" - id: 8 - time_step: 2 + - opcode: "DATA_MOV" + id: 14 + time_step: 4 invalid_iterations: 0 src_operands: - - operand: "WEST" + - operand: "SOUTH" color: "RED" dst_operands: - operand: "$0" color: "RED" - - index_per_ii: 3 - operations: - - opcode: "LOAD" - id: 13 - time_step: 3 - invalid_iterations: 0 + - opcode: "GRANT_PREDICATE" + id: 18 + time_step: 10 + invalid_iterations: 1 src_operands: - operand: "$0" color: "RED" + - operand: "$1" + color: "RED" dst_operands: - - operand: "$0" + - operand: "$1" color: "RED" - - index_per_ii: 4 + - index_per_ii: 5 operations: - - opcode: "MUL" - id: 20 - time_step: 4 + - opcode: "DATA_MOV" + id: 15 + time_step: 5 invalid_iterations: 0 src_operands: - - operand: "$0" - color: "RED" - - operand: "#5" + - operand: "SOUTH" color: "RED" dst_operands: - - operand: "SOUTH" + - operand: "$1" + color: "RED" + - opcode: "RETURN_VOID" + id: 24 + time_step: 11 + invalid_iterations: 1 + src_operands: + - operand: "$1" color: "RED" diff --git a/test/testbench/policy_behavior/late_arrival.yaml b/test/testbench/policy_behavior/late_arrival.yaml new file mode 100644 index 0000000..4092381 --- /dev/null +++ b/test/testbench/policy_behavior/late_arrival.yaml @@ -0,0 +1,47 @@ +array_config: + columns: 2 + rows: 1 + compiled_ii: 8 + cores: + - column: 0 + row: 0 + core_id: "relay" + entries: + - entry_id: "entry0" + instructions: + - index_per_ii: 0 + operations: + - opcode: "DATA_MOV" + id: 0 + time_step: 0 + invalid_iterations: 0 + src_operands: + - operand: "WEST" + color: "RED" + dst_operands: + - operand: "EAST" + color: "RED" + - column: 1 + row: 0 + core_id: "sink" + entries: + - entry_id: "entry0" + instructions: + - index_per_ii: 0 + operations: + - opcode: "STORE" + id: 1 + time_step: 0 + invalid_iterations: 0 + src_operands: + - operand: "WEST" + color: "RED" + - operand: "0" + color: "RED" + - opcode: "RETURN_VALUE" + id: 2 + time_step: 0 + invalid_iterations: 0 + src_operands: + - operand: "1" + color: "RED" diff --git a/test/testbench/policy_behavior/policy_behavior_test.go b/test/testbench/policy_behavior/policy_behavior_test.go new file mode 100644 index 0000000..208f7df --- /dev/null +++ b/test/testbench/policy_behavior/policy_behavior_test.go @@ -0,0 +1,137 @@ +package main + +import ( + "fmt" + "os" + "path/filepath" + "runtime" + "strings" + "testing" + + "github.com/sarchlab/akita/v4/sim" + "github.com/sarchlab/zeonica/cgra" + "github.com/sarchlab/zeonica/core" + "github.com/sarchlab/zeonica/runtimecfg" +) + +type runResult struct { + panicMsg string + memValue uint32 + retValue uint32 + endNS int64 +} + +func resolveScenarioPath(t *testing.T, filename string) string { + t.Helper() + + _, thisFile, _, ok := runtime.Caller(0) + if !ok { + t.Fatalf("cannot resolve current test file path") + } + + path := filepath.Clean(filepath.Join(filepath.Dir(thisFile), filename)) + if _, err := os.Stat(path); err != nil { + t.Fatalf("scenario file %s not found: %v", path, err) + } + return path +} + +func writePolicyArchSpec(t *testing.T, policy string) string { + t.Helper() + + spec := fmt.Sprintf(`cgra_defaults: + rows: 1 + columns: 2 +simulator: + execution_model: "serial" + execution_policy: "%s" + logging: + enabled: false + driver: + name: "Driver" + frequency: "1GHz" + device: + name: "Device" + frequency: "1GHz" + bind_to_architecture: true +`, policy) + + specPath := filepath.Join(t.TempDir(), "arch_spec.yaml") + if err := os.WriteFile(specPath, []byte(spec), 0o600); err != nil { + t.Fatalf("write arch spec: %v", err) + } + return specPath +} + +func runWorkloadWithPolicy(t *testing.T, policy, scenarioPath string) (result runResult) { + t.Helper() + + defer func() { + if recovered := recover(); recovered != nil { + result.panicMsg = fmt.Sprint(recovered) + } + }() + + specPath := writePolicyArchSpec(t, policy) + rt, err := runtimecfg.LoadRuntime(specPath, "policy_behavior_"+policy) + if err != nil { + t.Fatalf("load runtime: %v", err) + } + + program := core.LoadProgramFileFromYAML(scenarioPath) + if len(program) == 0 { + t.Fatalf("empty program map from %s", scenarioPath) + } + + width := rt.Config.Columns + height := rt.Config.Rows + for x := 0; x < width; x++ { + for y := 0; y < height; y++ { + coord := fmt.Sprintf("(%d,%d)", x, y) + if prog, exists := program[coord]; exists { + rt.Driver.MapProgram(prog, [2]int{x, y}) + } + } + } + + for x := 0; x < width; x++ { + for y := 0; y < height; y++ { + tile := rt.Device.GetTile(x, y) + rt.Engine.Schedule(sim.MakeTickEvent(tile.GetTickingComponent(), 0)) + } + } + + rt.Driver.FeedIn([]uint32{42}, cgra.West, [2]int{0, 1}, 1, "R") + rt.Driver.Run() + + result.memValue = rt.Driver.ReadMemory(1, 0, 0) + result.retValue = rt.Device.GetTile(1, 0).GetRetVal() + result.endNS = int64(rt.Engine.CurrentTime() * 1e9) + + return result +} + +func TestPolicyBehaviorLateArrival(t *testing.T) { + scenarioPath := resolveScenarioPath(t, "late_arrival.yaml") + + strict := runWorkloadWithPolicy(t, "strict_timed", scenarioPath) + if !strings.Contains(strict.panicMsg, "synchronization violation") { + t.Fatalf("strict_timed should report synchronization violation, got: %q", strict.panicMsg) + } + + elastic := runWorkloadWithPolicy(t, "elastic_scheduled", scenarioPath) + if elastic.panicMsg != "" { + t.Fatalf("elastic_scheduled should tolerate late arrival, got panic: %s", elastic.panicMsg) + } + if elastic.memValue != 42 || elastic.retValue != 1 { + t.Fatalf("elastic_scheduled wrong result: mem=%d ret=%d want mem=42 ret=1", elastic.memValue, elastic.retValue) + } + + inOrder := runWorkloadWithPolicy(t, "in_order_dataflow", scenarioPath) + if inOrder.panicMsg != "" { + t.Fatalf("in_order_dataflow should tolerate late arrival, got panic: %s", inOrder.panicMsg) + } + if inOrder.memValue != 42 || inOrder.retValue != 1 { + t.Fatalf("in_order_dataflow wrong result: mem=%d ret=%d want mem=42 ret=1", inOrder.memValue, inOrder.retValue) + } +} diff --git a/test/testbench/relu/main.go b/test/testbench/relu/main.go index e47dd49..1ff2085 100644 --- a/test/testbench/relu/main.go +++ b/test/testbench/relu/main.go @@ -2,47 +2,36 @@ package main import ( "fmt" - "log/slog" "os" + "path/filepath" + "runtime" + "strings" "github.com/sarchlab/akita/v4/sim" - "github.com/sarchlab/zeonica/api" - "github.com/sarchlab/zeonica/config" "github.com/sarchlab/zeonica/core" + "github.com/sarchlab/zeonica/runtimecfg" ) -func Relu() { - width := 4 - height := 4 - - engine := sim.NewSerialEngine() - - driver := api.DriverBuilder{}. - WithEngine(engine). - WithFreq(1 * sim.GHz). - Build("Driver") - - device := config.DeviceBuilder{}. - WithEngine(engine). - WithFreq(1 * sim.GHz). - WithWidth(width). - WithHeight(height). - Build("Device") - - driver.RegisterDevice(device) - - programPath := "test/testbench/relu/relu.yaml" - - // preload data - - data := []int32{1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, 14, -15, 16, 17, 18, 19, 20, -21, 22, 23, 24, -25, 26, 27, 28, -29, 30, -31, 32} // length is 32 - - for i := 0; i < len(data); i++ { - driver.PreloadMemory(3, 2, uint32(data[i]), uint32(i)) +// Relu runs the ReLU testbench on the configured runtime. +// +//nolint:gocyclo +func Relu(rt *runtimecfg.Runtime) int { + width := rt.Config.Columns + height := rt.Config.Rows + + driver := rt.Driver + device := rt.Device + engine := rt.Engine + + programPath := strings.TrimSpace(os.Getenv("ZEONICA_PROGRAM_YAML")) + if programPath == "" { + if _, err := os.Stat("relu.yaml"); err == nil { + programPath = "relu.yaml" + } else { + programPath = "relu/relu.yaml" + } } - program := core.LoadProgramFileFromYAML(programPath) - fmt.Println("program:", program) if len(program) == 0 { @@ -58,43 +47,132 @@ func Relu() { } } + // preload input data at tile (3,2): 32 int32 values + inputData := []int32{1, -2, 3, -4, 5, -6, 7, -8, 9, -10, 11, -12, 13, 14, -15, 16, 17, 18, 19, 20, -21, 22, 23, 24, -25, 26, 27, 28, -29, 30, -31, 32} + for i := 0; i < len(inputData); i++ { + driver.PreloadMemory(3, 2, uint32(inputData[i]), uint32(i)) + } + // fire all the cores in the beginning for x := 0; x < width; x++ { for y := 0; y < height; y++ { tile := device.GetTile(x, y) - // convert to tileCore tickingComponent := tile.GetTickingComponent() engine.Schedule(sim.MakeTickEvent(tickingComponent, 0)) } } - // TODO: Add PreloadMemory calls if needed for relu test - // driver.PreloadMemory(x, y, data, baseAddr) - driver.Run() fmt.Println("========================") fmt.Println("========================") fmt.Println("========================") - // get memory values in (1,3) from 0x0-0x31 - for i := 0; i < 32; i++ { - value := driver.ReadMemory(1, 3, uint32(i)) - fmt.Println("memory[", i, "]:", value) + // output tile (1,3), 32 elements + outputTile := [2]int{1, 3} + scanLimit := 32 + fmt.Printf("output memory @ tile (%d,%d):\n", outputTile[0], outputTile[1]) + outputData := make([]uint32, scanLimit) + for addr := 0; addr < scanLimit; addr++ { + val := driver.ReadMemory(outputTile[0], outputTile[1], uint32(addr)) + outputData[addr] = val + fmt.Printf(" addr %d -> %d\n", addr, val) + } + + expected := computeReLU(inputData) + fmt.Println("expected ReLU (CPU):") + reluMismatch := 0 + for i, val := range expected { + fmt.Printf(" addr %d -> %d\n", i, val) + if i < len(outputData) && outputData[i] != val { + reluMismatch++ + } + } + if reluMismatch == 0 { + fmt.Println("✅ output matches expected ReLU") + } else { + fmt.Printf("❌ output mismatches ReLU: %d\n", reluMismatch) + } + return reluMismatch +} + +func computeReLU(input []int32) []uint32 { + out := make([]uint32, len(input)) + for i, v := range input { + if v > 0 { + out[i] = uint32(v) + } else { + out[i] = 0 + } + } + return out +} + +func resolveArchSpecPath() (string, error) { + fromEnv := strings.TrimSpace(os.Getenv("ZEONICA_ARCH_SPEC")) + if fromEnv != "" { + if _, err := os.Stat(fromEnv); err == nil { + return fromEnv, nil + } + return "", fmt.Errorf("ZEONICA_ARCH_SPEC points to a missing file: %s", fromEnv) + } + + candidates := []string{ + "test/arch_spec/arch_spec.yaml", + "../../arch_spec/arch_spec.yaml", } + + if _, thisFile, _, ok := runtime.Caller(0); ok { + candidates = append(candidates, + filepath.Clean(filepath.Join(filepath.Dir(thisFile), "..", "..", "arch_spec", "arch_spec.yaml")), + ) + } + + seen := make(map[string]struct{}, len(candidates)) + normalized := make([]string, 0, len(candidates)) + for _, candidate := range candidates { + clean := filepath.Clean(candidate) + if _, exists := seen[clean]; exists { + continue + } + seen[clean] = struct{}{} + normalized = append(normalized, clean) + if _, err := os.Stat(clean); err == nil { + return clean, nil + } + } + + return "", fmt.Errorf("cannot locate arch spec, tried: %s", strings.Join(normalized, ", ")) } func main() { - f, err := os.Create("relu.json.log") + const testName = "relu" + + archSpecPath, err := resolveArchSpecPath() if err != nil { panic(err) } - defer f.Close() - handler := slog.NewJSONHandler(f, &slog.HandlerOptions{ - Level: core.LevelTrace, - }) + rt, err := runtimecfg.LoadRuntime(archSpecPath, testName) + if err != nil { + panic(err) + } + + traceLog, err := rt.InitTraceLogger(core.LevelTrace) + if err != nil { + panic(err) + } + + mismatch := Relu(rt) - slog.SetDefault(slog.New(handler)) - Relu() + if err := runtimecfg.CloseTraceLog(traceLog); err != nil { + panic(err) + } + + passed := mismatch == 0 + reportPath, err := rt.GenerateSaveAndPrintReport(5, &passed, &mismatch) + if err != nil { + panic(err) + } + fmt.Printf("report saved: %s\n", reportPath) } diff --git a/tool/viz/README.md b/tool/viz/README.md index 6f6c54c..7af721d 100644 --- a/tool/viz/README.md +++ b/tool/viz/README.md @@ -1,6 +1,9 @@ # CGRA Log Viewer -This viewer visualizes JSONL traces like `gemm.json.log` with a cycle slider and playback. +This viewer has two synchronized views: + +- Timeline replay for JSONL traces (cycle slider + playback) +- **Strict Timing Offset View** (program YAML + trace correlation by op ID) ## Run @@ -13,13 +16,155 @@ python3 -m http.server 8000 Open: ```text -http://localhost:8000/viz/ +http://localhost:8000/tool/viz/ ``` -It will try to load `../gemm.json.log` automatically. You can also load any other trace from the file picker. +The page tries to auto-load files (example): + +- `../gemm.json.log` (trace) +- `../gemm.yaml` (program) + +If not found, use file pickers manually. + +## Inputs + +You need both files to get strict timing comparison: + +- **Trace log**: JSONL with `Inst` events (`X`, `Y`, `ID`, `Time`) +- **Program YAML**: includes `array_config.compiled_ii`, per-core operations with `id` and `time_step` + +Optional aggregate report input: + +- **Report JSON**: generated report (for example `fir.report.json`) with `grid`, global counters, per-tile `utilizationPct`, and `topHotTiles`. +- Report can be loaded independently from trace/yaml for quick utilization review. +- Backpressure metrics are supported when report includes runtime `Backpressure` events: + - `backpressureCount`: total downstream backpressure hits (`SendBufBusy`) + - `backpressureCycles`: cycles containing at least one backpressure hit + - `tiles[].backpressureCount` and `topBackpressureTiles` + +## Strict Timing Offset View + +Layout behavior: + +- Top grid uses hybrid adaptation based on detected grid size: + - If YAML provides `array_config.columns/rows`, mesh size uses YAML array bounds first. + - If YAML is unavailable, bounds are inferred from trace events. + - Prefer fitting into current canvas viewport by scaling tile/gap. + - If tile would become too small, switch to expanded `viewBox` to keep readability. +- Top mesh supports free zoom/pan (wheel zoom + drag pan) for large arrays. +- If report is loaded, mesh adds a utilization heat overlay from `tiles[].utilizationPct` (missing tiles treated as 0). +- Active tiles render per-cycle summary text in-tile: + - `OP:` instruction opcode summary + - `MEM:` direct memory behaviors (e.g. `LoadDirect` / `StoreDirect`) + - `RX:` / `TX:` data snippets from `Send` / `Recv` / `FeedIn` / `Collect` +- DataFlow links keep pulse animation and include inline data labels from trace `Data` fields (deduplicated for same path/data in one cycle). +- Bottom timing view uses a timeline axis (`Y=core`, `X=cycle`) + drilldown. + +Report view: + +- Report panel shows summary cards: `totalCycles`, `activeCyclesGlobal`, `idleCyclesGlobal`, `passed`, `mismatchCount`, `activeTileCount`, `totalEvents`. +- Hot-tile table shows ranked `coord`, `utilizationPct`, `activeCycles`, `totalEvents`. +- Backpressure section shows ranked `coord`, `bp-count` from `topBackpressureTiles` (if available). +- If report grid and current mesh grid differ, viewer shows a warning; overlay is clipped to current mesh bounds. + +Timing view layout: + +- One lane per core `(x,y)` with: + - upper sub-row blocks: Expected slots + - lower sub-row blocks: Actual slots (all samples in full trace) +- Timeline blocks are expanded by **all actual samples across full trace length** (not just first occurrence). +- For each actual sample occurrence, expected block is back-computed and aligned by that sample's delta. +- Fractional `Time` values are rounded with `Math.round` before slot/time comparison and rendering. +- `baseline-view` supports: + - `strict`: strict baseline only + - `compensated`: compensated baseline only + - `split`: strict + compensated side-by-side rows for comparison +- Mismatch blocks/links are drawn as rectangles (not points) for slot-level readability +- Drilldown panel still shows operation-level details for selected `(core, slot)` +- `window-start` + `window-size` let you pan/zoom through full trace cycles +- Optional IO waveform expansion supports multiple cores: + - `io-wave-all`: expand waveform rows for all visible cores + - `io-wave-core`: multi-select a subset of cores to expand + - double-click Y-axis core label: toggle that core's IO wave quickly +- Expanded IO rows are bus-style waveform segments (trapezoid/diamond transition with parallel top and bottom edges): + - `IN` row: DataFlow values from `FeedIn(to tile)` and `Recv(dst tile)` + - `OUT` row: DataFlow values from `Send(src tile)` and `Collect(from tile)` +- IO waveform values are rendered in signed decimal; when multiple values occur in one cycle, the waveform label shows a compact summary and full values remain in tooltip. + +Default view (hybrid as main): + +- **Default** is `baseline-view=compensated` and `comp-model=hybrid`. The timeline and anomaly filter use **hybrid** status only, so you focus on "mid-trace" offsets after subtracting expected propagation delay; strict remains in summary and drilldown for reference. +- Use `strict` or `split` only when you want to double-check raw schedule vs trace or debug compiler/schedule issues. + +Default interaction: + +- `anomaly-only` is disabled by default; when in compensated view it filters by **hybrid** status (not strict). +- `show-phase-explain` is enabled by default to expose per-core phase offsets +- `boundary-only` can focus edge PEs to verify boundary shift patterns quickly +- **Jump to first hybrid mismatch** button moves the time window to the first cycle where any op is a hybrid mismatch. +- `Ctrl + mouse wheel` zooms timeline quickly (X/Y together). Zoom anchor follows mouse position on X-axis to reduce view jump. +- `y-zoom` slider adjusts lane height/readability; `Reset Zoom` restores default zoom and window. +- `comp-model` supports: + - `distance-heuristic`: infer propagation delay from core-to-ingress distance + - `trace-fitted-phase`: use per-core fitted phase (`modeDelta`) + - `hybrid`: prefer fitted when confidence is high, otherwise fall back to distance (default) +- Click a timeline block/link to inspect operation-level details in drilldown +- Drilldown now includes sample source fields so each match can be traced back to `Inst` / `LoadDirect` / `StoreDirect`. +- Core focus supports two synced entry points: click Y-axis core label, or select from `core-focus` dropdown. +- When a core is focused, the main timeline keeps only that core and an inline mini panel shows source distribution plus a compact in-window trace list. +- Y-axis label interaction is split: + - single-click: focus/unfocus core for main timeline + - double-click: toggle IO waveform expansion for that core +- `Export PNG` downloads the current timeline window +- `max-side` controls export scaling upper bound; oversized windows are proportionally downscaled +- For repeated op executions, timeline labels/tooltips include occurrence tag (e.g. `@2` or `[2/5]`). + +Status semantics: + +- **Strict baseline (truth reference, unchanged):** + - `on-time`: `actualSlot == expectedSlot` + - `early`: `actualSlot < expectedSlot` (signed modular delta) + - `late`: `actualSlot > expectedSlot` + - `missing`: operation exists in YAML but no `Inst` with same `(x,y,id)` in trace +- **Compensated baseline (explanation layer):** + - strict delta is rebased by per-core compensation offset + - used to reduce global boundary propagation shift false-positives + - never replaces strict verdict; always shown as secondary comparison + +Phase explanation layer (additive, does not change strict status): + +- `Δcore`: dominant per-core phase offset inferred from mismatch mode (`modeDelta`) +- `conf`: confidence of that offset from mismatch concentration +- `phase(boundary, inner, gap)`: weighted-median phase summary comparing boundary vs inner cores +- `deltaRebased`: per-op delta after subtracting `Δcore` (for separating global shift from local residual anomalies) + +Shift-aware annotations: + +- `first-divergence`: first mismatch point or delta-change point in a core +- `propagated`: same-delta continuation after divergence (faded style) + +Drilldown fields: + +- `opId`, `opcode` +- `expectedSlot`, `actualSlot` +- `deltaStrict` +- `deltaComp()` +- `statusStrict` / `statusComp` +- `deltaPhaseRebased` +- `firstTime` +- `samples` +- `sourceSummary` (for example `Inst*10,LoadDirect*2`) +- `firstDivergence` +- `samplePreview` with source tags (for example `1:210:Inst,2:213:LoadDirect`) + +Recommended read path: + +1. Use default **compensated + hybrid** view to see whether there are any mid-trace offsets (hybrid mismatch). Use "Jump to first hybrid mismatch" to focus the window on the first such cycle. +2. In drilldown, read `statusComp` / `deltaComp(hybrid)` first; treat `statusStrict` / `deltaStrict` as reference only for double-check. +3. If you need to verify raw schedule vs trace, switch to `strict` or `split` and compare; strict is the truth reference for pass/fail. -## Supported events +## Supported event families (timeline view) - `DataFlow` (`FeedIn`, `Send`, `Recv`, `Collect`) -- `Inst` (`DATA_MOV`, `MUL_ADD`, `STORE`) -- `Memory` (`StoreDirect`) +- `Inst` (generic instruction events) +- `Memory` (e.g., `StoreDirect`) diff --git a/tool/viz/__pycache__/run_viz.cpython-313.pyc b/tool/viz/__pycache__/run_viz.cpython-313.pyc new file mode 100644 index 0000000..f91c781 Binary files /dev/null and b/tool/viz/__pycache__/run_viz.cpython-313.pyc differ diff --git a/tool/viz/app.js b/tool/viz/app.js index 13d7c56..ba65ad5 100644 --- a/tool/viz/app.js +++ b/tool/viz/app.js @@ -1,6 +1,7 @@ const state = { events: [], byTime: new Map(), + timeKeys: [], minTime: 0, maxTime: 0, currentTime: 0, @@ -12,9 +13,52 @@ const state = { showInst: true, showMemory: true, showLabels: true, + programSpec: null, + yamlGridBounds: null, + reportSpec: null, + reportReady: false, + reportError: "", + reportHeatMetric: "utilizationPct", + timingRows: [], + timingColumns: [], + timingReady: false, + layoutMode: "fit", + timingAnomalyOnly: false, + timingSelectedCell: null, + timingFocusedCoreKey: null, + showPhaseExplain: true, + timingBoundaryOnly: false, + timingBaselineView: "compensated", + timingCompModel: "hybrid", + timingIoWaveExpandAll: false, + timingIoWaveExpandedCoreKeys: new Set(), + timingWindowStart: 0, + timingWindowSize: 120, + timingZoomX: 1, + timingZoomY: 1, + timingViewport: null, + firstHybridMismatchTime: null, + coreIoWaveByTime: new Map(), + stepLock: false, }; const layout = { + baseWidth: 940, + baseHeight: 620, + baseTileSize: 100, + baseGap: 24, + baseDriverOffset: 52, + marginLeft: 170, + marginRight: 92, + marginTop: 90, + marginBottom: 88, + minTileSize: 28, + maxTileSize: 124, + minReadableTile: 36, + minGap: 7, + maxGap: 28, + minDriverOffset: 20, + maxDriverOffset: 66, width: 940, height: 620, originX: 170, @@ -31,11 +75,15 @@ const colors = { Collect: "#8338ec", Inst: "#f77f00", Memory: "#d62828", + Backpressure: "#b91c1c", }; const svg = d3.select("#canvas"); +let sceneRoot; let staticLayer; let dynamicLayer; +let meshZoomBehavior = null; +let meshZoomTransform = d3.zoomIdentity; const controls = { playBtn: document.getElementById("playBtn"), @@ -49,14 +97,142 @@ const controls = { showMemory: document.getElementById("showMemory"), showLabels: document.getElementById("showLabels"), fileInput: document.getElementById("fileInput"), + yamlInput: document.getElementById("yamlInput"), + reportInput: document.getElementById("reportInput"), statsLine: document.getElementById("statsLine"), eventDump: document.getElementById("eventDump"), + reportSummary: document.getElementById("reportSummary"), + reportHotTiles: document.getElementById("reportHotTiles"), + reportWarning: document.getElementById("reportWarning"), + timingSummary: document.getElementById("timingSummary"), + timingGrid: document.getElementById("timingGrid"), + timingAnomalyOnly: document.getElementById("timingAnomalyOnly"), + timingShowPhaseExplain: document.getElementById("timingShowPhaseExplain"), + timingBoundaryOnly: document.getElementById("timingBoundaryOnly"), + timingCoreFocus: document.getElementById("timingCoreFocus"), + timingIoWaveAll: document.getElementById("timingIoWaveAll"), + timingIoWaveCore: document.getElementById("timingIoWaveCore"), + timingBaselineView: document.getElementById("timingBaselineView"), + timingCompModel: document.getElementById("timingCompModel"), + timingWindowStart: document.getElementById("timingWindowStart"), + timingWindowSize: document.getElementById("timingWindowSize"), + timingWindowStartLabel: document.getElementById("timingWindowStartLabel"), + timingWindowSizeLabel: document.getElementById("timingWindowSizeLabel"), + timingZoomY: document.getElementById("timingZoomY"), + timingZoomYLabel: document.getElementById("timingZoomYLabel"), + timingResetZoom: document.getElementById("timingResetZoom"), + timingExportPng: document.getElementById("timingExportPng"), + timingExportMaxSide: document.getElementById("timingExportMaxSide"), + timingJumpFirstMismatch: document.getElementById("timingJumpFirstMismatch"), + timingDrilldown: document.getElementById("timingDrilldown"), + timingCoreMini: document.getElementById("timingCoreMini"), + meshLegend: document.getElementById("meshLegend"), + vizPanel: document.querySelector(".panel.viz"), }; +let timingCoreLabelClickTimer = null; function tileKey(x, y) { return `${x},${y}`; } +function clamp(value, min, max) { + return Math.max(min, Math.min(max, value)); +} + +function normalizeCycleTime(value, fallback = 0) { + const numeric = Math.round(Number(value)); + return Number.isFinite(numeric) ? numeric : fallback; +} + +function nextIndexedTime(current, direction) { + const dir = direction >= 0 ? 1 : -1; + const keys = Array.isArray(state.timeKeys) ? state.timeKeys : []; + const cur = normalizeCycleTime(current, state.minTime); + if (keys.length === 0) { + const target = cur + dir; + return clamp(target, state.minTime, state.maxTime); + } + const exactIdx = keys.indexOf(cur); + if (exactIdx >= 0) { + const nextIdx = clamp(exactIdx + dir, 0, keys.length - 1); + return keys[nextIdx]; + } + if (dir > 0) { + for (const t of keys) { + if (t > cur) return t; + } + return keys[keys.length - 1]; + } + for (let i = keys.length - 1; i >= 0; i -= 1) { + if (keys[i] < cur) return keys[i]; + } + return keys[0]; +} + +function resolveTargetViewport() { + const hostWidth = controls.vizPanel?.clientWidth || layout.baseWidth; + const width = Math.max(720, Math.round(hostWidth) - 8); + const height = Math.max(480, Math.round(width * (layout.baseHeight / layout.baseWidth))); + return { width, height }; +} + +function applyAdaptiveLayout() { + const cols = Math.max(1, state.maxX + 1); + const rows = Math.max(1, state.maxY + 1); + const { width: targetWidth, height: targetHeight } = resolveTargetViewport(); + + const contentW = Math.max(1, targetWidth - layout.marginLeft - layout.marginRight); + const contentH = Math.max(1, targetHeight - layout.marginTop - layout.marginBottom); + const baseGridW = cols * layout.baseTileSize + (cols - 1) * layout.baseGap; + const baseGridH = rows * layout.baseTileSize + (rows - 1) * layout.baseGap; + const fitScale = Math.min(contentW / baseGridW, contentH / baseGridH); + const boundedScale = clamp(fitScale, 0.2, 1.45); + + let tileSize = clamp( + Math.round(layout.baseTileSize * boundedScale), + layout.minTileSize, + layout.maxTileSize, + ); + let gap = clamp(Math.round(layout.baseGap * boundedScale), layout.minGap, layout.maxGap); + let driverOffset = clamp( + Math.round(layout.baseDriverOffset * boundedScale), + layout.minDriverOffset, + layout.maxDriverOffset, + ); + let mode = "fit"; + if (tileSize < layout.minReadableTile) { + mode = "expand"; + tileSize = layout.minReadableTile; + const readableScale = tileSize / layout.baseTileSize; + gap = clamp(Math.round(layout.baseGap * readableScale), layout.minGap, layout.maxGap); + driverOffset = clamp( + Math.round(layout.baseDriverOffset * readableScale), + layout.minDriverOffset, + layout.maxDriverOffset, + ); + } + + const gridW = cols * tileSize + (cols - 1) * gap; + const gridH = rows * tileSize + (rows - 1) * gap; + const neededW = layout.marginLeft + gridW + layout.marginRight; + const neededH = layout.marginTop + gridH + layout.marginBottom; + const width = mode === "expand" ? Math.max(targetWidth, neededW) : targetWidth; + const height = mode === "expand" ? Math.max(targetHeight, neededH) : targetHeight; + + const freeW = width - layout.marginLeft - layout.marginRight - gridW; + const freeH = height - layout.marginTop - layout.marginBottom - gridH; + layout.width = width; + layout.height = height; + layout.tileSize = tileSize; + layout.gap = gap; + layout.driverOffset = driverOffset; + layout.originX = layout.marginLeft + Math.max(0, Math.floor(freeW / 2)); + layout.originY = layout.marginTop + Math.max(0, Math.floor(freeH / 2)); + state.layoutMode = mode; + + svg.attr("viewBox", `0 0 ${layout.width} ${layout.height}`); +} + function tileRect(x, y) { const step = layout.tileSize + layout.gap; const px = layout.originX + x * step; @@ -81,100 +257,2159 @@ function parseEndpoint(name) { const driverMatch = /^Driver\.Device(North|South|East|West)\[(\d+)\]$/.exec(name); if (driverMatch) { return { - kind: "driver", - side: driverMatch[1], - idx: Number(driverMatch[2]), - raw: name, + kind: "driver", + side: driverMatch[1], + idx: Number(driverMatch[2]), + raw: name, + }; + } + return { kind: "unknown", raw: name }; +} + +function endpointPoint(ep) { + if (!ep) { + return null; + } + if (ep.kind === "tilePort") { + const r = tileRect(ep.x, ep.y); + if (ep.port === "North") return { x: r.x + r.w / 2, y: r.y, tile: tileKey(ep.x, ep.y) }; + if (ep.port === "South") return { x: r.x + r.w / 2, y: r.y + r.h, tile: tileKey(ep.x, ep.y) }; + if (ep.port === "West") return { x: r.x, y: r.y + r.h / 2, tile: tileKey(ep.x, ep.y) }; + if (ep.port === "East") return { x: r.x + r.w, y: r.y + r.h / 2, tile: tileKey(ep.x, ep.y) }; + } + if (ep.kind === "driver") { + const side = ep.side; + const idx = ep.idx; + if (side === "North" && idx <= state.maxX) { + const r = tileRect(idx, state.maxY); + return { x: r.x + r.w / 2, y: r.y - layout.driverOffset }; + } + if (side === "South" && idx <= state.maxX) { + const r = tileRect(idx, 0); + return { x: r.x + r.w / 2, y: r.y + r.h + layout.driverOffset }; + } + if (side === "West" && idx <= state.maxY) { + const r = tileRect(0, idx); + return { x: r.x - layout.driverOffset, y: r.y + r.h / 2 }; + } + if (side === "East" && idx <= state.maxY) { + const r = tileRect(state.maxX, idx); + return { x: r.x + r.w + layout.driverOffset, y: r.y + r.h / 2 }; + } + } + return null; +} + +function normalizePortName(value) { + const raw = String(value || "").trim().toLowerCase(); + if (raw === "north" || raw === "n") return "North"; + if (raw === "south" || raw === "s") return "South"; + if (raw === "east" || raw === "e") return "East"; + if (raw === "west" || raw === "w") return "West"; + return null; +} + +function oppositePort(port) { + if (port === "North") return "South"; + if (port === "South") return "North"; + if (port === "East") return "West"; + if (port === "West") return "East"; + return null; +} + +function tilePortEndpoint(x, y, port) { + return { + kind: "tilePort", + x, + y, + port, + raw: `Device.Tile[${y}][${x}].Core.${port}`, + }; +} + +function driverEndpoint(side, idx) { + return { + kind: "driver", + side, + idx, + raw: `Driver.Device${side}[${idx}]`, + }; +} + +function neighborEndpointFromTilePort(x, y, port) { + const opposite = oppositePort(port); + if (!opposite) return null; + if (port === "North") { + const ny = y + 1; + if (ny <= state.maxY) return tilePortEndpoint(x, ny, opposite); + return driverEndpoint("North", x); + } + if (port === "South") { + const ny = y - 1; + if (ny >= 0) return tilePortEndpoint(x, ny, opposite); + return driverEndpoint("South", x); + } + if (port === "East") { + const nx = x + 1; + if (nx <= state.maxX) return tilePortEndpoint(nx, y, opposite); + return driverEndpoint("East", y); + } + if (port === "West") { + const nx = x - 1; + if (nx >= 0) return tilePortEndpoint(nx, y, opposite); + return driverEndpoint("West", y); + } + return null; +} + +function inferBounds(events) { + let maxX = 0; + let maxY = 0; + for (const e of events) { + if (Number.isInteger(e.X)) maxX = Math.max(maxX, e.X); + if (Number.isInteger(e.Y)) maxY = Math.max(maxY, e.Y); + for (const f of ["Src", "Dst", "From", "To"]) { + if (!e[f]) continue; + const ep = parseEndpoint(e[f]); + if (ep && ep.kind === "tilePort") { + maxX = Math.max(maxX, ep.x); + maxY = Math.max(maxY, ep.y); + } + } + } + return { maxX, maxY }; +} + +function boundsFromProgramSpec(programSpec) { + if (!programSpec) return null; + const cols = Number(programSpec.arrayColumns); + const rows = Number(programSpec.arrayRows); + if (!Number.isFinite(cols) || !Number.isFinite(rows) || cols <= 0 || rows <= 0) return null; + return { + maxX: Math.max(0, Math.round(cols) - 1), + maxY: Math.max(0, Math.round(rows) - 1), + }; +} + +function boundsFromReportSpec(reportSpec) { + if (!reportSpec?.grid) return null; + const width = Number(reportSpec.grid.width); + const height = Number(reportSpec.grid.height); + if (!Number.isFinite(width) || !Number.isFinite(height) || width <= 0 || height <= 0) return null; + return { + maxX: Math.max(0, Math.round(width) - 1), + maxY: Math.max(0, Math.round(height) - 1), + }; +} + +function resolveMeshBounds(events) { + const yamlBounds = boundsFromProgramSpec(state.programSpec); + if (yamlBounds) return yamlBounds; + const traceBounds = inferBounds(events); + const hasTraceBounds = traceBounds.maxX > 0 || traceBounds.maxY > 0; + if (hasTraceBounds) return traceBounds; + const reportBounds = boundsFromReportSpec(state.reportSpec); + if (reportBounds) return reportBounds; + return traceBounds; +} + +function parseJsonLines(text) { + const lines = text.split(/\r?\n/).map((s) => s.trim()).filter(Boolean); + const rows = []; + let lastTime = null; + for (const line of lines) { + try { + const obj = JSON.parse(line); + if (obj && Number.isFinite(Number(obj.Time))) { + obj.Time = Math.round(Number(obj.Time)); + lastTime = obj.Time; + rows.push(obj); + continue; + } + // Some memory traces (e.g. LoadDirect/StoreDirect) may omit Time. + // Reuse the latest observed cycle to keep them alignable in strict matching. + if (obj && obj.msg === "Memory" && Number.isFinite(lastTime)) { + obj.Time = lastTime; + rows.push(obj); + } + } catch (_) { + // Ignore malformed lines. + } + } + return rows; +} + +function indexByTime(events) { + const byTime = new Map(); + let minTime = Number.POSITIVE_INFINITY; + let maxTime = Number.NEGATIVE_INFINITY; + for (const e of events) { + const tKey = Math.round(Number(e.Time)); + if (!byTime.has(tKey)) byTime.set(tKey, []); + byTime.get(tKey).push(e); + minTime = Math.min(minTime, tKey); + maxTime = Math.max(maxTime, tKey); + } + if (!Number.isFinite(minTime) || !Number.isFinite(maxTime)) { + minTime = 0; + maxTime = 0; + } + const sortedTimes = [...byTime.keys()].sort((a, b) => a - b); + return { byTime, minTime, maxTime, sortedTimes }; +} + +function normalizeSlot(value, ii) { + const v = Math.round(Number(value)); + if (!Number.isFinite(v)) return 0; + if (ii > 0) { + let slot = v % ii; + if (slot < 0) slot += ii; + return slot; + } + return v; +} + +function signedDelta(actualSlot, expectedSlot, ii) { + if (!Number.isFinite(actualSlot) || !Number.isFinite(expectedSlot)) return null; + if (ii <= 0) return actualSlot - expectedSlot; + const raw = normalizeSlot(actualSlot - expectedSlot, ii); + if (raw === 0) return 0; + return raw <= ii / 2 ? raw : raw - ii; +} + +function sortCore(a, b) { + if (b.y !== a.y) return b.y - a.y; + return a.x - b.x; +} + +function escapeHtml(text) { + return String(text) + .replaceAll("&", "&") + .replaceAll("<", "<") + .replaceAll(">", ">") + .replaceAll('"', """); +} + +function formatDelta(delta) { + if (delta == null || !Number.isFinite(Number(delta))) return "N/A"; + const v = Number(delta); + return `${v >= 0 ? "+" : ""}${v}`; +} + +function formatDataAsDecimal(value) { + if (value == null) return null; + const numeric = Number(value); + if (Number.isFinite(numeric)) { + if (Number.isInteger(numeric)) return String(numeric); + return String(numeric); + } + const text = String(value).trim(); + return text.length > 0 ? text : null; +} + +function numberOr(value, fallback = 0) { + const v = Number(value); + return Number.isFinite(v) ? v : fallback; +} + +function integerOr(value, fallback = 0) { + return Math.round(numberOr(value, fallback)); +} + +function nullableBool(value) { + if (typeof value === "boolean") return value; + return null; +} + +function parseReportJson(text) { + let raw; + try { + raw = JSON.parse(text); + } catch (err) { + throw new Error(`Invalid JSON: ${err.message}`); + } + if (!raw || typeof raw !== "object") { + throw new Error("Report root must be an object."); + } + + const tiles = (Array.isArray(raw.tiles) ? raw.tiles : []) + .map((item) => { + const x = integerOr(item?.x, NaN); + const y = integerOr(item?.y, NaN); + if (!Number.isFinite(x) || !Number.isFinite(y)) return null; + const util = Math.max(0, Math.min(100, numberOr(item?.utilizationPct, 0))); + return { + x, + y, + coord: String(item?.coord || `(${x},${y})`), + activeCycles: Math.max(0, integerOr(item?.activeCycles, 0)), + utilizationPct: util, + instCount: Math.max(0, integerOr(item?.instCount, 0)), + sendCount: Math.max(0, integerOr(item?.sendCount, 0)), + recvCount: Math.max(0, integerOr(item?.recvCount, 0)), + memoryCount: Math.max(0, integerOr(item?.memoryCount, 0)), + totalEvents: Math.max(0, integerOr(item?.totalEvents, 0)), + backpressureCount: Math.max(0, integerOr(item?.backpressureCount, 0)), + }; + }) + .filter(Boolean); + + const topHotTiles = (Array.isArray(raw.topHotTiles) ? raw.topHotTiles : []) + .map((item) => { + const x = integerOr(item?.x, NaN); + const y = integerOr(item?.y, NaN); + if (!Number.isFinite(x) || !Number.isFinite(y)) return null; + return { + x, + y, + coord: String(item?.coord || `(${x},${y})`), + utilizationPct: Math.max(0, Math.min(100, numberOr(item?.utilizationPct, 0))), + activeCycles: Math.max(0, integerOr(item?.activeCycles, 0)), + totalEvents: Math.max(0, integerOr(item?.totalEvents, 0)), + }; + }) + .filter(Boolean); + const topBackpressureTiles = (Array.isArray(raw.topBackpressureTiles) ? raw.topBackpressureTiles : []) + .map((item) => { + const x = integerOr(item?.x, NaN); + const y = integerOr(item?.y, NaN); + if (!Number.isFinite(x) || !Number.isFinite(y)) return null; + return { + x, + y, + coord: String(item?.coord || `(${x},${y})`), + backpressureCount: Math.max(0, integerOr(item?.backpressureCount, 0)), + }; + }) + .filter(Boolean); + + const gridWidth = Math.max(0, integerOr(raw?.grid?.width, 0)); + const gridHeight = Math.max(0, integerOr(raw?.grid?.height, 0)); + const activeTileCount = Math.max(0, integerOr(raw?.activeTileCount, tiles.length)); + const fallbackHot = [...tiles] + .sort((a, b) => { + if (b.utilizationPct !== a.utilizationPct) return b.utilizationPct - a.utilizationPct; + if (b.activeCycles !== a.activeCycles) return b.activeCycles - a.activeCycles; + return b.totalEvents - a.totalEvents; + }) + .slice(0, 8) + .map((t) => ({ + x: t.x, + y: t.y, + coord: t.coord, + utilizationPct: t.utilizationPct, + activeCycles: t.activeCycles, + totalEvents: t.totalEvents, + })); + const fallbackBackpressure = [...tiles] + .filter((t) => t.backpressureCount > 0) + .sort((a, b) => { + if (b.backpressureCount !== a.backpressureCount) return b.backpressureCount - a.backpressureCount; + if (b.totalEvents !== a.totalEvents) return b.totalEvents - a.totalEvents; + return b.activeCycles - a.activeCycles; + }) + .slice(0, 8) + .map((t) => ({ + x: t.x, + y: t.y, + coord: t.coord, + backpressureCount: t.backpressureCount, + })); + + return { + testName: String(raw.testName || ""), + logPath: String(raw.logPath || ""), + grid: { + width: gridWidth, + height: gridHeight, + }, + totalCycles: Math.max(0, integerOr(raw.totalCycles, 0)), + activeCyclesGlobal: Math.max(0, integerOr(raw.activeCyclesGlobal, 0)), + idleCyclesGlobal: Math.max(0, integerOr(raw.idleCyclesGlobal, 0)), + passed: nullableBool(raw.passed), + mismatchCount: raw.mismatchCount == null ? null : Math.max(0, integerOr(raw.mismatchCount, 0)), + instCount: Math.max(0, integerOr(raw.instCount, 0)), + sendCount: Math.max(0, integerOr(raw.sendCount, 0)), + recvCount: Math.max(0, integerOr(raw.recvCount, 0)), + memoryCount: Math.max(0, integerOr(raw.memoryCount, 0)), + totalEvents: Math.max(0, integerOr(raw.totalEvents, 0)), + backpressureCount: Math.max(0, integerOr(raw.backpressureCount, 0)), + backpressureCycles: Math.max(0, integerOr(raw.backpressureCycles, 0)), + activeTileCount, + tiles, + topHotTiles: topHotTiles.length > 0 ? topHotTiles : fallbackHot, + topBackpressureTiles: topBackpressureTiles.length > 0 ? topBackpressureTiles : fallbackBackpressure, + }; +} + +function formatPercent(v) { + if (!Number.isFinite(Number(v))) return "N/A"; + return `${Number(v).toFixed(1)}%`; +} + +function renderReportView() { + if (!controls.reportSummary || !controls.reportHotTiles || !controls.reportWarning) return; + + if (state.reportError) { + controls.reportWarning.textContent = state.reportError; + controls.reportWarning.className = "report-warning error"; + controls.reportSummary.innerHTML = "
Report parse failed. Please provide a valid report JSON.
"; + controls.reportHotTiles.innerHTML = ""; + return; + } + + if (!state.reportReady || !state.reportSpec) { + controls.reportWarning.textContent = "Load a report JSON to see aggregate utilization and hot-tile stats."; + controls.reportWarning.className = "report-warning"; + controls.reportSummary.innerHTML = "
No report loaded.
"; + controls.reportHotTiles.innerHTML = ""; + return; + } + + const report = state.reportSpec; + const cards = [ + ["test", report.testName || "N/A"], + ["passed", report.passed == null ? "N/A" : (report.passed ? "yes" : "no")], + ["mismatch", report.mismatchCount == null ? "N/A" : report.mismatchCount], + ["cycles", report.totalCycles], + ["active(global)", report.activeCyclesGlobal], + ["idle(global)", report.idleCyclesGlobal], + ["active-tiles", report.activeTileCount], + ["events", report.totalEvents], + ["bp-count", report.backpressureCount], + ["bp-cycles", report.backpressureCycles], + ]; + controls.reportSummary.innerHTML = cards.map( + ([k, v]) => `
${escapeHtml(k)}
${escapeHtml(v)}
`, + ).join(""); + + const meshW = state.maxX + 1; + const meshH = state.maxY + 1; + const reportW = integerOr(report.grid?.width, 0); + const reportH = integerOr(report.grid?.height, 0); + if (reportW > 0 && reportH > 0 && (reportW !== meshW || reportH !== meshH)) { + controls.reportWarning.textContent = + `grid mismatch: report=${reportW}x${reportH}, mesh=${meshW}x${meshH}. Heat overlay is clipped to current mesh.`; + controls.reportWarning.className = "report-warning warn"; + } else { + controls.reportWarning.textContent = `report loaded: ${reportW || "?"}x${reportH || "?"}, log=${report.logPath || "N/A"}`; + controls.reportWarning.className = "report-warning"; + } + + const hotTiles = (Array.isArray(report.topHotTiles) ? report.topHotTiles : []).slice(0, 12); + const bpTiles = (Array.isArray(report.topBackpressureTiles) ? report.topBackpressureTiles : []).slice(0, 12); + const sections = []; + if (hotTiles.length > 0) { + const rows = hotTiles.map((tile, idx) => + `${idx + 1}${escapeHtml(tile.coord || `(${tile.x},${tile.y})`)}${escapeHtml(formatPercent(tile.utilizationPct))}${escapeHtml(tile.activeCycles)}${escapeHtml(tile.totalEvents)}`).join(""); + sections.push([ + "
Top Hot Tiles
", + "", + "", + `${rows}`, + "
#coordutilizationactiveCyclesevents
", + ].join("")); + } else { + sections.push("
No hot-tile entries.
"); + } + if (bpTiles.length > 0) { + const bpRows = bpTiles.map((tile, idx) => + `${idx + 1}${escapeHtml(tile.coord || `(${tile.x},${tile.y})`)}${escapeHtml(tile.backpressureCount)}`).join(""); + sections.push([ + "
Top Backpressure Tiles
", + "", + "", + `${bpRows}`, + "
#coordbp-count
", + ].join("")); + } + controls.reportHotTiles.innerHTML = sections.join(""); +} + +function applyReportHeatOverlay() { + if (!staticLayer) return; + const heatTiles = staticLayer.selectAll(".tile-report-heat"); + if (!heatTiles || heatTiles.empty()) return; + + heatTiles + .style("display", "none") + .attr("opacity", 0); + + if (!state.reportReady || !state.reportSpec || state.reportHeatMetric !== "utilizationPct") return; + + const byCore = new Map(); + for (const tile of state.reportSpec.tiles || []) { + byCore.set(tileKey(tile.x, tile.y), Math.max(0, Math.min(100, numberOr(tile.utilizationPct, 0)))); + } + heatTiles.each(function (d) { + const k = tileKey(d.x, d.y); + if (!byCore.has(k)) return; + const util = byCore.get(k); + const alpha = 0.08 + (util / 100) * 0.52; + d3.select(this) + .style("display", null) + .attr("opacity", alpha) + .attr("data-util", util.toFixed(1)); + }); +} + +function loadReport(text) { + try { + state.reportSpec = parseReportJson(text); + state.reportReady = true; + state.reportError = ""; + if (!state.programSpec && state.events.length === 0) { + const rb = boundsFromReportSpec(state.reportSpec); + if (rb) { + state.maxX = rb.maxX; + state.maxY = rb.maxY; + applyAdaptiveLayout(); + drawStaticScene(); + } + } + applyReportHeatOverlay(); + renderReportView(); + } catch (err) { + state.reportSpec = null; + state.reportReady = false; + state.reportError = `Report JSON parse error: ${err.message}`; + applyReportHeatOverlay(); + renderReportView(); + } +} + +function abbrevOpLabel(slot, maxLen) { + const len = maxLen ?? 5; + const occTag = slot.occurrenceTotal > 1 ? `@${slot.sampleIndex}` : ""; + if (slot.opcode && String(slot.opcode).trim()) { + const s = String(slot.opcode).trim(); + const head = s.length <= len ? s : s.slice(0, len); + return `${head}${occTag}`; + } + return `#${slot.opId}${occTag}`; +} + +function weightedMedian(samples) { + if (!samples || samples.length === 0) return null; + const sorted = [...samples] + .filter((s) => Number.isFinite(s.value) && Number.isFinite(s.weight) && s.weight > 0) + .sort((a, b) => a.value - b.value); + if (sorted.length === 0) return null; + const total = sorted.reduce((acc, s) => acc + s.weight, 0); + let accWeight = 0; + for (const s of sorted) { + accWeight += s.weight; + if (accWeight >= total / 2) return s.value; + } + return sorted[sorted.length - 1].value; +} + +function boundaryLabel(x, y, bounds) { + const tags = []; + if (y === bounds.maxY) tags.push("N"); + if (y === bounds.minY) tags.push("S"); + if (x === bounds.minX) tags.push("W"); + if (x === bounds.maxX) tags.push("E"); + return tags.length > 0 ? tags.join("") : "Inner"; +} + +function computeDeltaRebased(rawDelta, corePhaseOffset, ii) { + if (!Number.isFinite(rawDelta) || !Number.isFinite(corePhaseOffset)) return null; + return signedDelta(rawDelta - corePhaseOffset, 0, ii); +} + +function summarizeTimingCell(items) { + const statusCounts = { onTime: 0, early: 0, late: 0, missing: 0 }; + let hasFirstDivergence = false; + let propagatedCount = 0; + let maxAbsDelta = 0; + for (const item of items) { + if (item.status === "on-time") statusCounts.onTime += 1; + if (item.status === "early") statusCounts.early += 1; + if (item.status === "late") statusCounts.late += 1; + if (item.status === "missing") statusCounts.missing += 1; + if (item.firstDivergence) hasFirstDivergence = true; + if (item.propagated) propagatedCount += 1; + if (Number.isFinite(item.delta)) { + maxAbsDelta = Math.max(maxAbsDelta, Math.abs(Number(item.delta))); + } + } + const anomalyCount = statusCounts.early + statusCounts.late + statusCounts.missing; + const anomalyScore = + statusCounts.missing * 4 + + statusCounts.late * 3 + + statusCounts.early * 2 + + (hasFirstDivergence ? 2 : 0) + + (propagatedCount > 0 ? 1 : 0); + let dominantStatus = "on-time"; + if (statusCounts.missing > 0) dominantStatus = "missing"; + else if (statusCounts.late > 0) dominantStatus = "late"; + else if (statusCounts.early > 0) dominantStatus = "early"; + return { + statusCounts, + dominantStatus, + anomalyCount, + anomalyScore, + hasAnomaly: anomalyCount > 0, + hasFirstDivergence, + opCount: items.length, + maxAbsDelta, + }; +} + +function buildTimingHeatmap(view) { + const cells = new Map(); + let maxScore = 1; + for (const c of view.columns) { + for (const slot of view.slots) { + const cellKey = `${c.coreKey}|${slot}`; + const items = view.cellMap.get(cellKey) || []; + const summary = summarizeTimingCell(items); + cells.set(cellKey, { + cellKey, + coreKey: c.coreKey, + x: c.x, + y: c.y, + slot, + ...summary, + }); + maxScore = Math.max(maxScore, summary.anomalyScore); + } + } + return { cells, maxScore }; +} + +function buildPhaseExplain(view) { + const xs = view.columns.map((c) => c.x); + const ys = view.columns.map((c) => c.y); + const bounds = { + minX: xs.length > 0 ? Math.min(...xs) : 0, + maxX: xs.length > 0 ? Math.max(...xs) : 0, + minY: ys.length > 0 ? Math.min(...ys) : 0, + maxY: ys.length > 0 ? Math.max(...ys) : 0, + }; + const coreMap = new Map(); + const boundarySamples = []; + const innerSamples = []; + + for (const c of view.columns) { + const label = boundaryLabel(c.x, c.y, bounds); + const isBoundary = label !== "Inner"; + const phaseOffset = Number.isFinite(c.phaseOffset) ? c.phaseOffset : null; + const confidence = Number.isFinite(c.phaseConfidence) ? c.phaseConfidence : 0; + const detail = { + isBoundary, + boundaryLabel: label, + phaseOffset, + phaseConfidence: confidence, + modeCount: c.modeCount, + }; + coreMap.set(c.coreKey, detail); + if (phaseOffset == null) continue; + const sample = { value: phaseOffset, weight: Math.max(1, c.modeCount || 0) }; + if (isBoundary) { + boundarySamples.push(sample); + } else { + innerSamples.push(sample); + } + } + + const boundaryPhase = weightedMedian(boundarySamples); + const innerPhase = weightedMedian(innerSamples); + const phaseGap = Number.isFinite(boundaryPhase) && Number.isFinite(innerPhase) + ? signedDelta(boundaryPhase, innerPhase, view.ii) + : null; + + return { + coreMap, + boundaryPhase, + innerPhase, + phaseGap, + }; +} + +function inferIngressSidesFromTrace(events) { + const sides = new Set(); + for (const e of events) { + if (e.msg !== "DataFlow" || e.Behavior !== "FeedIn" || !e.To) continue; + const ep = parseEndpoint(e.To); + if (ep && ep.kind === "tilePort") { + sides.add(ep.port); + } + } + if (sides.size === 0) { + sides.add("North"); + sides.add("West"); + } + return [...sides]; +} + +function distanceToIngress(x, y, bounds, ingressSides) { + const d = []; + for (const side of ingressSides) { + if (side === "North") d.push(bounds.maxY - y); + if (side === "South") d.push(y - bounds.minY); + if (side === "West") d.push(x - bounds.minX); + if (side === "East") d.push(bounds.maxX - x); + } + if (d.length === 0) return 0; + // In GEMM-like wavefronts, readiness is dominated by the slower upstream stream. + return Math.max(...d); +} + +function statusFromDelta(deltaValue, missing) { + if (missing) return "missing"; + if (!Number.isFinite(deltaValue)) return "missing"; + if (deltaValue === 0) return "on-time"; + return deltaValue < 0 ? "early" : "late"; +} + +function getModelOffset(modelItem, compModel) { + if (!modelItem) return null; + if (compModel === "distance") return modelItem.distanceOffset; + if (compModel === "fitted") return modelItem.fittedOffset; + return modelItem.hybridOffset; +} + +function getCompDeltaByModel(slot, compModel) { + if (compModel === "distance") return slot.deltaCompDistance; + if (compModel === "fitted") return slot.deltaCompFitted; + return slot.deltaCompHybrid; +} + +function getCompStatusByModel(slot, compModel) { + if (compModel === "distance") return slot.statusCompDistance; + if (compModel === "fitted") return slot.statusCompFitted; + return slot.statusCompHybrid; +} + +function summarizeModelBoundary(ii, phaseExplain, coreOffsets, modelKey) { + const boundarySamples = []; + const innerSamples = []; + for (const [coreKey, offsetInfo] of coreOffsets.entries()) { + const offset = offsetInfo[modelKey]; + if (!Number.isFinite(offset)) continue; + const meta = phaseExplain.coreMap.get(coreKey); + const weight = Math.max(1, Number(meta?.modeCount || 0)); + const sample = { value: Number(offset), weight }; + if (meta?.isBoundary) boundarySamples.push(sample); + else innerSamples.push(sample); + } + const boundary = weightedMedian(boundarySamples); + const inner = weightedMedian(innerSamples); + const gap = Number.isFinite(boundary) && Number.isFinite(inner) + ? signedDelta(boundary, inner, ii) + : null; + return { boundary, inner, gap }; +} + +function buildCompensationModels(view, phaseExplain, events) { + const ingressSides = inferIngressSidesFromTrace(events); + const xs = view.columns.map((c) => c.x); + const ys = view.columns.map((c) => c.y); + const bounds = { + minX: xs.length > 0 ? Math.min(...xs) : 0, + maxX: xs.length > 0 ? Math.max(...xs) : 0, + minY: ys.length > 0 ? Math.min(...ys) : 0, + maxY: ys.length > 0 ? Math.max(...ys) : 0, + }; + + const rawDistancePhaseSamples = []; + for (const c of view.columns) { + const dist = distanceToIngress(c.x, c.y, bounds, ingressSides); + const phase = view.ii > 0 ? normalizeSlot(dist, view.ii) : dist; + rawDistancePhaseSamples.push({ value: phase, weight: 1, coreKey: c.coreKey, rawDist: dist }); + } + const center = weightedMedian(rawDistancePhaseSamples); + const coreOffsets = new Map(); + for (const c of view.columns) { + const phaseMeta = phaseExplain.coreMap.get(c.coreKey) || {}; + const row = rawDistancePhaseSamples.find((v) => v.coreKey === c.coreKey); + const rawPhase = row ? row.value : 0; + const distanceOffset = view.ii > 0 + ? signedDelta(rawPhase, Number.isFinite(center) ? center : 0, view.ii) + : rawPhase - (Number.isFinite(center) ? center : 0); + const fittedOffset = Number.isFinite(phaseMeta.phaseOffset) ? Number(phaseMeta.phaseOffset) : null; + const fittedConfidence = Number.isFinite(phaseMeta.phaseConfidence) ? Number(phaseMeta.phaseConfidence) : 0; + const hybridOffset = (Number.isFinite(fittedOffset) && fittedConfidence >= 0.4) + ? fittedOffset + : distanceOffset; + coreOffsets.set(c.coreKey, { + distanceOffset, + fittedOffset, + hybridOffset, + fittedConfidence, + ingressDistance: row ? row.rawDist : 0, + }); + } + + return { + ingressSides, + coreOffsets, + models: { + distance: summarizeModelBoundary(view.ii, phaseExplain, coreOffsets, "distanceOffset"), + fitted: summarizeModelBoundary(view.ii, phaseExplain, coreOffsets, "fittedOffset"), + hybrid: summarizeModelBoundary(view.ii, phaseExplain, coreOffsets, "hybridOffset"), + }, + }; +} + +function alignSlotAtOrAfter(startTime, expectedSlot, ii) { + const t0 = Math.round(Number(startTime || 0)); + if (ii <= 0) return expectedSlot; + const slot = normalizeSlot(expectedSlot, ii); + let offset = slot - normalizeSlot(t0, ii); + if (offset < 0) offset += ii; + return t0 + offset; +} + +function buildTimelineLanes(view, visibleColumns, phaseExplain, compensation) { + const lanes = []; + let minT = Number.POSITIVE_INFINITY; + let maxT = Number.NEGATIVE_INFINITY; + let totalSlots = 0; + + for (const c of visibleColumns) { + const coreMeta = phaseExplain.coreMap.get(c.coreKey) || {}; + const expectedSlots = []; + const actualSlots = []; + const compMeta = compensation.coreOffsets.get(c.coreKey) || null; + for (const item of c.items) { + const samples = (Array.isArray(item.allSamples) && item.allSamples.length > 0) + ? item.allSamples + : [null]; + const occurrenceTotal = samples.length; + for (let sampleIdx = 0; sampleIdx < samples.length; sampleIdx += 1) { + const sample = samples[sampleIdx]; + const hasActual = sample && Number.isFinite(sample.time); + const actualTime = hasActual ? Math.round(sample.time) : null; + const deltaStrict = hasActual ? sample.delta : item.delta; + const statusStrict = hasActual ? sample.status : item.status; + const missing = !hasActual; + + let expectedTime = null; + if (hasActual && Number.isFinite(deltaStrict)) { + // Expand expected per actual occurrence to cover full trace length. + expectedTime = Math.round(actualTime - deltaStrict); + } else if (Number.isFinite(item.firstTime) && Number.isFinite(item.delta)) { + expectedTime = Math.round(item.firstTime - item.delta); + } else if (view.ii > 0) { + expectedTime = alignSlotAtOrAfter(state.minTime, item.expectedSlot, view.ii); + } else { + expectedTime = item.expectedSlot; + } + + const deltaRebased = computeDeltaRebased(deltaStrict, coreMeta.phaseOffset, view.ii); + const deltaCompDistance = computeDeltaRebased(deltaStrict, compMeta?.distanceOffset, view.ii); + const deltaCompFitted = computeDeltaRebased(deltaStrict, compMeta?.fittedOffset, view.ii); + const deltaCompHybrid = computeDeltaRebased(deltaStrict, compMeta?.hybridOffset, view.ii); + const statusCompDistance = statusFromDelta(deltaCompDistance, missing); + const statusCompFitted = statusFromDelta(deltaCompFitted, missing); + const statusCompHybrid = statusFromDelta(deltaCompHybrid, missing); + const cellKey = `${c.coreKey}|${item.expectedSlot}`; + const slot = { + coreKey: c.coreKey, + x: c.x, + y: c.y, + expectedSlot: item.expectedSlot, + opId: item.id, + opcode: item.opcode || "", + status: statusStrict, + statusStrict, + expectedTime, + actualTime, + delta: deltaStrict, + deltaStrict, + deltaRebased, + deltaCompDistance, + deltaCompFitted, + deltaCompHybrid, + statusCompDistance, + statusCompFitted, + statusCompHybrid, + compDistanceOffset: compMeta?.distanceOffset ?? null, + compFittedOffset: compMeta?.fittedOffset ?? null, + compHybridOffset: compMeta?.hybridOffset ?? null, + firstDivergence: item.firstDivergence, + propagated: item.propagated, + cellKey, + sampleIdx, + sampleIndex: sampleIdx + 1, + occurrenceTotal, + samplePred: hasActual ? sample.pred : null, + sampleSource: hasActual ? String(sample.source || "Unknown") : null, + }; + expectedSlots.push(slot); + if (Number.isFinite(actualTime)) { + actualSlots.push(slot); + } + + if (Number.isFinite(expectedTime)) { + minT = Math.min(minT, expectedTime); + maxT = Math.max(maxT, expectedTime); + totalSlots += 1; + } + if (Number.isFinite(actualTime)) { + minT = Math.min(minT, actualTime); + maxT = Math.max(maxT, actualTime); + totalSlots += 1; + } + } + } + lanes.push({ + coreKey: c.coreKey, + x: c.x, + y: c.y, + modeDelta: c.modeDelta, + modeCount: c.modeCount, + statusCounts: c.statusCounts, + phaseOffset: coreMeta.phaseOffset ?? null, + phaseConfidence: coreMeta.phaseConfidence ?? 0, + compDistanceOffset: compMeta?.distanceOffset ?? null, + compFittedOffset: compMeta?.fittedOffset ?? null, + compHybridOffset: compMeta?.hybridOffset ?? null, + compFittedConfidence: compMeta?.fittedConfidence ?? 0, + boundaryLabel: coreMeta.boundaryLabel || "Inner", + isBoundary: Boolean(coreMeta.isBoundary), + expectedSlots, + actualSlots, + }); + } + + if (!Number.isFinite(minT) || !Number.isFinite(maxT)) { + minT = state.minTime; + maxT = state.maxTime; + } + minT = Math.min(minT, state.minTime); + maxT = Math.max(maxT, state.maxTime); + if (minT === maxT) maxT = minT + 1; + + return { + lanes, + timeMin: minT, + timeMax: maxT, + totalSlots, + }; +} + +function tickStepByRange(span) { + if (span <= 40) return 2; + if (span <= 120) return 5; + if (span <= 360) return 10; + if (span <= 900) return 25; + return 50; +} + +function renderTimelineSvg(_view, timeline) { + const wrap = controls.timingGrid; + if (!wrap) return; + wrap.innerHTML = ""; + + const baselineView = ["strict", "compensated", "split"].includes(state.timingBaselineView) + ? state.timingBaselineView + : "strict"; + const compModel = ["distance", "fitted", "hybrid"].includes(state.timingCompModel) + ? state.timingCompModel + : "hybrid"; + + const fullMin = timeline.timeMin; + const fullMax = timeline.timeMax; + const fullSpan = Math.max(1, fullMax - fullMin + 1); + const minWindow = 1; + const windowSize = clamp( + Math.round(Number(state.timingWindowSize || Math.min(120, fullSpan))), + minWindow, + fullSpan, + ); + const startMax = Math.max(fullMin, fullMax - windowSize + 1); + const windowStart = clamp( + Math.round(Number(state.timingWindowStart || fullMin)), + fullMin, + startMax, + ); + const windowEnd = windowStart + windowSize - 1; + state.timingWindowStart = windowStart; + state.timingWindowSize = windowSize; + + if (controls.timingWindowStart) { + controls.timingWindowStart.min = String(fullMin); + controls.timingWindowStart.max = String(startMax); + controls.timingWindowStart.value = String(windowStart); + controls.timingWindowStart.disabled = fullSpan <= 1; + } + if (controls.timingWindowSize) { + controls.timingWindowSize.min = String(minWindow); + controls.timingWindowSize.max = String(fullSpan); + controls.timingWindowSize.value = String(windowSize); + } + if (controls.timingWindowStartLabel) { + controls.timingWindowStartLabel.textContent = `T${windowStart}-T${windowEnd}`; + } + if (controls.timingWindowSizeLabel) { + controls.timingWindowSizeLabel.textContent = `${windowSize} cycles`; + } + + const zoomY = clamp(Number(state.timingZoomY || 1), 0.6, 4); + state.timingZoomX = 1; + state.timingZoomY = zoomY; + if (controls.timingZoomY) { + controls.timingZoomY.value = String(Math.round(zoomY * 100)); + } + if (controls.timingZoomYLabel) { + controls.timingZoomYLabel.textContent = `${zoomY.toFixed(2)}x`; + } + + const leftPad = 242; + const rightPad = 30; + const topPad = 30; + const bottomPad = 38; + const slotHeight = clamp(Math.round(8 * zoomY), 6, 34); + const subLaneGap = clamp(Math.round(5 * zoomY), 3, 22); + const laneGap = clamp(Math.round(10 * zoomY), 6, 46); + const splitView = baselineView === "split"; + const baseLaneRows = splitView ? 3 : 2; + const availableCoreKeys = new Set(timeline.lanes.map((lane) => lane.coreKey)); + const selectedIoKeys = new Set([...(state.timingIoWaveExpandedCoreKeys || [])]); + const ioWaveExpandedKeys = state.timingIoWaveExpandAll + ? new Set([...availableCoreKeys]) + : new Set([...selectedIoKeys].filter((key) => availableCoreKeys.has(key))); + const rowStep = slotHeight + subLaneGap; + const laneData = []; + let yCursor = topPad; + for (let idx = 0; idx < timeline.lanes.length; idx += 1) { + const lane = timeline.lanes[idx]; + const hasIoRows = ioWaveExpandedKeys.has(lane.coreKey); + const laneRows = baseLaneRows + (hasIoRows ? 2 : 0); + const laneHeight = laneRows * slotHeight + (laneRows - 1) * subLaneGap; + laneData.push({ + ...lane, + idx, + hasIoRows, + laneRows, + yBase: yCursor, + yExpected: yCursor, + yStrict: yCursor + rowStep, + yComp: yCursor + rowStep * 2, + yIoIn: hasIoRows ? yCursor + rowStep * baseLaneRows : null, + yIoOut: hasIoRows ? yCursor + rowStep * (baseLaneRows + 1) : null, + }); + yCursor += laneHeight + laneGap; + } + const laneCount = Math.max(1, laneData.length); + const plotH = Math.max(1, yCursor - topPad - laneGap); + const wrapWidth = Math.max(860, Math.round(wrap.clientWidth || 0) - 2); + const plotW = Math.max(620, wrapWidth - leftPad - rightPad); + const width = leftPad + plotW + rightPad; + const height = topPad + plotH + bottomPad; + const labelFontSize = clamp(Math.round(slotHeight * 0.72), 7, 13); + const labelMinWidth = Math.max(14, labelFontSize * 2 + 2); + + const svgEl = d3.create("svg") + .attr("id", "timingTimelineSvg") + .attr("class", "timing-timeline-svg") + .attr("viewBox", `0 0 ${width} ${height}`) + .attr("width", width) + .attr("height", height); + + const xScale = d3.scaleLinear() + .domain([windowStart, windowEnd + 1]) + .range([leftPad, leftPad + plotW]); + state.timingViewport = { + fullMin, + fullMax, + fullSpan, + windowStart, + windowSize, + windowEnd, + leftPad, + plotW, + }; + + const ticks = []; + for (let t = windowStart; t <= windowEnd; t += 1) ticks.push(t); + + // Cycle boundaries (X): dashed vertical lines only; no other grid + const grid = svgEl.append("g").attr("class", "timeline-grid"); + for (const t of ticks) { + const x = xScale(t); + grid.append("line") + .attr("x1", x).attr("x2", x) + .attr("y1", topPad).attr("y2", topPad + plotH) + .attr("class", "timeline-cycle-sep"); + grid.append("text") + .attr("x", x + 2) + .attr("y", topPad + plotH + 16) + .attr("class", "timeline-tick") + .text(`T${t}`); + } + svgEl.append("line") + .attr("x1", leftPad).attr("x2", leftPad + plotW) + .attr("y1", topPad + plotH).attr("y2", topPad + plotH) + .attr("class", "timeline-axis"); + + // Core boundaries (Y): one dashed/dark line between each core for easier row matching + const coreSep = svgEl.append("g").attr("class", "timeline-core-seps"); + for (let idx = 1; idx < laneCount; idx += 1) { + const y = laneData[idx].yBase; + coreSep.append("line") + .attr("x1", leftPad) + .attr("x2", leftPad + plotW) + .attr("y1", y) + .attr("y2", y) + .attr("class", "timeline-core-sep"); + } + + // Lane labels (no inner sub-row grid lines) + const lanesG = svgEl.append("g").attr("class", "timeline-lanes"); + for (const lane of laneData) { + const modelOffset = compModel === "distance" + ? lane.compDistanceOffset + : (compModel === "fitted" ? lane.compFittedOffset : lane.compHybridOffset); + const phaseText = state.showPhaseExplain + ? ` SΔ=${lane.phaseOffset == null ? "N/A" : formatDelta(lane.phaseOffset)} CΔ=${formatDelta(modelOffset)}` + : ""; + lanesG.append("text") + .attr("x", 8) + .attr("y", lane.yExpected + slotHeight + 1) + .attr("class", [ + "timeline-core-label", + lane.isBoundary ? "boundary" : "", + lane.hasIoRows ? "io-expanded" : "", + state.timingFocusedCoreKey === lane.coreKey ? "focused" : "", + ].filter(Boolean).join(" ")) + .attr("data-core-key", lane.coreKey) + .attr("title", `Click: focus core (${lane.x},${lane.y}) | Double-click: toggle IO wave`) + .text(`(${lane.x},${lane.y}) ${lane.boundaryLabel}${phaseText}`); + lanesG.append("text") + .attr("x", leftPad - 64) + .attr("y", lane.yExpected + slotHeight - 1) + .attr("class", "timeline-lane-tag") + .text("E"); + lanesG.append("text") + .attr("x", leftPad - 64) + .attr("y", lane.yStrict + slotHeight - 1) + .attr("class", "timeline-lane-tag") + .text(splitView ? "S" : (baselineView === "strict" ? "A" : "C")); + if (splitView) { + lanesG.append("text") + .attr("x", leftPad - 64) + .attr("y", lane.yComp + slotHeight - 1) + .attr("class", "timeline-lane-tag") + .text("C"); + } + if (lane.hasIoRows) { + lanesG.append("text") + .attr("x", leftPad - 64) + .attr("y", lane.yIoIn + slotHeight - 1) + .attr("class", "timeline-lane-tag timeline-io-tag-in") + .text("IN"); + lanesG.append("text") + .attr("x", leftPad - 64) + .attr("y", lane.yIoOut + slotHeight - 1) + .attr("class", "timeline-lane-tag timeline-io-tag-out") + .text("OUT"); + } + } + + const slotG = svgEl.append("g").attr("class", "timeline-rects"); + const keepSlot = (slot) => { + if (!state.timingAnomalyOnly) return true; + const strictAnomaly = slot.statusStrict !== "on-time"; + const compAnomaly = getCompStatusByModel(slot, compModel) !== "on-time"; + if (baselineView === "strict") return strictAnomaly; + if (baselineView === "compensated") return compAnomaly; + return strictAnomaly || compAnomaly; + }; + const applyStackLayout = (items, keyOf, tieBreakOf) => { + const buckets = new Map(); + for (const item of items) { + const key = keyOf(item); + const arr = buckets.get(key) || []; + arr.push(item); + buckets.set(key, arr); + } + for (const group of buckets.values()) { + group.sort((a, b) => tieBreakOf(a) - tieBreakOf(b)); + const total = group.length; + if (total <= 1) { + group[0].stackIndex = 0; + group[0].stackTotal = 1; + continue; + } + for (let i = 0; i < group.length; i += 1) { + group[i].stackIndex = i; + group[i].stackTotal = total; + } + } + }; + const resolveStackGeometry = (baseY, baseH, stackIndex, stackTotal) => { + if (!Number.isFinite(stackTotal) || stackTotal <= 1) { + return { y: baseY, h: baseH }; + } + // Keep all stacked blocks visible within one cycle slot row. + const gap = 1; + const innerH = Math.max(2, Math.floor((baseH - gap * (stackTotal - 1)) / stackTotal)); + const y = baseY + stackIndex * (innerH + gap); + return { y, h: innerH }; + }; + const summarizeWaveValues = (values, maxItems = 2) => { + const arr = Array.isArray(values) ? values : []; + if (arr.length === 0) return ""; + const shown = arr.slice(0, maxItems).map((v) => shortText(v, 7)); + const remain = arr.length - shown.length; + if (remain > 0) shown.push(`+${remain}`); + return shown.join(","); + }; + const ioBusPath = (xLeft, xRight, yTop, yBottom) => { + const yMid = (yTop + yBottom) / 2; + const w = Math.max(1, xRight - xLeft); + const edge = Math.min(5, Math.max(1, Math.round(w * 0.22))); + return [ + `M${xLeft + edge},${yTop}`, + `L${xRight - edge},${yTop}`, + `L${xRight},${yMid}`, + `L${xRight - edge},${yBottom}`, + `L${xLeft + edge},${yBottom}`, + `L${xLeft},${yMid}`, + "Z", + ].join(" "); + }; + const drawIoWaveRow = (lane, yPos, direction) => { + if (!Number.isFinite(yPos)) return; + const byTime = state.coreIoWaveByTime.get(lane.coreKey); + if (!byTime) return; + for (let t = windowStart; t <= windowEnd; t += 1) { + const entry = byTime.get(t); + const values = direction === "in" ? entry?.inVals : entry?.outVals; + if (!Array.isArray(values) || values.length === 0) continue; + const xLeft = xScale(t); + const xRight = xScale(t + 1); + const w = Math.max(1, xRight - xLeft); + slotG.append("path") + .attr("d", ioBusPath(xLeft, xRight, yPos, yPos + slotHeight)) + .attr("class", `timeline-io-bus ${direction === "in" ? "timeline-io-bus-in" : "timeline-io-bus-out"}`) + .attr( + "title", + `${direction === "in" ? "Input" : "Output"} core=(${lane.x},${lane.y}) t=${t} values=${values.join(",")}`, + ); + if (w >= labelMinWidth + 6) { + slotG.append("text") + .attr("x", xLeft + w / 2) + .attr("y", yPos + slotHeight / 2) + .attr("text-anchor", "middle") + .attr("dominant-baseline", "middle") + .attr("class", `timeline-io-bus-label ${direction === "in" ? "timeline-io-bus-label-in" : "timeline-io-bus-label-out"}`) + .attr("font-size", labelFontSize) + .text(summarizeWaveValues(values)); + } + } + }; + const drawActualRow = (lane, yPos, rowMode) => { + const drawables = []; + for (const slot of lane.expectedSlots) { + if (!keepSlot(slot)) continue; + const rowStatus = rowMode === "strict" ? slot.statusStrict : getCompStatusByModel(slot, compModel); + let drawTime = null; + let cls = "actual-ok"; + if (rowStatus === "missing") { + drawTime = slot.expectedTime; + cls = "missing"; + } else if (Number.isFinite(slot.actualTime)) { + drawTime = slot.actualTime; + if (rowMode === "strict") { + cls = rowStatus === "on-time" ? "actual-ok" : "actual-bad"; + } else { + cls = rowStatus === "on-time" ? "actual-comp-ok" : "actual-comp-bad"; + } + } + if (!Number.isFinite(drawTime)) continue; + if (drawTime < windowStart || drawTime > windowEnd) continue; + drawables.push({ slot, rowStatus, drawTime, cls }); + } + applyStackLayout( + drawables, + (d) => `${lane.coreKey}|${rowMode}|${d.drawTime}`, + (d) => (d.slot.opId * 10000) + (d.slot.sampleIndex || 0), + ); + for (const d of drawables) { + const { slot, rowStatus, drawTime, cls, stackIndex = 0, stackTotal = 1 } = d; + const x0 = xScale(drawTime); + const x1 = xScale(drawTime + 1); + const w = Math.max(1, Math.floor(x1 - x0 - 1)); + const selected = state.timingSelectedCell === slot.cellKey ? "selected" : ""; + const geom = resolveStackGeometry(yPos, slotHeight, stackIndex, stackTotal); + slotG.append("rect") + .attr("x", x0 + 0.5) + .attr("y", geom.y) + .attr("width", w) + .attr("height", geom.h) + .attr("class", `timeline-rect ${cls} ${selected}`) + .attr("data-timing-cell", slot.cellKey) + .attr( + "title", + `${rowMode === "strict" ? "Strict" : `Comp(${compModel})`} #${slot.opId}[${slot.sampleIndex}/${slot.occurrenceTotal}] status=${rowStatus} t=${Number.isFinite(slot.actualTime) ? slot.actualTime : "N/A"} deltaS=${formatDelta(slot.deltaStrict)} deltaC=${formatDelta(getCompDeltaByModel(slot, compModel))}`, + ); + if (w >= labelMinWidth && geom.h >= 8) { + slotG.append("text") + .attr("x", x0 + 0.5 + w / 2) + .attr("y", geom.y + geom.h / 2) + .attr("text-anchor", "middle") + .attr("dominant-baseline", "middle") + .attr("class", "timeline-rect-label timeline-rect-label-actual") + .attr("font-size", labelFontSize) + .text(abbrevOpLabel(slot)); + } + + if (state.timingSelectedCell === slot.cellKey && Number.isFinite(slot.actualTime) && Number.isFinite(slot.expectedTime)) { + const xe = xScale(slot.expectedTime) + Math.max(1, Math.floor((xScale(slot.expectedTime + 1) - xScale(slot.expectedTime)) / 2)); + const xa = xScale(slot.actualTime) + Math.max(1, Math.floor((xScale(slot.actualTime + 1) - xScale(slot.actualTime)) / 2)); + const linkClass = rowMode === "strict" + ? (rowStatus === "on-time" ? "ok" : "bad") + : (rowStatus === "on-time" ? "comp-ok" : "comp-bad"); + slotG.append("line") + .attr("x1", xe).attr("y1", lane.yExpected + slotHeight) + .attr("x2", xa).attr("y2", geom.y) + .attr("class", `timeline-link ${linkClass}`); + } + } + }; + + for (const lane of laneData) { + const expectedDrawables = []; + for (const slot of lane.expectedSlots) { + if (!keepSlot(slot)) continue; + if (!Number.isFinite(slot.expectedTime)) continue; + if (slot.expectedTime < windowStart || slot.expectedTime > windowEnd) continue; + expectedDrawables.push({ slot, drawTime: slot.expectedTime }); + } + applyStackLayout( + expectedDrawables, + (d) => `${lane.coreKey}|expected|${d.drawTime}`, + (d) => (d.slot.opId * 10000) + (d.slot.sampleIndex || 0), + ); + for (const d of expectedDrawables) { + const { slot, stackIndex = 0, stackTotal = 1 } = d; + const x0 = xScale(slot.expectedTime); + const x1 = xScale(slot.expectedTime + 1); + const w = Math.max(1, Math.floor(x1 - x0 - 1)); + const selected = state.timingSelectedCell === slot.cellKey ? "selected" : ""; + const geom = resolveStackGeometry(lane.yExpected, slotHeight, stackIndex, stackTotal); + slotG.append("rect") + .attr("x", x0 + 0.5) + .attr("y", geom.y) + .attr("width", w) + .attr("height", geom.h) + .attr("class", `timeline-rect expected ${selected}`) + .attr("data-timing-cell", slot.cellKey) + .attr( + "title", + `Expected #${slot.opId}[${slot.sampleIndex}/${slot.occurrenceTotal}] (${slot.opcode || "N/A"}) t=${slot.expectedTime} deltaS=${formatDelta(slot.deltaStrict)} deltaC=${formatDelta(getCompDeltaByModel(slot, compModel))}`, + ); + if (w >= labelMinWidth && geom.h >= 8) { + slotG.append("text") + .attr("x", x0 + 0.5 + w / 2) + .attr("y", geom.y + geom.h / 2) + .attr("text-anchor", "middle") + .attr("dominant-baseline", "middle") + .attr("class", "timeline-rect-label timeline-rect-label-expected") + .attr("font-size", labelFontSize) + .text(abbrevOpLabel(slot)); + } + } + if (baselineView === "strict") { + drawActualRow(lane, lane.yStrict, "strict"); + } else if (baselineView === "compensated") { + drawActualRow(lane, lane.yStrict, "comp"); + } else { + drawActualRow(lane, lane.yStrict, "strict"); + drawActualRow(lane, lane.yComp, "comp"); + } + if (lane.hasIoRows) { + drawIoWaveRow(lane, lane.yIoIn, "in"); + drawIoWaveRow(lane, lane.yIoOut, "out"); + } + } + + // Legend + const legend = svgEl.append("g").attr("class", "timeline-legend").attr("transform", `translate(${leftPad},14)`); + const legendItems = baselineView === "strict" + ? [ + ["Expected slot", "timeline-legend-exp"], + ["Strict on-time", "timeline-legend-act-ok"], + ["Strict mismatch", "timeline-legend-act-bad"], + ["Missing", "timeline-legend-missing"], + ] + : (baselineView === "compensated" + ? [ + ["Expected slot", "timeline-legend-exp"], + [compModel === "hybrid" ? "Hybrid on-time" : `Comp(${compModel}) on-time`, "timeline-legend-comp-ok"], + [compModel === "hybrid" ? "Hybrid mismatch" : `Comp(${compModel}) mismatch`, "timeline-legend-comp-bad"], + ["Missing", "timeline-legend-missing"], + ] + : [ + ["Expected slot", "timeline-legend-exp"], + ["Strict on-time", "timeline-legend-act-ok"], + ["Strict mismatch", "timeline-legend-act-bad"], + [`Comp(${compModel}) on-time`, "timeline-legend-comp-ok"], + [`Comp(${compModel}) mismatch`, "timeline-legend-comp-bad"], + ["Missing", "timeline-legend-missing"], + ]); + if (laneData.some((lane) => lane.hasIoRows)) { + legendItems.push(["IN bus", "timeline-legend-io-in"]); + legendItems.push(["OUT bus", "timeline-legend-io-out"]); + } + const legendGap = 132; + legendItems.forEach((it, i) => { + const gx = i * legendGap; + legend.append("rect").attr("x", gx).attr("y", -4).attr("width", 10).attr("height", 8).attr("class", it[1]); + legend.append("text").attr("x", gx + 14).attr("y", 4).attr("class", "timeline-legend-text").text(it[0]); + }); + + wrap.appendChild(svgEl.node()); +} + +function timelineZoomAnchorTimeFromWheel(event) { + const vp = state.timingViewport; + if (!vp) { + return Number(state.timingWindowStart || 0) + Number(state.timingWindowSize || 1) / 2; + } + const svgElement = document.getElementById("timingTimelineSvg"); + if (!svgElement) { + return vp.windowStart + vp.windowSize / 2; + } + const rect = svgElement.getBoundingClientRect(); + const localX = event.clientX - rect.left; + const ratio = clamp((localX - vp.leftPad) / Math.max(1, vp.plotW), 0, 0.999); + return vp.windowStart + ratio * vp.windowSize; +} + +function handleTimelineCtrlWheelZoom(event) { + if (!event.ctrlKey) return; + if (state.events.length === 0 || !state.programSpec) return; + event.preventDefault(); + + const vp = state.timingViewport || { + fullMin: state.minTime, + fullMax: state.maxTime, + fullSpan: Math.max(1, state.maxTime - state.minTime + 1), + }; + const fullMin = vp.fullMin; + const fullMax = vp.fullMax; + const fullSpan = Math.max(1, vp.fullSpan || (fullMax - fullMin + 1)); + const minWindow = 1; + const oldWindow = Math.max(1, Number(state.timingWindowSize || Math.min(120, fullSpan))); + const oldStart = Number(state.timingWindowStart || fullMin); + const zoomIn = event.deltaY < 0; + const anchorTime = timelineZoomAnchorTimeFromWheel(event); + const nextWindow = clamp( + Math.round(oldWindow * (zoomIn ? 0.88 : 1.14)), + minWindow, + fullSpan, + ); + const anchorRatio = clamp((anchorTime - oldStart) / oldWindow, 0, 1); + const startMax = Math.max(fullMin, fullMax - nextWindow + 1); + const nextStart = clamp( + Math.round(anchorTime - anchorRatio * nextWindow), + fullMin, + startMax, + ); + const factor = zoomIn ? 1.08 : 1 / 1.08; + state.timingWindowStart = nextStart; + state.timingWindowSize = nextWindow; + state.timingZoomX = 1; + state.timingZoomY = clamp(Number(state.timingZoomY || 1) * factor, 0.6, 4); + renderTimingView(); +} + +function getTimelineSvgSize(svgElement) { + const vb = svgElement.getAttribute("viewBox"); + if (vb) { + const parts = vb.trim().split(/\s+/).map(Number); + if (parts.length === 4 && Number.isFinite(parts[2]) && Number.isFinite(parts[3])) { + return { width: parts[2], height: parts[3] }; + } + } + const width = Number(svgElement.getAttribute("width")) || svgElement.clientWidth || 1200; + const height = Number(svgElement.getAttribute("height")) || svgElement.clientHeight || 800; + return { width, height }; +} + +function parseMaxSide() { + const fallback = 4096; + if (!controls.timingExportMaxSide) return fallback; + const v = Math.round(Number(controls.timingExportMaxSide.value)); + if (!Number.isFinite(v)) return fallback; + return clamp(v, 512, 16000); +} + +function timelineExportCss() { + return ` +.timing-timeline-svg { background: #fffaf0; } +.timeline-axis { stroke: #8f846d; stroke-width: 1; } +.timeline-cycle-sep { stroke: #c4b89a; stroke-width: 1; stroke-dasharray: 3 2; } +.timeline-core-sep { stroke: #7a6f58; stroke-width: 1.2; stroke-dasharray: 4 3; } +.timeline-tick { fill: #7a6f58; font-size: 10px; font-family: ui-monospace, SFMono-Regular, Menlo, monospace; } +.timeline-core-label { fill: #5a5347; font-size: 11px; font-family: ui-monospace, SFMono-Regular, Menlo, monospace; } +.timeline-core-label.boundary { font-weight: 700; } +.timeline-core-label.focused { fill: #1f4eb5; font-weight: 700; text-decoration: underline; } +.timeline-core-label.io-expanded { fill: #6a2b96; text-decoration: underline; text-decoration-style: dashed; } +.timeline-lane-tag { fill: #7f7460; font-size: 10px; font-family: ui-monospace, SFMono-Regular, Menlo, monospace; } +.timeline-io-tag-in { fill: #2d6cdf; font-weight: 700; } +.timeline-io-tag-out { fill: #8f2ac7; font-weight: 700; } +.timeline-rect.expected { fill: #f4f4f4; stroke: #8f8f8f; stroke-width: 0.8; } +.timeline-rect.actual-ok { fill: #2a7f62; stroke: #1f604a; stroke-width: 0.7; } +.timeline-rect.actual-bad { fill: #d62828; stroke: #8f1717; stroke-width: 0.7; } +.timeline-rect.actual-comp-ok { fill: #2d6cdf; stroke: #1d4a97; stroke-width: 0.8; opacity: 0.84; } +.timeline-rect.actual-comp-bad { fill: #9b2ce0; stroke: #5d178a; stroke-width: 0.85; opacity: 0.9; } +.timeline-rect.missing { fill: #f4f4f4; stroke: #7a7a7a; stroke-width: 1.1; stroke-dasharray: 2 1; } +.timeline-rect.selected { stroke-width: 1.8; } +.timeline-io-bus { stroke-width: 0.9; } +.timeline-io-bus-in { fill: #deebff; stroke: #7da3ea; } +.timeline-io-bus-out { fill: #f1e1ff; stroke: #b589dd; } +.timeline-io-bus-label { font-family: ui-monospace, SFMono-Regular, Menlo, monospace; pointer-events: none; font-size: 7px; } +.timeline-io-bus-label-in { fill: #214a9c; } +.timeline-io-bus-label-out { fill: #6d2094; } +.timeline-missing { stroke: #7a7a7a; stroke-width: 1.2; } +.timeline-link.ok { stroke: rgba(54, 132, 103, 0.45); stroke-width: 0.9; } +.timeline-link.bad { stroke: rgba(214, 40, 40, 0.72); stroke-width: 1.2; } +.timeline-link.comp-ok { stroke: rgba(45, 108, 223, 0.5); stroke-width: 0.9; stroke-dasharray: 2 1; } +.timeline-link.comp-bad { stroke: rgba(155, 44, 224, 0.78); stroke-width: 1.2; stroke-dasharray: 2 1; } +.timeline-legend-text { fill: #615a4f; font-size: 10px; font-family: ui-monospace, SFMono-Regular, Menlo, monospace; } +.timeline-legend-exp { fill: #fff; stroke: #8c8c8c; } +.timeline-legend-act-ok { fill: #2a7f62; } +.timeline-legend-act-bad { fill: #d62828; } +.timeline-legend-missing { fill: #f4f4f4; stroke: #7a7a7a; stroke-dasharray: 2 1; } +.timeline-legend-comp-ok { fill: #2d6cdf; } +.timeline-legend-comp-bad { fill: #9b2ce0; } +.timeline-legend-io-in { fill: #deebff; stroke: #7da3ea; } +.timeline-legend-io-out { fill: #f1e1ff; stroke: #b589dd; } +.timeline-rect-label { font-family: ui-monospace, SFMono-Regular, Menlo, monospace; font-size: 7px; } +.timeline-rect-label-expected { fill: #444; } +.timeline-rect-label-actual { fill: #fff; }`; +} + +function exportTimelinePng() { + const svgElement = document.getElementById("timingTimelineSvg"); + if (!svgElement) return; + const size = getTimelineSvgSize(svgElement); + const maxSide = parseMaxSide(); + const scale = Math.min(1, maxSide / Math.max(size.width, size.height)); + const outW = Math.max(1, Math.round(size.width * scale)); + const outH = Math.max(1, Math.round(size.height * scale)); + + const serializer = new XMLSerializer(); + const clone = svgElement.cloneNode(true); + const styleEl = document.createElementNS("http://www.w3.org/2000/svg", "style"); + styleEl.textContent = timelineExportCss(); + clone.insertBefore(styleEl, clone.firstChild); + let source = serializer.serializeToString(clone); + if (!source.includes("xmlns=\"http://www.w3.org/2000/svg\"")) { + source = source.replace(" { + const canvas = document.createElement("canvas"); + canvas.width = outW; + canvas.height = outH; + const ctx = canvas.getContext("2d"); + if (!ctx) { + URL.revokeObjectURL(url); + return; + } + ctx.fillStyle = "#fffdf7"; + ctx.fillRect(0, 0, outW, outH); + ctx.drawImage(img, 0, 0, outW, outH); + canvas.toBlob((blob) => { + if (!blob) { + URL.revokeObjectURL(url); + return; + } + const dlUrl = URL.createObjectURL(blob); + const a = document.createElement("a"); + a.href = dlUrl; + a.download = "timeline.png"; + a.click(); + URL.revokeObjectURL(dlUrl); + URL.revokeObjectURL(url); + }, "image/png"); + }; + img.onerror = () => { + URL.revokeObjectURL(url); + }; + img.src = url; +} + +function splitCellKey(cellKey) { + const pivot = cellKey.lastIndexOf("|"); + if (pivot <= 0) return { coreKey: "", slot: 0 }; + return { + coreKey: cellKey.slice(0, pivot), + slot: Number(cellKey.slice(pivot + 1)), + }; +} + +function buildCoreIoWaveByTime(events) { + const byCore = new Map(); + const ensure = (coreKey, time) => { + if (!byCore.has(coreKey)) byCore.set(coreKey, new Map()); + const byTime = byCore.get(coreKey); + if (!byTime.has(time)) byTime.set(time, { inVals: [], outVals: [] }); + return byTime.get(time); + }; + for (const e of events) { + if (e.msg !== "DataFlow") continue; + const time = Math.round(Number(e.Time)); + if (!Number.isFinite(time)) continue; + const value = formatDataAsDecimal(e.Data); + if (e.Behavior === "FeedIn" || e.Behavior === "Recv") { + const dst = parseEndpoint(e.Behavior === "FeedIn" ? e.To : e.Dst); + if (dst?.kind !== "tilePort") continue; + const cell = ensure(tileKey(dst.x, dst.y), time); + if (value != null) cell.inVals.push(value); + continue; + } + if (e.Behavior === "Send" || e.Behavior === "Collect") { + const src = parseEndpoint(e.Behavior === "Collect" ? e.From : e.Src); + if (src?.kind !== "tilePort") continue; + const cell = ensure(tileKey(src.x, src.y), time); + if (value != null) cell.outVals.push(value); + } + } + return byCore; +} + +function refreshCoreFocusControl(columns) { + if (!controls.timingCoreFocus) return; + const options = [ + { value: "", label: "All cores" }, + ...columns.map((c) => ({ value: c.coreKey, label: `(${c.x},${c.y})` })), + ]; + controls.timingCoreFocus.innerHTML = options + .map((opt) => ``) + .join(""); + const hasFocused = columns.some((c) => c.coreKey === state.timingFocusedCoreKey); + if (!hasFocused) state.timingFocusedCoreKey = null; + controls.timingCoreFocus.value = state.timingFocusedCoreKey || ""; +} + +function refreshIoWaveCoreControl(columns) { + if (!controls.timingIoWaveCore || !controls.timingIoWaveAll) return; + const validKeys = new Set(columns.map((c) => c.coreKey)); + const expanded = new Set( + [...(state.timingIoWaveExpandedCoreKeys || [])].filter((key) => validKeys.has(key)), + ); + state.timingIoWaveExpandedCoreKeys = expanded; + + const options = columns.map((c) => ({ value: c.coreKey, label: `(${c.x},${c.y})` })); + controls.timingIoWaveCore.innerHTML = options + .map((opt) => ``) + .join(""); + const shouldSelectAll = Boolean(state.timingIoWaveExpandAll && columns.length > 0); + if (state.timingIoWaveExpandAll && columns.length === 0) { + state.timingIoWaveExpandAll = false; + } + const selectedKeys = shouldSelectAll + ? new Set(columns.map((c) => c.coreKey)) + : expanded; + for (const opt of controls.timingIoWaveCore.options) { + opt.selected = selectedKeys.has(opt.value); + } + controls.timingIoWaveAll.checked = shouldSelectAll; +} + +function renderTimingDrilldown(view, heatmap, phaseExplain, compensation) { + if (!controls.timingDrilldown) return; + if (!state.timingSelectedCell || !heatmap.cells.has(state.timingSelectedCell)) { + controls.timingDrilldown.innerHTML = + "
Click a timeline mark to inspect operation-level details.
"; + return; + } + const selected = heatmap.cells.get(state.timingSelectedCell); + const { slot } = splitCellKey(state.timingSelectedCell); + const items = view.cellMap.get(state.timingSelectedCell) || []; + const corePhase = phaseExplain.coreMap.get(selected.coreKey); + const coreComp = compensation.coreOffsets.get(selected.coreKey) || null; + const compOffset = getModelOffset(coreComp, state.timingCompModel); + const corePhaseText = corePhase?.phaseOffset == null ? "N/A" : formatDelta(corePhase.phaseOffset); + const coreCompText = compOffset == null ? "N/A" : formatDelta(compOffset); + const confPct = `${Math.round((corePhase?.phaseConfidence || 0) * 100)}%`; + const summary = [ + `core=(${selected.x},${selected.y})`, + `edge=${corePhase?.boundaryLabel || "N/A"}`, + `slot=s${Number.isFinite(slot) ? slot : "N/A"}`, + `ops=${selected.opCount}`, + `anomaly=${selected.anomalyCount}`, + `strictPhase=${corePhaseText}`, + `comp(${state.timingCompModel})=${coreCompText}`, + `conf=${confPct}`, + ].join(" | "); + + if (items.length === 0) { + controls.timingDrilldown.innerHTML = + `
${escapeHtml(summary)}
No expected operations in this cell.
`; + return; + } + + let html = `
${escapeHtml(summary)}
`; + html += "
"; + for (const item of items) { + const rowCls = [ + "timing-drill-row", + item.status, + item.firstDivergence ? "first-divergence" : "", + item.propagated ? "propagated" : "", + ].filter(Boolean).join(" "); + const opLabel = `#${item.id} ${item.opcode || "N/A"}`; + const deltaComp = computeDeltaRebased(item.delta, compOffset, view.ii); + const statusComp = statusFromDelta(deltaComp, item.status === "missing"); + const compLabel = state.timingCompModel === "hybrid" ? "hybrid" : `comp(${state.timingCompModel})`; + const allSamples = Array.isArray(item.allSamples) ? item.allSamples : []; + const sourceCounts = new Map(); + for (const s of allSamples) { + const src = String(s?.source || "Unknown"); + sourceCounts.set(src, (sourceCounts.get(src) || 0) + 1); + } + const sourceSummary = sourceCounts.size > 0 + ? [...sourceCounts.entries()].map(([k, v]) => `${k}*${v}`).join(",") + : "N/A"; + const sampleRange = allSamples.length > 0 + ? `${allSamples[0].time}..${allSamples[allSamples.length - 1].time}` + : "N/A"; + const samplePreview = allSamples.length > 0 + ? allSamples.slice(0, 4).map((s, idx) => `${idx + 1}:${s.time}:${s.source || "Unknown"}`).join(",") + : "N/A"; + const fields = [ + `statusComp=${statusComp}`, + `deltaComp(${compLabel})=${formatDelta(deltaComp)}`, + `exp=s${item.expectedSlot}`, + `act=${item.actualSlot == null ? "N/A" : `s${item.actualSlot}`}`, + `statusStrict=${item.status} (reference)`, + `deltaStrict=${formatDelta(item.delta)} (reference)`, + `deltaPhaseRebased=${formatDelta(computeDeltaRebased(item.delta, corePhase?.phaseOffset, view.ii))}`, + `time=${item.firstTime == null ? "N/A" : item.firstTime}`, + `samples=${item.sampleCount}`, + `sourceSummary=${sourceSummary}`, + `sampleRange=${sampleRange}`, + `samplePreview=${samplePreview}`, + `div=${item.firstDivergence ? "yes" : "no"}`, + ].join(" | "); + html += `
${escapeHtml(opLabel)}${escapeHtml(fields)}
`; + } + html += "
"; + controls.timingDrilldown.innerHTML = html; +} + +function renderFocusedCoreMini(view, timeline) { + if (!controls.timingCoreMini) return; + const focusedKey = state.timingFocusedCoreKey; + if (!focusedKey) { + controls.timingCoreMini.innerHTML = + "
Click a Y-axis core label or use the core selector to focus one core.
"; + return; + } + const core = view.columns.find((c) => c.coreKey === focusedKey); + const lane = timeline.lanes.find((l) => l.coreKey === focusedKey); + if (!core || !lane) { + controls.timingCoreMini.innerHTML = + "
Focused core is not visible under current filters.
"; + return; + } + const sourceCounts = new Map(); + for (const item of core.items) { + const samples = Array.isArray(item.allSamples) ? item.allSamples : []; + for (const s of samples) { + const src = String(s?.source || "Unknown"); + sourceCounts.set(src, (sourceCounts.get(src) || 0) + 1); + } + } + const sourceText = sourceCounts.size > 0 + ? [...sourceCounts.entries()].map(([k, v]) => `${k}*${v}`).join(" | ") + : "N/A"; + const windowStart = Number(state.timingWindowStart || timeline.timeMin); + const windowEnd = windowStart + Number(state.timingWindowSize || 1) - 1; + const compModel = ["distance", "fitted", "hybrid"].includes(state.timingCompModel) + ? state.timingCompModel + : "hybrid"; + const rows = lane.expectedSlots + .filter((slot) => { + const inExp = Number.isFinite(slot.expectedTime) && slot.expectedTime >= windowStart && slot.expectedTime <= windowEnd; + const inAct = Number.isFinite(slot.actualTime) && slot.actualTime >= windowStart && slot.actualTime <= windowEnd; + return inExp || inAct; + }) + .sort((a, b) => { + const ta = Number.isFinite(a.actualTime) ? a.actualTime : a.expectedTime; + const tb = Number.isFinite(b.actualTime) ? b.actualTime : b.expectedTime; + if (ta !== tb) return ta - tb; + if (a.opId !== b.opId) return a.opId - b.opId; + return (a.sampleIndex || 0) - (b.sampleIndex || 0); + }) + .slice(0, 28); + const listHtml = rows.length > 0 + ? rows.map((slot) => { + const occ = `[${slot.sampleIndex}/${slot.occurrenceTotal}]`; + const src = slot.sampleSource || "N/A"; + const strict = slot.statusStrict; + const comp = getCompStatusByModel(slot, compModel); + const line = `#${slot.opId}${occ} ${slot.opcode || "N/A"} expT=${slot.expectedTime ?? "N/A"} actT=${slot.actualTime ?? "N/A"} strict=${strict} comp=${comp} src=${src}`; + return `
${escapeHtml(line)}
`; + }).join("") + : "
No blocks from this core in current window.
"; + + controls.timingCoreMini.innerHTML = [ + `
focused-core=(${core.x},${core.y}) | window=T${windowStart}..T${windowEnd} | sources=${escapeHtml(sourceText)}
`, + `
${listHtml}
`, + ].join(""); +} + +function parseProgramYaml(text) { + if (!window.jsyaml) { + throw new Error("js-yaml is unavailable in current page."); + } + const parsed = window.jsyaml.load(text); + const cfg = parsed?.array_config; + if (!cfg || !Array.isArray(cfg.cores)) { + throw new Error("Program YAML must contain array_config.cores."); + } + + const ii = Math.max(0, Math.round(Number(cfg.compiled_ii || 0))); + const arrayColumns = Math.round(Number(cfg.columns)); + const arrayRows = Math.round(Number(cfg.rows)); + const hasArraySize = Number.isFinite(arrayColumns) && Number.isFinite(arrayRows) && arrayColumns > 0 && arrayRows > 0; + const expectedOps = []; + const coreSet = new Map(); + + for (const core of cfg.cores) { + const x = Number(core.column); + const y = Number(core.row); + if (!Number.isFinite(x) || !Number.isFinite(y)) continue; + const coreKey = tileKey(x, y); + if (!coreSet.has(coreKey)) coreSet.set(coreKey, { coreKey, x, y }); + + const entries = Array.isArray(core.entries) ? core.entries : []; + for (const entry of entries) { + const groups = Array.isArray(entry.instructions) ? entry.instructions : []; + for (const ig of groups) { + const fallbackSlot = normalizeSlot(ig.index_per_ii || 0, ii); + const ops = Array.isArray(ig.operations) ? ig.operations : []; + for (const op of ops) { + const id = Number(op.id); + if (!Number.isFinite(id)) continue; + const rawTimeStep = Number(op.time_step); + const hasTimeStep = Number.isFinite(rawTimeStep); + const expectedSlot = normalizeSlot(hasTimeStep ? rawTimeStep : fallbackSlot, ii); + expectedOps.push({ + coreKey, + x, + y, + id: Math.round(id), + opcode: String(op.opcode || ""), + expectedSlot, + rawTimeStep: hasTimeStep ? Math.round(rawTimeStep) : null, + }); + } + } + } + } + + const columns = [...coreSet.values()].sort(sortCore); + const maxSlot = expectedOps.reduce((acc, op) => Math.max(acc, op.expectedSlot), 0); + const slots = ii > 0 + ? Array.from({ length: ii }, (_v, idx) => idx) + : Array.from({ length: maxSlot + 1 }, (_v, idx) => idx); + + return { + ii, + expectedOps, + columns, + slots, + arrayColumns: hasArraySize ? arrayColumns : null, + arrayRows: hasArraySize ? arrayRows : null, + }; +} + +function buildActualByCoreAndId(events, ii) { + const actualByCore = new Map(); + for (const e of events) { + const isInst = e.msg === "Inst"; + const isMemoryDirect = e.msg === "Memory" + && (String(e.Behavior || "") === "LoadDirect" || String(e.Behavior || "") === "StoreDirect"); + if (!isInst && !isMemoryDirect) continue; + if (!Number.isFinite(Number(e.Time)) + || !Number.isFinite(Number(e.ID)) + || !Number.isFinite(Number(e.X)) + || !Number.isFinite(Number(e.Y))) { + continue; + } + const coreKey = tileKey(Number(e.X), Number(e.Y)); + if (!actualByCore.has(coreKey)) actualByCore.set(coreKey, new Map()); + const byId = actualByCore.get(coreKey); + const id = Math.round(Number(e.ID)); + if (!byId.has(id)) byId.set(id, []); + byId.get(id).push({ + time: Math.round(Number(e.Time)), + slot: normalizeSlot(e.Time, ii), + pred: e.Pred, + source: isInst ? "Inst" : String(e.Behavior || "MemoryDirect"), + }); + } + for (const byId of actualByCore.values()) { + for (const samples of byId.values()) { + samples.sort((a, b) => a.time - b.time); + } + } + return actualByCore; +} + +function buildStrictTimingView(programSpec, events) { + const actualByCoreAndId = buildActualByCoreAndId(events, programSpec.ii); + const cellMap = new Map(); + const byCore = new Map(); + + for (const op of programSpec.expectedOps) { + const actuals = actualByCoreAndId.get(op.coreKey)?.get(op.id) || []; + const first = actuals.length > 0 ? actuals[0] : null; + const delta = first ? signedDelta(first.slot, op.expectedSlot, programSpec.ii) : null; + const status = !first ? "missing" : (delta === 0 ? "on-time" : (delta < 0 ? "early" : "late")); + const allSamples = actuals.map((sample, sampleIdx) => { + const sampleDelta = signedDelta(sample.slot, op.expectedSlot, programSpec.ii); + const sampleStatus = sampleDelta === 0 ? "on-time" : (sampleDelta < 0 ? "early" : "late"); + return { + ...sample, + sampleIdx, + delta: sampleDelta, + status: sampleStatus, + }; + }); + const compareItem = { + ...op, + actualSlot: first ? first.slot : null, + firstTime: first ? first.time : null, + sampleCount: actuals.length, + delta, + status, + allSamples, + firstDivergence: false, + propagated: false, + }; + + if (!byCore.has(op.coreKey)) byCore.set(op.coreKey, []); + byCore.get(op.coreKey).push(compareItem); + + const cellKey = `${op.coreKey}|${op.expectedSlot}`; + if (!cellMap.has(cellKey)) cellMap.set(cellKey, []); + cellMap.get(cellKey).push(compareItem); + } + + const columns = programSpec.columns.map((c) => { + const items = byCore.get(c.coreKey) || []; + items.sort((a, b) => { + const ta = Number.isFinite(a.firstTime) ? a.firstTime : Number.POSITIVE_INFINITY; + const tb = Number.isFinite(b.firstTime) ? b.firstTime : Number.POSITIVE_INFINITY; + if (ta !== tb) return ta - tb; + if (a.expectedSlot !== b.expectedSlot) return a.expectedSlot - b.expectedSlot; + return a.id - b.id; + }); + + let hasDivergence = false; + let lastDelta = null; + for (const item of items) { + if (item.status === "on-time") continue; + if (item.status === "missing") { + if (!hasDivergence) { + item.firstDivergence = true; + hasDivergence = true; + } else { + item.propagated = true; + } + continue; + } + if (!hasDivergence || item.delta !== lastDelta) { + item.firstDivergence = true; + hasDivergence = true; + } else { + item.propagated = true; + } + lastDelta = item.delta; + } + + const deltaCounts = new Map(); + const statusCounts = { onTime: 0, early: 0, late: 0, missing: 0 }; + for (const item of items) { + if (item.status === "on-time") statusCounts.onTime += 1; + if (item.status === "early") statusCounts.early += 1; + if (item.status === "late") statusCounts.late += 1; + if (item.status === "missing") statusCounts.missing += 1; + if (item.status === "early" || item.status === "late") { + const k = String(item.delta); + deltaCounts.set(k, (deltaCounts.get(k) || 0) + 1); + } + } + let modeDelta = 0; + let modeCount = 0; + for (const [k, v] of deltaCounts.entries()) { + if (v > modeCount) { + modeCount = v; + modeDelta = Number(k); + } + } + + return { + ...c, + items, + modeDelta, + modeCount, + statusCounts, + earlyLateCount: statusCounts.early + statusCounts.late, + phaseOffset: modeCount > 0 ? modeDelta : null, + phaseConfidence: (statusCounts.early + statusCounts.late) > 0 + ? modeCount / (statusCounts.early + statusCounts.late) + : 0, }; + }); + + for (const items of cellMap.values()) { + items.sort((a, b) => a.id - b.id); } - return { kind: "unknown", raw: name }; + + return { + ii: programSpec.ii, + slots: programSpec.slots, + columns, + cellMap, + }; } -function endpointPoint(ep) { - if (!ep) { - return null; - } - if (ep.kind === "tilePort") { - const r = tileRect(ep.x, ep.y); - if (ep.port === "North") return { x: r.x + r.w / 2, y: r.y, tile: tileKey(ep.x, ep.y) }; - if (ep.port === "South") return { x: r.x + r.w / 2, y: r.y + r.h, tile: tileKey(ep.x, ep.y) }; - if (ep.port === "West") return { x: r.x, y: r.y + r.h / 2, tile: tileKey(ep.x, ep.y) }; - if (ep.port === "East") return { x: r.x + r.w, y: r.y + r.h / 2, tile: tileKey(ep.x, ep.y) }; - } - if (ep.kind === "driver") { - const side = ep.side; - const idx = ep.idx; - if (side === "North" && idx <= state.maxX) { - const r = tileRect(idx, state.maxY); - return { x: r.x + r.w / 2, y: r.y - layout.driverOffset }; +function renderTimingView() { + if (!controls.timingGrid || !controls.timingSummary) return; + if (!state.programSpec) { + controls.timingSummary.textContent = "Load program YAML to enable strict timing comparison."; + controls.timingGrid.innerHTML = ""; + if (controls.timingCoreFocus) controls.timingCoreFocus.innerHTML = ""; + if (controls.timingIoWaveCore) controls.timingIoWaveCore.innerHTML = ""; + if (controls.timingIoWaveAll) controls.timingIoWaveAll.checked = false; + if (controls.timingDrilldown) { + controls.timingDrilldown.innerHTML = + "
Load YAML and trace, then click a timeline mark for details.
"; } - if (side === "South" && idx <= state.maxX) { - const r = tileRect(idx, 0); - return { x: r.x + r.w / 2, y: r.y + r.h + layout.driverOffset }; + if (controls.timingCoreMini) { + controls.timingCoreMini.innerHTML = + "
Focus one core to inspect local trace details.
"; } - if (side === "West" && idx <= state.maxY) { - const r = tileRect(0, idx); - return { x: r.x - layout.driverOffset, y: r.y + r.h / 2 }; + return; + } + if (state.events.length === 0) { + controls.timingSummary.textContent = "Load trace log to populate timing comparison."; + controls.timingGrid.innerHTML = ""; + if (controls.timingCoreFocus) controls.timingCoreFocus.innerHTML = ""; + if (controls.timingIoWaveCore) controls.timingIoWaveCore.innerHTML = ""; + if (controls.timingIoWaveAll) controls.timingIoWaveAll.checked = false; + if (controls.timingDrilldown) { + controls.timingDrilldown.innerHTML = + "
Load YAML and trace, then click a timeline mark for details.
"; } - if (side === "East" && idx <= state.maxY) { - const r = tileRect(state.maxX, idx); - return { x: r.x + r.w + layout.driverOffset, y: r.y + r.h / 2 }; + if (controls.timingCoreMini) { + controls.timingCoreMini.innerHTML = + "
Focus one core to inspect local trace details.
"; } + return; } - return null; -} -function inferBounds(events) { - let maxX = 0; - let maxY = 0; - for (const e of events) { - if (Number.isInteger(e.X)) maxX = Math.max(maxX, e.X); - if (Number.isInteger(e.Y)) maxY = Math.max(maxY, e.Y); - for (const f of ["Src", "Dst", "From", "To"]) { - if (!e[f]) continue; - const ep = parseEndpoint(e[f]); - if (ep && ep.kind === "tilePort") { - maxX = Math.max(maxX, ep.x); - maxY = Math.max(maxY, ep.y); - } + const view = buildStrictTimingView(state.programSpec, state.events); + state.timingRows = view.columns; + state.timingColumns = view.slots; + state.timingReady = true; + refreshCoreFocusControl(view.columns); + refreshIoWaveCoreControl(view.columns); + const heatmap = buildTimingHeatmap(view); + const phaseExplain = buildPhaseExplain(view); + const compensation = buildCompensationModels(view, phaseExplain, state.events); + + const totals = { onTime: 0, early: 0, late: 0, missing: 0 }; + for (const c of view.columns) { + totals.onTime += c.statusCounts.onTime; + totals.early += c.statusCounts.early; + totals.late += c.statusCounts.late; + totals.missing += c.statusCounts.missing; + } + const filterText = state.timingAnomalyOnly ? "filter=anomaly-only" : "filter=all"; + const boundaryText = state.timingBoundaryOnly ? "scope=boundary-only" : "scope=all-cores"; + const focusedCoreText = state.timingFocusedCoreKey ? `focus=${state.timingFocusedCoreKey}` : "focus=all-cores"; + const phaseText = state.showPhaseExplain + ? `phase(boundary=${formatDelta(phaseExplain.boundaryPhase)} inner=${formatDelta(phaseExplain.innerPhase)} gap=${formatDelta(phaseExplain.phaseGap)})` + : "phase(hidden)"; + const compModel = ["distance", "fitted", "hybrid"].includes(state.timingCompModel) + ? state.timingCompModel + : "hybrid"; + const modelSummary = compensation.models[compModel]; + const compTotals = { onTime: 0, early: 0, late: 0, missing: 0 }; + for (const c of view.columns) { + const compMeta = compensation.coreOffsets.get(c.coreKey) || null; + const compOffset = getModelOffset(compMeta, compModel); + for (const item of c.items) { + const deltaComp = computeDeltaRebased(item.delta, compOffset, view.ii); + const statusComp = statusFromDelta(deltaComp, item.status === "missing"); + if (statusComp === "on-time") compTotals.onTime += 1; + if (statusComp === "early") compTotals.early += 1; + if (statusComp === "late") compTotals.late += 1; + if (statusComp === "missing") compTotals.missing += 1; } } - return { maxX, maxY }; -} + controls.timingSummary.textContent = + `strict baseline | ii=${view.ii || "N/A"} | on-time=${totals.onTime} early=${totals.early} late=${totals.late} missing=${totals.missing} | comp(${compModel}) on-time=${compTotals.onTime} early=${compTotals.early} late=${compTotals.late} missing=${compTotals.missing} gap=${formatDelta(modelSummary?.gap)} | view=${state.timingBaselineView} | ${filterText} | ${boundaryText} | ${focusedCoreText} | ingress=${compensation.ingressSides.join("+")} | ${phaseText}`; -function parseJsonLines(text) { - const lines = text.split(/\r?\n/).map((s) => s.trim()).filter(Boolean); - const rows = []; - for (const line of lines) { - try { - const obj = JSON.parse(line); - if (obj && typeof obj.Time === "number" && Number.isFinite(obj.Time)) { - obj.Time = Math.round(obj.Time); - rows.push(obj); + let visibleColumns = view.columns; + if (state.timingBoundaryOnly) { + visibleColumns = visibleColumns.filter((c) => phaseExplain.coreMap.get(c.coreKey)?.isBoundary); + } + if (state.timingFocusedCoreKey) { + visibleColumns = visibleColumns.filter((c) => c.coreKey === state.timingFocusedCoreKey); + } + const visibleCoreSet = new Set(visibleColumns.map((c) => c.coreKey)); + + if (state.timingSelectedCell && !heatmap.cells.has(state.timingSelectedCell)) { + state.timingSelectedCell = null; + } + if (state.timingSelectedCell) { + const selectedCoreKey = splitCellKey(state.timingSelectedCell).coreKey; + if (!visibleCoreSet.has(selectedCoreKey)) { + state.timingSelectedCell = null; + } + } + if (!state.timingSelectedCell) { + for (const c of visibleColumns) { + for (const slot of view.slots) { + const cell = heatmap.cells.get(`${c.coreKey}|${slot}`); + if (cell && (!state.timingAnomalyOnly || cell.hasAnomaly)) { + state.timingSelectedCell = cell.cellKey; + break; + } } - } catch (_) { - // Ignore malformed lines. + if (state.timingSelectedCell) break; } } - return rows; -} -function indexByTime(events) { - const byTime = new Map(); - let minTime = Number.POSITIVE_INFINITY; - let maxTime = Number.NEGATIVE_INFINITY; - for (const e of events) { - const tKey = Math.round(Number(e.Time)); - if (!byTime.has(tKey)) byTime.set(tKey, []); - byTime.get(tKey).push(e); - minTime = Math.min(minTime, tKey); - maxTime = Math.max(maxTime, tKey); - } - if (!Number.isFinite(minTime) || !Number.isFinite(maxTime)) { - minTime = 0; - maxTime = 0; + const timeline = buildTimelineLanes(view, visibleColumns, phaseExplain, compensation); + const compModelForMismatch = ["distance", "fitted", "hybrid"].includes(state.timingCompModel) + ? state.timingCompModel + : "hybrid"; + let firstHybridMismatchTime = null; + for (const lane of timeline.lanes) { + for (const slot of lane.expectedSlots) { + if (getCompStatusByModel(slot, compModelForMismatch) !== "on-time") { + const t = Number.isFinite(slot.actualTime) ? slot.actualTime : slot.expectedTime; + if (Number.isFinite(t) && (firstHybridMismatchTime == null || t < firstHybridMismatchTime)) { + firstHybridMismatchTime = t; + } + } + } } - return { byTime, minTime, maxTime }; + state.firstHybridMismatchTime = firstHybridMismatchTime; + + renderTimelineSvg(view, timeline); + renderTimingDrilldown(view, heatmap, phaseExplain, compensation); + renderFocusedCoreMini(view, timeline); } function summarizeEvent(e) { @@ -193,13 +2428,58 @@ function summarizeEvent(e) { if (e.msg === "Memory") { return `Memory ${e.Behavior} tile=(${e.X},${e.Y}) value=${e.Value} addr=${e.Addr}`; } + if (e.msg === "Backpressure") { + return `Backpressure tile=(${e.X},${e.Y}) dir=${e.DstDir ?? "N/A"} reason=${e.Reason ?? "N/A"} op=${e.OpCode ?? "N/A"} id=${e.ID ?? "N/A"}`; + } return JSON.stringify(e); } +function applyMeshZoomTransform(transform) { + meshZoomTransform = transform || d3.zoomIdentity; + if (sceneRoot) sceneRoot.attr("transform", meshZoomTransform.toString()); +} + +function bindMeshZoom() { + if (!meshZoomBehavior) { + meshZoomBehavior = d3.zoom() + .scaleExtent([0.4, 8]) + .on("zoom", (event) => { + applyMeshZoomTransform(event.transform); + }); + } + meshZoomBehavior + .extent([[0, 0], [layout.width, layout.height]]) + .translateExtent([ + [-layout.width * 1.5, -layout.height * 1.5], + [layout.width * 2.5, layout.height * 2.5], + ]); + svg.call(meshZoomBehavior); + svg.call(meshZoomBehavior.transform, meshZoomTransform); +} + +function renderMeshLegend() { + if (!controls.meshLegend) return; + const legendItems = [ + ["Send", colors.Send], + ["Recv", colors.Recv], + ["FeedIn", colors.FeedIn], + ["Collect", colors.Collect], + ["Backpressure path", colors.Backpressure], + ["Inst", colors.Inst], + ["Memory", colors.Memory], + ]; + controls.meshLegend.innerHTML = legendItems.map( + ([name, color]) => + `${name}`, + ).join(""); +} + function drawStaticScene() { svg.selectAll("*").remove(); - staticLayer = svg.append("g"); - dynamicLayer = svg.append("g"); + sceneRoot = svg.append("g").attr("class", "mesh-scene-root"); + applyMeshZoomTransform(meshZoomTransform); + staticLayer = sceneRoot.append("g"); + dynamicLayer = sceneRoot.append("g"); const bg = staticLayer.append("rect"); bg @@ -230,6 +2510,19 @@ function drawStaticScene() { .attr("height", layout.tileSize) .attr("rx", 10); + tileGroup + .selectAll(".tile-report-heat") + .data(tiles) + .join("rect") + .attr("class", (d) => `tile-report-heat tile-report-heat-${d.x}-${d.y}`) + .attr("x", (d) => tileRect(d.x, d.y).x) + .attr("y", (d) => tileRect(d.x, d.y).y) + .attr("width", layout.tileSize) + .attr("height", layout.tileSize) + .attr("rx", 10) + .attr("opacity", 0) + .style("display", "none"); + tileGroup .selectAll("text") .data(tiles) @@ -269,35 +2562,12 @@ function drawStaticScene() { .attr("x", (d) => endpointPoint({ kind: "driver", side: d.side, idx: d.idx }).x + 12) .attr("y", (d) => endpointPoint({ kind: "driver", side: d.side, idx: d.idx }).y + 4) .text((d) => `${d.side[0]}${d.idx}`); - - const legend = staticLayer.append("g").attr("transform", "translate(28, 34)"); - const legendItems = [ - ["Send", colors.Send], - ["Recv", colors.Recv], - ["FeedIn", colors.FeedIn], - ["Collect", colors.Collect], - ["Inst", colors.Inst], - ["Memory", colors.Memory], - ]; - legend - .selectAll("circle") - .data(legendItems) - .join("circle") - .attr("cx", (_d, i) => i * 112) - .attr("cy", 0) - .attr("r", 5) - .attr("fill", (d) => d[1]); - legend - .selectAll("text") - .data(legendItems) - .join("text") - .attr("class", "legend-text") - .attr("x", (_d, i) => i * 112 + 9) - .attr("y", 4) - .text((d) => d[0]); + applyReportHeatOverlay(); + renderMeshLegend(); + bindMeshZoom(); } -function drawLink(type, srcPoint, dstPoint) { +function buildLinkPath(srcPoint, dstPoint) { const path = d3.path(); path.moveTo(srcPoint.x, srcPoint.y); const dx = dstPoint.x - srcPoint.x; @@ -311,14 +2581,198 @@ function drawLink(type, srcPoint, dstPoint) { dstPoint.x, dstPoint.y, ); + return path.toString(); +} + +function resolveDataFlowEndpoints(event) { + if (!event || event.msg !== "DataFlow") return null; + let src = null; + let dst = null; + if (event.Behavior === "FeedIn") { + src = parseEndpoint(event.From); + dst = parseEndpoint(event.To); + } else if (event.Behavior === "Collect") { + src = parseEndpoint(event.From); + dst = parseEndpoint(event.To || event.Dst); + } else { + src = parseEndpoint(event.Src); + dst = parseEndpoint(event.Dst); + } + const srcPoint = endpointPoint(src); + const dstPoint = endpointPoint(dst); + return { + type: event.Behavior, + src, + dst, + srcPoint, + dstPoint, + }; +} + +function collectBackpressureOverlay(events) { + const blockedTileKeys = new Set(); + const blockedLinks = []; + const incomingByDstTile = new Map(); + + const addIncomingEdge = (edge) => { + if (!edge?.dstTileKey) return; + if (!incomingByDstTile.has(edge.dstTileKey)) incomingByDstTile.set(edge.dstTileKey, []); + incomingByDstTile.get(edge.dstTileKey).push(edge); + }; + + for (const e of events) { + if (e.msg === "Backpressure" && Number.isFinite(Number(e.X)) && Number.isFinite(Number(e.Y))) { + const x = Math.round(Number(e.X)); + const y = Math.round(Number(e.Y)); + const tile = tileKey(x, y); + blockedTileKeys.add(tile); + + const outPort = normalizePortName(e.DstDir || e.Dir || e.Port || ""); + if (outPort) { + const src = tilePortEndpoint(x, y, outPort); + const dst = neighborEndpointFromTilePort(x, y, outPort); + const srcPoint = endpointPoint(src); + const dstPoint = endpointPoint(dst); + if (srcPoint && dstPoint) { + blockedLinks.push({ + key: `${src.raw}->${dst.raw}`, + srcPoint, + dstPoint, + title: `blocked wire: ${src.raw} -> ${dst.raw}`, + }); + } + } + continue; + } + + const flow = resolveDataFlowEndpoints(e); + if (!flow?.srcPoint || !flow?.dstPoint) continue; + if (!flow.srcPoint.tile || !flow.dstPoint.tile) continue; + addIncomingEdge({ + key: `${flow.src?.raw || flow.srcPoint.tile}->${flow.dst?.raw || flow.dstPoint.tile}`, + srcTileKey: flow.srcPoint.tile, + dstTileKey: flow.dstPoint.tile, + srcPoint: flow.srcPoint, + dstPoint: flow.dstPoint, + behavior: flow.type, + }); + } + + const propagatedTileKeys = new Set(blockedTileKeys); + const propagatedLinks = []; + const visitedEdges = new Set(); + const queue = [...blockedTileKeys]; + while (queue.length > 0) { + const tile = queue.shift(); + const incoming = incomingByDstTile.get(tile) || []; + for (const edge of incoming) { + if (visitedEdges.has(edge.key)) continue; + visitedEdges.add(edge.key); + propagatedLinks.push(edge); + if (edge.srcTileKey && !propagatedTileKeys.has(edge.srcTileKey)) { + propagatedTileKeys.add(edge.srcTileKey); + queue.push(edge.srcTileKey); + } + } + } + + return { + tileKeys: propagatedTileKeys, + blockedLinks, + propagatedLinks, + }; +} + +function drawBackpressureOverlay(overlay) { + if (!overlay) return; + const drawnPathKeys = new Set(); + for (const link of overlay.propagatedLinks || []) { + if (!link?.srcPoint || !link?.dstPoint) continue; + if (drawnPathKeys.has(link.key)) continue; + drawnPathKeys.add(link.key); + dynamicLayer.append("path") + .attr("class", "bp-path-link") + .attr("d", buildLinkPath(link.srcPoint, link.dstPoint)) + .append("title") + .text(`backpressure path (${link.behavior || "DataFlow"})`); + } + for (const link of overlay.blockedLinks || []) { + if (!link?.srcPoint || !link?.dstPoint) continue; + if (drawnPathKeys.has(link.key)) continue; + drawnPathKeys.add(link.key); + dynamicLayer.append("path") + .attr("class", "bp-path-link") + .attr("d", buildLinkPath(link.srcPoint, link.dstPoint)) + .append("title") + .text(link.title || "blocked wire"); + } + for (const key of overlay.tileKeys || []) { + const [x, y] = String(key).split(",").map(Number); + if (!Number.isFinite(x) || !Number.isFinite(y)) continue; + const r = tileRect(x, y); + dynamicLayer.append("rect") + .attr("class", "bp-tile-outline") + .attr("x", r.x + 1.5) + .attr("y", r.y + 1.5) + .attr("width", Math.max(2, r.w - 3)) + .attr("height", Math.max(2, r.h - 3)) + .attr("rx", 9) + .append("title") + .text(`backpressure tile (${x},${y})`); + } +} - dynamicLayer +function drawLink(type, srcPoint, dstPoint, payload = null) { + const pathData = buildLinkPath(srcPoint, dstPoint); + const dx = dstPoint.x - srcPoint.x; + const dy = dstPoint.y - srcPoint.y; + + const link = dynamicLayer .append("path") .attr("class", "event-link") - .attr("d", path.toString()) + .attr("d", pathData) .attr("stroke", colors[type] || "#555") .attr("stroke-opacity", 0.78); + const dataText = payload?.dataText == null ? "" : String(payload.dataText); + if (payload?.drawDataLabel && dataText) { + const shortData = shortText(dataText, 12); + let anchorX = srcPoint.x + dx * 0.58; + let anchorY = srcPoint.y + dy * 0.58; + try { + const node = link.node(); + if (node) { + const total = node.getTotalLength(); + if (Number.isFinite(total) && total > 0) { + const p = node.getPointAtLength(total * 0.58); + anchorX = p.x; + anchorY = p.y; + } + } + } catch (_) { + // Fall back to linear interpolation point when path metrics unavailable. + } + const tag = dynamicLayer.append("g") + .attr("class", "flow-data-tag") + .attr("transform", `translate(${anchorX},${anchorY})`); + const text = tag.append("text") + .attr("class", "flow-data-text") + .attr("text-anchor", "middle") + .attr("dominant-baseline", "middle") + .text(shortData); + const box = text.node()?.getBBox(); + if (box) { + tag.insert("rect", "text") + .attr("class", "flow-data-bg") + .attr("x", box.x - 3) + .attr("y", box.y - 1) + .attr("width", box.width + 6) + .attr("height", box.height + 2) + .attr("rx", 4); + } + tag.append("title").text(`data=${dataText}`); + } + const pulse = dynamicLayer .append("circle") .attr("class", "pulse") @@ -346,61 +2800,165 @@ function applyTileActivity(activeTiles) { staticLayer.selectAll(".tile-label").style("display", state.showLabels ? null : "none"); } +function shortText(value, maxLen = 16) { + const s = String(value ?? "").trim(); + if (!s) return ""; + return s.length <= maxLen ? s : `${s.slice(0, maxLen - 1)}~`; +} + +function summarizeTokens(tokens, prefix, maxItems = 3) { + if (!Array.isArray(tokens) || tokens.length === 0) return null; + const counts = new Map(); + for (const token of tokens) { + const key = String(token || "N/A"); + counts.set(key, (counts.get(key) || 0) + 1); + } + const sorted = [...counts.entries()] + .sort((a, b) => { + if (b[1] !== a[1]) return b[1] - a[1]; + return a[0].localeCompare(b[0]); + }); + const picked = sorted.slice(0, maxItems).map(([k, v]) => (v > 1 ? `${k}*${v}` : k)); + const remain = sorted.length - maxItems; + const suffix = remain > 0 ? `,+${remain}` : ""; + return `${prefix}:${picked.join(",")}${suffix}`; +} + +function summarizeData(values, prefix, maxItems = 4) { + if (!Array.isArray(values) || values.length === 0) return null; + const picked = values.slice(0, maxItems).map((v) => shortText(v, 8)); + const suffix = values.length > maxItems ? ",..." : ""; + return `${prefix}:${picked.join(",")}${suffix}`; +} + function drawTileBadges(timeEvents) { - const instCounts = new Map(); - const memCounts = new Map(); + const byTile = new Map(); + const ensure = (key) => { + if (!byTile.has(key)) { + byTile.set(key, { + op: [], + mem: [], + txData: [], + rxData: [], + details: [], + }); + } + return byTile.get(key); + }; for (const e of timeEvents) { - if (e.msg === "Inst" && state.showInst) { - const k = tileKey(e.X, e.Y); - instCounts.set(k, (instCounts.get(k) || 0) + 1); + if (e.msg === "Inst" && state.showInst && Number.isFinite(Number(e.X)) && Number.isFinite(Number(e.Y))) { + const k = tileKey(Number(e.X), Number(e.Y)); + const rec = ensure(k); + const op = shortText(e.OpCode || "Inst", 10); + rec.op.push(op || "Inst"); + rec.details.push(`Inst#${e.ID ?? "?"} ${e.OpCode ?? "N/A"} pred=${e.Pred ?? "N/A"}`); + continue; } - if (e.msg === "Memory" && state.showMemory) { - const k = tileKey(e.X, e.Y); - memCounts.set(k, (memCounts.get(k) || 0) + 1); + + if (e.msg === "Memory" && state.showMemory && Number.isFinite(Number(e.X)) && Number.isFinite(Number(e.Y))) { + const k = tileKey(Number(e.X), Number(e.Y)); + const rec = ensure(k); + const behavior = String(e.Behavior || "Memory"); + const memTag = behavior === "LoadDirect" + ? `LD(${shortText(e.Value, 6)})` + : (behavior === "StoreDirect" + ? `ST(${shortText(e.Value, 6)})` + : shortText(behavior, 12)); + rec.mem.push(memTag); + rec.details.push(`Memory ${behavior} value=${e.Value ?? "N/A"} addr=${e.Addr ?? "N/A"}`); + continue; + } + + if (e.msg === "DataFlow" && state.showDataFlow) { + const dataValue = e.Data; + if (e.Behavior === "Send") { + const src = parseEndpoint(e.Src); + if (src?.kind === "tilePort") { + const rec = ensure(tileKey(src.x, src.y)); + rec.txData.push(dataValue); + rec.details.push(`TX ${dataValue} ${e.Src} -> ${e.Dst}`); + } + } else if (e.Behavior === "Recv") { + const dst = parseEndpoint(e.Dst); + if (dst?.kind === "tilePort") { + const rec = ensure(tileKey(dst.x, dst.y)); + rec.rxData.push(dataValue); + rec.details.push(`RX ${dataValue} ${e.Src} -> ${e.Dst}`); + } + } else if (e.Behavior === "FeedIn") { + const dst = parseEndpoint(e.To); + if (dst?.kind === "tilePort") { + const rec = ensure(tileKey(dst.x, dst.y)); + rec.rxData.push(dataValue); + rec.details.push(`FeedIn ${dataValue} ${e.From} -> ${e.To}`); + } + } else if (e.Behavior === "Collect") { + const src = parseEndpoint(e.From); + if (src?.kind === "tilePort") { + const rec = ensure(tileKey(src.x, src.y)); + rec.txData.push(dataValue); + rec.details.push(`Collect ${dataValue} ${e.From} -> ${e.To || e.Dst || "Driver"}`); + } + } } } - for (const [k, count] of instCounts.entries()) { - const [x, y] = k.split(",").map(Number); - const r = tileRect(x, y); - dynamicLayer - .append("circle") - .attr("class", "inst-badge") - .attr("cx", r.x + 16) - .attr("cy", r.y + 16) - .attr("r", 10) - .attr("fill", colors.Inst) - .attr("opacity", 0.9); - dynamicLayer - .append("text") - .attr("x", r.x + 12) - .attr("y", r.y + 20) - .attr("fill", "#fff") - .attr("font-size", 11) - .text(`${count}`); - } - - for (const [k, count] of memCounts.entries()) { + for (const [k, rec] of byTile.entries()) { const [x, y] = k.split(",").map(Number); const r = tileRect(x, y); - dynamicLayer - .append("rect") - .attr("class", "memory-badge") - .attr("x", r.x + r.w - 23) - .attr("y", r.y + r.h - 23) - .attr("width", 16) - .attr("height", 16) - .attr("rx", 3) - .attr("fill", colors.Memory) - .attr("opacity", 0.9); - dynamicLayer - .append("text") - .attr("x", r.x + r.w - 20) - .attr("y", r.y + r.h - 11) - .attr("fill", "#fff") - .attr("font-size", 11) - .text(`${count}`); + const lineHeight = clamp(Math.round(layout.tileSize * 0.12), 9, 13); + const fontSize = clamp(Math.round(layout.tileSize * 0.1), 7, 11); + const innerWidth = Math.max(20, r.w - 8); + const approxCharWidth = fontSize * 0.6; + const maxCharsPerLine = Math.max(4, Math.floor(innerWidth / approxCharWidth)); + const textTop = r.y + 24; + const maxTextHeight = Math.max(8, r.h - 30); + const maxLines = Math.max(1, Math.floor(maxTextHeight / lineHeight)); + + const lines = []; + const lineOps = [summarizeTokens(rec.op, "OP", 2), summarizeTokens(rec.mem, "MEM", 1)] + .filter(Boolean) + .join(" | "); + const lineFlow = [summarizeData(rec.rxData, "RX", 2), summarizeData(rec.txData, "TX", 2)] + .filter(Boolean) + .join(" | "); + // Prioritize flow values so data is still visible when space is tight. + if (lineFlow) lines.push(lineFlow); + if (lineOps) lines.push(lineOps); + if (lines.length === 0) continue; + + const shown = lines.slice(0, maxLines).map((line) => shortText(line, maxCharsPerLine)); + if (lines.length > maxLines) { + shown[maxLines - 1] = `${shortText(shown[maxLines - 1], Math.max(4, maxCharsPerLine - 3))}...`; + } + const bgHeight = shown.length * lineHeight + 8; + const g = dynamicLayer.append("g") + .attr("class", "tile-overlay") + .attr("transform", `translate(${r.x + 4},${textTop})`); + g.append("rect") + .attr("class", "tile-overlay-card") + .attr("width", innerWidth) + .attr("height", bgHeight) + .attr("rx", 4); + const text = g.append("text") + .attr("class", "tile-overlay-text") + .style("font-size", `${fontSize}px`) + .attr("x", 4) + .attr("y", lineHeight - 1); + shown.forEach((line, idx) => { + text.append("tspan") + .attr("x", 4) + .attr("dy", idx === 0 ? 0 : lineHeight) + .text(line); + }); + g.append("title").text( + [ + `tile=(${x},${y})`, + ...lines, + ...rec.details.slice(0, 12), + ].join("\n"), + ); } } @@ -421,32 +2979,34 @@ function renderCycleDetails(events, t) { } function renderTime(t) { - state.currentTime = t; - controls.timeLabel.textContent = `T=${t}`; - controls.timeSlider.value = String(t); + const cycle = clamp(normalizeCycleTime(t, state.currentTime), state.minTime, state.maxTime); + state.currentTime = cycle; + controls.timeLabel.textContent = `T=${cycle}`; + controls.timeSlider.value = String(cycle); dynamicLayer.selectAll("*").remove(); - const events = state.byTime.get(t) || []; + const events = state.byTime.get(cycle) || []; const activeTiles = new Set(); + const linkLabelSeen = new Set(); + const bpOverlay = collectBackpressureOverlay(events); + for (const tile of bpOverlay.tileKeys || []) { + activeTiles.add(tile); + } for (const e of events) { if (e.msg === "DataFlow" && state.showDataFlow) { - let src = null; - let dst = null; - let type = e.Behavior; - if (e.Behavior === "FeedIn") { - src = parseEndpoint(e.From); - dst = parseEndpoint(e.To); - } else if (e.Behavior === "Collect") { - src = parseEndpoint(e.From); - } else { - src = parseEndpoint(e.Src); - dst = parseEndpoint(e.Dst); - } - const srcPoint = endpointPoint(src); - const dstPoint = endpointPoint(dst); + const flow = resolveDataFlowEndpoints(e); + const src = flow?.src; + const dst = flow?.dst; + const type = flow?.type || e.Behavior; + const srcPoint = flow?.srcPoint; + const dstPoint = flow?.dstPoint; if (srcPoint && dstPoint) { - drawLink(type, srcPoint, dstPoint); + const dataText = e.Data == null ? "" : String(e.Data); + const labelKey = `${src?.raw || srcPoint.tile || "?"}|${dst?.raw || dstPoint.tile || "?"}|${dataText}`; + const drawDataLabel = dataText && !linkLabelSeen.has(labelKey); + if (drawDataLabel) linkLabelSeen.add(labelKey); + drawLink(type, srcPoint, dstPoint, { dataText, drawDataLabel }); } else if (srcPoint) { dynamicLayer .append("circle") @@ -474,45 +3034,76 @@ function renderTime(t) { applyTileActivity(activeTiles); drawTileBadges(events); - renderCycleDetails(events, t); + drawBackpressureOverlay(bpOverlay); + // Keep link arrows above tile overlay cards. + dynamicLayer.selectAll(".event-link").raise(); + dynamicLayer.selectAll(".bp-path-link").raise(); + dynamicLayer.selectAll(".bp-tile-outline").raise(); + // Keep transfer data labels/pulses above tile cards for readability. + dynamicLayer.selectAll(".flow-data-tag").raise(); + dynamicLayer.selectAll(".pulse").raise(); + renderCycleDetails(events, cycle); } function stopPlayback() { if (state.timer) { - clearInterval(state.timer); + clearTimeout(state.timer); state.timer = null; } controls.playBtn.textContent = "Play"; } +function playbackTick() { + if (!state.timer) return; + const next = nextIndexedTime(state.currentTime, +1); + if (next <= state.currentTime) { + stopPlayback(); + return; + } + renderTime(next); + state.timer = setTimeout(playbackTick, state.speedMs); +} + function playOrPause() { if (state.timer) { stopPlayback(); return; } + if (state.currentTime >= state.maxTime) renderTime(state.maxTime); controls.playBtn.textContent = "Pause"; - state.timer = setInterval(() => { - if (state.currentTime >= state.maxTime) { - stopPlayback(); - return; - } - renderTime(state.currentTime + 1); - }, state.speedMs); + state.timer = setTimeout(playbackTick, state.speedMs); } function initControls() { controls.playBtn.addEventListener("click", playOrPause); controls.stepBackBtn.addEventListener("click", () => { + if (state.stepLock) return; + state.stepLock = true; stopPlayback(); - renderTime(Math.max(state.minTime, state.currentTime - 1)); + try { + renderTime(nextIndexedTime(state.currentTime, -1)); + } finally { + state.stepLock = false; + } }); controls.stepFwdBtn.addEventListener("click", () => { + if (state.stepLock) return; + state.stepLock = true; stopPlayback(); - renderTime(Math.min(state.maxTime, state.currentTime + 1)); + try { + renderTime(nextIndexedTime(state.currentTime, +1)); + } finally { + state.stepLock = false; + } }); controls.timeSlider.addEventListener("input", (e) => { + const wasPlaying = Boolean(state.timer); stopPlayback(); - renderTime(Number(e.target.value)); + const nextTime = Number(e.target.value); + renderTime(nextTime); + if (wasPlaying) { + playOrPause(); + } }); controls.speedSelect.addEventListener("change", (e) => { state.speedMs = Number(e.target.value); @@ -537,23 +3128,198 @@ function initControls() { state.showLabels = Boolean(e.target.checked); renderTime(state.currentTime); }); + if (controls.timingAnomalyOnly) { + controls.timingAnomalyOnly.checked = state.timingAnomalyOnly; + controls.timingAnomalyOnly.addEventListener("change", (e) => { + state.timingAnomalyOnly = Boolean(e.target.checked); + renderTimingView(); + }); + } + if (controls.timingShowPhaseExplain) { + controls.timingShowPhaseExplain.checked = state.showPhaseExplain; + controls.timingShowPhaseExplain.addEventListener("change", (e) => { + state.showPhaseExplain = Boolean(e.target.checked); + renderTimingView(); + }); + } + if (controls.timingBoundaryOnly) { + controls.timingBoundaryOnly.checked = state.timingBoundaryOnly; + controls.timingBoundaryOnly.addEventListener("change", (e) => { + state.timingBoundaryOnly = Boolean(e.target.checked); + state.timingSelectedCell = null; + renderTimingView(); + }); + } + if (controls.timingCoreFocus) { + controls.timingCoreFocus.addEventListener("change", (e) => { + const value = String(e.target.value || ""); + state.timingFocusedCoreKey = value || null; + state.timingSelectedCell = null; + renderTimingView(); + }); + } + if (controls.timingIoWaveAll) { + controls.timingIoWaveAll.addEventListener("change", (e) => { + const checked = Boolean(e.target.checked); + state.timingIoWaveExpandAll = checked; + if (checked) { + state.timingIoWaveExpandedCoreKeys = new Set((state.timingRows || []).map((c) => c.coreKey)); + } + renderTimingView(); + }); + } + if (controls.timingIoWaveCore) { + controls.timingIoWaveCore.addEventListener("change", (e) => { + const selectedKeys = new Set( + [...e.target.selectedOptions] + .map((opt) => String(opt.value || "")) + .filter(Boolean), + ); + state.timingIoWaveExpandedCoreKeys = selectedKeys; + const total = (state.timingRows || []).length; + state.timingIoWaveExpandAll = total > 0 && selectedKeys.size >= total; + renderTimingView(); + }); + } + if (controls.timingBaselineView) { + controls.timingBaselineView.value = state.timingBaselineView; + controls.timingBaselineView.addEventListener("change", (e) => { + state.timingBaselineView = String(e.target.value || "strict"); + renderTimingView(); + }); + } + if (controls.timingCompModel) { + controls.timingCompModel.value = state.timingCompModel; + controls.timingCompModel.addEventListener("change", (e) => { + state.timingCompModel = String(e.target.value || "hybrid"); + renderTimingView(); + }); + } + if (controls.timingExportPng) { + controls.timingExportPng.addEventListener("click", exportTimelinePng); + } + if (controls.timingJumpFirstMismatch) { + controls.timingJumpFirstMismatch.addEventListener("click", () => { + if (state.firstHybridMismatchTime == null || !Number.isFinite(state.firstHybridMismatchTime)) return; + const half = Math.floor((Number(state.timingWindowSize) || 60) / 2); + state.timingWindowStart = Math.max(0, state.firstHybridMismatchTime - half); + renderTimingView(); + }); + } + if (controls.timingWindowStart) { + controls.timingWindowStart.addEventListener("input", (e) => { + state.timingWindowStart = Number(e.target.value); + renderTimingView(); + }); + } + if (controls.timingWindowSize) { + controls.timingWindowSize.addEventListener("input", (e) => { + state.timingWindowSize = Number(e.target.value); + renderTimingView(); + }); + } + if (controls.timingZoomY) { + controls.timingZoomY.addEventListener("input", (e) => { + state.timingZoomY = clamp(Number(e.target.value) / 100, 0.6, 4); + renderTimingView(); + }); + } + if (controls.timingResetZoom) { + controls.timingResetZoom.addEventListener("click", () => { + state.timingZoomX = 1; + state.timingZoomY = 1; + const fullMin = state.timingViewport?.fullMin ?? state.minTime; + const fullMax = state.timingViewport?.fullMax ?? state.maxTime; + const fullSpan = Math.max(1, fullMax - fullMin + 1); + state.timingWindowSize = Math.min(120, fullSpan); + state.timingWindowStart = fullMin; + renderTimingView(); + }); + } + if (controls.timingGrid) { + controls.timingGrid.addEventListener("wheel", handleTimelineCtrlWheelZoom, { passive: false }); + controls.timingGrid.addEventListener("click", (e) => { + const label = e.target.closest("[data-core-key]"); + if (label) { + if (timingCoreLabelClickTimer) clearTimeout(timingCoreLabelClickTimer); + const key = label.getAttribute("data-core-key"); + timingCoreLabelClickTimer = setTimeout(() => { + timingCoreLabelClickTimer = null; + state.timingFocusedCoreKey = state.timingFocusedCoreKey === key ? null : key; + state.timingSelectedCell = null; + renderTimingView(); + }, 220); + return; + } + const btn = e.target.closest("[data-timing-cell]"); + if (!btn) return; + state.timingSelectedCell = btn.getAttribute("data-timing-cell"); + renderTimingView(); + }); + controls.timingGrid.addEventListener("dblclick", (e) => { + const label = e.target.closest("[data-core-key]"); + if (!label) return; + if (timingCoreLabelClickTimer) { + clearTimeout(timingCoreLabelClickTimer); + timingCoreLabelClickTimer = null; + } + const key = label.getAttribute("data-core-key"); + if (!key) return; + const expanded = new Set(state.timingIoWaveExpandedCoreKeys || []); + if (expanded.has(key)) { + expanded.delete(key); + } else { + expanded.add(key); + } + state.timingIoWaveExpandedCoreKeys = expanded; + const total = (state.timingRows || []).length; + state.timingIoWaveExpandAll = total > 0 && expanded.size >= total; + renderTimingView(); + }); + } controls.fileInput.addEventListener("change", async (e) => { const file = e.target.files?.[0]; if (!file) return; const text = await file.text(); loadTrace(text); }); + controls.yamlInput.addEventListener("change", async (e) => { + const file = e.target.files?.[0]; + if (!file) return; + const text = await file.text(); + loadProgramYaml(text); + }); + if (controls.reportInput) { + controls.reportInput.addEventListener("change", async (e) => { + const file = e.target.files?.[0]; + if (!file) return; + const text = await file.text(); + loadReport(text); + }); + } } function loadTrace(text) { stopPlayback(); const events = parseJsonLines(text); state.events = events; - const bounds = inferBounds(events); + state.coreIoWaveByTime = buildCoreIoWaveByTime(events); + state.timingSelectedCell = null; + state.timingFocusedCoreKey = null; + state.timingIoWaveExpandAll = false; + state.timingIoWaveExpandedCoreKeys = new Set(); + state.timingWindowStart = 0; + state.timingWindowSize = 120; + state.timingZoomX = 1; + state.timingZoomY = 1; + state.timingViewport = null; + meshZoomTransform = d3.zoomIdentity; + const bounds = resolveMeshBounds(events); state.maxX = bounds.maxX; state.maxY = bounds.maxY; const index = indexByTime(events); state.byTime = index.byTime; + state.timeKeys = index.sortedTimes; state.minTime = index.minTime; state.maxTime = index.maxTime; @@ -561,12 +3327,84 @@ function loadTrace(text) { controls.timeSlider.max = String(state.maxTime); controls.timeSlider.value = String(state.minTime); + applyAdaptiveLayout(); drawStaticScene(); renderTime(state.minTime); + renderReportView(); + renderTimingView(); +} + +function loadProgramYaml(text) { + try { + state.programSpec = parseProgramYaml(text); + state.yamlGridBounds = boundsFromProgramSpec(state.programSpec); + state.timingSelectedCell = null; + state.timingFocusedCoreKey = null; + state.timingIoWaveExpandAll = false; + state.timingIoWaveExpandedCoreKeys = new Set(); + state.timingWindowStart = 0; + state.timingZoomX = 1; + state.timingZoomY = 1; + state.timingViewport = null; + meshZoomTransform = d3.zoomIdentity; + const bounds = state.yamlGridBounds || inferBounds(state.events); + state.maxX = bounds.maxX; + state.maxY = bounds.maxY; + applyAdaptiveLayout(); + drawStaticScene(); + if (state.events.length > 0) { + renderTime(clamp(state.currentTime, state.minTime, state.maxTime)); + } + renderReportView(); + renderTimingView(); + } catch (err) { + state.programSpec = null; + state.yamlGridBounds = null; + state.timingReady = false; + state.timingFocusedCoreKey = null; + state.timingIoWaveExpandAll = false; + state.timingIoWaveExpandedCoreKeys = new Set(); + controls.timingSummary.textContent = `Program YAML parse error: ${err.message}`; + controls.timingGrid.innerHTML = ""; + if (controls.timingCoreFocus) { + controls.timingCoreFocus.innerHTML = ""; + controls.timingCoreFocus.value = ""; + } + if (controls.timingIoWaveCore) { + controls.timingIoWaveCore.innerHTML = ""; + } + if (controls.timingIoWaveAll) { + controls.timingIoWaveAll.checked = false; + } + if (controls.timingDrilldown) { + controls.timingDrilldown.innerHTML = + "
Program YAML parse failed. Fix YAML and reload.
"; + } + if (controls.timingCoreMini) { + controls.timingCoreMini.innerHTML = + "
Focus one core to inspect local trace details.
"; + } + renderReportView(); + } +} + +let resizeTimer = null; + +function handleResize() { + if (state.events.length === 0 && !state.programSpec) return; + if (resizeTimer) clearTimeout(resizeTimer); + resizeTimer = setTimeout(() => { + applyAdaptiveLayout(); + drawStaticScene(); + if (state.events.length > 0) renderTime(state.currentTime); + }, 120); } async function boot() { initControls(); + applyAdaptiveLayout(); + renderReportView(); + window.addEventListener("resize", handleResize); // Default behavior: load ../gemm.json.log when served from repo root. try { @@ -578,6 +3416,15 @@ async function boot() { controls.statsLine.textContent = "Default log not loaded. Use the file picker."; controls.eventDump.textContent = ""; } + + try { + const yamlResp = await fetch("../gemm.yaml"); + if (!yamlResp.ok) throw new Error(`HTTP ${yamlResp.status}`); + const yamlText = await yamlResp.text(); + loadProgramYaml(yamlText); + } catch (_) { + renderTimingView(); + } } boot(); diff --git a/tool/viz/index.html b/tool/viz/index.html index cd8528b..2a8caa9 100644 --- a/tool/viz/index.html +++ b/tool/viz/index.html @@ -3,20 +3,31 @@ - CGRA GEMM Log Viewer + CGRA Log Viewer +
-

CGRA GEMM Log Viewer

+

CGRA Log Viewer

Timeline visualization for JSONL execution traces

- - +
+ + +
+
+ + +
+
+ + +
@@ -45,7 +56,8 @@

CGRA GEMM Log Viewer

- +
+
@@ -53,6 +65,99 @@

Cycle Details


       
+ +
+

Report Overview

+

+
+
+
+ +
+

Strict Timing Offset View

+

+
+ + + + + + + + + + + +
+
+ + + +
+
+ on-time + early + late + missing + propagated +
+
+
+
+ +
+
+
diff --git a/tool/viz/styles.css b/tool/viz/styles.css index 0f2adf1..246ef65 100644 --- a/tool/viz/styles.css +++ b/tool/viz/styles.css @@ -30,6 +30,7 @@ body { radial-gradient(circle at 10% 90%, #8ecae644, transparent 38%), var(--bg); font-family: "Avenir Next", "Segoe UI", "Helvetica Neue", sans-serif; + scrollbar-gutter: stable both-edges; } .layout { @@ -47,6 +48,7 @@ body { border-radius: 14px; padding: 0.9rem 1rem; box-shadow: var(--shadow); + min-width: 0; } .topbar { @@ -75,10 +77,17 @@ h2 { .file-load { display: flex; align-items: center; - gap: 0.5rem; + gap: 0.8rem; + flex-wrap: wrap; font-size: 0.92rem; } +.file-load-item { + display: flex; + align-items: center; + gap: 0.4rem; +} + .controls .row { display: flex; gap: 0.6rem; @@ -118,8 +127,38 @@ button { padding: 0.2rem; } +.mesh-legend-panel { + display: flex; + flex-wrap: wrap; + gap: 0.45rem 0.8rem; + align-items: center; + margin: 0.1rem 0.15rem 0.5rem; + padding: 0.35rem 0.45rem; + border: 1px solid #d7caaa; + border-radius: 10px; + background: #fbf5e7; +} + +.mesh-legend-item { + display: inline-flex; + align-items: center; + gap: 0.35rem; + font-size: 0.82rem; + color: #544b3f; + font-family: ui-monospace, SFMono-Regular, Menlo, monospace; +} + +.mesh-legend-dot { + width: 10px; + height: 10px; + border-radius: 999px; + border: 1px solid rgba(48, 48, 48, 0.35); + display: inline-block; +} + #canvas { width: 100%; + height: auto; min-height: 480px; display: block; } @@ -131,7 +170,7 @@ button { } #eventDump { - max-height: 260px; + height: 260px; overflow: auto; background: #fbf7ea; border: 1px solid var(--line); @@ -140,6 +179,100 @@ button { margin-top: 0.6rem; font-size: 0.82rem; line-height: 1.45; + white-space: pre-wrap; + word-break: break-word; + overflow-wrap: anywhere; +} + +.report-warning { + margin: 0 0 0.45rem; + color: #756f63; + font-size: 0.86rem; +} + +.report-warning.warn { + color: #905b13; +} + +.report-warning.error { + color: #9d1f1f; +} + +.report-summary { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(130px, 1fr)); + gap: 0.45rem; + margin-bottom: 0.55rem; +} + +.report-card { + border: 1px solid #ddcfb2; + border-radius: 8px; + background: #fbf7ea; + padding: 0.42rem 0.5rem; +} + +.report-card-k { + font-size: 0.73rem; + color: #7a6f58; + text-transform: uppercase; + letter-spacing: 0.02em; +} + +.report-card-v { + margin-top: 0.1rem; + font-size: 1rem; + color: #3b352d; + font-weight: 600; + font-family: ui-monospace, SFMono-Regular, Menlo, monospace; +} + +.report-hot-tiles { + border: 1px solid #ddcfb2; + border-radius: 10px; + background: #fffaf0; + overflow: auto; + max-height: 280px; +} + +.report-hot-title { + position: sticky; + top: 0; + z-index: 1; + padding: 0.42rem 0.55rem; + border-bottom: 1px solid #ddcfb2; + background: #f4ecd9; + color: #5c5348; + font-size: 0.82rem; + font-weight: 600; +} + +.report-hot-table { + width: 100%; + border-collapse: collapse; +} + +.report-hot-table th, +.report-hot-table td { + border-bottom: 1px solid #eadfc7; + padding: 0.28rem 0.4rem; + text-align: left; + font-size: 0.79rem; + color: #4f483c; + font-family: ui-monospace, SFMono-Regular, Menlo, monospace; +} + +.report-hot-table thead th { + position: sticky; + top: 29px; + background: #f8f0de; + z-index: 1; +} + +.report-empty { + padding: 0.55rem; + color: #817662; + font-size: 0.84rem; } .tile { @@ -148,6 +281,11 @@ button { stroke-width: 1.4px; } +.tile-report-heat { + fill: #d62828; + pointer-events: none; +} + .tile.active { fill: var(--tile-active); } @@ -158,6 +296,21 @@ button { pointer-events: none; } +.tile-overlay-card, +.tile-overlay-bg { + fill: rgba(255, 253, 247, 0.9); + stroke: rgba(122, 109, 84, 0.5); + stroke-width: 0.75px; +} + +.tile-overlay-text { + fill: #3a352d; + font-size: 9.5px; + font-weight: 600; + font-family: ui-monospace, SFMono-Regular, Menlo, monospace; + pointer-events: none; +} + .legend-text, .driver-label { fill: #4d4d4d; @@ -170,6 +323,41 @@ button { stroke-linecap: round; } +.bp-path-link { + fill: none; + stroke: #b91c1c; + stroke-width: 4.2px; + stroke-linecap: round; + stroke-linejoin: round; + stroke-opacity: 0.95; + filter: drop-shadow(0 0 1px rgba(185, 28, 28, 0.35)); + pointer-events: none; +} + +.bp-tile-outline { + fill: none; + stroke: #b91c1c; + stroke-width: 3px; + pointer-events: none; +} + +.flow-data-tag { + pointer-events: none; +} + +.flow-data-bg { + fill: rgba(255, 251, 240, 0.92); + stroke: rgba(122, 109, 84, 0.48); + stroke-width: 0.7px; +} + +.flow-data-text { + fill: #3b3427; + font-size: 9px; + font-weight: 700; + font-family: ui-monospace, SFMono-Regular, Menlo, monospace; +} + .pulse { r: 5; } @@ -180,8 +368,678 @@ button { stroke-width: 0.5px; } +.timing-summary { + margin: 0 0 0.45rem; + color: var(--muted); + font-size: 0.9rem; +} + +.timing-toolbar { + display: flex; + align-items: center; + justify-content: flex-start; + flex-wrap: wrap; + gap: 0.7rem; + margin: 0 0 0.45rem; +} + +.timing-toggle { + display: inline-flex; + align-items: center; + gap: 0.35rem; + font-size: 0.84rem; + color: #5b5b5b; +} + +.timing-select select { + min-width: 130px; + padding: 0.18rem 0.3rem; + border: 1px solid #cbbda2; + border-radius: 6px; + background: #fffdf7; + color: #514a3f; +} + +#timingIoWaveCore[multiple] { + min-width: 140px; + min-height: 92px; +} + +.timing-export-setting input[type="number"] { + width: 86px; + padding: 0.2rem 0.35rem; +} + +.timing-legend { + display: flex; + gap: 0.45rem; + align-items: center; + flex-wrap: wrap; + margin-bottom: 0.55rem; +} + +.legend-chip { + border: 1px solid #bfb8a6; + border-radius: 999px; + padding: 0.12rem 0.5rem; + font-size: 0.77rem; +} + +.legend-chip.on-time { + background: #d9f5df; +} + +.legend-chip.early { + background: #ffd9d9; +} + +.legend-chip.late { + background: #ffc8c8; +} + +.legend-chip.missing { + background: #ececec; +} + +.legend-chip.propagated { + background: #f5e4e4; + opacity: 0.75; +} + +.timing-grid-wrap { + overflow: auto; + border: 1px solid var(--line); + border-radius: 10px; + background: #fbf7ea; +} + +.timing-grid, +.timing-heatmap { + width: max-content; + min-width: 100%; + border-collapse: collapse; +} + +.timing-grid th, +.timing-grid td, +.timing-heatmap th, +.timing-heatmap td { + border-bottom: 1px solid #d8ccb0; + border-right: 1px solid #e4d8be; + vertical-align: top; + padding: 0.35rem 0.4rem; +} + +.timing-grid thead th, +.timing-heatmap thead th { + position: sticky; + top: 0; + background: #f4ecd9; + z-index: 1; + font-size: 0.78rem; + white-space: nowrap; +} + +.core-col { + min-width: 176px; + color: #544f43; + font-size: 0.8rem; + background: #f8f1df; + text-align: left; +} + +.slot-head { + min-width: 84px; + text-align: left; +} + +.timing-grid thead .core-col, +.timing-heatmap thead .core-col { + left: 0; + z-index: 3; +} + +.timing-grid tbody .core-col, +.timing-heatmap tbody .core-col { + position: sticky; + left: 0; + z-index: 2; +} + +.core-col .core-meta { + color: #7a6b55; + font-size: 0.72rem; + margin-top: 0.15rem; + font-weight: 500; +} + +.timing-heat-cell { + padding: 0.2rem 0.25rem; +} + +.timing-heat-btn { + width: 100%; + min-width: 64px; + border-radius: 8px; + border: 1px solid #c8baa0; + padding: 0.2rem 0.26rem; + display: flex; + flex-direction: column; + align-items: flex-start; + gap: 0.08rem; + cursor: pointer; + background: #fff; + transition: transform 120ms ease, box-shadow 120ms ease; +} + +.timing-heat-btn:hover { + transform: translateY(-1px); + box-shadow: 0 1px 0 rgba(0, 0, 0, 0.06), 0 2px 8px rgba(0, 0, 0, 0.08); +} + +.timing-heat-btn.selected { + box-shadow: 0 0 0 2px rgba(73, 83, 102, 0.35) inset; +} + +.timing-heat-btn.first-divergence { + border-width: 2px; +} + +.timing-heat-btn.boundary-core { + box-shadow: 0 0 0 1px rgba(66, 66, 66, 0.18); +} + +.timing-heat-btn.muted { + opacity: 0.25; +} + +.timing-heat-btn .heat-main { + font-family: ui-monospace, SFMono-Regular, Menlo, monospace; + font-size: 0.76rem; + font-weight: 700; + line-height: 1; +} + +.timing-heat-btn .heat-sub { + font-size: 0.67rem; + color: #5f5f5f; + line-height: 1.1; + font-family: ui-monospace, SFMono-Regular, Menlo, monospace; +} + +.timing-heat-btn.status-on-time { + background: rgba(217, 245, 223, var(--heat-alpha, 0.2)); + border-color: #8bbd95; +} + +.timing-heat-btn.status-early { + background: rgba(255, 217, 217, var(--heat-alpha, 0.5)); + border-color: #d88b8b; +} + +.timing-heat-btn.status-late { + background: rgba(255, 200, 200, var(--heat-alpha, 0.56)); + border-color: #d77a7a; +} + +.timing-heat-btn.status-missing { + background: rgba(236, 236, 236, var(--heat-alpha, 0.62)); + border-color: #9a9696; +} + +.timing-drilldown { + margin-top: 0.6rem; + border: 1px solid #dacfb5; + border-radius: 10px; + background: #fffaf0; + max-height: 280px; + overflow: auto; +} + +.timing-drill-head { + position: sticky; + top: 0; + z-index: 1; + background: #f4ecd9; + border-bottom: 1px solid #dacfb5; + color: #5f5548; + font-size: 0.78rem; + font-family: ui-monospace, SFMono-Regular, Menlo, monospace; + padding: 0.45rem 0.55rem; + white-space: nowrap; + overflow: auto; +} + +.timing-drill-list { + display: flex; + flex-direction: column; +} + +.timing-drill-row { + display: grid; + grid-template-columns: minmax(120px, 200px) 1fr; + gap: 0.5rem; + align-items: start; + padding: 0.33rem 0.55rem; + border-bottom: 1px solid #eee2c8; + font-size: 0.74rem; +} + +.timing-drill-row .drill-op { + font-family: ui-monospace, SFMono-Regular, Menlo, monospace; + font-weight: 700; + color: #3f3f3f; +} + +.timing-drill-row .drill-meta { + color: #676255; + font-family: ui-monospace, SFMono-Regular, Menlo, monospace; +} + +.timing-drill-row.on-time { + background: #eefaf0; +} + +.timing-drill-row.early { + background: #fff1f1; +} + +.timing-drill-row.late { + background: #ffeaea; +} + +.timing-drill-row.missing { + background: #f3f3f3; +} + +.timing-drill-row.propagated { + opacity: 0.72; +} + +.timing-drill-empty { + color: #7d7668; + font-size: 0.82rem; + padding: 0.6rem; +} + +.timing-core-mini { + margin-top: 0.55rem; + border: 1px solid #dacfb5; + border-radius: 10px; + background: #fffaf0; + max-height: 210px; + overflow: auto; +} + +.timing-core-mini-head { + position: sticky; + top: 0; + z-index: 1; + background: #f4ecd9; + border-bottom: 1px solid #dacfb5; + color: #5f5548; + font-size: 0.78rem; + font-family: ui-monospace, SFMono-Regular, Menlo, monospace; + padding: 0.4rem 0.55rem; + white-space: nowrap; + overflow: auto; +} + +.timing-core-mini-list { + display: flex; + flex-direction: column; +} + +.timing-core-mini-row { + padding: 0.28rem 0.55rem; + border-bottom: 1px solid #eee2c8; + font-size: 0.76rem; + color: #5a5448; + font-family: ui-monospace, SFMono-Regular, Menlo, monospace; +} + +.timing-core-mini-empty { + color: #7d7668; + font-size: 0.82rem; + padding: 0.6rem; +} + +.timing-window { + display: flex; + flex-wrap: wrap; + gap: 0.8rem; + margin: 0 0 0.55rem; +} + +.timing-window-below { + margin: 0.45rem 0 0.55rem; +} + +.timing-window-item { + gap: 0.45rem; +} + +.timing-window-item input[type="range"] { + width: min(360px, 42vw); +} + +#timingResetZoom { + height: 28px; + align-self: center; +} + +.timing-timeline-svg { + display: block; + background: #fffaf0; +} + +.timeline-axis { + stroke: #8f846d; + stroke-width: 1; +} + +.timeline-cycle-sep { + stroke: #c4b89a; + stroke-width: 1; + stroke-dasharray: 3 2; +} + +.timeline-core-sep { + stroke: #7a6f58; + stroke-width: 1.2; + stroke-dasharray: 4 3; +} + +.timeline-tick { + fill: #7a6f58; + font-size: 10px; + font-family: ui-monospace, SFMono-Regular, Menlo, monospace; +} + +.timeline-core-label { + fill: #5a5347; + font-size: 11px; + font-family: ui-monospace, SFMono-Regular, Menlo, monospace; + cursor: pointer; + user-select: none; +} + +.timeline-core-label.boundary { + font-weight: 700; +} + +.timeline-core-label:hover { + fill: #2a4f9a; +} + +.timeline-core-label.focused { + fill: #1f4eb5; + font-weight: 700; + text-decoration: underline; +} + +.timeline-core-label.io-expanded { + fill: #6a2b96; + text-decoration: underline; + text-decoration-style: dashed; +} + +.timeline-lane-tag { + fill: #7f7460; + font-size: 10px; + font-family: ui-monospace, SFMono-Regular, Menlo, monospace; +} + +.timeline-io-tag-in { + fill: #2d6cdf; + font-weight: 700; +} + +.timeline-io-tag-out { + fill: #8f2ac7; + font-weight: 700; +} + +.timeline-rect { + rx: 1.6; + ry: 1.6; + shape-rendering: crispEdges; +} + +.timeline-rect.expected { + fill: #f4f4f4; + stroke: #8f8f8f; + stroke-width: 0.8; +} + +.timeline-rect.actual-ok { + fill: #2a7f62; + stroke: #1f604a; + stroke-width: 0.7; +} + +.timeline-rect.actual-bad { + fill: #d62828; + stroke: #8f1717; + stroke-width: 0.7; +} + +.timeline-rect.actual-comp-ok { + fill: #2d6cdf; + stroke: #1d4a97; + stroke-width: 0.8; + opacity: 0.84; +} + +.timeline-rect.actual-comp-bad { + fill: #9b2ce0; + stroke: #5d178a; + stroke-width: 0.85; + opacity: 0.9; +} + +.timeline-rect.missing { + fill: #f4f4f4; + stroke: #7a7a7a; + stroke-width: 1.1; + stroke-dasharray: 2 1; +} + +.timeline-rect.selected { + stroke-width: 1.8; + filter: drop-shadow(0 0 1px rgba(0, 0, 0, 0.28)); +} + +.timeline-io-bus { + stroke-width: 0.9; +} + +.timeline-io-bus-in { + fill: #deebff; + stroke: #7da3ea; +} + +.timeline-io-bus-out { + fill: #f1e1ff; + stroke: #b589dd; +} + +.timeline-io-bus-label { + font-family: ui-monospace, SFMono-Regular, Menlo, monospace; + pointer-events: none; + font-size: 7px; +} + +.timeline-io-bus-label-in { + fill: #214a9c; +} + +.timeline-io-bus-label-out { + fill: #6d2094; +} + +.timeline-rect-label { + font-family: ui-monospace, SFMono-Regular, Menlo, monospace; + pointer-events: none; + font-size: 7px; +} + +.timeline-rect-label-expected { + fill: #444; +} + +.timeline-rect-label-actual { + fill: #fff; +} + +.timeline-missing { + stroke: #7a7a7a; + stroke-width: 1.2; +} + +.timeline-link.ok { + stroke: rgba(54, 132, 103, 0.45); + stroke-width: 0.9; +} + +.timeline-link.bad { + stroke: rgba(214, 40, 40, 0.72); + stroke-width: 1.2; +} + +.timeline-link.comp-ok { + stroke: rgba(45, 108, 223, 0.5); + stroke-width: 0.9; + stroke-dasharray: 2 1; +} + +.timeline-link.comp-bad { + stroke: rgba(155, 44, 224, 0.78); + stroke-width: 1.2; + stroke-dasharray: 2 1; +} + +.timeline-legend-text { + fill: #615a4f; + font-size: 10px; +} + +.timeline-legend-exp { + fill: #fff; + stroke: #8c8c8c; +} + +.timeline-legend-act-ok { + fill: #2a7f62; +} + +.timeline-legend-act-bad { + fill: #d62828; +} + +.timeline-legend-comp-ok { + fill: #2d6cdf; +} + +.timeline-legend-comp-bad { + fill: #9b2ce0; +} + +.timeline-legend-io-in { + fill: #deebff; + stroke: #7da3ea; +} + +.timeline-legend-io-out { + fill: #f1e1ff; + stroke: #b589dd; +} + +.timeline-legend-missing { + fill: #f4f4f4; + stroke: #7a7a7a; + stroke-dasharray: 2 1; +} + +.timing-cell { + min-height: 42px; + display: flex; + flex-wrap: wrap; + gap: 0.25rem; +} + +.timing-op { + border: 1px solid #c8baa0; + border-radius: 6px; + padding: 0.1rem 0.3rem; + font-family: ui-monospace, SFMono-Regular, Menlo, monospace; + font-size: 0.73rem; + cursor: default; + background: #fff; +} + +.timing-op.on-time { + background: #d9f5df; + border-color: #8bbd95; +} + +.timing-op.early { + background: #ffd9d9; + border-color: #d88b8b; +} + +.timing-op.late { + background: #ffc8c8; + border-color: #d77a7a; +} + +.timing-op.missing { + background: #ececec; + border-color: #bcbcbc; +} + +.timing-op.first-divergence { + box-shadow: 0 0 0 2px rgba(96, 41, 41, 0.25) inset; + font-weight: 600; +} + +.timing-op.propagated { + opacity: 0.55; +} + +.timing-empty { + color: #8b8577; + font-size: 0.73rem; +} + @media (max-width: 860px) { #canvas { min-height: 360px; } + + .core-col { + min-width: 150px; + } + + .slot-head { + min-width: 74px; + } + + .timing-heat-btn { + min-width: 54px; + padding: 0.18rem 0.22rem; + } + + .timing-drill-row { + grid-template-columns: 1fr; + gap: 0.2rem; + } + + .timing-window-item input[type="range"] { + width: min(280px, 60vw); + } + + .timeline-core-label { + font-size: 10px; + } } diff --git a/verify/VERIFY_OVERVIEW.md b/verify/VERIFY_OVERVIEW.md new file mode 100644 index 0000000..177632c --- /dev/null +++ b/verify/VERIFY_OVERVIEW.md @@ -0,0 +1,113 @@ +# Verify 包说明 + +本文档说明 Zeonica 中 `verify` 文件夹的职责、组成和用法。 + +--- + +## 一句话定位 + +**verify** 是 Zeonica 的**快速验证层**:在跑 cycle-accurate 仿真之前,用**静态检查 + 无时序的功能仿真**先检查 kernel 的**结构、时序约束和计算语义**,并生成可读的验证报告。不建模周期、网络延迟和 backpressure,只关心“对不对”,不关心“多快”。 + +--- + +## 三块功能 + +### 1. Lint(静态检查)— `lint.go` + +**作用**:只看 kernel YAML 和架构信息,不做执行,快速发现映射/调度错误。 + +- **STRUCT** + - 坐标格式是否合法(如 `"(x,y)"`) + - PE 坐标是否在 CGRA 范围内(如 4×4 下不能有 (5,5)) + - 同一 (PE, timestep) 内是否有**端口写冲突**(同一端口被多条指令写) +- **TIMING** + - 跨 PE 数据依赖的**时序是否满足** + - 规则:生产者写 → 消费者读,中间至少要经过 `距离 × HopLatency` 个周期 + - 支持 modulo scheduling(II > 0):用 D∈{0,1} 的迭代距离模型,减少误报 + +**输入**:`programs map[string]core.Program`(从 kernel YAML 加载)+ `arch *ArchInfo`(行列、mesh、HopLatency 等)。 +**输出**:`[]Issue`,每个 issue 带类型(STRUCT/TIMING)、PE、timestep、op 索引、消息和 Details。 + +--- + +### 2. Functional Simulator(功能仿真)— `funcsim.go` + +**作用**:按**数据依赖**执行 kernel,不建模周期、网络延迟和 backpressure,只验证**计算语义**是否正确。 + +- 执行顺序:按 timestep 拓扑执行,某条 op 的**所有源操作数就绪**才执行 +- 每个 PE 维护:寄存器、本地 memory、端口上的“数据是否到位” +- 数据带 **predicate**(valid/invalid),运算会传播 predicate(如二元 op 的 pred = pred0 AND pred1) +- 支持 35+ opcode(与 `core/emu.go` 语义对齐的 subset:算术、逻辑、内存、比较、PHI、控制等) +- **不做**:周期推进、网络延迟、SendBuf 满时的阻塞 + +**输入**:同样 `programs` + `arch`,还可 `PreloadMemory(x, y, value, addr)` 预填内存。 +**输出**:执行完后用 `GetRegisterValue(x, y, regIndex)`、`GetMemoryValue(x, y, addr)` 等查结果;若执行卡死或出错,`Run(maxSteps)` 返回 error。 + +**典型用法**:在跑 Akita cycle-accurate 仿真前,先用 funcsim 跑一遍,看结果是否和预期一致,用来区分是**编译器/映射问题**还是**仿真器/时序问题**。 + +--- + +### 3. Report(报告)— `report.go` + +**作用**:把 Lint + FuncSim 的结果整理成一份**可读的验证报告**(控制台或文件)。 + +- 先跑 Lint,再跑 FuncSim +- 报告内容包含: + - 加载了多少个 PE program + - Lint:多少 STRUCT / TIMING issue,每条的具体信息 + - FuncSim:是否成功完成、若有错则报错信息 + - 总结:PE 数量、issue 统计、仿真成功/失败 + - **Recommendation**:若有 TIMING 违例,会提示调整 timestep、调度或缓冲策略;若都通过,提示 kernel 可以进仿真 + +**入口**:`GenerateReport(programs, arch, maxSteps)` 返回 `*VerificationReport`,再调用 `WriteReport(w)` 或 `SaveReportToFile(filename)`。 + +--- + +## 和 cycle-accurate simulator 的关系 + +| 对比项 | verify(Lint + FuncSim) | core + runtimecfg + config(真实仿真) | +|--------------|------------------------------|----------------------------------------| +| 目的 | 快速验证“对不对” | 周期精确的“怎么执行、多快” | +| 时序 | 无周期、无网络延迟 | 有周期、有 backpressure、有延迟 | +| 执行驱动 | 数据依赖就绪即执行 | 引擎 tick、端口收发、调度策略 | +| 速度 | 很快(毫秒级) | 慢(秒级或更长) | +| 典型用法 | 改完 kernel/映射先跑一遍 | 确认无误后再跑完整仿真 | + +**总结**:verify 是仿真前的“守门员”,先保证结构和语义没问题,再上真仿真。 + +--- + +## 输入输出小结 + +- **输入** + - Kernel YAML(per-PE program,和 `core.LoadProgramFileFromYAML` 一致) + - 架构参数:`ArchInfo{Rows, Columns, Topology, HopLatency, MemCapacity, CtrlMemItems}`(verify 里自己定义,和 `runtimecfg` 的 arch spec 是分开的) + - 可选:funcsim 的 `PreloadMemory`、`maxSteps` +- **输出** + - Lint:`[]Issue` + - FuncSim:各 PE 的寄存器/内存查询接口 + `Run()` 的 error + - Report:文本报告(stdout 或文件),包含 Lint 汇总、FuncSim 结果和 Recommendation + +--- + +## 文件结构 + +| 文件 | 作用 | +|------|------| +| `verify.go` | 类型定义(Issue、ArchInfo、PEState、FunctionalSimulator)、NewFunctionalSimulator、PreloadMemory、GetRegisterValue/GetMemoryValue、GenerateReport 等对外 API | +| `lint.go` | `RunLint`、坐标校验、端口冲突、TIMING 约束检查(含 modulo 支持) | +| `funcsim.go` | `Run`、`canExecuteOp`、`executeOp`、各 opcode 的语义实现(与 core 对齐) | +| `report.go` | `VerificationReport`、`GenerateReport`、`WriteReport`、`SaveReportToFile` | +| `verify_test.go` | 单元测试 | +| `histogram_integration_test*.go` | 用真实 histogram kernel 做集成测试 | +| `cmd/verify-axpy/main.go` 等 | 按 kernel(axpy、histogram、fir、gemv)封装的 CLI,读 YAML + 调 `GenerateReport(...).SaveReportToFile(...)` | + +--- + +## 总结 + +- **Lint**:静态看“坐标、端口冲突、跨 PE 时序”是否合法。 +- **FuncSim**:不跑周期,只按数据流执行,看“算出来的数”对不对。 +- **Report**:把前两步结果打成一份报告,并给出是否适合进仿真的建议。 + +整体上,**verify 包就是在做“不跑完整仿真也能检查 kernel 对不对”的快速验证流水线**。 diff --git a/verify/cmd/verify-axpy/main.go b/verify/cmd/verify-axpy/main.go index 38f6f58..a00b6a2 100644 --- a/verify/cmd/verify-axpy/main.go +++ b/verify/cmd/verify-axpy/main.go @@ -32,8 +32,15 @@ func main() { report := verify.GenerateReport(programs, arch, 1000) report.WriteReport(os.Stdout) - if len(report.LintIssues) > 0 { - log.Fatalf("AXPY verification failed with %d lint issues", len(report.LintIssues)) + if report.BlockingLintIssueCount() > 0 { + log.Fatalf( + "AXPY verification failed with %d blocking lint issues (%d warnings)", + report.BlockingLintIssueCount(), + report.WarningLintIssueCount(), + ) + } + if report.WarningLintIssueCount() > 0 { + log.Printf("AXPY verification has %d non-blocking warnings", report.WarningLintIssueCount()) } if !report.SimulationOK { log.Fatalf("AXPY simulation failed: %v", report.SimulationErr) diff --git a/verify/cmd/verify-fir/main.go b/verify/cmd/verify-fir/main.go index 342b010..7689ae9 100644 --- a/verify/cmd/verify-fir/main.go +++ b/verify/cmd/verify-fir/main.go @@ -30,8 +30,15 @@ func main() { report := verify.GenerateReport(programs, arch, 1000) report.WriteReport(os.Stdout) - if len(report.LintIssues) > 0 { - log.Fatalf("FIR verification failed with %d lint issues", len(report.LintIssues)) + if report.BlockingLintIssueCount() > 0 { + log.Fatalf( + "FIR verification failed with %d blocking lint issues (%d warnings)", + report.BlockingLintIssueCount(), + report.WarningLintIssueCount(), + ) + } + if report.WarningLintIssueCount() > 0 { + log.Printf("FIR verification has %d non-blocking warnings", report.WarningLintIssueCount()) } if !report.SimulationOK { log.Fatalf("FIR simulation failed: %v", report.SimulationErr) diff --git a/verify/cmd/verify-gemv/main.go b/verify/cmd/verify-gemv/main.go new file mode 100644 index 0000000..a4fc5de --- /dev/null +++ b/verify/cmd/verify-gemv/main.go @@ -0,0 +1,84 @@ +package main + +import ( + "log" + "os" + "strconv" + + "github.com/sarchlab/zeonica/core" + "github.com/sarchlab/zeonica/verify" +) + +// main runs lint and functional simulation on gemv kernel +func main() { + programPath := os.Getenv("ZEONICA_PROGRAM_YAML") + if programPath == "" { + programPath = "test/testbench/gemv/tmp-generated-instructions.yaml" + } + programs := core.LoadProgramFileFromYAML(programPath) + if len(programs) == 0 { + log.Fatalf("Failed to load gemv program from %s", programPath) + } + lintOpts := verify.DefaultLintOptions() + lintOpts.EnablePrologueAwarePredicate = getEnvBool( + "VERIFY_PRED_PROLOGUE_AWARE", + lintOpts.EnablePrologueAwarePredicate, + ) + lintOpts.PredicateWarmupPassCap = getEnvInt( + "VERIFY_PRED_WARMUP_CAP", + lintOpts.PredicateWarmupPassCap, + ) + lintOpts.PredicateSteadyStatePasses = getEnvInt( + "VERIFY_PRED_STEADY_PASSES", + lintOpts.PredicateSteadyStatePasses, + ) + + arch := &verify.ArchInfo{ + Rows: 4, + Columns: 4, + Topology: "mesh", + HopLatency: 1, + MemCapacity: 2048, + CtrlMemItems: 20, + } + + report := verify.GenerateReport(programs, arch, 1000, lintOpts) + report.WriteReport(os.Stdout) + if report.BlockingLintIssueCount() > 0 { + log.Fatalf( + "GEMV verification failed with %d blocking lint issues (%d warnings)", + report.BlockingLintIssueCount(), + report.WarningLintIssueCount(), + ) + } + if report.WarningLintIssueCount() > 0 { + log.Printf("GEMV verification has %d non-blocking warnings", report.WarningLintIssueCount()) + } + if !report.SimulationOK { + log.Fatalf("GEMV simulation failed: %v", report.SimulationErr) + } +} + +func getEnvInt(name string, fallback int) int { + raw := os.Getenv(name) + if raw == "" { + return fallback + } + v, err := strconv.Atoi(raw) + if err != nil { + return fallback + } + return v +} + +func getEnvBool(name string, fallback bool) bool { + raw := os.Getenv(name) + if raw == "" { + return fallback + } + v, err := strconv.ParseBool(raw) + if err != nil { + return fallback + } + return v +} diff --git a/verify/cmd/verify-histogram/main.go b/verify/cmd/verify-histogram/main.go index 829867f..0e4d322 100644 --- a/verify/cmd/verify-histogram/main.go +++ b/verify/cmd/verify-histogram/main.go @@ -31,8 +31,18 @@ func main() { report := verify.GenerateReport(programs, arch, 100) report.WriteReport(os.Stdout) - if len(report.LintIssues) > 0 { - log.Fatalf("Histogram verification failed with %d lint issues", len(report.LintIssues)) + if report.BlockingLintIssueCount() > 0 { + log.Fatalf( + "Histogram verification failed with %d blocking lint issues (%d warnings)", + report.BlockingLintIssueCount(), + report.WarningLintIssueCount(), + ) + } + if report.WarningLintIssueCount() > 0 { + log.Printf( + "Histogram verification has %d non-blocking warnings", + report.WarningLintIssueCount(), + ) } if !report.SimulationOK { log.Fatalf("Histogram simulation failed: %v", report.SimulationErr) diff --git a/verify/cmd/verify-kernelfusion/main.go b/verify/cmd/verify-kernelfusion/main.go new file mode 100644 index 0000000..e1e0a3c --- /dev/null +++ b/verify/cmd/verify-kernelfusion/main.go @@ -0,0 +1,150 @@ +package main + +import ( + "fmt" + "os" + "sort" + "strconv" + + "github.com/sarchlab/zeonica/core" + "github.com/sarchlab/zeonica/verify" +) + +//nolint:gocyclo,funlen +func main() { + programPath := os.Getenv("ZEONICA_PROGRAM_YAML") + if programPath == "" { + programPath = "test/testbench/kernelfusion/tmp-generated-instructions.yaml" + } + + rows := getEnvInt("VERIFY_ROWS", 8) + cols := getEnvInt("VERIFY_COLS", 8) + maxPrint := getEnvInt("VERIFY_MAX_PRINT", 50) + lintOpts := verify.DefaultLintOptions() + lintOpts.EnablePrologueAwarePredicate = getEnvBool( + "VERIFY_PRED_PROLOGUE_AWARE", + lintOpts.EnablePrologueAwarePredicate, + ) + lintOpts.PredicateWarmupPassCap = getEnvInt( + "VERIFY_PRED_WARMUP_CAP", + lintOpts.PredicateWarmupPassCap, + ) + lintOpts.PredicateSteadyStatePasses = getEnvInt( + "VERIFY_PRED_STEADY_PASSES", + lintOpts.PredicateSteadyStatePasses, + ) + + programs := core.LoadProgramFileFromYAML(programPath) + if len(programs) == 0 { + fmt.Printf("failed to load program: %s\n", programPath) + os.Exit(2) + } + + arch := &verify.ArchInfo{ + Rows: rows, + Columns: cols, + Topology: "mesh", + HopLatency: 1, + MemCapacity: 2048, + CtrlMemItems: 20, + } + + issues := verify.RunLint(programs, arch, lintOpts) + structCnt := 0 + timingCnt := 0 + predicateCnt := 0 + for _, it := range issues { + switch it.Type { + case verify.IssueStruct: + structCnt++ + case verify.IssueTiming: + timingCnt++ + case verify.IssuePredicate: + predicateCnt++ + } + } + + fmt.Printf("program: %s\n", programPath) + fmt.Printf("arch: %dx%d\n", cols, rows) + fmt.Printf( + "predicate_lint: prologueAware=%t warmupCap=%d steadyPasses=%d\n", + lintOpts.EnablePrologueAwarePredicate, + lintOpts.PredicateWarmupPassCap, + lintOpts.PredicateSteadyStatePasses, + ) + fmt.Printf( + "total issues: %d (STRUCT=%d, TIMING=%d, PREDICATE=%d)\n", + len(issues), + structCnt, + timingCnt, + predicateCnt, + ) + report := &verify.VerificationReport{LintIssues: issues} + fmt.Printf( + "severity: blocking=%d, warning=%d\n", + report.BlockingLintIssueCount(), + report.WarningLintIssueCount(), + ) + + if len(issues) == 0 { + fmt.Println("verify lint passed: no dependency/timing issues found") + return + } + + sort.Slice(issues, func(i, j int) bool { + if issues[i].Type != issues[j].Type { + return issues[i].Type < issues[j].Type + } + if issues[i].PEY != issues[j].PEY { + return issues[i].PEY < issues[j].PEY + } + if issues[i].PEX != issues[j].PEX { + return issues[i].PEX < issues[j].PEX + } + if issues[i].Time != issues[j].Time { + return issues[i].Time < issues[j].Time + } + return issues[i].OpID < issues[j].OpID + }) + + limit := len(issues) + if maxPrint > 0 && limit > maxPrint { + limit = maxPrint + } + fmt.Printf("\nshowing first %d issue(s):\n", limit) + for i := 0; i < limit; i++ { + it := issues[i] + fmt.Printf( + "[%03d] %-6s PE(%d,%d) t=%d op=%d | %s\n", + i+1, it.Type, it.PEX, it.PEY, it.Time, it.OpID, it.Message, + ) + } + + if limit < len(issues) { + fmt.Printf("... %d more issue(s) not shown\n", len(issues)-limit) + } +} + +func getEnvInt(name string, fallback int) int { + raw := os.Getenv(name) + if raw == "" { + return fallback + } + v, err := strconv.Atoi(raw) + if err != nil { + return fallback + } + return v +} + +func getEnvBool(name string, fallback bool) bool { + raw := os.Getenv(name) + if raw == "" { + return fallback + } + v, err := strconv.ParseBool(raw) + if err != nil { + return fallback + } + return v +} diff --git a/verify/lint.go b/verify/lint.go index 09cc504..281b1a2 100644 --- a/verify/lint.go +++ b/verify/lint.go @@ -3,6 +3,8 @@ package verify import ( "fmt" + "sort" + "strconv" "strings" "github.com/sarchlab/zeonica/core" @@ -12,11 +14,16 @@ import ( // It validates structure (STRUCT) and simple timing constraints (TIMING). // For kernels with modulo scheduling (ii > 0), it uses a D∈{0,1} iteration // distance model to reduce false positives on loop-carried dependencies. +// Optional lint options can be provided to tune predicate analysis behavior. // Returns a list of issues found, or empty list if no issues. // //nolint:gocyclo -func RunLint(programs map[string]core.Program, arch *ArchInfo) []Issue { +func RunLint(programs map[string]core.Program, arch *ArchInfo, opts ...LintOptions) []Issue { var issues []Issue + lintOpts := DefaultLintOptions() + if len(opts) > 0 { + lintOpts = normalizeLintOptions(opts[0]) + } // Extract CompiledII from the first program that has it // (All programs should have the same II since they're from the same kernel) @@ -107,6 +114,8 @@ func RunLint(programs map[string]core.Program, arch *ArchInfo) []Issue { // TIMING: Build dependency graph and check latencies with modulo scheduling support issues = append(issues, checkTimingConstraints(programs, arch, ii)...) + // PREDICATE: Check PHI/PHI_START/GRANT predicate consistency risks. + issues = append(issues, checkPredicateConstraints(programs, arch, ii, lintOpts)...) return issues } @@ -266,6 +275,428 @@ func checkTimingConstraints(programs map[string]core.Program, arch *ArchInfo, ii return issues } +type predMask uint8 + +const ( + predCanTrue predMask = 1 << iota + predCanFalse +) + +const ( + predTrueMask predMask = predCanTrue + predFalseMask predMask = predCanFalse + predUnknownMask predMask = predCanTrue | predCanFalse +) + +func predHasTrue(v predMask) bool { + return v&predCanTrue != 0 +} + +func predHasFalse(v predMask) bool { + return v&predCanFalse != 0 +} + +func predAnd(values ...predMask) predMask { + canTrue := true + canFalse := false + for _, v := range values { + canTrue = canTrue && predHasTrue(v) + canFalse = canFalse || predHasFalse(v) + } + + var out predMask + if canTrue { + out |= predCanTrue + } + if canFalse { + out |= predCanFalse + } + if out == 0 { + return predUnknownMask + } + return out +} + +func predOr(a, b predMask) predMask { + out := predMask(0) + if predHasTrue(a) || predHasTrue(b) { + out |= predCanTrue + } + if predHasFalse(a) || predHasFalse(b) { + out |= predCanFalse + } + if out == 0 { + return predUnknownMask + } + return out +} + +func parseRegisterIndex(impl string) (int, bool) { + if !strings.HasPrefix(impl, "$") { + return 0, false + } + idx, err := strconv.Atoi(strings.TrimPrefix(impl, "$")) + if err != nil { + return 0, false + } + return idx, true +} + +func parseImmediateInt(impl string) (int64, bool) { + if !strings.HasPrefix(impl, "#") { + return 0, false + } + v := strings.TrimPrefix(impl, "#") + num, err := strconv.ParseInt(v, 0, 64) + if err == nil { + return num, true + } + u, err := strconv.ParseUint(v, 0, 64) + if err != nil { + return 0, false + } + return int64(u), true +} + +func operandPredMask(operand core.Operand, regPred map[int]predMask) predMask { + if idx, ok := parseRegisterIndex(operand.Impl); ok { + if p, exists := regPred[idx]; exists { + return p + } + return predUnknownMask + } + if strings.HasPrefix(operand.Impl, "#") { + return predTrueMask + } + if isPortOperand(operand.Impl) { + return predUnknownMask + } + return predUnknownMask +} + +func predicateGateMask(operand core.Operand) predMask { + v, ok := parseImmediateInt(operand.Impl) + if !ok { + return predUnknownMask + } + if v == 0 { + return predFalseMask + } + return predTrueMask +} + +func writeRegisterDsts(op core.Operation, regPred map[int]predMask, pred predMask) { + for _, dst := range op.DstOperands.Operands { + if idx, ok := parseRegisterIndex(dst.Impl); ok { + regPred[idx] = pred + } + } +} + +func andFromSrcOperands(op core.Operation, regPred map[int]predMask) predMask { + if len(op.SrcOperands.Operands) == 0 { + return predUnknownMask + } + preds := make([]predMask, 0, len(op.SrcOperands.Operands)) + for _, src := range op.SrcOperands.Operands { + preds = append(preds, operandPredMask(src, regPred)) + } + return predAnd(preds...) +} + +type predicateStage string + +const ( + predicateStageWarmup predicateStage = "warmup" + predicateStageSteady predicateStage = "steady" +) + +type predicateRiskStat struct { + x int + y int + t int + opID int + message string + opcode string + + totalHits int + definiteHits int + stageHits map[predicateStage]int +} + +func newPredicateRiskStat(x, y, t, opID int, message, opcode string) *predicateRiskStat { + return &predicateRiskStat{ + x: x, + y: y, + t: t, + opID: opID, + message: message, + opcode: opcode, + stageHits: map[predicateStage]int{ + predicateStageWarmup: 0, + predicateStageSteady: 0, + }, + } +} + +func (p *predicateRiskStat) mark(stage predicateStage, definite bool) { + p.totalHits++ + p.stageHits[stage]++ + if definite { + p.definiteHits++ + } +} + +func (p *predicateRiskStat) certainty() string { + if p.totalHits > 0 && p.definiteHits == p.totalHits { + return "definite" + } + return "possible" +} + +func computePredicatePassWindows(maxInvalid, ii int, opts LintOptions) (int, int) { + if !opts.EnablePrologueAwarePredicate { + passCount := maxInvalid + 1 + if passCount < 1 { + passCount = 1 + } + if passCount > 4 { + // Keep lint bounded in legacy mode. + passCount = 4 + } + return passCount, 0 + } + + warmupPasses := maxInvalid + 1 + if ii > 0 && ii+1 > warmupPasses { + warmupPasses = ii + 1 + } + if warmupPasses < 1 { + warmupPasses = 1 + } + if warmupPasses > opts.PredicateWarmupPassCap { + warmupPasses = opts.PredicateWarmupPassCap + } + + steadyPasses := opts.PredicateSteadyStatePasses + if steadyPasses < 0 { + steadyPasses = 0 + } + return warmupPasses, steadyPasses +} + +func recordPredicateRisk( + stats map[string]*predicateRiskStat, + key string, + x, y, t, opID int, + message, opcode string, + stage predicateStage, + definite bool, +) { + s, exists := stats[key] + if !exists { + s = newPredicateRiskStat(x, y, t, opID, message, opcode) + stats[key] = s + } + s.mark(stage, definite) +} + +//nolint:gocyclo +func checkPredicateConstraints( + programs map[string]core.Program, + arch *ArchInfo, + ii int, + opts LintOptions, +) []Issue { + var issues []Issue + + type opCursor struct { + timeIdx int + op core.Operation + invalidRem int + } + + for coordStr, prog := range programs { + x, y, err := parseCoordinate(coordStr) + if err != nil || x < 0 || x >= arch.Columns || y < 0 || y >= arch.Rows { + continue + } + + regPred := make(map[int]predMask) + phiStartSeen := make(map[int]bool) + grantOnceSeen := make(map[int]bool) + riskStats := make(map[string]*predicateRiskStat) + + ops := make([]*opCursor, 0) + maxInvalid := 0 + for _, entry := range prog.EntryBlocks { + for t, ig := range entry.InstructionGroups { + for _, op := range ig.Operations { + if op.InvalidIterations > maxInvalid { + maxInvalid = op.InvalidIterations + } + ops = append(ops, &opCursor{ + timeIdx: t, + op: op, + invalidRem: op.InvalidIterations, + }) + } + } + } + + warmupPasses, steadyPasses := computePredicatePassWindows(maxInvalid, ii, opts) + totalPasses := warmupPasses + steadyPasses + if totalPasses < 1 { + totalPasses = 1 + } + + for pass := 0; pass < totalPasses; pass++ { + stage := predicateStageWarmup + if pass >= warmupPasses { + stage = predicateStageSteady + } + for _, item := range ops { + if item.invalidRem > 0 { + item.invalidRem-- + continue + } + + op := item.op + opName := strings.ToUpper(op.OpCode) + + switch opName { + case "PHI_START": + if len(op.SrcOperands.Operands) < 2 { + writeRegisterDsts(op, regPred, predUnknownMask) + continue + } + src1 := operandPredMask(op.SrcOperands.Operands[0], regPred) + src2 := operandPredMask(op.SrcOperands.Operands[1], regPred) + + if !phiStartSeen[op.ID] { + if predHasFalse(src1) { + recordPredicateRisk( + riskStats, + fmt.Sprintf("phi_start_first:%d", op.ID), + x, y, item.timeIdx, op.ID, + fmt.Sprintf("PHI_START id=%d first source may have pred=false on first execution", op.ID), + opName, + stage, + src1 == predFalseMask, + ) + } + phiStartSeen[op.ID] = true + writeRegisterDsts(op, regPred, src1) + } else { + if predHasTrue(src1) && predHasTrue(src2) { + recordPredicateRisk( + riskStats, + fmt.Sprintf("phi_start_both_true:%d", op.ID), + x, y, item.timeIdx, op.ID, + fmt.Sprintf("PHI_START id=%d may see both source predicates true", op.ID), + opName, + stage, + src1 == predTrueMask && src2 == predTrueMask, + ) + } + writeRegisterDsts(op, regPred, predOr(src1, src2)) + } + case "PHI": + if len(op.SrcOperands.Operands) < 2 { + writeRegisterDsts(op, regPred, predUnknownMask) + continue + } + src1 := operandPredMask(op.SrcOperands.Operands[0], regPred) + src2 := operandPredMask(op.SrcOperands.Operands[1], regPred) + if predHasTrue(src1) && predHasTrue(src2) { + recordPredicateRisk( + riskStats, + fmt.Sprintf("phi_both_true:%d", op.ID), + x, y, item.timeIdx, op.ID, + fmt.Sprintf("PHI id=%d may have both source predicates true", op.ID), + opName, + stage, + src1 == predTrueMask && src2 == predTrueMask, + ) + } + writeRegisterDsts(op, regPred, predOr(src1, src2)) + case "GRANT_PREDICATE", "GPRED": + if len(op.SrcOperands.Operands) < 2 { + writeRegisterDsts(op, regPred, predUnknownMask) + continue + } + srcPred := operandPredMask(op.SrcOperands.Operands[0], regPred) + predPred := operandPredMask(op.SrcOperands.Operands[1], regPred) + gate := predicateGateMask(op.SrcOperands.Operands[1]) + writeRegisterDsts(op, regPred, predAnd(srcPred, predPred, gate)) + case "GRANT_ONCE": + if len(op.SrcOperands.Operands) == 0 { + writeRegisterDsts(op, regPred, predUnknownMask) + continue + } + srcPred := operandPredMask(op.SrcOperands.Operands[0], regPred) + if !grantOnceSeen[op.ID] { + grantOnceSeen[op.ID] = true + writeRegisterDsts(op, regPred, srcPred) + } else { + writeRegisterDsts(op, regPred, predFalseMask) + } + case "MOV", "DATA_MOV", "CTRL_MOV", "SEXT", "ZEXT", "CAST_FPTOSI", "NOT", "LOAD": + if len(op.SrcOperands.Operands) == 0 { + writeRegisterDsts(op, regPred, predUnknownMask) + continue + } + writeRegisterDsts(op, regPred, operandPredMask(op.SrcOperands.Operands[0], regPred)) + case "CONSTANT": + writeRegisterDsts(op, regPred, predTrueMask) + case "PHI_CONST": + if len(op.SrcOperands.Operands) < 2 { + writeRegisterDsts(op, regPred, predUnknownMask) + continue + } + src1 := operandPredMask(op.SrcOperands.Operands[0], regPred) + src2 := operandPredMask(op.SrcOperands.Operands[1], regPred) + writeRegisterDsts(op, regPred, predOr(src1, src2)) + case "ADD", "SUB", "MUL", "DIV", "FADD", "FSUB", "FMUL", "FDIV", + "OR", "XOR", "AND", "SHL", "LLS", "LRS", "GEP", "MUL_ADD", + "ICMP_EQ", "ICMP_SLT", "ICMP_SGT", "ICMP_SGE", "ICMP_SLE", "ICMP_SNE", "LT_EX": + writeRegisterDsts(op, regPred, andFromSrcOperands(op, regPred)) + default: + // Unknown opcode to lint: keep analysis conservative. + writeRegisterDsts(op, regPred, predUnknownMask) + } + } + } + + keys := make([]string, 0, len(riskStats)) + for key := range riskStats { + keys = append(keys, key) + } + sort.Strings(keys) + for _, key := range keys { + stat := riskStats[key] + issues = append(issues, Issue{ + Type: IssuePredicate, + PEX: stat.x, + PEY: stat.y, + Time: stat.t, + OpID: stat.opID, + Message: stat.message, + Details: map[string]interface{}{ + "certainty": stat.certainty(), + "opcode": stat.opcode, + "warmup_hits": stat.stageHits[predicateStageWarmup], + "steady_hits": stat.stageHits[predicateStageSteady], + "total_hits": stat.totalHits, + "definite_hits": stat.definiteHits, + }, + }) + } + } + + return issues +} + // isPortOperand checks if an operand is a port (direction name) func isPortOperand(impl string) bool { dirNames := map[string]bool{ diff --git a/verify/report.go b/verify/report.go index b6ffd4b..1f8050f 100644 --- a/verify/report.go +++ b/verify/report.go @@ -11,18 +11,79 @@ import ( // VerificationReport represents a complete verification report type VerificationReport struct { - ProgramCount int - LintIssues []Issue - StructIssues []Issue - TimingIssues []Issue - SimulationErr error - SimulationOK bool - Arch *ArchInfo - Programs map[string]core.Program + ProgramCount int + LintIssues []Issue + StructIssues []Issue + TimingIssues []Issue + PredicateIssues []Issue + SimulationErr error + SimulationOK bool + Arch *ArchInfo + Programs map[string]core.Program } -// GenerateReport runs both lint and functional simulation, returns a report -func GenerateReport(programs map[string]core.Program, arch *ArchInfo, maxSimSteps int) *VerificationReport { +func predicateIssueIsPossible(issue Issue) bool { + if issue.Type != IssuePredicate { + return false + } + if issue.Details == nil { + return false + } + certainty, ok := issue.Details["certainty"] + if !ok { + return false + } + certaintyStr, ok := certainty.(string) + if !ok { + return false + } + return strings.EqualFold(strings.TrimSpace(certaintyStr), "possible") +} + +// BlockingLintIssues returns lint issues that should fail verification. +// Current policy: +// - STRUCT/TIMING issues are always blocking. +// - PREDICATE issues are blocking unless tagged as certainty=possible. +func (r *VerificationReport) BlockingLintIssues() []Issue { + blocking := make([]Issue, 0, len(r.LintIssues)) + for _, issue := range r.LintIssues { + if issue.Type == IssuePredicate && predicateIssueIsPossible(issue) { + continue + } + blocking = append(blocking, issue) + } + return blocking +} + +// WarningLintIssues returns non-blocking lint issues (currently predicate possible). +func (r *VerificationReport) WarningLintIssues() []Issue { + warnings := make([]Issue, 0) + for _, issue := range r.LintIssues { + if issue.Type == IssuePredicate && predicateIssueIsPossible(issue) { + warnings = append(warnings, issue) + } + } + return warnings +} + +// BlockingLintIssueCount returns number of blocking lint issues. +func (r *VerificationReport) BlockingLintIssueCount() int { + return len(r.BlockingLintIssues()) +} + +// WarningLintIssueCount returns number of warning lint issues. +func (r *VerificationReport) WarningLintIssueCount() int { + return len(r.WarningLintIssues()) +} + +// GenerateReport runs both lint and functional simulation, returns a report. +// Optional lint options can be provided to tune predicate analysis. +func GenerateReport( + programs map[string]core.Program, + arch *ArchInfo, + maxSimSteps int, + opts ...LintOptions, +) *VerificationReport { report := &VerificationReport{ ProgramCount: len(programs), Arch: arch, @@ -30,13 +91,18 @@ func GenerateReport(programs map[string]core.Program, arch *ArchInfo, maxSimStep } // Run lint - report.LintIssues = RunLint(programs, arch) + report.LintIssues = RunLint(programs, arch, opts...) // Categorize issues for _, issue := range report.LintIssues { - if issue.Type == IssueStruct { + switch issue.Type { + case IssueStruct: report.StructIssues = append(report.StructIssues, issue) - } else { + case IssueTiming: + report.TimingIssues = append(report.TimingIssues, issue) + case IssuePredicate: + report.PredicateIssues = append(report.PredicateIssues, issue) + default: report.TimingIssues = append(report.TimingIssues, issue) } } @@ -74,6 +140,12 @@ func (r *VerificationReport) WriteReport(w io.Writer) { fmt.Fprintln(w, "✓ No lint issues found!") } else { fmt.Fprintf(w, "⚠ Found %d lint issues:\n\n", len(r.LintIssues)) + fmt.Fprintf( + w, + " Blocking: %d, Warning: %d\n", + r.BlockingLintIssueCount(), + r.WarningLintIssueCount(), + ) if len(r.StructIssues) > 0 { fmt.Fprintf(w, "\nSTRUCT ISSUES (%d):\n", len(r.StructIssues)) @@ -108,6 +180,31 @@ func (r *VerificationReport) WriteReport(w io.Writer) { fmt.Fprintln(w) } } + + if len(r.PredicateIssues) > 0 { + fmt.Fprintf(w, "\nPREDICATE ISSUES (%d):\n", len(r.PredicateIssues)) + fmt.Fprintln(w, dash) + for i, issue := range r.PredicateIssues { + fmt.Fprintf(w, " Issue %d: [PE(%d,%d) t=%d op=%d]\n", + i+1, issue.PEX, issue.PEY, issue.Time, issue.OpID) + fmt.Fprintf(w, " Message: %s\n", issue.Message) + if issue.Details != nil { + if opCode, ok := issue.Details["opcode"]; ok { + fmt.Fprintf(w, " OpCode: %v\n", opCode) + } + if certainty, ok := issue.Details["certainty"]; ok { + fmt.Fprintf(w, " Certainty: %v\n", certainty) + } + if warmupHits, ok := issue.Details["warmup_hits"]; ok { + fmt.Fprintf(w, " Warmup hits: %v\n", warmupHits) + } + if steadyHits, ok := issue.Details["steady_hits"]; ok { + fmt.Fprintf(w, " Steady hits: %v\n", steadyHits) + } + } + fmt.Fprintln(w) + } + } } // STAGE 2: FUNCTIONAL SIMULATION @@ -127,8 +224,14 @@ func (r *VerificationReport) WriteReport(w io.Writer) { fmt.Fprintln(w, separator) fmt.Fprintf(w, "Program Structure: %d PEs deployed\n", r.ProgramCount) - fmt.Fprintf(w, "Lint Result: %d issues detected (%d STRUCT, %d TIMING)\n", - len(r.LintIssues), len(r.StructIssues), len(r.TimingIssues)) + fmt.Fprintf(w, "Lint Result: %d issues detected (%d STRUCT, %d TIMING, %d PREDICATE)\n", + len(r.LintIssues), len(r.StructIssues), len(r.TimingIssues), len(r.PredicateIssues)) + fmt.Fprintf( + w, + "Lint Severity: %d blocking, %d warning\n", + r.BlockingLintIssueCount(), + r.WarningLintIssueCount(), + ) simStatus := "SUCCESS" if !r.SimulationOK { simStatus = "FAILED: " + r.SimulationErr.Error() @@ -139,13 +242,14 @@ func (r *VerificationReport) WriteReport(w io.Writer) { fmt.Fprintln(w, "RECOMMENDATION") fmt.Fprintln(w, separator) - if len(r.TimingIssues) > 0 { - fmt.Fprintln(w, "⚠ TIMING VIOLATIONS DETECTED") - fmt.Fprintln(w, "This kernel has cross-PE communication constraints") - fmt.Fprintln(w, "that are not satisfied. Consider:") - fmt.Fprintln(w, " 1. Adjusting operation timesteps to allow latency") - fmt.Fprintln(w, " 2. Modifying the scheduling to respect network delays") - fmt.Fprintln(w, " 3. Using buffering or pipelining strategies") + if r.BlockingLintIssueCount() > 0 { + fmt.Fprintln(w, "⚠ BLOCKING LINT ISSUES DETECTED") + fmt.Fprintln(w, "This kernel still has structural/timing/definite-predicate issues.") + fmt.Fprintln(w, "Fix blocking issues before trusting simulation results.") + } else if r.WarningLintIssueCount() > 0 { + fmt.Fprintln(w, "⚠ PREDICATE RISKS DETECTED") + fmt.Fprintln(w, "Only non-blocking predicate warnings are present (certainty=possible).") + fmt.Fprintln(w, "You may still review PHI/PHI_START/GPRED flows for robustness.") } else { fmt.Fprintln(w, "✓ KERNEL PASSED ALL CHECKS") fmt.Fprintln(w, "The kernel is ready for simulation.") diff --git a/verify/verify.go b/verify/verify.go index d42ea0e..16e4d7b 100644 --- a/verify/verify.go +++ b/verify/verify.go @@ -137,11 +137,13 @@ const ( IssueStruct IssueType = "STRUCT" // Mapping/structure error (illegal PE, port conflict) // IssueTiming indicates a dependency/timing lint issue. IssueTiming IssueType = "TIMING" // Dependency/timing error (insufficient latency) + // IssuePredicate indicates predicate-consistency risk in control/dataflow ops. + IssuePredicate IssueType = "PREDICATE" // Predicate risk (PHI/PHI_START/GPRED interactions) ) // Issue represents a single lint issue type Issue struct { - Type IssueType // STRUCT or TIMING + Type IssueType // STRUCT, TIMING, or PREDICATE PEX int // PE X coordinate (-1 if not applicable) PEY int // PE Y coordinate (-1 if not applicable) Time int // Timestep (-1 if not applicable) @@ -160,6 +162,41 @@ type ArchInfo struct { CtrlMemItems int // Control memory entries per PE } +const ( + defaultPredicateWarmupPassCap = 8 + defaultPredicateSteadyPassCount = 2 +) + +// LintOptions controls static lint behavior. +type LintOptions struct { + // EnablePrologueAwarePredicate enables bounded warmup+steady analysis for predicate checks. + EnablePrologueAwarePredicate bool + // PredicateWarmupPassCap bounds warmup passes used to consume invalid_iterations/prologue. + PredicateWarmupPassCap int + // PredicateSteadyStatePasses controls extra passes after warmup to inspect steady-state risks. + PredicateSteadyStatePasses int +} + +// DefaultLintOptions returns the default lint configuration. +func DefaultLintOptions() LintOptions { + return LintOptions{ + EnablePrologueAwarePredicate: true, + PredicateWarmupPassCap: defaultPredicateWarmupPassCap, + PredicateSteadyStatePasses: defaultPredicateSteadyPassCount, + } +} + +func normalizeLintOptions(opts LintOptions) LintOptions { + out := opts + if out.PredicateWarmupPassCap <= 0 { + out.PredicateWarmupPassCap = defaultPredicateWarmupPassCap + } + if out.PredicateSteadyStatePasses < 0 { + out.PredicateSteadyStatePasses = 0 + } + return out +} + // PEState captures the runtime state of a single PE (for functional simulator) type PEState struct { Registers map[int]core.Data // Register file: register index → Data diff --git a/verify/verify_test.go b/verify/verify_test.go index 76d446c..9201f2f 100644 --- a/verify/verify_test.go +++ b/verify/verify_test.go @@ -2,6 +2,7 @@ package verify import ( + "strings" "testing" "github.com/sarchlab/zeonica/core" @@ -264,3 +265,353 @@ func TestFunctionalSimulatorMemory(t *testing.T) { t.Errorf("Expected $1 = 42 (from memory), got %d", val1) } } + +func TestRunLintPredicatePhiStartFirstSourceRisk(t *testing.T) { + arch := &ArchInfo{ + Rows: 2, + Columns: 2, + Topology: "mesh", + HopLatency: 1, + MemCapacity: 1024, + CtrlMemItems: 256, + } + + // Force $0 predicate=false, then PHI_START reads $0 as first source. + prog := core.Program{ + EntryBlocks: []core.EntryBlock{ + { + InstructionGroups: []core.InstructionGroup{ + { + Operations: []core.Operation{ + { + OpCode: "GRANT_PREDICATE", + SrcOperands: core.OperandList{ + Operands: []core.Operand{ + {Impl: "#1", Color: "RED"}, + {Impl: "#0", Color: "RED"}, + }, + }, + DstOperands: core.OperandList{ + Operands: []core.Operand{ + {Impl: "$0", Color: "RED"}, + }, + }, + }, + }, + }, + { + Operations: []core.Operation{ + { + OpCode: "PHI_START", + ID: 145, + SrcOperands: core.OperandList{ + Operands: []core.Operand{ + {Impl: "$0", Color: "RED"}, + {Impl: "$1", Color: "RED"}, + }, + }, + DstOperands: core.OperandList{ + Operands: []core.Operand{ + {Impl: "$2", Color: "RED"}, + }, + }, + }, + }, + }, + }, + }, + }, + } + + issues := RunLint(map[string]core.Program{"(0, 0)": prog}, arch) + + found := false + for _, issue := range issues { + if issue.Type == IssuePredicate && strings.Contains(issue.Message, "PHI_START") { + found = true + break + } + } + if !found { + t.Fatalf("expected a PREDICATE issue for PHI_START first-source risk, got %v", issues) + } +} + +func TestRunLintPredicatePhiBothTrueRisk(t *testing.T) { + arch := &ArchInfo{ + Rows: 2, + Columns: 2, + Topology: "mesh", + HopLatency: 1, + MemCapacity: 1024, + CtrlMemItems: 256, + } + + // $0 and $1 are both definitely true before PHI. + prog := core.Program{ + EntryBlocks: []core.EntryBlock{ + { + InstructionGroups: []core.InstructionGroup{ + { + Operations: []core.Operation{ + { + OpCode: "MOV", + SrcOperands: core.OperandList{ + Operands: []core.Operand{{Impl: "#1", Color: "RED"}}, + }, + DstOperands: core.OperandList{ + Operands: []core.Operand{{Impl: "$0", Color: "RED"}}, + }, + }, + }, + }, + { + Operations: []core.Operation{ + { + OpCode: "MOV", + SrcOperands: core.OperandList{ + Operands: []core.Operand{{Impl: "#2", Color: "RED"}}, + }, + DstOperands: core.OperandList{ + Operands: []core.Operand{{Impl: "$1", Color: "RED"}}, + }, + }, + }, + }, + { + Operations: []core.Operation{ + { + OpCode: "PHI", + ID: 111, + SrcOperands: core.OperandList{ + Operands: []core.Operand{ + {Impl: "$0", Color: "RED"}, + {Impl: "$1", Color: "RED"}, + }, + }, + DstOperands: core.OperandList{ + Operands: []core.Operand{{Impl: "$2", Color: "RED"}}, + }, + }, + }, + }, + }, + }, + }, + } + + issues := RunLint(map[string]core.Program{"(0, 0)": prog}, arch) + + found := false + for _, issue := range issues { + if issue.Type == IssuePredicate && strings.Contains(issue.Message, "PHI") { + found = true + break + } + } + if !found { + t.Fatalf("expected a PREDICATE issue for PHI both-true risk, got %v", issues) + } +} + +func TestVerificationReportBlockingAndWarningCounts(t *testing.T) { + report := &VerificationReport{ + LintIssues: []Issue{ + {Type: IssueStruct, Message: "struct issue"}, + {Type: IssueTiming, Message: "timing issue"}, + { + Type: IssuePredicate, + Message: "predicate possible", + Details: map[string]interface{}{"certainty": "possible"}, + }, + { + Type: IssuePredicate, + Message: "predicate definite", + Details: map[string]interface{}{"certainty": "definite"}, + }, + { + Type: IssuePredicate, + Message: "predicate without certainty", + }, + }, + } + + if got := report.WarningLintIssueCount(); got != 1 { + t.Fatalf("expected 1 warning issue, got %d", got) + } + if got := report.BlockingLintIssueCount(); got != 4 { + t.Fatalf("expected 4 blocking issues, got %d", got) + } +} + +func TestRunLintPredicatePhiStartInvalidIterationsPrologueSafe(t *testing.T) { + arch := &ArchInfo{ + Rows: 2, + Columns: 2, + Topology: "mesh", + HopLatency: 1, + MemCapacity: 1024, + CtrlMemItems: 256, + } + + // The first PHI_START execution should see $0=true. + // A later GPRED overwrites $0 predicate to false, but only after one invalid iteration. + prog := core.Program{ + EntryBlocks: []core.EntryBlock{ + { + InstructionGroups: []core.InstructionGroup{ + { + Operations: []core.Operation{ + { + OpCode: "MOV", + SrcOperands: core.OperandList{ + Operands: []core.Operand{{Impl: "#1", Color: "RED"}}, + }, + DstOperands: core.OperandList{ + Operands: []core.Operand{{Impl: "$0", Color: "RED"}}, + }, + }, + }, + }, + { + Operations: []core.Operation{ + { + OpCode: "GRANT_PREDICATE", + SrcOperands: core.OperandList{ + Operands: []core.Operand{ + {Impl: "#1", Color: "RED"}, + {Impl: "#0", Color: "RED"}, + }, + }, + DstOperands: core.OperandList{ + Operands: []core.Operand{{Impl: "$2", Color: "RED"}}, + }, + }, + }, + }, + { + Operations: []core.Operation{ + { + OpCode: "GRANT_PREDICATE", + InvalidIterations: 1, + SrcOperands: core.OperandList{ + Operands: []core.Operand{ + {Impl: "#1", Color: "RED"}, + {Impl: "#0", Color: "RED"}, + }, + }, + DstOperands: core.OperandList{ + Operands: []core.Operand{{Impl: "$0", Color: "RED"}}, + }, + }, + }, + }, + { + Operations: []core.Operation{ + { + OpCode: "PHI_START", + ID: 210, + SrcOperands: core.OperandList{ + Operands: []core.Operand{ + {Impl: "$0", Color: "RED"}, + {Impl: "$2", Color: "RED"}, + }, + }, + DstOperands: core.OperandList{ + Operands: []core.Operand{{Impl: "$3", Color: "RED"}}, + }, + }, + }, + }, + }, + }, + }, + } + + issues := RunLint(map[string]core.Program{"(0, 0)": prog}, arch) + for _, issue := range issues { + if issue.Type == IssuePredicate && strings.Contains(issue.Message, "first source") { + t.Fatalf("unexpected PHI_START first-source issue with prologue protection: %+v", issue) + } + } +} + +func TestRunLintPredicatePhiSteadyStateAfterWarmupStillDetected(t *testing.T) { + arch := &ArchInfo{ + Rows: 2, + Columns: 2, + Topology: "mesh", + HopLatency: 1, + MemCapacity: 1024, + CtrlMemItems: 256, + } + + // All three ops become executable after warmup; PHI should still be flagged. + prog := core.Program{ + EntryBlocks: []core.EntryBlock{ + { + InstructionGroups: []core.InstructionGroup{ + { + Operations: []core.Operation{ + { + OpCode: "MOV", + InvalidIterations: 1, + SrcOperands: core.OperandList{ + Operands: []core.Operand{{Impl: "#1", Color: "RED"}}, + }, + DstOperands: core.OperandList{ + Operands: []core.Operand{{Impl: "$0", Color: "RED"}}, + }, + }, + }, + }, + { + Operations: []core.Operation{ + { + OpCode: "MOV", + InvalidIterations: 1, + SrcOperands: core.OperandList{ + Operands: []core.Operand{{Impl: "#2", Color: "RED"}}, + }, + DstOperands: core.OperandList{ + Operands: []core.Operand{{Impl: "$1", Color: "RED"}}, + }, + }, + }, + }, + { + Operations: []core.Operation{ + { + OpCode: "PHI", + ID: 211, + InvalidIterations: 1, + SrcOperands: core.OperandList{ + Operands: []core.Operand{ + {Impl: "$0", Color: "RED"}, + {Impl: "$1", Color: "RED"}, + }, + }, + DstOperands: core.OperandList{ + Operands: []core.Operand{{Impl: "$2", Color: "RED"}}, + }, + }, + }, + }, + }, + }, + }, + } + + issues := RunLint(map[string]core.Program{"(0, 0)": prog}, arch) + foundDefinitePhi := false + for _, issue := range issues { + if issue.Type == IssuePredicate && strings.Contains(issue.Message, "PHI id=211") { + if certainty, ok := issue.Details["certainty"].(string); ok && certainty == "definite" { + foundDefinitePhi = true + } + } + } + if !foundDefinitePhi { + t.Fatalf("expected definite PHI predicate issue after warmup, got: %+v", issues) + } +}