diff --git a/internal/controller/device/scsi/controller.go b/internal/controller/device/scsi/controller.go index 879b130508..d664398971 100644 --- a/internal/controller/device/scsi/controller.go +++ b/internal/controller/device/scsi/controller.go @@ -9,32 +9,16 @@ import ( "github.com/Microsoft/hcsshim/internal/controller/device/scsi/disk" "github.com/Microsoft/hcsshim/internal/controller/device/scsi/mount" + "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" - "github.com/google/uuid" + "github.com/Microsoft/go-winio/pkg/guid" + "github.com/sirupsen/logrus" ) -type VMSCSIOps interface { - disk.VMSCSIAdder - disk.VMSCSIRemover -} - -type LinuxGuestSCSIOps interface { - mount.LinuxGuestSCSIMounter - mount.LinuxGuestSCSIUnmounter - disk.LinuxGuestSCSIEjector -} - -type WindowsGuestSCSIOps interface { - mount.WindowsGuestSCSIMounter - mount.WindowsGuestSCSIUnmounter -} - -// numLUNsPerController is the maximum number of LUNs per controller, fixed by Hyper-V. -const numLUNsPerController = 64 - -// The controller manages all SCSI attached devices and guest mounted -// directories. -// +// Controller manages the full SCSI disk lifecycle — slot allocation, VM +// attachment, guest mounting, and teardown — across one or more controllers +// on a Hyper-V VM. All operations are serialized by a single mutex. // It is required that all callers: // // 1. Obtain a reservation using Reserve(). @@ -49,36 +33,44 @@ const numLUNsPerController = 64 // If UnmapFromGuest() fails, the caller must call UnmapFromGuest() again until // it succeeds to release the reservation and all resources. type Controller struct { - vm VMSCSIOps - lGuest LinuxGuestSCSIOps - wGuest WindowsGuestSCSIOps - + // mu serializes all public operations on the Controller. mu sync.Mutex - // Every call to Reserve gets a unique reservation ID which holds pointers - // to its controllerSlot for the disk and its partition for the mount. - reservations map[uuid.UUID]*reservation + // vm is the host-side interface for adding and removing SCSI disks. + // Immutable after construction. + vm VMSCSIOps + // linuxGuest is the guest-side interface for LCOW SCSI operations. + // Immutable after construction. + linuxGuest LinuxGuestSCSIOps + // windowsGuest is the guest-side interface for WCOW SCSI operations. + // Immutable after construction. + windowsGuest WindowsGuestSCSIOps + + // reservations maps a reservation ID to its disk slot and partition. + // Guarded by mu. + reservations map[guid.GUID]*reservation - // For fast lookup we keep a hostPath to controllerSlot mapping for all - // allocated disks. + // disksByPath maps a host disk path to its controllerSlots index for + // fast deduplication of disk attachments. Guarded by mu. disksByPath map[string]int - // Tracks all allocated and unallocated available slots on the SCSI - // controllers. + // controllerSlots tracks all disk slots across all SCSI controllers. + // A nil entry means the slot is free for allocation. // - // NumControllers == len(controllerSlots) / numLUNsPerController - // ControllerID == index / numLUNsPerController - // LunID == index % numLUNsPerController + // Index layout: + // ControllerID = index / numLUNsPerController + // LUN = index % numLUNsPerController controllerSlots []*disk.Disk } -func New(numControllers int, vm VMSCSIOps, lGuest LinuxGuestSCSIOps, wGuest WindowsGuestSCSIOps) *Controller { +// New creates a new [Controller] for the given number of SCSI controllers and +// host/guest operation interfaces. +func New(numControllers int, vm VMSCSIOps, linuxGuest LinuxGuestSCSIOps, windowsGuest WindowsGuestSCSIOps) *Controller { return &Controller{ vm: vm, - lGuest: lGuest, - wGuest: wGuest, - mu: sync.Mutex{}, - reservations: make(map[uuid.UUID]*reservation), + linuxGuest: linuxGuest, + windowsGuest: windowsGuest, + reservations: make(map[guid.GUID]*reservation), disksByPath: make(map[string]int), controllerSlots: make([]*disk.Disk, numControllers*numLUNsPerController), } @@ -101,47 +93,59 @@ func (c *Controller) ReserveForRootfs(ctx context.Context, controller, lun uint) if c.controllerSlots[slot] != nil { return fmt.Errorf("slot for controller %d and lun %d is already reserved", controller, lun) } - c.controllerSlots[slot] = disk.NewReserved(controller, lun, disk.DiskConfig{}) + c.controllerSlots[slot] = disk.NewReserved(controller, lun, disk.Config{}) return nil } -// Reserves a referenced counted mapping entry for a SCSI attachment based on +// Reserve reserves a referenced counted mapping entry for a SCSI attachment based on // the SCSI disk path, and partition number. // // If an error is returned from this function, it is guaranteed that no // reservation mapping was made and no UnmapFromGuest() call is necessary to // clean up. -func (c *Controller) Reserve(ctx context.Context, diskConfig disk.DiskConfig, mountConfig mount.MountConfig) (uuid.UUID, error) { +func (c *Controller) Reserve(ctx context.Context, diskConfig disk.Config, mountConfig mount.Config) (guid.GUID, error) { c.mu.Lock() defer c.mu.Unlock() - // Generate a new reservation id. - id := uuid.New() + ctx, _ = log.WithContext(ctx, logrus.WithFields(logrus.Fields{ + logfields.HostPath: diskConfig.HostPath, + logfields.Partition: mountConfig.Partition, + })) + + log.G(ctx).Debug("reserving SCSI slot") + + // Generate a unique reservation ID. + id, err := guid.NewV4() + if err != nil { + return guid.GUID{}, fmt.Errorf("generate reservation ID: %w", err) + } if _, ok := c.reservations[id]; ok { - return uuid.Nil, fmt.Errorf("reservation ID collision") + return guid.GUID{}, fmt.Errorf("reservation ID already exists: %s", id) } + + // Create the reservation entry. r := &reservation{ controllerSlot: -1, partition: mountConfig.Partition, } - // Determine if this hostPath already had a disk known. + // Check whether this disk path already has an allocated slot. if slot, ok := c.disksByPath[diskConfig.HostPath]; ok { - r.controllerSlot = slot // Update our reservation where the disk is. - d := c.controllerSlots[slot] + r.controllerSlot = slot // Update our reservation where the dsk is. + existingDisk := c.controllerSlots[slot] - // Verify the caller config is the same. - if !d.Config().Equals(diskConfig) { - return uuid.Nil, fmt.Errorf("cannot reserve ref on disk with different config") + // Verify the caller is requesting the same disk configuration. + if !existingDisk.Config().Equals(diskConfig) { + return guid.GUID{}, fmt.Errorf("cannot reserve ref on disk with different config") } - // We at least have a disk, now determine if we have a mount for this + // We at least have a dsk, now determine if we have a mount for this // partition. - if _, err := d.ReservePartition(ctx, mountConfig); err != nil { - return uuid.Nil, fmt.Errorf("reserve partition %d: %w", mountConfig.Partition, err) + if _, err := existingDisk.ReservePartition(ctx, mountConfig); err != nil { + return guid.GUID{}, fmt.Errorf("reserve partition %d: %w", mountConfig.Partition, err) } } else { - // No hostPath was found. Find a slot for the disk. + // No existing slot for this path — find a free one. nextSlot := -1 for i, d := range c.controllerSlots { if d == nil { @@ -150,72 +154,103 @@ func (c *Controller) Reserve(ctx context.Context, diskConfig disk.DiskConfig, mo } } if nextSlot == -1 { - return uuid.Nil, fmt.Errorf("no available slots") + return guid.GUID{}, fmt.Errorf("no available SCSI slots") } // Create the Disk and Partition Mount in the reserved states. controller := uint(nextSlot / numLUNsPerController) lun := uint(nextSlot % numLUNsPerController) - d := disk.NewReserved(controller, lun, diskConfig) - if _, err := d.ReservePartition(ctx, mountConfig); err != nil { - return uuid.Nil, fmt.Errorf("reserve partition %d: %w", mountConfig.Partition, err) + newDisk := disk.NewReserved(controller, lun, diskConfig) + if _, err := newDisk.ReservePartition(ctx, mountConfig); err != nil { + return guid.GUID{}, fmt.Errorf("reserve partition %d: %w", mountConfig.Partition, err) } - c.controllerSlots[controller*numLUNsPerController+lun] = d + c.controllerSlots[controller*numLUNsPerController+lun] = newDisk c.disksByPath[diskConfig.HostPath] = nextSlot r.controllerSlot = nextSlot } // Ensure our reservation is saved for all future operations. c.reservations[id] = r + log.G(ctx).WithField("reservation", id).Debug("SCSI slot reserved") return id, nil } -func (c *Controller) MapToGuest(ctx context.Context, reservation uuid.UUID) (string, error) { +// MapToGuest attaches the reserved disk to the VM and mounts its partition +// inside the guest, returning the guest path. It is idempotent for a +// reservation that is already fully mapped. +func (c *Controller) MapToGuest(ctx context.Context, id guid.GUID) (string, error) { c.mu.Lock() defer c.mu.Unlock() - if r, ok := c.reservations[reservation]; ok { - d := c.controllerSlots[r.controllerSlot] - if err := d.AttachToVM(ctx, c.vm); err != nil { - return "", fmt.Errorf("attach disk to vm: %w", err) - } - guestPath, err := d.MountPartitionToGuest(ctx, r.partition, c.lGuest, c.wGuest) - if err != nil { - return "", fmt.Errorf("mount partition %d to guest: %w", r.partition, err) - } - return guestPath, nil + r, ok := c.reservations[id] + if !ok { + return "", fmt.Errorf("reservation %s not found", id) } - return "", fmt.Errorf("reservation %s not found", reservation) + + existingDisk := c.controllerSlots[r.controllerSlot] + + log.G(ctx).WithFields(logrus.Fields{ + logfields.HostPath: existingDisk.HostPath(), + logfields.Partition: r.partition, + }).Debug("mapping SCSI disk to guest") + + // Attach the disk to the VM's SCSI bus (idempotent if already attached). + if err := existingDisk.AttachToVM(ctx, c.vm); err != nil { + return "", fmt.Errorf("attach disk to VM: %w", err) + } + + // Mount the partition inside the guest. + guestPath, err := existingDisk.MountPartitionToGuest(ctx, r.partition, c.linuxGuest, c.windowsGuest) + if err != nil { + return "", fmt.Errorf("mount partition %d to guest: %w", r.partition, err) + } + + log.G(ctx).WithField(logfields.UVMPath, guestPath).Debug("SCSI disk mapped to guest") + return guestPath, nil } -func (c *Controller) UnmapFromGuest(ctx context.Context, reservation uuid.UUID) error { +// UnmapFromGuest unmounts the partition from the guest and, when all +// reservations for a disk are released, detaches the disk from the VM and +// frees the SCSI slot. A failed call is retryable with the same reservation ID. +func (c *Controller) UnmapFromGuest(ctx context.Context, id guid.GUID) error { c.mu.Lock() defer c.mu.Unlock() - if r, ok := c.reservations[reservation]; ok { - d := c.controllerSlots[r.controllerSlot] - // Ref counted unmount. - if err := d.UnmountPartitionFromGuest(ctx, r.partition, c.lGuest, c.wGuest); err != nil { - return fmt.Errorf("unmount partition %d from guest: %w", r.partition, err) - } - if err := d.DetachFromVM(ctx, c.vm, c.lGuest); err != nil { - return fmt.Errorf("detach disk from vm: %w", err) - } - if d.State() == disk.DiskStateDetached { - // If we have no more mounts on this disk, remove the disk from the - // known disks and free the slot. - delete(c.disksByPath, d.HostPath()) - c.controllerSlots[r.controllerSlot] = nil - } - delete(c.reservations, reservation) - return nil + ctx, _ = log.WithContext(ctx, logrus.WithField("reservation", id.String())) + + r, ok := c.reservations[id] + if !ok { + return fmt.Errorf("reservation %s not found", id) } - return fmt.Errorf("reservation %s not found", reservation) -} -type reservation struct { - // This is the index into controllerSlots that holds this disk. - controllerSlot int - // This is the index into the disk mounts for this partition. - partition uint64 + existingDisk := c.controllerSlots[r.controllerSlot] + + log.G(ctx).WithFields(logrus.Fields{ + logfields.HostPath: existingDisk.HostPath(), + logfields.Partition: r.partition, + }).Debug("unmapping SCSI disk from guest") + + // Unmount the partition from the guest (ref-counted; only issues the + // guest call when this is the last reservation on the partition). + if err := existingDisk.UnmountPartitionFromGuest(ctx, r.partition, c.linuxGuest, c.windowsGuest); err != nil { + return fmt.Errorf("unmount partition %d from guest: %w", r.partition, err) + } + + // Detach the disk from the VM when no partitions remain active. + if err := existingDisk.DetachFromVM(ctx, c.vm, c.linuxGuest); err != nil { + return fmt.Errorf("detach disk from VM: %w", err) + } + + // If the disk is now fully detached, free its slot for reuse. + if existingDisk.State() == disk.StateDetached { + delete(c.disksByPath, existingDisk.HostPath()) + c.controllerSlots[r.controllerSlot] = nil + log.G(ctx).Debug("SCSI slot freed") + } + + // Remove the reservation last so it remains available for retries if + // any earlier step above fails. + delete(c.reservations, id) + log.G(ctx).Debug("SCSI disk unmapped from guest") + return nil } diff --git a/internal/controller/device/scsi/controller_test.go b/internal/controller/device/scsi/controller_test.go index 9a6daa8c5b..9f3efa8fcf 100644 --- a/internal/controller/device/scsi/controller_test.go +++ b/internal/controller/device/scsi/controller_test.go @@ -12,7 +12,8 @@ import ( "github.com/Microsoft/hcsshim/internal/controller/device/scsi/mount" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" "github.com/Microsoft/hcsshim/internal/protocol/guestresource" - "github.com/google/uuid" + + "github.com/Microsoft/go-winio/pkg/guid" ) // --- Mock types --- @@ -67,16 +68,16 @@ func (m *mockWindowsGuestOps) RemoveWCOWMappedVirtualDisk(_ context.Context, _ g // --- Helpers --- -func defaultDiskConfig() disk.DiskConfig { - return disk.DiskConfig{ +func defaultDiskConfig() disk.Config { + return disk.Config{ HostPath: `C:\test\disk.vhdx`, ReadOnly: false, - Type: disk.DiskTypeVirtualDisk, + Type: disk.TypeVirtualDisk, } } -func defaultMountConfig() mount.MountConfig { - return mount.MountConfig{ +func defaultMountConfig() mount.Config { + return mount.Config{ Partition: 1, ReadOnly: false, } @@ -86,7 +87,7 @@ func newController(vm *mockVMOps, lg *mockLinuxGuestOps, wg *mockWindowsGuestOps return New(1, vm, lg, wg) } -func reservedController(t *testing.T) (*Controller, uuid.UUID) { +func reservedController(t *testing.T) (*Controller, guid.GUID) { t.Helper() c := newController(&mockVMOps{}, &mockLinuxGuestOps{}, nil) id, err := c.Reserve(context.Background(), defaultDiskConfig(), defaultMountConfig()) @@ -96,7 +97,7 @@ func reservedController(t *testing.T) (*Controller, uuid.UUID) { return c, id } -func mappedController(t *testing.T) (*Controller, uuid.UUID) { +func mappedController(t *testing.T) (*Controller, guid.GUID) { t.Helper() c, id := reservedController(t) if _, err := c.MapToGuest(context.Background(), id); err != nil { @@ -122,7 +123,7 @@ func TestReserve_Success(t *testing.T) { if err != nil { t.Fatalf("unexpected error: %v", err) } - if id == uuid.Nil { + if id == (guid.GUID{}) { t.Fatal("expected non-nil reservation ID") } } @@ -147,17 +148,17 @@ func TestReserve_SameDiskSamePartition(t *testing.T) { func TestReserve_SameDiskDifferentPartition(t *testing.T) { c := newController(&mockVMOps{}, &mockLinuxGuestOps{}, nil) dc := defaultDiskConfig() - _, err := c.Reserve(context.Background(), dc, mount.MountConfig{Partition: 1}) + _, err := c.Reserve(context.Background(), dc, mount.Config{Partition: 1}) if err != nil { t.Fatalf("first reserve: %v", err) } - _, err = c.Reserve(context.Background(), dc, mount.MountConfig{Partition: 2}) + _, err = c.Reserve(context.Background(), dc, mount.Config{Partition: 2}) if err != nil { t.Fatalf("second reserve different partition: %v", err) } } -func TestReserve_SamePathDifferentDiskConfig(t *testing.T) { +func TestReserve_SamePathDifferentConfig(t *testing.T) { c := newController(&mockVMOps{}, &mockLinuxGuestOps{}, nil) dc := defaultDiskConfig() mc := defaultMountConfig() @@ -176,11 +177,11 @@ func TestReserve_SamePathDifferentDiskConfig(t *testing.T) { func TestReserve_DifferentDisks(t *testing.T) { c := newController(&mockVMOps{}, &mockLinuxGuestOps{}, nil) mc := defaultMountConfig() - _, err := c.Reserve(context.Background(), disk.DiskConfig{HostPath: `C:\a.vhdx`, Type: disk.DiskTypeVirtualDisk}, mc) + _, err := c.Reserve(context.Background(), disk.Config{HostPath: `C:\a.vhdx`, Type: disk.TypeVirtualDisk}, mc) if err != nil { t.Fatalf("first reserve: %v", err) } - _, err = c.Reserve(context.Background(), disk.DiskConfig{HostPath: `C:\b.vhdx`, Type: disk.DiskTypeVirtualDisk}, mc) + _, err = c.Reserve(context.Background(), disk.Config{HostPath: `C:\b.vhdx`, Type: disk.TypeVirtualDisk}, mc) if err != nil { t.Fatalf("second reserve: %v", err) } @@ -199,9 +200,9 @@ func TestReserve_FillsAllSlots(t *testing.T) { c := New(1, &mockVMOps{}, &mockLinuxGuestOps{}, nil) mc := defaultMountConfig() for i := range numLUNsPerController { - dc := disk.DiskConfig{ + dc := disk.Config{ HostPath: fmt.Sprintf(`C:\disk%d.vhdx`, i), - Type: disk.DiskTypeVirtualDisk, + Type: disk.TypeVirtualDisk, } _, err := c.Reserve(context.Background(), dc, mc) if err != nil { @@ -209,7 +210,7 @@ func TestReserve_FillsAllSlots(t *testing.T) { } } // Next reservation should fail. - _, err := c.Reserve(context.Background(), disk.DiskConfig{HostPath: `C:\overflow.vhdx`, Type: disk.DiskTypeVirtualDisk}, mc) + _, err := c.Reserve(context.Background(), disk.Config{HostPath: `C:\overflow.vhdx`, Type: disk.TypeVirtualDisk}, mc) if err == nil { t.Fatal("expected error when all slots are full") } @@ -230,7 +231,11 @@ func TestMapToGuest_Success(t *testing.T) { func TestMapToGuest_UnknownReservation(t *testing.T) { c := newController(&mockVMOps{}, &mockLinuxGuestOps{}, nil) - _, err := c.MapToGuest(context.Background(), uuid.New()) + unknownID, err := guid.NewV4() + if err != nil { + t.Fatalf("NewV4: %v", err) + } + _, err = c.MapToGuest(context.Background(), unknownID) if err == nil { t.Fatal("expected error for unknown reservation") } @@ -285,7 +290,11 @@ func TestUnmapFromGuest_Success(t *testing.T) { func TestUnmapFromGuest_UnknownReservation(t *testing.T) { c := newController(&mockVMOps{}, &mockLinuxGuestOps{}, nil) - err := c.UnmapFromGuest(context.Background(), uuid.New()) + unknownID, err := guid.NewV4() + if err != nil { + t.Fatalf("NewV4: %v", err) + } + err = c.UnmapFromGuest(context.Background(), unknownID) if err == nil { t.Fatal("expected error for unknown reservation") } @@ -405,7 +414,7 @@ func TestReserveAfterUnmap_ReusesSlot(t *testing.T) { t.Fatalf("UnmapFromGuest: %v", err) } // Reserve a different disk in the now-freed slot. - dc := disk.DiskConfig{HostPath: `C:\other.vhdx`, Type: disk.DiskTypeVirtualDisk} + dc := disk.Config{HostPath: `C:\other.vhdx`, Type: disk.TypeVirtualDisk} _, err := c.Reserve(context.Background(), dc, defaultMountConfig()) if err != nil { t.Fatalf("reserve after unmap: %v", err) diff --git a/internal/controller/device/scsi/disk/disk.go b/internal/controller/device/scsi/disk/disk.go index 2c5e383ab5..fbafd7f698 100644 --- a/internal/controller/device/scsi/disk/disk.go +++ b/internal/controller/device/scsi/disk/disk.go @@ -6,216 +6,241 @@ import ( "context" "fmt" + "github.com/sirupsen/logrus" + "github.com/Microsoft/hcsshim/internal/controller/device/scsi/mount" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" "github.com/Microsoft/hcsshim/internal/protocol/guestresource" ) -// DiskType identifies the attachment protocol used when adding a disk to the VM's SCSI bus. -type DiskType string - -const ( - // DiskTypeVirtualDisk attaches the disk as a virtual hard disk (VHD/VHDX). - DiskTypeVirtualDisk DiskType = "VirtualDisk" - - // DiskTypePassThru attaches a physical disk directly to the VM with pass-through access. - DiskTypePassThru DiskType = "PassThru" - - // DiskTypeExtensibleVirtualDisk attaches a disk via an extensible virtual disk (EVD) provider. - // The hostPath must be in the form evd:///. - DiskTypeExtensibleVirtualDisk DiskType = "ExtensibleVirtualDisk" -) - -// DiskConfig describes the host-side disk to attach to the VM's SCSI bus. -type DiskConfig struct { - // HostPath is the path on the host to the disk to be attached. - HostPath string - // ReadOnly specifies whether the disk should be attached with read-only access. - ReadOnly bool - // Type specifies the attachment protocol to use when attaching the disk. - Type DiskType - // EVDType is the EVD provider name. - // Only populated when Type is [DiskTypeExtensibleVirtualDisk]. - EVDType string -} - -// equals reports whether two DiskConfig values describe the same attachment parameters. -func (d DiskConfig) Equals(other DiskConfig) bool { - return d.HostPath == other.HostPath && - d.ReadOnly == other.ReadOnly && - d.Type == other.Type && - d.EVDType == other.EVDType -} - -type DiskState int - -const ( - // The disk has never been attached. - DiskStateReserved DiskState = iota - // The disk is currently attached to the guest. - DiskStateAttached - // The disk was previously attached and ejected and must be detached. - DiskStateEjected - // The disk was previously attached and detached, this is terminal. - DiskStateDetached -) - -type VMSCSIAdder interface { - AddSCSIDisk(ctx context.Context, disk hcsschema.Attachment, controller uint, lun uint) error -} - -type VMSCSIRemover interface { - RemoveSCSIDisk(ctx context.Context, controller uint, lun uint) error -} - -type LinuxGuestSCSIEjector interface { - RemoveSCSIDevice(ctx context.Context, settings guestresource.SCSIDevice) error -} - -// Disk represents a SCSI disk attached to the VM. It manages the lifecycle of -// the disk attachment as well as the guest mounts on the disk partitions. +// Disk represents a SCSI disk attached to a Hyper-V VM. It tracks the +// attachment lifecycle and delegates per-partition mount management to [mount.Mount]. // -// All operations on the disk are expected to be ordered by the caller. No -// locking is done at this layer. +// All operations on a [Disk] are expected to be ordered by the caller. +// No locking is performed at this layer. type Disk struct { + // controller and lun are the hardware address of this disk on the VM's SCSI bus. controller uint lun uint - config DiskConfig - state DiskState - // Note that len(mounts) > 0 is the ref count for a disk. + // config is the immutable host-side disk configuration supplied at construction. + config Config + + // state tracks the current lifecycle position of this attachment. + state State + + // mounts maps a partition index to its guest mount. + // len(mounts) > 0 serves as the ref count for the disk. mounts map[uint64]*mount.Mount } -// NewReserved creates a new Disk in the reserved state with the provided configuration. -func NewReserved(controller, lun uint, config DiskConfig) *Disk { +// NewReserved creates a new [Disk] in the [StateReserved] state with the +// provided controller, LUN, and host-side disk configuration. +func NewReserved(controller, lun uint, config Config) *Disk { return &Disk{ controller: controller, lun: lun, config: config, - state: DiskStateReserved, + state: StateReserved, mounts: make(map[uint64]*mount.Mount), } } -func (d *Disk) State() DiskState { +// State returns the current lifecycle state of the disk. +func (d *Disk) State() State { return d.state } -func (d *Disk) Config() DiskConfig { +// Config returns the host-side disk configuration. +func (d *Disk) Config() Config { return d.config } +// HostPath returns the host-side path of the disk image. func (d *Disk) HostPath() string { return d.config.HostPath } +// AttachToVM adds the disk to the VM's SCSI bus. It is idempotent for an +// already-attached disk; on failure the disk is moved into detached state and +// a new [Disk] must be created to retry. func (d *Disk) AttachToVM(ctx context.Context, vm VMSCSIAdder) error { + + // Drive the state machine. switch d.state { - case DiskStateReserved: - // Attach the disk to the VM. + case StateReserved: + log.G(ctx).WithFields(logrus.Fields{ + logfields.Controller: d.controller, + logfields.LUN: d.lun, + logfields.HostPath: d.config.HostPath, + logfields.DiskType: d.config.Type, + }).Debug("attaching SCSI disk to VM") + + // Attempt to hot-add the disk to the VM SCSI bus. if err := vm.AddSCSIDisk(ctx, hcsschema.Attachment{ Path: d.config.HostPath, Type_: string(d.config.Type), ReadOnly: d.config.ReadOnly, ExtensibleVirtualDiskType: d.config.EVDType, }, d.controller, d.lun); err != nil { - // Move to detached since we know from reserved there was no guest - // state. - d.state = DiskStateDetached - return fmt.Errorf("attach disk to VM: %w", err) + // Since the disk was never attached, move directly to the terminal + // Detached state. No guest state was established, so there is nothing + // to clean up. + d.state = StateDetached + return fmt.Errorf("attach SCSI disk controller=%d lun=%d to VM: %w", d.controller, d.lun, err) } - d.state = DiskStateAttached + + // Move to attached state after a successful attach. + d.state = StateAttached + log.G(ctx).Debug("SCSI disk attached to VM") return nil - case DiskStateAttached: - // Disk is already attached, this is idempotent. + + case StateAttached: + // Already attached — no-op. return nil - case DiskStateDetached: - // We don't support re-attaching a detached disk, this is an error. - return fmt.Errorf("disk already detached") + + case StateDetached: + // Re-attaching a detached disk is not supported. + return fmt.Errorf("disk already attached at controller=%d lun=%d", d.controller, d.lun) + default: } + return nil } -func (d *Disk) DetachFromVM(ctx context.Context, vm VMSCSIRemover, lGuest LinuxGuestSCSIEjector) error { - if d.state == DiskStateAttached { - // Ensure for correctness nobody leaked a mount +// DetachFromVM ejects the disk from the guest and removes it from the VM's SCSI +// bus. It is idempotent for a disk that was never attached or is already detached; +// a failed removal is retriable by calling DetachFromVM again. +func (d *Disk) DetachFromVM(ctx context.Context, vm VMSCSIRemover, linuxGuest LinuxGuestSCSIEjector) error { + ctx, _ = log.WithContext(ctx, logrus.WithFields(logrus.Fields{ + logfields.Controller: d.controller, + logfields.LUN: d.lun, + })) + + // Eject the disk from guest if we need that. + if d.state == StateAttached { + // If the disk still has active mounts it is in use; skip detach. if len(d.mounts) != 0 { // This disk is still active by some other mounts. Leave it. return nil } - // The linux guest needs to have the SCSI ejected. Do that now. - if lGuest != nil { - if err := lGuest.RemoveSCSIDevice(ctx, guestresource.SCSIDevice{ + + // LCOW guests require an explicit SCSI device removal before the host + // removes the disk from the VM bus. For WCOW, Windows handles hot-unplug + // automatically, so linuxGuest will be nil. + if linuxGuest != nil { + log.G(ctx).Debug("ejecting SCSI device from guest") + + if err := linuxGuest.RemoveSCSIDevice(ctx, guestresource.SCSIDevice{ Controller: uint8(d.controller), Lun: uint8(d.lun), }); err != nil { - return fmt.Errorf("remove SCSI device from guest: %w", err) + return fmt.Errorf("eject SCSI device controller=%d lun=%d from guest: %w", d.controller, d.lun, err) } } - // Set it to ejected and continue processing. - d.state = DiskStateEjected + + // Advance to Ejected before attempting VM removal so that a removal + // failure leaves the disk in a retriable position without re-ejecting. + d.state = StateEjected + log.G(ctx).Debug("SCSI device ejected from guest") } + // Drive the state machine for VM disk detachment. switch d.state { - case DiskStateReserved: - // Disk is not attached, this is idempotent. - return nil - case DiskStateAttached: - panic(fmt.Errorf("unexpected attached disk state in detach, expected ejected")) - case DiskStateEjected: - // The disk is ejected but still attached, attempt to detach it again. + case StateReserved: + // Disk was never attached — no-op. + + case StateAttached: + // Unreachable: the block above always advances past StateAttached. + return fmt.Errorf("unexpected disk state %s in detach path, expected ejected", d.state) + + case StateEjected: + log.G(ctx).Debug("removing SCSI disk from VM") + + // Guest has released the device; remove it from the VM SCSI bus. if err := vm.RemoveSCSIDisk(ctx, d.controller, d.lun); err != nil { - return fmt.Errorf("detach ejected disk from VM: %w", err) + // Leave the disk in StateEjected so the caller can retry this step. + return fmt.Errorf("remove ejected SCSI disk controller=%d lun=%d from VM: %w", d.controller, d.lun, err) } - d.state = DiskStateDetached - return nil - case DiskStateDetached: - // Disk is already detached, this is idempotent. - return nil + d.state = StateDetached + log.G(ctx).Debug("SCSI disk removed from VM") + + case StateDetached: + // Already fully detached — no-op. } - return fmt.Errorf("unexpected disk state %d", d.state) + + return nil } -func (d *Disk) ReservePartition(ctx context.Context, config mount.MountConfig) (*mount.Mount, error) { - if d.state != DiskStateReserved && d.state != DiskStateAttached { - return nil, fmt.Errorf("unexpected disk state %d, expected reserved or attached", d.state) +// ReservePartition reserves a slot for a guest mount on the given partition. +// If a mount already exists for that partition, it increments the reference +// count after verifying the config matches. +func (d *Disk) ReservePartition(ctx context.Context, config mount.Config) (*mount.Mount, error) { + if d.state != StateReserved && d.state != StateAttached { + return nil, fmt.Errorf("cannot reserve partition on disk in state %s", d.state) } - // Check if the partition is already reserved. - if m, ok := d.mounts[config.Partition]; ok { - if err := m.Reserve(config); err != nil { + ctx, _ = log.WithContext(ctx, logrus.WithFields(logrus.Fields{ + logfields.Controller: d.controller, + logfields.LUN: d.lun, + logfields.Partition: config.Partition, + })) + + // If a mount already exists for this partition, bump its ref count. + if existingMount, ok := d.mounts[config.Partition]; ok { + if err := existingMount.Reserve(config); err != nil { return nil, fmt.Errorf("reserve partition %d: %w", config.Partition, err) } - return m, nil + + log.G(ctx).Trace("existing mount found for partition, incrementing ref count") + return existingMount, nil } - // Create a new mount for this partition in the reserved state. - m := mount.NewReserved(d.controller, d.lun, config) - d.mounts[config.Partition] = m - return m, nil + + // No existing mount for this partition — create one in the reserved state. + newMount := mount.NewReserved(d.controller, d.lun, config) + d.mounts[config.Partition] = newMount + + log.G(ctx).Trace("reserved new mount for partition") + return newMount, nil } +// MountPartitionToGuest mounts the partition inside the guest, +// returning the auto-generated guest path. The partition must first be reserved +// via [Disk.ReservePartition]. func (d *Disk) MountPartitionToGuest(ctx context.Context, partition uint64, linuxGuest mount.LinuxGuestSCSIMounter, windowsGuest mount.WindowsGuestSCSIMounter) (string, error) { - if d.state != DiskStateAttached { - return "", fmt.Errorf("unexpected disk state %d, expected attached", d.state) + if d.state != StateAttached { + return "", fmt.Errorf("cannot mount partition on disk in state %s, expected attached", d.state) } - if m, ok := d.mounts[partition]; ok { - return m.MountToGuest(ctx, linuxGuest, windowsGuest) + + // Look up the pre-reserved mount for this partition. + existingMnt, ok := d.mounts[partition] + if !ok { + return "", fmt.Errorf("partition %d not reserved on disk controller=%d lun=%d", partition, d.controller, d.lun) } - return "", fmt.Errorf("partition %d not found on disk", partition) + return existingMnt.MountToGuest(ctx, linuxGuest, windowsGuest) } +// UnmountPartitionFromGuest unmounts the partition at the given index from the +// guest. When the mount's reference count reaches zero, and it transitions to +// the unmounted state, its entry is removed from the disk so a subsequent +// [Disk.DetachFromVM] call sees no active mounts. func (d *Disk) UnmountPartitionFromGuest(ctx context.Context, partition uint64, linuxGuest mount.LinuxGuestSCSIUnmounter, windowsGuest mount.WindowsGuestSCSIUnmounter) error { - if m, ok := d.mounts[partition]; ok { - if err := m.UnmountFromGuest(ctx, linuxGuest, windowsGuest); err != nil { - return fmt.Errorf("unmount partition %d from guest: %w", partition, err) - } - // This was the last caller of Unmount, remove the partition in the disk - // mounts. - if m.State() == mount.MountStateUnmounted { - delete(d.mounts, partition) - } + existingMount, ok := d.mounts[partition] + if !ok { + // No mount found — treat as a no-op to support retry by callers. + return nil + } + + if err := existingMount.UnmountFromGuest(ctx, linuxGuest, windowsGuest); err != nil { + return fmt.Errorf("unmount partition %d from guest: %w", partition, err) + } + + // If the mount reached the terminal unmounted state, remove it from the disk + // so that len(mounts) correctly reflects the active consumer count. + if existingMount.State() == mount.StateUnmounted { + delete(d.mounts, partition) } - // Consider a not found mount, a success for retry logic in the caller. return nil } diff --git a/internal/controller/device/scsi/disk/disk_lcow.go b/internal/controller/device/scsi/disk/disk_lcow.go new file mode 100644 index 0000000000..a53f0fd644 --- /dev/null +++ b/internal/controller/device/scsi/disk/disk_lcow.go @@ -0,0 +1,3 @@ +//go:build windows + +package disk diff --git a/internal/controller/device/scsi/disk/disk_test.go b/internal/controller/device/scsi/disk/disk_test.go index 6c9140e057..0d74c7f339 100644 --- a/internal/controller/device/scsi/disk/disk_test.go +++ b/internal/controller/device/scsi/disk/disk_test.go @@ -76,11 +76,11 @@ func (m *mockWindowsGuestSCSIUnmounter) RemoveWCOWMappedVirtualDisk(_ context.Co // --- Helpers --- -func defaultConfig() DiskConfig { - return DiskConfig{ +func defaultConfig() Config { + return Config{ HostPath: `C:\test\disk.vhdx`, ReadOnly: false, - Type: DiskTypeVirtualDisk, + Type: TypeVirtualDisk, } } @@ -96,16 +96,16 @@ func attachedDisk(t *testing.T) *Disk { // --- Tests --- func TestNewReserved(t *testing.T) { - cfg := DiskConfig{ + cfg := Config{ HostPath: `C:\test\disk.vhdx`, ReadOnly: true, - Type: DiskTypePassThru, + Type: TypePassThru, EVDType: "evd-type", } d := NewReserved(1, 2, cfg) - if d.State() != DiskStateReserved { - t.Errorf("expected state %d, got %d", DiskStateReserved, d.State()) + if d.State() != StateReserved { + t.Errorf("expected state %d, got %d", StateReserved, d.State()) } if d.Config() != cfg { t.Errorf("expected config %+v, got %+v", cfg, d.Config()) @@ -115,11 +115,11 @@ func TestNewReserved(t *testing.T) { } } -func TestDiskConfigEquals(t *testing.T) { - base := DiskConfig{ +func TestConfigEquals(t *testing.T) { + base := Config{ HostPath: `C:\a.vhdx`, ReadOnly: true, - Type: DiskTypeVirtualDisk, + Type: TypeVirtualDisk, EVDType: "evd", } same := base @@ -140,7 +140,7 @@ func TestDiskConfigEquals(t *testing.T) { } diffType := base - diffType.Type = DiskTypePassThru + diffType.Type = TypePassThru if base.Equals(diffType) { t.Error("expected different Type to be not equal") } @@ -158,8 +158,8 @@ func TestAttachToVM_FromReserved_Success(t *testing.T) { if err != nil { t.Fatalf("unexpected error: %v", err) } - if d.State() != DiskStateAttached { - t.Errorf("expected state %d, got %d", DiskStateAttached, d.State()) + if d.State() != StateAttached { + t.Errorf("expected state %d, got %d", StateAttached, d.State()) } } @@ -173,8 +173,8 @@ func TestAttachToVM_FromReserved_Error(t *testing.T) { if !errors.Is(err, addErr) { t.Errorf("expected wrapped error %v, got %v", addErr, err) } - if d.State() != DiskStateDetached { - t.Errorf("expected state %d after failure, got %d", DiskStateDetached, d.State()) + if d.State() != StateDetached { + t.Errorf("expected state %d after failure, got %d", StateDetached, d.State()) } } @@ -184,8 +184,8 @@ func TestAttachToVM_Idempotent_WhenAttached(t *testing.T) { if err := d.AttachToVM(context.Background(), &mockVMSCSIAdder{}); err != nil { t.Fatalf("unexpected error on idempotent attach: %v", err) } - if d.State() != DiskStateAttached { - t.Errorf("expected state %d, got %d", DiskStateAttached, d.State()) + if d.State() != StateAttached { + t.Errorf("expected state %d, got %d", StateAttached, d.State()) } } @@ -193,7 +193,7 @@ func TestAttachToVM_ErrorWhenDetached(t *testing.T) { d := NewReserved(0, 0, defaultConfig()) // Fail attachment to move to detached. _ = d.AttachToVM(context.Background(), &mockVMSCSIAdder{err: errors.New("fail")}) - if d.State() != DiskStateDetached { + if d.State() != StateDetached { t.Fatalf("expected detached state, got %d", d.State()) } err := d.AttachToVM(context.Background(), &mockVMSCSIAdder{}) @@ -208,8 +208,8 @@ func TestDetachFromVM_FromAttached_NoMounts_NoLinuxGuest(t *testing.T) { if err != nil { t.Fatalf("unexpected error: %v", err) } - if d.State() != DiskStateDetached { - t.Errorf("expected state %d, got %d", DiskStateDetached, d.State()) + if d.State() != StateDetached { + t.Errorf("expected state %d, got %d", StateDetached, d.State()) } } @@ -219,8 +219,8 @@ func TestDetachFromVM_FromAttached_WithLinuxGuest(t *testing.T) { if err != nil { t.Fatalf("unexpected error: %v", err) } - if d.State() != DiskStateDetached { - t.Errorf("expected state %d, got %d", DiskStateDetached, d.State()) + if d.State() != StateDetached { + t.Errorf("expected state %d, got %d", StateDetached, d.State()) } } @@ -235,8 +235,8 @@ func TestDetachFromVM_LinuxEjectError(t *testing.T) { t.Errorf("expected wrapped error %v, got %v", ejectErr, err) } // State should remain attached since eject failed before state transition. - if d.State() != DiskStateAttached { - t.Errorf("expected state %d, got %d", DiskStateAttached, d.State()) + if d.State() != StateAttached { + t.Errorf("expected state %d, got %d", StateAttached, d.State()) } } @@ -252,15 +252,15 @@ func TestDetachFromVM_RemoveError(t *testing.T) { t.Errorf("expected wrapped error %v, got %v", removeErr, err) } // State should be ejected since eject succeeded but removal failed. - if d.State() != DiskStateEjected { - t.Errorf("expected state %d, got %d", DiskStateEjected, d.State()) + if d.State() != StateEjected { + t.Errorf("expected state %d, got %d", StateEjected, d.State()) } } func TestDetachFromVM_SkipsWhenMountsExist(t *testing.T) { d := attachedDisk(t) // Reserve a partition so len(mounts) > 0. - _, err := d.ReservePartition(context.Background(), mount.MountConfig{Partition: 1}) + _, err := d.ReservePartition(context.Background(), mount.Config{Partition: 1}) if err != nil { t.Fatalf("ReservePartition: %v", err) } @@ -269,8 +269,8 @@ func TestDetachFromVM_SkipsWhenMountsExist(t *testing.T) { t.Fatalf("unexpected error: %v", err) } // Should remain attached because there are outstanding mounts. - if d.State() != DiskStateAttached { - t.Errorf("expected state %d, got %d", DiskStateAttached, d.State()) + if d.State() != StateAttached { + t.Errorf("expected state %d, got %d", StateAttached, d.State()) } } @@ -294,7 +294,7 @@ func TestDetachFromVM_Idempotent_WhenDetached(t *testing.T) { func TestReservePartition_Success(t *testing.T) { d := attachedDisk(t) - cfg := mount.MountConfig{Partition: 1, ReadOnly: true} + cfg := mount.Config{Partition: 1, ReadOnly: true} m, err := d.ReservePartition(context.Background(), cfg) if err != nil { t.Fatalf("unexpected error: %v", err) @@ -302,14 +302,14 @@ func TestReservePartition_Success(t *testing.T) { if m == nil { t.Fatal("expected non-nil mount") } - if m.State() != mount.MountStateReserved { - t.Errorf("expected mount state %d, got %d", mount.MountStateReserved, m.State()) + if m.State() != mount.StateReserved { + t.Errorf("expected mount state %d, got %d", mount.StateReserved, m.State()) } } func TestReservePartition_SuccessFromReservedDisk(t *testing.T) { d := NewReserved(0, 0, defaultConfig()) - cfg := mount.MountConfig{Partition: 1} + cfg := mount.Config{Partition: 1} m, err := d.ReservePartition(context.Background(), cfg) if err != nil { t.Fatalf("unexpected error: %v", err) @@ -321,7 +321,7 @@ func TestReservePartition_SuccessFromReservedDisk(t *testing.T) { func TestReservePartition_SamePartitionSameConfig(t *testing.T) { d := attachedDisk(t) - cfg := mount.MountConfig{Partition: 1, ReadOnly: true} + cfg := mount.Config{Partition: 1, ReadOnly: true} m1, err := d.ReservePartition(context.Background(), cfg) if err != nil { t.Fatalf("first reserve: %v", err) @@ -337,12 +337,12 @@ func TestReservePartition_SamePartitionSameConfig(t *testing.T) { func TestReservePartition_SamePartitionDifferentConfig(t *testing.T) { d := attachedDisk(t) - cfg1 := mount.MountConfig{Partition: 1, ReadOnly: true} + cfg1 := mount.Config{Partition: 1, ReadOnly: true} _, err := d.ReservePartition(context.Background(), cfg1) if err != nil { t.Fatalf("first reserve: %v", err) } - cfg2 := mount.MountConfig{Partition: 1, ReadOnly: false} + cfg2 := mount.Config{Partition: 1, ReadOnly: false} _, err = d.ReservePartition(context.Background(), cfg2) if err == nil { t.Fatal("expected error reserving same partition with different config") @@ -352,7 +352,7 @@ func TestReservePartition_SamePartitionDifferentConfig(t *testing.T) { func TestReservePartition_ErrorWhenDetached(t *testing.T) { d := NewReserved(0, 0, defaultConfig()) _ = d.AttachToVM(context.Background(), &mockVMSCSIAdder{err: errors.New("fail")}) - _, err := d.ReservePartition(context.Background(), mount.MountConfig{Partition: 1}) + _, err := d.ReservePartition(context.Background(), mount.Config{Partition: 1}) if err == nil { t.Fatal("expected error when disk is detached") } @@ -360,7 +360,7 @@ func TestReservePartition_ErrorWhenDetached(t *testing.T) { func TestMountPartitionToGuest_Success(t *testing.T) { d := attachedDisk(t) - cfg := mount.MountConfig{Partition: 1} + cfg := mount.Config{Partition: 1} _, err := d.ReservePartition(context.Background(), cfg) if err != nil { t.Fatalf("ReservePartition: %v", err) @@ -392,7 +392,7 @@ func TestMountPartitionToGuest_PartitionNotFound(t *testing.T) { func TestMountPartitionToGuest_MountError(t *testing.T) { d := attachedDisk(t) - cfg := mount.MountConfig{Partition: 1} + cfg := mount.Config{Partition: 1} _, err := d.ReservePartition(context.Background(), cfg) if err != nil { t.Fatalf("ReservePartition: %v", err) @@ -406,7 +406,7 @@ func TestMountPartitionToGuest_MountError(t *testing.T) { func TestUnmountPartitionFromGuest_Success(t *testing.T) { d := attachedDisk(t) - cfg := mount.MountConfig{Partition: 1} + cfg := mount.Config{Partition: 1} _, err := d.ReservePartition(context.Background(), cfg) if err != nil { t.Fatalf("ReservePartition: %v", err) @@ -441,7 +441,7 @@ func TestUnmountPartitionFromGuest_PartitionNotFound_IsNoOp(t *testing.T) { func TestUnmountPartitionFromGuest_UnmountError(t *testing.T) { d := attachedDisk(t) - cfg := mount.MountConfig{Partition: 1} + cfg := mount.Config{Partition: 1} _, err := d.ReservePartition(context.Background(), cfg) if err != nil { t.Fatalf("ReservePartition: %v", err) @@ -462,7 +462,7 @@ func TestUnmountPartitionFromGuest_UnmountError(t *testing.T) { func TestUnmountPartitionFromGuest_RemovesMountWhenFullyUnmounted(t *testing.T) { d := attachedDisk(t) - cfg := mount.MountConfig{Partition: 1} + cfg := mount.Config{Partition: 1} _, err := d.ReservePartition(context.Background(), cfg) if err != nil { t.Fatalf("ReservePartition: %v", err) @@ -481,8 +481,8 @@ func TestUnmountPartitionFromGuest_RemovesMountWhenFullyUnmounted(t *testing.T) if err != nil { t.Fatalf("DetachFromVM after unmount: %v", err) } - if d.State() != DiskStateDetached { - t.Errorf("expected state %d, got %d", DiskStateDetached, d.State()) + if d.State() != StateDetached { + t.Errorf("expected state %d, got %d", StateDetached, d.State()) } } @@ -491,7 +491,7 @@ func TestUnmountPartitionFromGuest_RetryAfterDetachFailure(t *testing.T) { // On retry, UnmountPartitionFromGuest should be a no-op (partition already removed) // so that DetachFromVM can be retried. d := attachedDisk(t) - cfg := mount.MountConfig{Partition: 1} + cfg := mount.Config{Partition: 1} _, err := d.ReservePartition(context.Background(), cfg) if err != nil { t.Fatalf("ReservePartition: %v", err) @@ -520,8 +520,8 @@ func TestUnmountPartitionFromGuest_RetryAfterDetachFailure(t *testing.T) { if err != nil { t.Fatalf("retry DetachFromVM: %v", err) } - if d.State() != DiskStateDetached { - t.Errorf("expected state %d, got %d", DiskStateDetached, d.State()) + if d.State() != StateDetached { + t.Errorf("expected state %d, got %d", StateDetached, d.State()) } } @@ -529,7 +529,7 @@ func TestUnmountPartitionFromGuest_RetryAfterDetachFailure(t *testing.T) { func TestMountPartitionToGuest_WCOW_Success(t *testing.T) { d := attachedDisk(t) - cfg := mount.MountConfig{Partition: 1} + cfg := mount.Config{Partition: 1} _, err := d.ReservePartition(context.Background(), cfg) if err != nil { t.Fatalf("ReservePartition: %v", err) @@ -545,7 +545,7 @@ func TestMountPartitionToGuest_WCOW_Success(t *testing.T) { func TestMountPartitionToGuest_WCOW_MountError(t *testing.T) { d := attachedDisk(t) - cfg := mount.MountConfig{Partition: 1} + cfg := mount.Config{Partition: 1} _, err := d.ReservePartition(context.Background(), cfg) if err != nil { t.Fatalf("ReservePartition: %v", err) @@ -558,7 +558,7 @@ func TestMountPartitionToGuest_WCOW_MountError(t *testing.T) { func TestMountPartitionToGuest_WCOW_FormatWithRefs(t *testing.T) { d := attachedDisk(t) - cfg := mount.MountConfig{Partition: 1, FormatWithRefs: true} + cfg := mount.Config{Partition: 1, FormatWithRefs: true} _, err := d.ReservePartition(context.Background(), cfg) if err != nil { t.Fatalf("ReservePartition: %v", err) @@ -574,7 +574,7 @@ func TestMountPartitionToGuest_WCOW_FormatWithRefs(t *testing.T) { func TestUnmountPartitionFromGuest_WCOW_Success(t *testing.T) { d := attachedDisk(t) - cfg := mount.MountConfig{Partition: 1} + cfg := mount.Config{Partition: 1} _, err := d.ReservePartition(context.Background(), cfg) if err != nil { t.Fatalf("ReservePartition: %v", err) @@ -591,7 +591,7 @@ func TestUnmountPartitionFromGuest_WCOW_Success(t *testing.T) { func TestUnmountPartitionFromGuest_WCOW_UnmountError(t *testing.T) { d := attachedDisk(t) - cfg := mount.MountConfig{Partition: 1} + cfg := mount.Config{Partition: 1} _, err := d.ReservePartition(context.Background(), cfg) if err != nil { t.Fatalf("ReservePartition: %v", err) @@ -612,7 +612,7 @@ func TestUnmountPartitionFromGuest_WCOW_UnmountError(t *testing.T) { func TestUnmountPartitionFromGuest_WCOW_RemovesMountWhenFullyUnmounted(t *testing.T) { d := attachedDisk(t) - cfg := mount.MountConfig{Partition: 1} + cfg := mount.Config{Partition: 1} _, err := d.ReservePartition(context.Background(), cfg) if err != nil { t.Fatalf("ReservePartition: %v", err) @@ -629,7 +629,7 @@ func TestUnmountPartitionFromGuest_WCOW_RemovesMountWhenFullyUnmounted(t *testin if err != nil { t.Fatalf("DetachFromVM after unmount: %v", err) } - if d.State() != DiskStateDetached { - t.Errorf("expected state %d, got %d", DiskStateDetached, d.State()) + if d.State() != StateDetached { + t.Errorf("expected state %d, got %d", StateDetached, d.State()) } } diff --git a/internal/controller/device/scsi/disk/disk_wcow.go b/internal/controller/device/scsi/disk/disk_wcow.go new file mode 100644 index 0000000000..a53f0fd644 --- /dev/null +++ b/internal/controller/device/scsi/disk/disk_wcow.go @@ -0,0 +1,3 @@ +//go:build windows + +package disk diff --git a/internal/controller/device/scsi/disk/doc.go b/internal/controller/device/scsi/disk/doc.go new file mode 100644 index 0000000000..07b1fda3c6 --- /dev/null +++ b/internal/controller/device/scsi/disk/doc.go @@ -0,0 +1,48 @@ +//go:build windows + +// Package disk manages the lifecycle of a single SCSI disk attachment to a +// Hyper-V VM, from host-side slot allocation through guest-side partition +// mounting. +// +// # Overview +// +// [Disk] is the primary type, representing one SCSI disk attached (or to be +// attached) to the VM. It tracks its own lifecycle state via [State] and +// delegates per-partition mount management to [mount.Mount]. +// +// All operations on a [Disk] are expected to be ordered by the caller. +// No locking is performed at this layer. +// +// # Disk Lifecycle +// +// ┌──────────────────────┐ +// │ StateReserved │ ← attach failure → StateDetached (not retriable) +// └──────────┬───────────┘ +// │ disk added to VM SCSI bus +// ▼ +// ┌──────────────────────┐ +// │ StateAttached │ +// └──────────┬───────────┘ +// (partition mounts driven here) +// │ all partitions released; +// │ SCSI device ejected from guest +// ▼ +// ┌──────────────────────┐ +// │ StateEjected │ ← stays here on VM removal failure (retriable) +// └──────────┬───────────┘ +// │ disk removed from VM SCSI bus +// ▼ +// ┌──────────────────────┐ +// │ StateDetached │ +// └──────────────────────┘ +// (terminal — no further transitions) +// +// # Retry / Idempotency +// +// After a successful guest eject but a failed [Disk.DetachFromVM] VM removal, +// the disk remains in [StateEjected]. A subsequent [Disk.DetachFromVM] call +// resumes from that state and retries only the VM removal step. +// +// Attachment failure from [StateReserved] moves the disk to the terminal +// [StateDetached] state; a new [Disk] must be created to retry. +package disk diff --git a/internal/controller/device/scsi/disk/state.go b/internal/controller/device/scsi/disk/state.go new file mode 100644 index 0000000000..bcbc2d530f --- /dev/null +++ b/internal/controller/device/scsi/disk/state.go @@ -0,0 +1,64 @@ +//go:build windows + +package disk + +// State represents the current lifecycle state of a SCSI disk attachment. +// +// The normal progression is: +// +// StateReserved → StateAttached → StateEjected → StateDetached +// +// Attachment failure from StateReserved transitions directly to the +// terminal StateDetached state; create a new [Disk] to retry. +// A VM removal failure from StateEjected leaves the disk in StateEjected +// so the caller can retry only the removal step. +// +// Full state-transition table: +// +// Current State │ Trigger │ Next State +// ───────────────┼───────────────────────────────────────┼────────────────── +// StateReserved │ attach succeeds │ StateAttached +// StateReserved │ attach fails │ StateDetached +// StateAttached │ active mounts remain │ StateAttached (no-op) +// StateAttached │ eject succeeds (or no-op for WCOW) │ StateEjected +// StateAttached │ eject fails │ StateAttached +// StateEjected │ VM removal succeeds │ StateDetached +// StateEjected │ VM removal fails │ StateEjected +// StateDetached │ (terminal — no further transitions) │ — +type State int + +const ( + // StateReserved is the initial state; the slot has been allocated but + // the disk has not yet been added to the VM's SCSI bus. + StateReserved State = iota + + // StateAttached means the disk has been successfully added to the + // VM's SCSI bus. Partition mounts are driven from this state. + StateAttached + + // StateEjected means the SCSI device has been unplugged from the + // guest but the disk has not yet been removed from the VM's SCSI bus. + // This intermediate state makes VM removal retriable independently of + // the guest eject step. + StateEjected + + // StateDetached means the disk has been fully removed from the VM's + // SCSI bus. This is a terminal state. + StateDetached +) + +// String returns a human-readable name for the [State]. +func (s State) String() string { + switch s { + case StateReserved: + return "Reserved" + case StateAttached: + return "Attached" + case StateEjected: + return "Ejected" + case StateDetached: + return "Detached" + default: + return "Unknown" + } +} diff --git a/internal/controller/device/scsi/disk/types.go b/internal/controller/device/scsi/disk/types.go new file mode 100644 index 0000000000..1976e71225 --- /dev/null +++ b/internal/controller/device/scsi/disk/types.go @@ -0,0 +1,65 @@ +//go:build windows + +package disk + +import ( + "context" + + hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + "github.com/Microsoft/hcsshim/internal/protocol/guestresource" +) + +// Type identifies the attachment protocol used when adding a disk to the VM's SCSI bus. +type Type string + +const ( + // TypeVirtualDisk attaches the disk as a virtual hard disk (VHD/VHDX). + TypeVirtualDisk Type = "VirtualDisk" + + // TypePassThru attaches a physical disk directly to the VM with pass-through access. + TypePassThru Type = "PassThru" + + // TypeExtensibleVirtualDisk attaches a disk via an extensible virtual disk (EVD) provider. + TypeExtensibleVirtualDisk Type = "ExtensibleVirtualDisk" +) + +// Config describes the host-side disk to attach to the VM's SCSI bus. +type Config struct { + // HostPath is the path on the host to the disk to be attached. + HostPath string + // ReadOnly specifies whether the disk should be attached with read-only access. + ReadOnly bool + // Type specifies the attachment protocol to use when attaching the disk. + Type Type + // EVDType is the EVD provider name. + // Only populated when Type is [TypeExtensibleVirtualDisk]. + EVDType string +} + +// Equals reports whether two disk Config values describe the same attachment parameters. +func (d Config) Equals(other Config) bool { + return d.HostPath == other.HostPath && + d.ReadOnly == other.ReadOnly && + d.Type == other.Type && + d.EVDType == other.EVDType +} + +// VMSCSIAdder adds a SCSI disk to a Utility VM's SCSI bus. +type VMSCSIAdder interface { + // AddSCSIDisk hot-adds a SCSI disk to the Utility VM. + AddSCSIDisk(ctx context.Context, disk hcsschema.Attachment, controller uint, lun uint) error +} + +// VMSCSIRemover removes a SCSI disk from a Utility VM's SCSI bus. +type VMSCSIRemover interface { + // RemoveSCSIDisk removes a SCSI disk from the Utility VM. + RemoveSCSIDisk(ctx context.Context, controller uint, lun uint) error +} + +// LinuxGuestSCSIEjector issues a guest-side SCSI device removal for LCOW guests. +// Pass nil to [Disk.DetachFromVM] to skip this step for WCOW guests, which handle +// SCSI hot-unplug automatically. +type LinuxGuestSCSIEjector interface { + // RemoveSCSIDevice removes a SCSI device from the LCOW guest. + RemoveSCSIDevice(ctx context.Context, settings guestresource.SCSIDevice) error +} diff --git a/internal/controller/device/scsi/doc.go b/internal/controller/device/scsi/doc.go new file mode 100644 index 0000000000..9469ec5290 --- /dev/null +++ b/internal/controller/device/scsi/doc.go @@ -0,0 +1,45 @@ +//go:build windows + +// Package scsi manages the full lifecycle of SCSI disk mappings on a +// Hyper-V VM, from host-side slot allocation through guest-side mounting. +// +// # Architecture +// +// [Controller] is the primary entry point, exposing three methods: +// +// - [Controller.Reserve]: allocates a reference-counted SCSI slot for a +// disk + partition pair and returns a reservation ID. +// - [Controller.MapToGuest]: attaches the disk to the VM's SCSI bus and +// mounts the partition inside the guest. +// - [Controller.UnmapFromGuest]: unmounts the partition from the guest and, +// when all reservations for a disk are released, detaches the disk from +// the VM and frees the SCSI slot. +// +// All three operations are serialized by a single mutex on the [Controller]. +// +// # Usage +// +// c := scsi.New(numControllers, vmOps, linuxGuestOps, windowsGuestOps) +// +// // Reserve a slot (no I/O yet): +// id, err := c.Reserve(ctx, diskConfig, mountConfig) +// +// // Attach the disk and mount the partition: +// guestPath, err := c.MapToGuest(ctx, id) +// +// // Unmount and detach when done: +// err = c.UnmapFromGuest(ctx, id) +// +// # Retry / Idempotency +// +// [Controller.MapToGuest] is idempotent for a reservation that is already +// fully mapped. [Controller.UnmapFromGuest] is retryable: if it fails +// partway through teardown, calling it again with the same reservation ID +// resumes from where the previous attempt stopped. +// +// # Layered Design +// +// The [Controller] delegates all disk-level state to [disk.Disk] and all +// partition-level state to [mount.Mount]; it only coordinates slot allocation +// and the overall call sequence. +package scsi diff --git a/internal/controller/device/scsi/mount/doc.go b/internal/controller/device/scsi/mount/doc.go new file mode 100644 index 0000000000..83cc023af6 --- /dev/null +++ b/internal/controller/device/scsi/mount/doc.go @@ -0,0 +1,41 @@ +//go:build windows + +// Package mount manages the lifecycle of a single partition mount inside a +// Hyper-V guest VM, from the initial reservation through mounting and +// unmounting. +// +// # Overview +// +// [Mount] is the primary type, representing one guest-side partition mount. +// It tracks its own lifecycle state via [State] and supports reference +// counting so multiple callers can share the same mount. +// +// All operations on a [Mount] are expected to be ordered by the caller. +// No locking is performed at this layer. +// +// # Mount Lifecycle +// +// ┌──────────────────────┐ +// │ StateReserved │ ← mount failure → StateUnmounted (not retriable) +// └──────────┬───────────┘ +// │ guest mount succeeds +// ▼ +// ┌──────────────────────┐ +// │ StateMounted │ +// └──────────┬───────────┘ +// │ reference count → 0; +// │ guest unmount succeeds +// ▼ +// ┌──────────────────────┐ +// │ StateUnmounted │ +// └──────────────────────┘ +// (terminal — entry removed from disk) +// +// # Reference Counting +// +// Multiple callers may share a single [Mount] by calling [Mount.Reserve] +// before the mount is issued. [Mount.MountToGuest] issues the guest operation +// only once regardless of the reservation count; subsequent callers receive the +// same guest path. [Mount.UnmountFromGuest] decrements the count and only +// issues the guest unmount when it reaches zero. +package mount diff --git a/internal/controller/device/scsi/mount/mount.go b/internal/controller/device/scsi/mount/mount.go index 1d820f3bad..f2ab102c30 100644 --- a/internal/controller/device/scsi/mount/mount.go +++ b/internal/controller/device/scsi/mount/mount.go @@ -5,158 +5,148 @@ package mount import ( "context" "fmt" - "slices" -) - -// MountConfig describes how a partition of a SCSI disk should be mounted inside -// the guest. -type MountConfig struct { - // Partition is the target partition index (1-based) on a partitioned - // device. Zero means the whole disk. - // - // WCOW only supports whole disk. LCOW supports both whole disk and - // partition mounts if the disk has multiple. - Partition uint64 - // ReadOnly mounts the disk read-only. - ReadOnly bool - // Encrypted encrypts the device with dm-crypt. - // - // Only supported for LCOW. - Encrypted bool - // Options are mount flags or data passed to the guest mount call. - // - // Only supported for LCOW. - Options []string - // EnsureFilesystem formats the mount as Filesystem if not already - // formatted. - // - // Only supported for LCOW. - EnsureFilesystem bool - // Filesystem is the target filesystem type. - // - // Only supported for LCOW. - Filesystem string - // BlockDev mounts the device as a block device. - // - // Only supported for LCOW. - BlockDev bool - // FormatWithRefs formats the disk using refs. - // - // Only supported for WCOW scratch disks. - FormatWithRefs bool -} - -// equals reports whether two MountConfig values describe the same mount parameters. -func (mc MountConfig) Equals(other MountConfig) bool { - return mc.ReadOnly == other.ReadOnly && - mc.Encrypted == other.Encrypted && - mc.EnsureFilesystem == other.EnsureFilesystem && - mc.Filesystem == other.Filesystem && - mc.BlockDev == other.BlockDev && - mc.FormatWithRefs == other.FormatWithRefs && - slices.Equal(mc.Options, other.Options) -} -type MountState int + "github.com/sirupsen/logrus" -const ( - // The mount has never been mounted. - MountStateReserved MountState = iota - // The mount is current mounted in the guest. - MountStateMounted - // The mount was previously mounted and unmounted. - MountStateUnmounted + "github.com/Microsoft/hcsshim/internal/log" + "github.com/Microsoft/hcsshim/internal/logfields" ) -// Defines a mount of a partition of a SCSI disk inside the guest. It manages -// the lifecycle of the mount inside the guest independent of the lifecycle of -// the disk attachment. +// Mount represents a single partition mount inside a Hyper-V guest VM. It +// tracks the mount lifecycle and supports reference counting so multiple +// callers can share the same physical guest mount. // -// All operations on the mount are expected to be ordered by the caller. No -// locking is done at this layer. +// All operations on a [Mount] are expected to be ordered by the caller. +// No locking is performed at this layer. type Mount struct { + // controller and lun are the hardware address of the parent disk on the VM's SCSI bus. controller uint lun uint - config MountConfig - state MountState - refCount int + // config is the immutable guest-side mount configuration supplied at construction. + config Config + + // state tracks the current lifecycle position of this mount. + state State + + // refCount is the number of active callers sharing this mount. + // The guest unmount is issued only when it drops to zero. + refCount int + + // guestPath is the auto-generated path inside the guest where the + // partition is mounted. Valid only in [StateMounted]. guestPath string } -// NewReserved creates a new Mount in the reserved state with the provided configuration. -func NewReserved(controller, lun uint, config MountConfig) *Mount { +// NewReserved creates a new [Mount] in the [StateReserved] state with the +// provided controller, LUN, and guest-side mount configuration. +func NewReserved(controller, lun uint, config Config) *Mount { return &Mount{ controller: controller, lun: lun, config: config, - state: MountStateReserved, + state: StateReserved, refCount: 1, } } -func (m *Mount) State() MountState { +// State returns the current lifecycle state of the mount. +func (m *Mount) State() State { return m.state } +// GuestPath returns the path inside the guest where the partition is mounted. +// The path is only valid once the mount is in [StateMounted]. func (m *Mount) GuestPath() string { return m.guestPath } -func (m *Mount) Reserve(config MountConfig) error { +// Reserve increments the reference count on this mount, allowing an additional +// caller to share the same guest path. +func (m *Mount) Reserve(config Config) error { if !m.config.Equals(config) { - return fmt.Errorf("cannot reserve ref on mount with different config") + return fmt.Errorf("cannot reserve mount with different config") } - if m.state != MountStateReserved && m.state != MountStateMounted { - return fmt.Errorf("cannot reserve ref on mount in state %d", m.state) + if m.state != StateReserved && m.state != StateMounted { + return fmt.Errorf("cannot reserve mount in state %s", m.state) } m.refCount++ return nil } +// MountToGuest issues the guest-side mount operation and returns the guest +// path. func (m *Mount) MountToGuest(ctx context.Context, linuxGuest LinuxGuestSCSIMounter, windowsGuest WindowsGuestSCSIMounter) (string, error) { + ctx, _ = log.WithContext(ctx, logrus.WithFields(logrus.Fields{ + logfields.Controller: m.controller, + logfields.LUN: m.lun, + logfields.Partition: m.config.Partition, + })) + + // Drive the mount state machine. switch m.state { - // If the mount is reserved, we need to actually mount it in the guest. - case MountStateReserved: + case StateReserved: + log.G(ctx).Debug("mounting partition in guest") + + // Issue the platform-specific guest mount. On failure, advance to + // StateUnmounted since no guest state was established from Reserved. if linuxGuest != nil { if err := m.mountReservedLCOW(ctx, linuxGuest); err != nil { // Move to unmounted since we know from reserved there was no // guest state. - m.state = MountStateUnmounted + m.state = StateUnmounted return "", err } } else if windowsGuest != nil { if err := m.mountReservedWCOW(ctx, windowsGuest); err != nil { // Move to unmounted since we know from reserved there was no // guest state. - m.state = MountStateUnmounted + m.state = StateUnmounted return "", err } } else { - panic(fmt.Errorf("both linuxGuest and windowsGuest cannot be nil")) + return "", fmt.Errorf("both linuxGuest and windowsGuest cannot be nil") } - m.state = MountStateMounted + m.state = StateMounted // Note we don't increment the ref count here as the caller of // MountToGuest is responsible for calling it once per reservation, so // we know the ref count should be 1 at this point. + + log.G(ctx).WithField(logfields.UVMPath, m.guestPath).Debug("successfully mounted partition in guest") return m.guestPath, nil - case MountStateMounted: - // The mount is already mounted, and the caller has a reservation so do nothing, its ready. + + case StateMounted: + // Already mounted — the caller holds a reservation so return the + // existing guest path directly. return m.guestPath, nil - case MountStateUnmounted: - return "", fmt.Errorf("cannot mount an unmounted mount") + + case StateUnmounted: + return "", fmt.Errorf("cannot mount a partition in state %s", m.state) } return "", nil } +// UnmountFromGuest decrements the reference count and, when it reaches zero, +// issues the guest-side unmount. func (m *Mount) UnmountFromGuest(ctx context.Context, linuxGuest LinuxGuestSCSIUnmounter, windowsGuest WindowsGuestSCSIUnmounter) error { + ctx, _ = log.WithContext(ctx, logrus.WithFields(logrus.Fields{ + logfields.Controller: m.controller, + logfields.LUN: m.lun, + logfields.Partition: m.config.Partition, + })) + + // Drive the state machine. switch m.state { - case MountStateReserved: + case StateReserved: // No guest work to do, just decrement the ref count and if it hits zero we are done. m.refCount-- return nil - case MountStateMounted: + + case StateMounted: if m.refCount == 1 { + log.G(ctx).Debug("unmounting partition from guest") + + // Last reference — issue the physical guest unmount. if linuxGuest != nil { if err := m.unmountLCOW(ctx, linuxGuest); err != nil { return err @@ -168,12 +158,14 @@ func (m *Mount) UnmountFromGuest(ctx context.Context, linuxGuest LinuxGuestSCSIU } else { return fmt.Errorf("both linuxGuest and windowsGuest cannot be nil") } - m.state = MountStateUnmounted + m.state = StateUnmounted + log.G(ctx).Debug("partition unmounted from guest") } m.refCount-- return nil - case MountStateUnmounted: - return fmt.Errorf("cannot unmount an unmounted mount") + + case StateUnmounted: + return fmt.Errorf("cannot unmount a partition in state %s", m.state) } return nil } diff --git a/internal/controller/device/scsi/mount/mount_lcow.go b/internal/controller/device/scsi/mount/mount_lcow.go index 3423ae3580..2e8e06b51d 100644 --- a/internal/controller/device/scsi/mount/mount_lcow.go +++ b/internal/controller/device/scsi/mount/mount_lcow.go @@ -9,26 +9,24 @@ import ( "github.com/Microsoft/hcsshim/internal/protocol/guestresource" ) -type LinuxGuestSCSIMounter interface { - AddLCOWMappedVirtualDisk(ctx context.Context, settings guestresource.LCOWMappedVirtualDisk) error -} - -type LinuxGuestSCSIUnmounter interface { - RemoveLCOWMappedVirtualDisk(ctx context.Context, settings guestresource.LCOWMappedVirtualDisk) error -} - +// mountFmtLCOW is the guest path template for SCSI partition mounts on LCOW. +// The path encodes the controller index, LUN, and partition index so that each +// combination gets a unique, stable mount point. Example: +// +// /run/mounts/scsi/__ const ( mountFmtLCOW = "/run/mounts/scsi/%d_%d_%d" ) -// Implements the mount operation for LCOW from the expected Reserved state. -// -// It does not transition the state to ensure the exposed mount interface -// handles transitions for all LCOW/WCOW. +// mountReservedLCOW issues the LCOW guest mount for a partition in the +// [StateReserved] state. It does not transition the state; that is handled +// by the caller in [Mount.MountToGuest]. func (m *Mount) mountReservedLCOW(ctx context.Context, guest LinuxGuestSCSIMounter) error { - if m.state != MountStateReserved { - return fmt.Errorf("unexpected mount state %d, expected reserved", m.state) + if m.state != StateReserved { + return fmt.Errorf("unexpected mount state %s, expected reserved", m.state) } + + // Build the stable guest path from the controller, LUN, and partition index. guestPath := fmt.Sprintf(mountFmtLCOW, m.controller, m.lun, m.config.Partition) settings := guestresource.LCOWMappedVirtualDisk{ MountPath: guestPath, @@ -45,18 +43,20 @@ func (m *Mount) mountReservedLCOW(ctx context.Context, guest LinuxGuestSCSIMount if err := guest.AddLCOWMappedVirtualDisk(ctx, settings); err != nil { return fmt.Errorf("add LCOW mapped virtual disk controller=%d lun=%d: %w", m.controller, m.lun, err) } + m.guestPath = guestPath return nil } -// Implements the unmount operation for LCOW from the expected Mounted state. -// -// It does not transition the state to ensure the exposed mount interface -// handles transitions for all LCOW/WCOW. +// unmountLCOW issues the LCOW guest unmount for a partition in the +// [StateMounted] state. It does not transition the state; that is handled +// by the caller in [Mount.UnmountFromGuest]. func (m *Mount) unmountLCOW(ctx context.Context, guest LinuxGuestSCSIUnmounter) error { - if m.state != MountStateMounted { - return fmt.Errorf("unexpected mount state %d, expected mounted", m.state) + if m.state != StateMounted { + return fmt.Errorf("unexpected mount state %s, expected mounted", m.state) } + + // Generate the predictable guest path. guestPath := fmt.Sprintf(mountFmtLCOW, m.controller, m.lun, m.config.Partition) settings := guestresource.LCOWMappedVirtualDisk{ MountPath: guestPath, diff --git a/internal/controller/device/scsi/mount/mount_test.go b/internal/controller/device/scsi/mount/mount_test.go index 7250db76ac..44ea88dd0e 100644 --- a/internal/controller/device/scsi/mount/mount_test.go +++ b/internal/controller/device/scsi/mount/mount_test.go @@ -54,8 +54,8 @@ func (m *mockWindowsUnmounter) RemoveWCOWMappedVirtualDisk(_ context.Context, _ // --- Helpers --- -func defaultMountConfig() MountConfig { - return MountConfig{ +func defaultConfig() Config { + return Config{ Partition: 1, ReadOnly: false, } @@ -63,7 +63,7 @@ func defaultMountConfig() MountConfig { func mountedLCOW(t *testing.T) *Mount { t.Helper() - m := NewReserved(0, 0, defaultMountConfig()) + m := NewReserved(0, 0, defaultConfig()) if _, err := m.MountToGuest(context.Background(), &mockLinuxMounter{}, nil); err != nil { t.Fatalf("setup MountToGuest: %v", err) } @@ -72,7 +72,7 @@ func mountedLCOW(t *testing.T) *Mount { func mountedWCOW(t *testing.T) *Mount { t.Helper() - m := NewReserved(0, 0, defaultMountConfig()) + m := NewReserved(0, 0, defaultConfig()) if _, err := m.MountToGuest(context.Background(), nil, &mockWindowsMounter{}); err != nil { t.Fatalf("setup MountToGuest WCOW: %v", err) } @@ -82,20 +82,20 @@ func mountedWCOW(t *testing.T) *Mount { // --- Tests --- func TestNewReserved(t *testing.T) { - cfg := MountConfig{ + cfg := Config{ Partition: 2, ReadOnly: true, Encrypted: true, Filesystem: "ext4", } m := NewReserved(1, 3, cfg) - if m.State() != MountStateReserved { - t.Errorf("expected state %d, got %d", MountStateReserved, m.State()) + if m.State() != StateReserved { + t.Errorf("expected state %d, got %d", StateReserved, m.State()) } } -func TestMountConfigEquals(t *testing.T) { - base := MountConfig{ +func TestConfigEquals(t *testing.T) { + base := Config{ ReadOnly: true, Encrypted: true, EnsureFilesystem: true, @@ -104,7 +104,7 @@ func TestMountConfigEquals(t *testing.T) { FormatWithRefs: false, Options: []string{"rw", "noatime"}, } - same := MountConfig{ + same := Config{ ReadOnly: true, Encrypted: true, EnsureFilesystem: true, @@ -119,16 +119,16 @@ func TestMountConfigEquals(t *testing.T) { tests := []struct { name string - modify func(c *MountConfig) + modify func(c *Config) }{ - {"ReadOnly", func(c *MountConfig) { c.ReadOnly = false }}, - {"Encrypted", func(c *MountConfig) { c.Encrypted = false }}, - {"EnsureFilesystem", func(c *MountConfig) { c.EnsureFilesystem = false }}, - {"Filesystem", func(c *MountConfig) { c.Filesystem = "xfs" }}, - {"BlockDev", func(c *MountConfig) { c.BlockDev = true }}, - {"FormatWithRefs", func(c *MountConfig) { c.FormatWithRefs = true }}, - {"Options", func(c *MountConfig) { c.Options = []string{"ro"} }}, - {"OptionsNil", func(c *MountConfig) { c.Options = nil }}, + {"ReadOnly", func(c *Config) { c.ReadOnly = false }}, + {"Encrypted", func(c *Config) { c.Encrypted = false }}, + {"EnsureFilesystem", func(c *Config) { c.EnsureFilesystem = false }}, + {"Filesystem", func(c *Config) { c.Filesystem = "xfs" }}, + {"BlockDev", func(c *Config) { c.BlockDev = true }}, + {"FormatWithRefs", func(c *Config) { c.FormatWithRefs = true }}, + {"Options", func(c *Config) { c.Options = []string{"ro"} }}, + {"OptionsNil", func(c *Config) { c.Options = nil }}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -142,7 +142,7 @@ func TestMountConfigEquals(t *testing.T) { } func TestReserve_SameConfig(t *testing.T) { - cfg := defaultMountConfig() + cfg := defaultConfig() m := NewReserved(0, 0, cfg) if err := m.Reserve(cfg); err != nil { t.Fatalf("unexpected error: %v", err) @@ -150,8 +150,8 @@ func TestReserve_SameConfig(t *testing.T) { } func TestReserve_DifferentConfig(t *testing.T) { - m := NewReserved(0, 0, MountConfig{ReadOnly: true}) - err := m.Reserve(MountConfig{ReadOnly: false}) + m := NewReserved(0, 0, Config{ReadOnly: true}) + err := m.Reserve(Config{ReadOnly: false}) if err == nil { t.Fatal("expected error for different config") } @@ -159,7 +159,7 @@ func TestReserve_DifferentConfig(t *testing.T) { func TestReserve_WhenMounted(t *testing.T) { m := mountedLCOW(t) - cfg := defaultMountConfig() + cfg := defaultConfig() if err := m.Reserve(cfg); err != nil { t.Fatalf("unexpected error reserving mounted mount: %v", err) } @@ -168,17 +168,17 @@ func TestReserve_WhenMounted(t *testing.T) { func TestReserve_WhenUnmounted(t *testing.T) { m := mountedLCOW(t) _ = m.UnmountFromGuest(context.Background(), &mockLinuxUnmounter{}, nil) - if m.State() != MountStateUnmounted { + if m.State() != StateUnmounted { t.Fatalf("expected unmounted, got %d", m.State()) } - err := m.Reserve(defaultMountConfig()) + err := m.Reserve(defaultConfig()) if err == nil { t.Fatal("expected error reserving unmounted mount") } } func TestMountToGuest_LCOW_Success(t *testing.T) { - m := NewReserved(0, 0, defaultMountConfig()) + m := NewReserved(0, 0, defaultConfig()) guestPath, err := m.MountToGuest(context.Background(), &mockLinuxMounter{}, nil) if err != nil { t.Fatalf("unexpected error: %v", err) @@ -186,13 +186,13 @@ func TestMountToGuest_LCOW_Success(t *testing.T) { if guestPath == "" { t.Error("expected non-empty guestPath") } - if m.State() != MountStateMounted { - t.Errorf("expected state %d, got %d", MountStateMounted, m.State()) + if m.State() != StateMounted { + t.Errorf("expected state %d, got %d", StateMounted, m.State()) } } func TestMountToGuest_WCOW_Success(t *testing.T) { - m := NewReserved(0, 0, defaultMountConfig()) + m := NewReserved(0, 0, defaultConfig()) guestPath, err := m.MountToGuest(context.Background(), nil, &mockWindowsMounter{}) if err != nil { t.Fatalf("unexpected error: %v", err) @@ -200,14 +200,14 @@ func TestMountToGuest_WCOW_Success(t *testing.T) { if guestPath == "" { t.Error("expected non-empty guestPath") } - if m.State() != MountStateMounted { - t.Errorf("expected state %d, got %d", MountStateMounted, m.State()) + if m.State() != StateMounted { + t.Errorf("expected state %d, got %d", StateMounted, m.State()) } } func TestMountToGuest_LCOW_Error(t *testing.T) { mountErr := errors.New("lcow mount failed") - m := NewReserved(0, 0, defaultMountConfig()) + m := NewReserved(0, 0, defaultConfig()) _, err := m.MountToGuest(context.Background(), &mockLinuxMounter{err: mountErr}, nil) if err == nil { t.Fatal("expected error, got nil") @@ -215,14 +215,14 @@ func TestMountToGuest_LCOW_Error(t *testing.T) { if !errors.Is(err, mountErr) { t.Errorf("expected wrapped error %v, got %v", mountErr, err) } - if m.State() != MountStateUnmounted { - t.Errorf("expected state %d after failure, got %d", MountStateUnmounted, m.State()) + if m.State() != StateUnmounted { + t.Errorf("expected state %d after failure, got %d", StateUnmounted, m.State()) } } func TestMountToGuest_WCOW_Error(t *testing.T) { mountErr := errors.New("wcow mount failed") - m := NewReserved(0, 0, defaultMountConfig()) + m := NewReserved(0, 0, defaultConfig()) _, err := m.MountToGuest(context.Background(), nil, &mockWindowsMounter{err: mountErr}) if err == nil { t.Fatal("expected error, got nil") @@ -230,14 +230,14 @@ func TestMountToGuest_WCOW_Error(t *testing.T) { if !errors.Is(err, mountErr) { t.Errorf("expected wrapped error %v, got %v", mountErr, err) } - if m.State() != MountStateUnmounted { - t.Errorf("expected state %d after failure, got %d", MountStateUnmounted, m.State()) + if m.State() != StateUnmounted { + t.Errorf("expected state %d after failure, got %d", StateUnmounted, m.State()) } } func TestMountToGuest_WCOW_FormatWithRefs(t *testing.T) { scratchCalled := false - m := NewReserved(0, 0, MountConfig{Partition: 1, FormatWithRefs: true}) + m := NewReserved(0, 0, Config{Partition: 1, FormatWithRefs: true}) wm := &mockWindowsMounter{scratchFn: func() { scratchCalled = true }} _, err := m.MountToGuest(context.Background(), nil, wm) if err != nil { @@ -257,8 +257,8 @@ func TestMountToGuest_Idempotent_WhenMounted(t *testing.T) { if guestPath == "" { t.Error("expected non-empty guestPath on idempotent mount") } - if m.State() != MountStateMounted { - t.Errorf("expected state %d, got %d", MountStateMounted, m.State()) + if m.State() != StateMounted { + t.Errorf("expected state %d, got %d", StateMounted, m.State()) } } @@ -271,14 +271,12 @@ func TestMountToGuest_ErrorWhenUnmounted(t *testing.T) { } } -func TestMountToGuest_PanicsWhenBothGuestsNil(t *testing.T) { - m := NewReserved(0, 0, defaultMountConfig()) - defer func() { - if r := recover(); r == nil { - t.Fatal("expected panic when both guests are nil") - } - }() - _, _ = m.MountToGuest(context.Background(), nil, nil) +func TestMountToGuest_ErrorWhenBothGuestsNil(t *testing.T) { + m := NewReserved(0, 0, defaultConfig()) + _, err := m.MountToGuest(context.Background(), nil, nil) + if err == nil { + t.Fatal("expected error when both guests are nil") + } } func TestUnmountFromGuest_LCOW_Success(t *testing.T) { @@ -287,8 +285,8 @@ func TestUnmountFromGuest_LCOW_Success(t *testing.T) { if err != nil { t.Fatalf("unexpected error: %v", err) } - if m.State() != MountStateUnmounted { - t.Errorf("expected state %d, got %d", MountStateUnmounted, m.State()) + if m.State() != StateUnmounted { + t.Errorf("expected state %d, got %d", StateUnmounted, m.State()) } } @@ -298,8 +296,8 @@ func TestUnmountFromGuest_WCOW_Success(t *testing.T) { if err != nil { t.Fatalf("unexpected error: %v", err) } - if m.State() != MountStateUnmounted { - t.Errorf("expected state %d, got %d", MountStateUnmounted, m.State()) + if m.State() != StateUnmounted { + t.Errorf("expected state %d, got %d", StateUnmounted, m.State()) } } @@ -314,8 +312,8 @@ func TestUnmountFromGuest_LCOW_Error(t *testing.T) { t.Errorf("expected wrapped error %v, got %v", unmountErr, err) } // State should remain mounted since unmount failed. - if m.State() != MountStateMounted { - t.Errorf("expected state %d, got %d", MountStateMounted, m.State()) + if m.State() != StateMounted { + t.Errorf("expected state %d, got %d", StateMounted, m.State()) } } @@ -329,20 +327,20 @@ func TestUnmountFromGuest_WCOW_Error(t *testing.T) { if !errors.Is(err, unmountErr) { t.Errorf("expected wrapped error %v, got %v", unmountErr, err) } - if m.State() != MountStateMounted { - t.Errorf("expected state %d, got %d", MountStateMounted, m.State()) + if m.State() != StateMounted { + t.Errorf("expected state %d, got %d", StateMounted, m.State()) } } func TestUnmountFromGuest_FromReserved_DecrementsRefCount(t *testing.T) { - m := NewReserved(0, 0, defaultMountConfig()) + m := NewReserved(0, 0, defaultConfig()) err := m.UnmountFromGuest(context.Background(), &mockLinuxUnmounter{}, nil) if err != nil { t.Fatalf("unexpected error: %v", err) } // Should still be reserved since no guest mount was done. - if m.State() != MountStateReserved { - t.Errorf("expected state %d, got %d", MountStateReserved, m.State()) + if m.State() != StateReserved { + t.Errorf("expected state %d, got %d", StateReserved, m.State()) } } @@ -364,7 +362,7 @@ func TestUnmountFromGuest_ErrorWhenBothGuestsNil(t *testing.T) { } func TestUnmountFromGuest_MultipleRefs_DoesNotUnmountUntilLastRef(t *testing.T) { - cfg := defaultMountConfig() + cfg := defaultConfig() m := NewReserved(0, 0, cfg) // Add a second reservation. if err := m.Reserve(cfg); err != nil { @@ -378,14 +376,14 @@ func TestUnmountFromGuest_MultipleRefs_DoesNotUnmountUntilLastRef(t *testing.T) if err := m.UnmountFromGuest(context.Background(), &mockLinuxUnmounter{}, nil); err != nil { t.Fatalf("first UnmountFromGuest: %v", err) } - if m.State() != MountStateMounted { - t.Errorf("expected state %d after first unmount, got %d", MountStateMounted, m.State()) + if m.State() != StateMounted { + t.Errorf("expected state %d after first unmount, got %d", StateMounted, m.State()) } // Second unmount should actually unmount. if err := m.UnmountFromGuest(context.Background(), &mockLinuxUnmounter{}, nil); err != nil { t.Fatalf("second UnmountFromGuest: %v", err) } - if m.State() != MountStateUnmounted { - t.Errorf("expected state %d after final unmount, got %d", MountStateUnmounted, m.State()) + if m.State() != StateUnmounted { + t.Errorf("expected state %d after final unmount, got %d", StateUnmounted, m.State()) } } diff --git a/internal/controller/device/scsi/mount/mount_wcow.go b/internal/controller/device/scsi/mount/mount_wcow.go index 8b624d192d..f3cb505ae1 100644 --- a/internal/controller/device/scsi/mount/mount_wcow.go +++ b/internal/controller/device/scsi/mount/mount_wcow.go @@ -9,33 +9,31 @@ import ( "github.com/Microsoft/hcsshim/internal/protocol/guestresource" ) -type WindowsGuestSCSIMounter interface { - AddWCOWMappedVirtualDisk(ctx context.Context, settings guestresource.WCOWMappedVirtualDisk) error - AddWCOWMappedVirtualDiskForContainerScratch(ctx context.Context, settings guestresource.WCOWMappedVirtualDisk) error -} - -type WindowsGuestSCSIUnmounter interface { - RemoveWCOWMappedVirtualDisk(ctx context.Context, settings guestresource.WCOWMappedVirtualDisk) error -} - +// mountFmtWCOW is the guest path template for SCSI partition mounts on WCOW. +// The path encodes the controller index, LUN, and partition index so that each +// combination gets a unique, stable mount point. Example: +// +// C:/mounts/scsi/__ const ( mountFmtWCOW = "C:\\mounts\\scsi\\%d_%d_%d" ) -// Implements the mount operation for WCOW from the expected Reserved state. -// -// It does not transition the state to ensure the exposed mount interface -// handles transitions for all LCOW/WCOW. +// mountReservedWCOW issues the WCOW guest mount for a partition in the +// [StateReserved] state. It does not transition the state which is handled +// by the caller in [Mount.MountToGuest]. func (m *Mount) mountReservedWCOW(ctx context.Context, guest WindowsGuestSCSIMounter) error { - if m.state != MountStateReserved { - return fmt.Errorf("unexpected mount state %d, expected reserved", m.state) + if m.state != StateReserved { + return fmt.Errorf("unexpected mount state %s, expected reserved", m.state) } + + // Generate a predictable guest path. guestPath := fmt.Sprintf(mountFmtWCOW, m.controller, m.lun, m.config.Partition) settings := guestresource.WCOWMappedVirtualDisk{ ContainerPath: guestPath, Lun: int32(m.lun), } + // FormatWithRefs disks use a separate scratch path to enable ReFS formatting. if m.config.FormatWithRefs { if err := guest.AddWCOWMappedVirtualDiskForContainerScratch(ctx, settings); err != nil { return fmt.Errorf("add WCOW mapped virtual disk for container scratch controller=%d lun=%d: %w", m.controller, m.lun, err) @@ -49,14 +47,15 @@ func (m *Mount) mountReservedWCOW(ctx context.Context, guest WindowsGuestSCSIMou return nil } -// Implements the unmount operation for WCOW from the expected Mounted state. -// -// It does not transition the state to ensure the exposed mount interface -// handles transitions for all LCOW/WCOW. +// unmountWCOW issues the WCOW guest unmount for a partition in the +// [StateMounted] state. It does not transition the state; that is handled +// by the caller in [Mount.UnmountFromGuest]. func (m *Mount) unmountWCOW(ctx context.Context, guest WindowsGuestSCSIUnmounter) error { - if m.state != MountStateMounted { - return fmt.Errorf("unexpected mount state %d, expected mounted", m.state) + if m.state != StateMounted { + return fmt.Errorf("unexpected mount state %s, expected mounted", m.state) } + + // Build the predictable guest path. guestPath := fmt.Sprintf(mountFmtWCOW, m.controller, m.lun, m.config.Partition) settings := guestresource.WCOWMappedVirtualDisk{ ContainerPath: guestPath, diff --git a/internal/controller/device/scsi/mount/state.go b/internal/controller/device/scsi/mount/state.go new file mode 100644 index 0000000000..64ed80ef02 --- /dev/null +++ b/internal/controller/device/scsi/mount/state.go @@ -0,0 +1,56 @@ +//go:build windows + +package mount + +// State represents the current lifecycle state of a partition mount inside +// the guest. +// +// The normal progression is: +// +// StateReserved → StateMounted → StateUnmounted +// +// Mount failure from StateReserved transitions directly to the terminal +// StateUnmounted state; the entry is then removed by the parent [disk.Disk]. +// An unmount failure from StateMounted leaves the mount in StateMounted so +// the caller can retry. +// +// Full state-transition table: +// +// Current State │ Trigger │ Next State +// ────────────────┼────────────────────────────────────────────┼────────────────────── +// StateReserved │ guest mount succeeds │ StateMounted +// StateReserved │ guest mount fails │ StateUnmounted +// StateReserved │ UnmountFromGuest (any refCount) │ StateReserved (ref--) +// StateMounted │ UnmountFromGuest (refCount > 1) │ StateMounted (ref--) +// StateMounted │ UnmountFromGuest (refCount == 1) succeeds │ StateUnmounted +// StateMounted │ UnmountFromGuest (refCount == 1) fails │ StateMounted +// StateUnmounted │ (terminal — entry removed from disk) │ — +type State int + +const ( + // StateReserved is the initial state; the mount entry has been created + // but the guest mount operation has not yet been issued. + StateReserved State = iota + + // StateMounted means the partition has been successfully mounted inside + // the guest. The guest path is valid from this state onward. + StateMounted + + // StateUnmounted means the guest has unmounted the partition. This is a + // terminal state; the entry is removed from the parent disk. + StateUnmounted +) + +// String returns a human-readable name for the [State]. +func (s State) String() string { + switch s { + case StateReserved: + return "Reserved" + case StateMounted: + return "Mounted" + case StateUnmounted: + return "Unmounted" + default: + return "Unknown" + } +} diff --git a/internal/controller/device/scsi/mount/types.go b/internal/controller/device/scsi/mount/types.go new file mode 100644 index 0000000000..388d68435e --- /dev/null +++ b/internal/controller/device/scsi/mount/types.go @@ -0,0 +1,86 @@ +//go:build windows + +package mount + +import ( + "context" + "slices" + + "github.com/Microsoft/hcsshim/internal/protocol/guestresource" +) + +// Config describes how a partition of a SCSI disk should be mounted inside +// the guest. +type Config struct { + // Partition is the target partition index (1-based) on a partitioned + // device. Zero means the whole disk. + // + // WCOW only supports whole disk. LCOW supports both whole disk and + // partition mounts if the disk has multiple. + Partition uint64 + // ReadOnly mounts the partition read-only. + ReadOnly bool + // Encrypted encrypts the device with dm-crypt. + // + // Only supported for LCOW. + Encrypted bool + // Options are mount flags or data passed to the guest mount call. + // + // Only supported for LCOW. + Options []string + // EnsureFilesystem formats the partition as Filesystem if not already + // formatted. + // + // Only supported for LCOW. + EnsureFilesystem bool + // Filesystem is the target filesystem type. + // + // Only supported for LCOW. + Filesystem string + // BlockDev mounts the device as a block device instead of a filesystem. + // + // Only supported for LCOW. + BlockDev bool + // FormatWithRefs formats the disk using ReFS. + // + // Only supported for WCOW scratch disks. + FormatWithRefs bool +} + +// Equals reports whether two mount Config values describe the same mount parameters. +func (c Config) Equals(other Config) bool { + return c.ReadOnly == other.ReadOnly && + c.Encrypted == other.Encrypted && + c.EnsureFilesystem == other.EnsureFilesystem && + c.Filesystem == other.Filesystem && + c.BlockDev == other.BlockDev && + c.FormatWithRefs == other.FormatWithRefs && + slices.Equal(c.Options, other.Options) +} + +// LinuxGuestSCSIMounter mounts a virtual disk partition inside an LCOW guest. +type LinuxGuestSCSIMounter interface { + // AddLCOWMappedVirtualDisk maps a virtual disk partition into the LCOW guest. + AddLCOWMappedVirtualDisk(ctx context.Context, settings guestresource.LCOWMappedVirtualDisk) error +} + +// LinuxGuestSCSIUnmounter unmounts a virtual disk partition from an LCOW guest. +type LinuxGuestSCSIUnmounter interface { + // RemoveLCOWMappedVirtualDisk unmaps a virtual disk partition from the LCOW guest. + RemoveLCOWMappedVirtualDisk(ctx context.Context, settings guestresource.LCOWMappedVirtualDisk) error +} + +// WindowsGuestSCSIMounter mounts a virtual disk partition inside a WCOW guest. +type WindowsGuestSCSIMounter interface { + // AddWCOWMappedVirtualDisk maps a virtual disk into the WCOW guest. + AddWCOWMappedVirtualDisk(ctx context.Context, settings guestresource.WCOWMappedVirtualDisk) error + // AddWCOWMappedVirtualDiskForContainerScratch maps a virtual disk as a + // container scratch disk into the WCOW guest. + AddWCOWMappedVirtualDiskForContainerScratch(ctx context.Context, settings guestresource.WCOWMappedVirtualDisk) error +} + +// WindowsGuestSCSIUnmounter unmounts a virtual disk partition from a WCOW guest. +type WindowsGuestSCSIUnmounter interface { + // RemoveWCOWMappedVirtualDisk unmaps a virtual disk from the WCOW guest. + RemoveWCOWMappedVirtualDisk(ctx context.Context, settings guestresource.WCOWMappedVirtualDisk) error +} diff --git a/internal/controller/device/scsi/scsi.go b/internal/controller/device/scsi/scsi.go deleted file mode 100644 index a3d4c9bfb8..0000000000 --- a/internal/controller/device/scsi/scsi.go +++ /dev/null @@ -1,7 +0,0 @@ -//go:build windows - -// Package SCSI implements the SCSI controller for managing SCSI attached -// devices to a Utility VM as well as the Guest Mounts surfacing those devices -// for use by the Guest containers. - -package scsi diff --git a/internal/controller/device/scsi/types.go b/internal/controller/device/scsi/types.go new file mode 100644 index 0000000000..fc94fefb06 --- /dev/null +++ b/internal/controller/device/scsi/types.go @@ -0,0 +1,39 @@ +//go:build windows + +package scsi + +import ( + "github.com/Microsoft/hcsshim/internal/controller/device/scsi/disk" + "github.com/Microsoft/hcsshim/internal/controller/device/scsi/mount" +) + +// numLUNsPerController is the maximum number of LUNs per controller, fixed by Hyper-V. +const numLUNsPerController = 64 + +// reservation links a caller-supplied reservation ID to a SCSI slot and +// partition index. Access must be guarded by Controller.mu. +type reservation struct { + // controllerSlot is the index into Controller.controllerSlots for the disk. + controllerSlot int + // partition is the partition index on the disk for this reservation. + partition uint64 +} + +// VMSCSIOps combines the VM-side SCSI add and remove operations. +type VMSCSIOps interface { + disk.VMSCSIAdder + disk.VMSCSIRemover +} + +// LinuxGuestSCSIOps combines all guest-side SCSI operations for LCOW guests. +type LinuxGuestSCSIOps interface { + mount.LinuxGuestSCSIMounter + mount.LinuxGuestSCSIUnmounter + disk.LinuxGuestSCSIEjector +} + +// WindowsGuestSCSIOps combines all guest-side SCSI operations for WCOW guests. +type WindowsGuestSCSIOps interface { + mount.WindowsGuestSCSIMounter + mount.WindowsGuestSCSIUnmounter +} diff --git a/internal/logfields/fields.go b/internal/logfields/fields.go index dac5a708e5..bf5ac65a44 100644 --- a/internal/logfields/fields.go +++ b/internal/logfields/fields.go @@ -22,6 +22,13 @@ const ( Bytes = "bytes" Pipe = "pipe" + // SCSI Constants + + Controller = "controller" + LUN = "lun" + DiskType = "disk-type" + Partition = "partition" + // Common Misc Attempt = "attemptNo"