package main

import (
	"context"
	"crypto/rand"
	"crypto/sha256"
	"encoding/base64"
	"encoding/hex"
	"encoding/json"
	"errors"
	"fmt"
	"io"
	"net"
	"net/http"
	"net/url"
	"os"
	"os/exec"
	"path/filepath"
	"regexp"
	"strconv"
	"strings"
	"time"
)

type sliceVMProvisionInput struct {
	AllocationID           string
	TaskID                 string
	VMName                 string
	ImagePath              string
	ImageURL               string
	ImageSHA256            string
	ImageTrusted           bool
	DriverStrategy         string
	DefaultUsername        string
	SSHPublicKeys          []string
	Slots                  []sliceVMSlot
	OVSBridge              string
	CloudInitDir           string
	GracefulTimeoutSeconds int
}

type sliceVMReleaseInput struct {
	AllocationID           string
	TaskID                 string
	VMName                 string
	Slots                  []sliceVMSlot
	CloudInitDir           string
	GracefulTimeoutSeconds int
	Wipe                   bool
}

type sliceVMRecoveryInput struct {
	AllocationID    string
	VMName          string
	PrivateIP       string
	DefaultUsername string
}

type sliceVMSlot struct {
	SlotID       string
	SlotIndex    int
	PCIAddress   string
	FabricDevice string
	NVMeDevice   string
	NUMANode     *int
	VCPUCount    int
	MemoryMIB    int
	MACAddress   string
	PrivateIP    string
}

var (
	sliceVMNamePattern          = regexp.MustCompile(`^(gpuaas-slice-[a-f0-9]{32}|slice-[a-z0-9][a-z0-9-]{0,23}-[1-8]g-s[0-9]{2}-[a-f0-9]{8})$`)
	sliceVMBridgePattern        = regexp.MustCompile(`^[A-Za-z0-9_.:-]{1,64}$`)
	sliceVMMACPattern           = regexp.MustCompile(`^[0-9a-fA-F]{2}(:[0-9a-fA-F]{2}){5}$`)
	sliceVMPCIAddressPattern    = regexp.MustCompile(`^[0-9a-fA-F]{4}:[0-9a-fA-F]{2}:[0-9a-fA-F]{2}\.[0-7]$`)
	sliceVMBlockDevicePattern   = regexp.MustCompile(`^/dev/(nvme[0-9]+n[0-9]+|disk/by-id/[A-Za-z0-9_.:@+-]+)$`)
	sliceVMImageAllowedPrefixes = []string{"/var/lib/gpuaas/slice-images", "/var/lib/libvirt/images"}
)

var (
	sliceVMCommandContext         = exec.CommandContext
	sliceVMStat                   = os.Stat
	sliceVMOpen                   = os.Open
	sliceVMRemoveAll              = os.RemoveAll
	sliceVMGlob                   = filepath.Glob
	sliceVMWriteFile              = os.WriteFile
	sliceVMReadlink               = os.Readlink
	sliceVMSysfsPCIPath           = func(addr string) string { return filepath.Join("/sys/bus/pci/devices", addr) }
	sliceVMVFIOBindPath           = "/sys/bus/pci/drivers/vfio-pci/bind"
	sliceVMRuntimeVFIOBindEnabled = func() bool {
		return strings.EqualFold(strings.TrimSpace(os.Getenv("GPUAAS_SLICE_RUNTIME_VFIO_BIND")), "1")
	}
	sliceVMWaitSSH                      = waitForSliceVMSSH
	sliceVMWaitGuestReady               = waitForSliceVMGuestReady
	sliceVMDownloadImage                = downloadSliceVMImage
	sliceVMEnsureHostDevicesBoundToVFIO = ensureSliceVMHostDevicesBoundToVFIO
)

const (
	sliceVMDNSMasqConfigDir             = "/etc/dnsmasq.d"
	defaultSliceVMNodeSchedulerLeaseDir = "/var/lib/gpuaas/node-scheduler/leases"
	defaultSliceVMTerminalSSHKeyPath    = "/var/lib/gpuaas/terminal/id_ed25519"
	defaultSliceVMTerminalSSHKeyBits    = "ed25519"
	defaultSliceVMTerminalSSHKeyLabel   = "gpuaas-slice-terminal"
	sliceVMGuestReadyMarker             = "/var/lib/gpuaas/slice-ready"
	sliceVMGuestMetricsEnvPath          = "/etc/gpuaas/metrics-helper.env"
	sliceVMGuestMetricsHelperPath       = "/usr/local/bin/gpuaas-metrics-helper"
	sliceVMGuestTelemetryProbePath      = "/usr/local/bin/gpuaas-telemetry-probe.sh"
)

var sliceVMNodeSchedulerLeaseDir = defaultSliceVMNodeSchedulerLeaseDir

type sliceVMGuestTelemetryConfig struct {
	AllocationID string
	PushURL      string
	PushToken    string
}

func handleSliceVMProvisionTask(ctx context.Context, task nodeTask) (map[string]any, error) {
	in, err := parseSliceVMProvisionInput(task.Params)
	if err != nil {
		return nil, err
	}
	in.TaskID = strings.TrimSpace(task.TaskID)
	return runSliceVMProvision(ctx, in)
}

func handleSliceVMReleaseTask(ctx context.Context, task nodeTask) (map[string]any, error) {
	in, err := parseSliceVMReleaseInput(task.Params)
	if err != nil {
		return nil, err
	}
	in.TaskID = strings.TrimSpace(task.TaskID)
	return runSliceVMRelease(ctx, in)
}

func handleSliceVMStartTask(ctx context.Context, task nodeTask) (map[string]any, error) {
	in, err := parseSliceVMRecoveryInput(task.Params)
	if err != nil {
		return nil, err
	}
	return runSliceVMStart(ctx, in)
}

func handleSliceVMRestartTask(ctx context.Context, task nodeTask) (map[string]any, error) {
	in, err := parseSliceVMRecoveryInput(task.Params)
	if err != nil {
		return nil, err
	}
	return runSliceVMRestart(ctx, in)
}

func parseSliceVMProvisionInput(params map[string]any) (sliceVMProvisionInput, error) {
	allocationID, err := requiredUUIDParam(params, "allocation_id")
	if err != nil {
		return sliceVMProvisionInput{}, err
	}
	vmName, err := parseSliceVMName(params)
	if err != nil {
		return sliceVMProvisionInput{}, err
	}
	imagePath, _ := params["image_path"].(string)
	imagePath, err = normalizeSliceVMImagePath(imagePath)
	if err != nil {
		return sliceVMProvisionInput{}, err
	}
	imageSHA256, _ := params["image_sha256"].(string)
	imageSHA256 = strings.TrimSpace(strings.ToLower(strings.TrimPrefix(imageSHA256, "sha256:")))
	if imageSHA256 != "" && !sha256HexPattern.MatchString(imageSHA256) {
		return sliceVMProvisionInput{}, fmt.Errorf("invalid image_sha256")
	}
	imageTrusted, _ := params["image_trusted"].(bool)
	imageURL, err := normalizeSliceVMImageURL(params["image_url"])
	if err != nil {
		return sliceVMProvisionInput{}, err
	}
	driverStrategy := normalizeSliceVMDriverStrategy(params["driver_strategy"])
	username, _ := params["default_username"].(string)
	username = strings.TrimSpace(username)
	if username == "" {
		return sliceVMProvisionInput{}, fmt.Errorf("missing default_username")
	}
	if !usernameOnNodePattern.MatchString(username) {
		return sliceVMProvisionInput{}, fmt.Errorf("invalid default_username")
	}
	keys, err := parseSliceVMSSHPublicKeys(params["ssh_public_keys"])
	if err != nil {
		return sliceVMProvisionInput{}, err
	}
	slots, err := parseSliceVMSlots(params["slots"])
	if err != nil {
		return sliceVMProvisionInput{}, err
	}
	bridge, err := parseSliceVMBridge(params)
	if err != nil {
		return sliceVMProvisionInput{}, err
	}
	cloudInitDir, err := normalizeSliceVMCloudInitDir(params, allocationID)
	if err != nil {
		return sliceVMProvisionInput{}, err
	}
	return sliceVMProvisionInput{
		AllocationID:           allocationID,
		VMName:                 vmName,
		ImagePath:              imagePath,
		ImageURL:               imageURL,
		ImageSHA256:            imageSHA256,
		ImageTrusted:           imageTrusted,
		DriverStrategy:         driverStrategy,
		DefaultUsername:        username,
		SSHPublicKeys:          keys,
		Slots:                  slots,
		OVSBridge:              bridge,
		CloudInitDir:           cloudInitDir,
		GracefulTimeoutSeconds: parseSliceVMTimeout(params),
	}, nil
}

func normalizeSliceVMDriverStrategy(raw any) string {
	value, _ := raw.(string)
	switch strings.TrimSpace(strings.ToLower(value)) {
	case "preinstalled", "cloud-init", "none":
		return strings.TrimSpace(strings.ToLower(value))
	default:
		return "cloud-init"
	}
}

func parseSliceVMReleaseInput(params map[string]any) (sliceVMReleaseInput, error) {
	allocationID, err := requiredUUIDParam(params, "allocation_id")
	if err != nil {
		return sliceVMReleaseInput{}, err
	}
	vmName, err := parseSliceVMName(params)
	if err != nil {
		return sliceVMReleaseInput{}, err
	}
	slots, err := parseSliceVMSlots(params["slots"])
	if err != nil {
		return sliceVMReleaseInput{}, err
	}
	cloudInitDir, err := normalizeSliceVMCloudInitDir(params, allocationID)
	if err != nil {
		return sliceVMReleaseInput{}, err
	}
	wipe, _ := params["wipe"].(bool)
	return sliceVMReleaseInput{
		AllocationID:           allocationID,
		VMName:                 vmName,
		Slots:                  slots,
		CloudInitDir:           cloudInitDir,
		GracefulTimeoutSeconds: parseSliceVMTimeout(params),
		Wipe:                   wipe,
	}, nil
}

func parseSliceVMRecoveryInput(params map[string]any) (sliceVMRecoveryInput, error) {
	allocationID, err := requiredUUIDParam(params, "allocation_id")
	if err != nil {
		return sliceVMRecoveryInput{}, err
	}
	vmName, err := parseSliceVMName(params)
	if err != nil {
		return sliceVMRecoveryInput{}, err
	}
	privateIP, _ := params["private_ip"].(string)
	privateIP, err = normalizeSliceVMPrivateIP(privateIP)
	if err != nil {
		return sliceVMRecoveryInput{}, err
	}
	username, _ := params["default_username"].(string)
	username = strings.TrimSpace(username)
	if username != "" && !usernameOnNodePattern.MatchString(username) {
		return sliceVMRecoveryInput{}, fmt.Errorf("invalid default_username")
	}
	return sliceVMRecoveryInput{
		AllocationID:    allocationID,
		VMName:          vmName,
		PrivateIP:       privateIP,
		DefaultUsername: username,
	}, nil
}

func parseSliceVMName(params map[string]any) (string, error) {
	vmName, _ := params["vm_name"].(string)
	vmName = strings.TrimSpace(vmName)
	if !sliceVMNamePattern.MatchString(vmName) {
		return "", fmt.Errorf("invalid vm_name")
	}
	return vmName, nil
}

func normalizeSliceVMImagePath(path string) (string, error) {
	path = strings.TrimSpace(path)
	if path == "" {
		return "", fmt.Errorf("missing image_path")
	}
	if !filepath.IsAbs(path) {
		return "", fmt.Errorf("image_path must be absolute")
	}
	cleaned := filepath.Clean(path)
	for _, root := range sliceVMImageAllowedPrefixes {
		root = filepath.Clean(root)
		if cleaned == root || strings.HasPrefix(cleaned, root+string(filepath.Separator)) {
			return cleaned, nil
		}
	}
	return "", fmt.Errorf("image_path outside approved roots")
}

func normalizeSliceVMImageURL(raw any) (string, error) {
	imageURL, _ := raw.(string)
	imageURL = strings.TrimSpace(imageURL)
	if imageURL == "" {
		return "", nil
	}
	parsed, err := url.Parse(imageURL)
	if err != nil || parsed == nil || parsed.Scheme != "https" || parsed.Host == "" {
		return "", fmt.Errorf("invalid image_url")
	}
	if parsed.User != nil || parsed.Fragment != "" {
		return "", fmt.Errorf("invalid image_url")
	}
	return parsed.String(), nil
}

func normalizeSliceVMCloudInitDir(params map[string]any, allocationID string) (string, error) {
	cloudInitDir, _ := params["cloud_init_dir"].(string)
	cloudInitDir = strings.TrimSpace(cloudInitDir)
	if cloudInitDir == "" {
		cloudInitDir = filepath.Join("/var/lib/gpuaas/slices", allocationID)
	}
	if !filepath.IsAbs(cloudInitDir) {
		return "", fmt.Errorf("cloud_init_dir must be absolute")
	}
	cleaned := filepath.Clean(cloudInitDir)
	root := filepath.Clean("/var/lib/gpuaas/slices")
	if cleaned == root || !strings.HasPrefix(cleaned, root+string(filepath.Separator)) {
		return "", fmt.Errorf("cloud_init_dir outside approved root")
	}
	return cleaned, nil
}

func parseSliceVMBridge(params map[string]any) (string, error) {
	bridge, _ := params["ovs_bridge"].(string)
	bridge = strings.TrimSpace(bridge)
	if bridge == "" {
		bridge = "ovsbr0"
	}
	if !sliceVMBridgePattern.MatchString(bridge) {
		return "", fmt.Errorf("invalid ovs_bridge")
	}
	return bridge, nil
}

func parseSliceVMTimeout(params map[string]any) int {
	raw, _ := optionalSliceVMIntParam(params, "graceful_timeout_seconds")
	if raw < 30 {
		return 300
	}
	if raw > 900 {
		return 900
	}
	return raw
}

func parseSliceVMSSHPublicKeys(raw any) ([]string, error) {
	items, ok := raw.([]any)
	if !ok || len(items) == 0 {
		return nil, fmt.Errorf("ssh_public_keys required")
	}
	keys := make([]string, 0, len(items))
	for _, item := range items {
		key, ok := item.(string)
		key = strings.TrimSpace(key)
		if !ok || key == "" || strings.ContainsRune(key, '\x00') || len(key) > 8192 {
			return nil, fmt.Errorf("invalid ssh_public_keys")
		}
		keys = append(keys, key)
	}
	return keys, nil
}

func parseSliceVMSlots(raw any) ([]sliceVMSlot, error) {
	items, ok := raw.([]any)
	if !ok || len(items) == 0 || len(items) > 8 {
		return nil, fmt.Errorf("slots required")
	}
	slots := make([]sliceVMSlot, 0, len(items))
	seenIndexes := map[int]struct{}{}
	seenDisks := map[string]struct{}{}
	seenPCI := map[string]struct{}{}
	for _, item := range items {
		rawSlot, ok := item.(map[string]any)
		if !ok {
			return nil, fmt.Errorf("slot entries must be objects")
		}
		slotID, err := requiredUUIDParam(rawSlot, "slot_id")
		if err != nil {
			return nil, err
		}
		slotIndex, err := optionalSliceVMIntParam(rawSlot, "slot_index")
		if err != nil {
			return nil, err
		}
		if slotIndex < 0 {
			return nil, fmt.Errorf("invalid slot_index")
		}
		if _, ok := seenIndexes[slotIndex]; ok {
			return nil, fmt.Errorf("duplicate slot_index")
		}
		seenIndexes[slotIndex] = struct{}{}

		pciAddress, _ := rawSlot["pci_address"].(string)
		pciAddress = strings.ToLower(strings.TrimSpace(pciAddress))
		if !sliceVMPCIAddressPattern.MatchString(pciAddress) {
			return nil, fmt.Errorf("invalid pci_address")
		}
		if _, ok := seenPCI[pciAddress]; ok {
			return nil, fmt.Errorf("duplicate pci_address")
		}
		seenPCI[pciAddress] = struct{}{}

		nvmeDevice, _ := rawSlot["nvme_device"].(string)
		nvmeDevice = filepath.Clean(strings.TrimSpace(nvmeDevice))
		if !sliceVMBlockDevicePattern.MatchString(nvmeDevice) {
			return nil, fmt.Errorf("invalid nvme_device")
		}
		if _, ok := seenDisks[nvmeDevice]; ok {
			return nil, fmt.Errorf("duplicate nvme_device")
		}
		seenDisks[nvmeDevice] = struct{}{}

		fabricDevice, _ := rawSlot["fabric_device"].(string)
		fabricDevice = strings.ToLower(strings.TrimSpace(fabricDevice))
		if fabricDevice != "" && !sliceVMPCIAddressPattern.MatchString(fabricDevice) {
			return nil, fmt.Errorf("invalid fabric_device")
		}
		if fabricDevice != "" {
			if _, ok := seenPCI[fabricDevice]; ok {
				return nil, fmt.Errorf("duplicate fabric_device")
			}
			seenPCI[fabricDevice] = struct{}{}
		}

		macAddress, _ := rawSlot["mac_address"].(string)
		macAddress = strings.ToLower(strings.TrimSpace(macAddress))
		if macAddress == "" && slotIndex == 0 {
			return nil, fmt.Errorf("missing mac_address")
		}
		if macAddress != "" && !sliceVMMACPattern.MatchString(macAddress) {
			return nil, fmt.Errorf("invalid mac_address")
		}
		privateIP, _ := rawSlot["private_ip"].(string)
		privateIP, err = normalizeSliceVMPrivateIP(privateIP)
		if err != nil {
			return nil, err
		}

		numaNode, err := nullableIntParam(rawSlot, "numa_node")
		if err != nil {
			return nil, err
		}
		vcpuCount, err := optionalSliceVMIntParam(rawSlot, "vcpu_count")
		if err != nil {
			return nil, err
		}
		if vcpuCount <= 0 {
			vcpuCount = 12
		}
		memoryMIB, err := optionalSliceVMIntParam(rawSlot, "memory_mib")
		if err != nil {
			return nil, err
		}
		if memoryMIB <= 0 {
			memoryMIB = 65536
		}
		slots = append(slots, sliceVMSlot{
			SlotID:       slotID,
			SlotIndex:    slotIndex,
			PCIAddress:   pciAddress,
			FabricDevice: fabricDevice,
			NVMeDevice:   nvmeDevice,
			NUMANode:     numaNode,
			VCPUCount:    vcpuCount,
			MemoryMIB:    memoryMIB,
			MACAddress:   macAddress,
			PrivateIP:    privateIP,
		})
	}
	return slots, nil
}

func normalizeSliceVMPrivateIP(raw string) (string, error) {
	raw = strings.TrimSpace(raw)
	if raw == "" {
		return "", nil
	}
	if ip := net.ParseIP(raw); ip != nil {
		return ip.String(), nil
	}
	ip, _, err := net.ParseCIDR(raw)
	if err != nil || ip == nil {
		return "", fmt.Errorf("invalid private_ip")
	}
	return ip.String(), nil
}

func optionalSliceVMIntParam(params map[string]any, key string) (int, error) {
	if params[key] == nil {
		return 0, nil
	}
	return intParam(params, key)
}

func nullableIntParam(params map[string]any, key string) (*int, error) {
	if params[key] == nil {
		return nil, nil
	}
	value, err := intParam(params, key)
	if err != nil {
		return nil, err
	}
	return &value, nil
}

func runSliceVMProvision(ctx context.Context, in sliceVMProvisionInput) (map[string]any, error) {
	timings := newSliceVMProvisionTimings()
	success := false
	reservationCreated := false
	guestTelemetryRegistered := false
	defer func() {
		if reservationCreated && !success {
			_ = removeSliceVMDHCPReservation(context.Background(), in.VMName)
		}
		if guestTelemetryRegistered && !success {
			_ = newGuestTelemetryStore().unregister(in.AllocationID)
		}
	}()
	stop := timings.startPhase("lease_acquire")
	leasePaths, err := acquireSliceVMNodeSchedulerLeases(in)
	stop()
	if err != nil {
		return nil, err
	}
	defer func() {
		if !success {
			releaseSliceVMNodeSchedulerLeases(in.AllocationID, in.Slots)
			_ = sliceVMRemoveAll(in.CloudInitDir)
		}
	}()
	if err := timings.recordPhase("host_dependencies", func() error {
		return ensureSliceVMHostDependencies(ctx)
	}); err != nil {
		return nil, err
	}
	if err := timings.recordPhase("host_passthrough_check", ensureSliceVMHostPassthroughReady); err != nil {
		return nil, err
	}
	if err := timings.recordPhase("vfio_bind_check", func() error {
		return sliceVMEnsureHostDevicesBoundToVFIO(ctx, in.Slots)
	}); err != nil {
		return nil, err
	}
	imagePrepared := false
	stop = timings.startPhase("image_stat_download")
	if _, err := sliceVMStat(in.ImagePath); err != nil {
		if !errors.Is(err, os.ErrNotExist) || strings.TrimSpace(in.ImageURL) == "" {
			stop()
			return nil, fmt.Errorf("stat image_path: %w", err)
		}
		if err := sliceVMDownloadImage(ctx, in.ImageURL, in.ImagePath); err != nil {
			stop()
			return nil, err
		}
		imagePrepared = true
	}
	stop()
	if in.ImageSHA256 != "" && (imagePrepared || !in.ImageTrusted) {
		if err := timings.recordPhase("image_digest_verify", func() error {
			return verifySliceVMImageDigest(in.ImagePath, in.ImageSHA256)
		}); err != nil {
			return nil, err
		}
	}
	if err := timings.recordPhase("cloud_init_dir", func() error {
		return ensureSliceVMCloudInitDir(in.CloudInitDir)
	}); err != nil {
		return nil, err
	}
	stop = timings.startPhase("terminal_key")
	terminalPublicKey, err := ensureSliceVMTerminalSSHKey(ctx)
	stop()
	if err != nil {
		return nil, err
	}
	bootSlot := in.Slots[0]
	if err := validateSliceVMBootNetwork(bootSlot); err != nil {
		return nil, err
	}
	var guestTelemetry sliceVMGuestTelemetryConfig
	if err := timings.recordPhase("guest_telemetry_register", func() error {
		var regErr error
		guestTelemetry, regErr = registerSliceVMGuestTelemetry(in, bootSlot)
		return regErr
	}); err != nil {
		return nil, err
	}
	guestTelemetryRegistered = true
	userDataPath := filepath.Join(in.CloudInitDir, "user-data.yaml")
	metaDataPath := filepath.Join(in.CloudInitDir, "meta-data.yaml")
	isoPath := filepath.Join(in.CloudInitDir, "seed.iso")
	stop = timings.startPhase("cloud_init_seed_files")
	if err := writeSecureFile(userDataPath, renderSliceVMUserData(in.DefaultUsername, appendSliceVMTerminalPublicKey(in.SSHPublicKeys, terminalPublicKey), in.DriverStrategy, &guestTelemetry), 0o600); err != nil {
		stop()
		return nil, fmt.Errorf("write cloud-init user-data: %w", err)
	}
	if err := writeSecureFile(metaDataPath, renderSliceVMMetaData(in.AllocationID, in.VMName), 0o600); err != nil {
		stop()
		return nil, fmt.Errorf("write cloud-init meta-data: %w", err)
	}
	stop()
	if err := timings.recordPhase("cloud_localds", func() error {
		return runSliceVMCommand(ctx, "cloud-localds", isoPath, userDataPath, metaDataPath)
	}); err != nil {
		return nil, err
	}
	_ = os.Chmod(isoPath, 0o644)
	if err := timings.recordPhase("runtime_validate", func() error {
		return validateSliceVMRuntimeDevices(ctx, in)
	}); err != nil {
		return nil, err
	}

	if err := timings.recordPhase("dhcp_reservation", func() error {
		return ensureSliceVMDHCPReservation(ctx, in)
	}); err != nil {
		return nil, err
	}
	reservationCreated = true
	if err := timings.recordPhase("image_write_convert", func() error {
		return runSliceVMCommand(ctx, "qemu-img", "convert", "-O", "raw", in.ImagePath, bootSlot.NVMeDevice)
	}); err != nil {
		return nil, err
	}
	args := buildSliceVMVirtInstallArgs(in, isoPath)
	if err := timings.recordPhase("virt_install", func() error {
		return runSliceVMCommand(ctx, "virt-install", args...)
	}); err != nil {
		return nil, err
	}
	autostartEnabled := true
	if err := timings.recordPhase("autostart", func() error {
		return ensureSliceVMAutostart(ctx, in.VMName)
	}); err != nil {
		autostartEnabled = false
	}
	stop = timings.startPhase("readiness")
	readiness, err := waitForSliceVMReadiness(ctx, in.DefaultUsername, bootSlot, time.Duration(in.GracefulTimeoutSeconds)*time.Second)
	stop()
	if err != nil {
		return nil, err
	}
	performanceProbe := map[string]any{}
	if err := timings.recordPhase("performance_probe", func() error {
		var probeErr error
		performanceProbe, probeErr = collectSliceVMPerformanceProbe(ctx, in.DefaultUsername, bootSlot.PrivateIP, 30*time.Second)
		return probeErr
	}); err != nil {
		performanceProbe = map[string]any{"error": err.Error()}
	}
	success = true
	return map[string]any{
		"vm_name":         in.VMName,
		"default_user":    in.DefaultUsername,
		"private_ip":      bootSlot.PrivateIP,
		"ssh_port":        22,
		"slot_count":      len(in.Slots),
		"cloud_init":      isoPath,
		"image_path":      in.ImagePath,
		"image_prepared":  imagePrepared,
		"driver_strategy": in.DriverStrategy,
		"readiness":       readiness,
		"runtime_health":  collectSliceVMRuntimeHealth(ctx, sliceVMRecoveryInput{AllocationID: in.AllocationID, VMName: in.VMName, PrivateIP: bootSlot.PrivateIP, DefaultUsername: in.DefaultUsername}, 5*time.Second),
		"autostart":       autostartEnabled,
		"performance":     performanceProbe,
		"lease_count":     len(leasePaths),
		"timings":         timings.output(),
		"raw_vnc":         false,
		"console_model":   "gateway_required",
	}, nil
}

func runSliceVMStart(ctx context.Context, in sliceVMRecoveryInput) (map[string]any, error) {
	if err := ensureSliceVMAutostart(ctx, in.VMName); err != nil {
		return nil, err
	}
	state, _ := sliceVMDomainState(ctx, in.VMName)
	if !strings.Contains(strings.ToLower(state), "running") {
		if err := runSliceVMCommand(ctx, "virsh", "start", in.VMName); err != nil {
			return nil, err
		}
	}
	health := collectSliceVMRuntimeHealth(ctx, in, 10*time.Second)
	return map[string]any{
		"allocation_id":  in.AllocationID,
		"vm_name":        in.VMName,
		"started":        true,
		"runtime_health": health,
		"domstate":       health["domstate"],
		"autostart":      health["autostart"],
		"private_ip":     in.PrivateIP,
	}, nil
}

func runSliceVMRestart(ctx context.Context, in sliceVMRecoveryInput) (map[string]any, error) {
	if err := ensureSliceVMAutostart(ctx, in.VMName); err != nil {
		return nil, err
	}
	state, _ := sliceVMDomainState(ctx, in.VMName)
	if strings.Contains(strings.ToLower(state), "running") {
		if err := runSliceVMCommand(ctx, "virsh", "reboot", in.VMName); err != nil {
			return nil, err
		}
	} else if err := runSliceVMCommand(ctx, "virsh", "start", in.VMName); err != nil {
		return nil, err
	}
	health := collectSliceVMRuntimeHealth(ctx, in, 10*time.Second)
	return map[string]any{
		"allocation_id":  in.AllocationID,
		"vm_name":        in.VMName,
		"restarted":      true,
		"runtime_health": health,
		"domstate":       health["domstate"],
		"autostart":      health["autostart"],
		"private_ip":     in.PrivateIP,
	}, nil
}

func ensureSliceVMAutostart(ctx context.Context, vmName string) error {
	return runSliceVMCommand(ctx, "virsh", "autostart", vmName)
}

func sliceVMDomainState(ctx context.Context, vmName string) (string, error) {
	state, err := sliceVMCommandOutput(ctx, "virsh", "domstate", vmName)
	return strings.TrimSpace(strings.ToLower(state)), err
}

func sliceVMAutostartEnabled(ctx context.Context, vmName string) (bool, error) {
	info, err := sliceVMCommandOutput(ctx, "virsh", "dominfo", vmName)
	if err != nil {
		return false, err
	}
	for _, line := range strings.Split(info, "\n") {
		key, value, ok := strings.Cut(line, ":")
		if !ok || !strings.EqualFold(strings.TrimSpace(key), "Autostart") {
			continue
		}
		value = strings.ToLower(strings.TrimSpace(value))
		return value == "enable" || value == "enabled" || value == "yes", nil
	}
	return false, nil
}

func collectSliceVMRuntimeHealth(ctx context.Context, in sliceVMRecoveryInput, sshTimeout time.Duration) map[string]any {
	health := map[string]any{
		"allocation_id": in.AllocationID,
		"vm_name":       in.VMName,
		"private_ip":    in.PrivateIP,
		"checked_at":    time.Now().UTC().Format(time.RFC3339),
	}
	if state, err := sliceVMDomainState(ctx, in.VMName); err != nil {
		health["domstate"] = "unknown"
		health["last_probe_error"] = err.Error()
	} else {
		health["domstate"] = state
	}
	if autostart, err := sliceVMAutostartEnabled(ctx, in.VMName); err == nil {
		health["autostart"] = autostart
	} else if _, exists := health["last_probe_error"]; !exists {
		health["last_probe_error"] = err.Error()
	}
	if strings.TrimSpace(in.PrivateIP) != "" {
		startedAt := time.Now()
		if err := sliceVMWaitSSH(ctx, in.PrivateIP, 22, sshTimeout); err != nil {
			health["ssh_reachable"] = false
			health["ssh_port"] = 22
			health["ssh_probe_ms"] = time.Since(startedAt).Milliseconds()
			health["last_probe_error"] = err.Error()
		} else {
			health["ssh_reachable"] = true
			health["ssh_port"] = 22
			health["ssh_probe_ms"] = time.Since(startedAt).Milliseconds()
		}
	}
	return health
}

type sliceVMProvisionTimings struct {
	startedAt time.Time
	phases    map[string]int64
}

func newSliceVMProvisionTimings() *sliceVMProvisionTimings {
	return &sliceVMProvisionTimings{
		startedAt: time.Now(),
		phases:    map[string]int64{},
	}
}

func (t *sliceVMProvisionTimings) startPhase(name string) func() {
	if t == nil {
		return func() {}
	}
	startedAt := time.Now()
	return func() {
		t.phases[name+"_ms"] = time.Since(startedAt).Milliseconds()
	}
}

func (t *sliceVMProvisionTimings) recordPhase(name string, fn func() error) error {
	stop := t.startPhase(name)
	err := fn()
	stop()
	return err
}

func (t *sliceVMProvisionTimings) output() map[string]any {
	if t == nil {
		return map[string]any{}
	}
	out := make(map[string]any, len(t.phases)+1)
	out["total_ms"] = time.Since(t.startedAt).Milliseconds()
	for name, elapsed := range t.phases {
		out[name] = elapsed
	}
	return out
}

type sliceVMNodeSchedulerLease struct {
	AllocationID string    `json:"allocation_id"`
	TaskID       string    `json:"task_id"`
	SlotID       string    `json:"slot_id"`
	SlotIndex    int       `json:"slot_index"`
	PCIAddress   string    `json:"pci_address"`
	FabricDevice string    `json:"fabric_device,omitempty"`
	NVMeDevice   string    `json:"nvme_device"`
	CreatedAt    time.Time `json:"created_at"`
	ExpiresAt    time.Time `json:"expires_at"`
}

func acquireSliceVMNodeSchedulerLeases(in sliceVMProvisionInput) ([]string, error) {
	if err := reconcileExpiredSliceVMNodeSchedulerLeases(); err != nil {
		return nil, err
	}
	dir := filepath.Clean(sliceVMNodeSchedulerLeaseDir)
	if err := os.MkdirAll(dir, 0o700); err != nil {
		return nil, fmt.Errorf("create node scheduler lease dir: %w", err)
	}
	now := time.Now().UTC()
	expiresAt := now.Add(sliceVMNodeSchedulerLeaseTTL())
	paths := make([]string, 0, len(in.Slots))
	for _, slot := range in.Slots {
		path := sliceVMNodeSchedulerLeasePath(slot.SlotID)
		lease := sliceVMNodeSchedulerLease{
			AllocationID: in.AllocationID,
			TaskID:       in.TaskID,
			SlotID:       slot.SlotID,
			SlotIndex:    slot.SlotIndex,
			PCIAddress:   slot.PCIAddress,
			FabricDevice: slot.FabricDevice,
			NVMeDevice:   slot.NVMeDevice,
			CreatedAt:    now,
			ExpiresAt:    expiresAt,
		}
		payload, err := json.MarshalIndent(lease, "", "  ")
		if err != nil {
			releaseSliceVMNodeSchedulerLeases(in.AllocationID, in.Slots)
			return nil, fmt.Errorf("encode node scheduler lease: %w", err)
		}
		f, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0o600)
		if err != nil {
			if errors.Is(err, os.ErrExist) {
				existing, readErr := readSliceVMNodeSchedulerLease(path)
				if readErr != nil {
					releaseSliceVMNodeSchedulerLeases(in.AllocationID, in.Slots)
					return nil, readErr
				}
				if existing.AllocationID == in.AllocationID {
					paths = append(paths, path)
					continue
				}
				releaseSliceVMNodeSchedulerLeases(in.AllocationID, in.Slots)
				return nil, fmt.Errorf("slice slot %s has active node scheduler lease for allocation %s", slot.SlotID, existing.AllocationID)
			}
			releaseSliceVMNodeSchedulerLeases(in.AllocationID, in.Slots)
			return nil, fmt.Errorf("create node scheduler lease for slot %s: %w", slot.SlotID, err)
		}
		if _, err := f.Write(append(payload, '\n')); err != nil {
			_ = f.Close()
			_ = os.Remove(path)
			releaseSliceVMNodeSchedulerLeases(in.AllocationID, in.Slots)
			return nil, fmt.Errorf("write node scheduler lease for slot %s: %w", slot.SlotID, err)
		}
		if err := f.Close(); err != nil {
			_ = os.Remove(path)
			releaseSliceVMNodeSchedulerLeases(in.AllocationID, in.Slots)
			return nil, fmt.Errorf("close node scheduler lease for slot %s: %w", slot.SlotID, err)
		}
		paths = append(paths, path)
	}
	return paths, nil
}

func releaseSliceVMNodeSchedulerLeases(allocationID string, slots []sliceVMSlot) int {
	released := 0
	for _, slot := range slots {
		path := sliceVMNodeSchedulerLeasePath(slot.SlotID)
		lease, err := readSliceVMNodeSchedulerLease(path)
		if err != nil {
			if errors.Is(err, os.ErrNotExist) {
				continue
			}
			continue
		}
		if lease.AllocationID != allocationID {
			continue
		}
		if err := os.Remove(path); err == nil || errors.Is(err, os.ErrNotExist) {
			released++
		}
	}
	return released
}

func reconcileExpiredSliceVMNodeSchedulerLeases() error {
	dir := filepath.Clean(sliceVMNodeSchedulerLeaseDir)
	entries, err := os.ReadDir(dir)
	if err != nil {
		if errors.Is(err, os.ErrNotExist) {
			return nil
		}
		return fmt.Errorf("read node scheduler lease dir: %w", err)
	}
	now := time.Now().UTC()
	for _, entry := range entries {
		if entry.IsDir() || !strings.HasSuffix(entry.Name(), ".json") {
			continue
		}
		path := filepath.Join(dir, entry.Name())
		lease, err := readSliceVMNodeSchedulerLease(path)
		if err != nil {
			continue
		}
		if !lease.ExpiresAt.IsZero() && now.After(lease.ExpiresAt) {
			_ = os.Remove(path)
		}
	}
	return nil
}

func readSliceVMNodeSchedulerLease(path string) (sliceVMNodeSchedulerLease, error) {
	raw, err := os.ReadFile(path)
	if err != nil {
		return sliceVMNodeSchedulerLease{}, err
	}
	var lease sliceVMNodeSchedulerLease
	if err := json.Unmarshal(raw, &lease); err != nil {
		return sliceVMNodeSchedulerLease{}, fmt.Errorf("decode node scheduler lease %s: %w", path, err)
	}
	return lease, nil
}

func sliceVMNodeSchedulerLeasePath(slotID string) string {
	return filepath.Join(filepath.Clean(sliceVMNodeSchedulerLeaseDir), filepath.Clean(slotID)+".json")
}

func sliceVMNodeSchedulerLeaseTTL() time.Duration {
	raw := strings.TrimSpace(os.Getenv("GPUAAS_NODE_SCHEDULER_LEASE_TTL_SECONDS"))
	if raw == "" {
		return 24 * time.Hour
	}
	seconds, err := strconv.Atoi(raw)
	if err != nil || seconds < 300 {
		return 24 * time.Hour
	}
	if seconds > 7*24*3600 {
		return 7 * 24 * time.Hour
	}
	return time.Duration(seconds) * time.Second
}

func ensureSliceVMCloudInitDir(path string) error {
	parent := filepath.Dir(path)
	if err := os.MkdirAll(parent, 0o755); err != nil {
		return fmt.Errorf("create cloud_init_parent_dir: %w", err)
	}
	if err := os.Chmod(parent, 0o755); err != nil {
		return fmt.Errorf("chmod cloud_init_parent_dir: %w", err)
	}
	if err := os.MkdirAll(path, 0o755); err != nil {
		return fmt.Errorf("create cloud_init_dir: %w", err)
	}
	if err := os.Chmod(path, 0o755); err != nil {
		return fmt.Errorf("chmod cloud_init_dir: %w", err)
	}
	return nil
}

func ensureSliceVMTerminalSSHKey(ctx context.Context) (string, error) {
	keyPath := strings.TrimSpace(os.Getenv("GPUAAS_SLICE_TERMINAL_SSH_KEY_PATH"))
	if keyPath == "" {
		keyPath = defaultSliceVMTerminalSSHKeyPath
	}
	keyDir := filepath.Dir(keyPath)
	if err := os.MkdirAll(keyDir, 0o700); err != nil {
		return "", fmt.Errorf("create slice terminal ssh key dir: %w", err)
	}
	if err := os.Chmod(keyDir, 0o700); err != nil {
		return "", fmt.Errorf("chmod slice terminal ssh key dir: %w", err)
	}
	if _, err := sliceVMStat(keyPath); err != nil {
		if !errors.Is(err, os.ErrNotExist) {
			return "", fmt.Errorf("stat slice terminal ssh key: %w", err)
		}
		if err := runSliceVMCommand(ctx, "ssh-keygen", "-q", "-t", defaultSliceVMTerminalSSHKeyBits, "-N", "", "-C", defaultSliceVMTerminalSSHKeyLabel, "-f", keyPath); err != nil {
			return "", err
		}
		_ = os.Chmod(keyPath, 0o600)
		_ = os.Chmod(keyPath+".pub", 0o644)
	}
	publicKeyPath := keyPath + ".pub"
	if _, err := sliceVMStat(publicKeyPath); err != nil {
		if !errors.Is(err, os.ErrNotExist) {
			return "", fmt.Errorf("stat slice terminal ssh public key: %w", err)
		}
		out, genErr := sliceVMCommandContext(ctx, "ssh-keygen", "-y", "-f", keyPath).Output()
		if genErr != nil {
			return "", fmt.Errorf("derive slice terminal ssh public key: %w", genErr)
		}
		if err := writeSecureFile(publicKeyPath, strings.TrimSpace(string(out))+"\n", 0o644); err != nil {
			return "", fmt.Errorf("write slice terminal ssh public key: %w", err)
		}
	}
	raw, err := os.ReadFile(publicKeyPath)
	if err != nil {
		return "", fmt.Errorf("read slice terminal ssh public key: %w", err)
	}
	publicKey := strings.TrimSpace(string(raw))
	if publicKey == "" || (!strings.HasPrefix(publicKey, "ssh-ed25519 ") && !strings.HasPrefix(publicKey, "ssh-rsa ") && !strings.HasPrefix(publicKey, "ecdsa-sha2-")) {
		return "", fmt.Errorf("invalid slice terminal ssh public key")
	}
	return publicKey, nil
}

func appendSliceVMTerminalPublicKey(keys []string, terminalPublicKey string) []string {
	terminalPublicKey = strings.TrimSpace(terminalPublicKey)
	if terminalPublicKey == "" {
		return append([]string(nil), keys...)
	}
	out := make([]string, 0, len(keys)+1)
	seen := map[string]struct{}{}
	for _, key := range keys {
		key = strings.TrimSpace(key)
		if key == "" {
			continue
		}
		if _, ok := seen[key]; ok {
			continue
		}
		seen[key] = struct{}{}
		out = append(out, key)
	}
	if _, ok := seen[terminalPublicKey]; !ok {
		out = append(out, terminalPublicKey)
	}
	return out
}

func validateSliceVMBootNetwork(slot sliceVMSlot) error {
	if strings.TrimSpace(slot.MACAddress) == "" {
		return fmt.Errorf("slice VM boot slot missing mac_address")
	}
	if strings.TrimSpace(slot.PrivateIP) == "" {
		return fmt.Errorf("slice VM boot slot missing private_ip")
	}
	return nil
}

func waitForSliceVMReadiness(ctx context.Context, username string, bootSlot sliceVMSlot, timeout time.Duration) (map[string]any, error) {
	if err := validateSliceVMBootNetwork(bootSlot); err != nil {
		return map[string]any{
			"checked":   false,
			"ssh_ready": false,
			"error":     err.Error(),
		}, err
	}
	readiness := map[string]any{
		"checked":    true,
		"private_ip": bootSlot.PrivateIP,
		"ssh_port":   22,
	}
	sshStartedAt := time.Now()
	if err := sliceVMWaitSSH(ctx, bootSlot.PrivateIP, 22, timeout); err != nil {
		readiness["ssh_wait_ms"] = time.Since(sshStartedAt).Milliseconds()
		readiness["ssh_ready"] = false
		readiness["error"] = err.Error()
		return readiness, fmt.Errorf("slice VM SSH readiness failed for %s: %w", bootSlot.PrivateIP, err)
	}
	readiness["ssh_wait_ms"] = time.Since(sshStartedAt).Milliseconds()
	readiness["ssh_ready"] = true
	guestStartedAt := time.Now()
	if err := sliceVMWaitGuestReady(ctx, username, bootSlot.PrivateIP, timeout); err != nil {
		readiness["guest_bootstrap_ms"] = time.Since(guestStartedAt).Milliseconds()
		readiness["guest_ready"] = false
		readiness["error"] = err.Error()
		return readiness, fmt.Errorf("slice VM guest bootstrap failed for %s: %w", bootSlot.PrivateIP, err)
	}
	readiness["guest_bootstrap_ms"] = time.Since(guestStartedAt).Milliseconds()
	readiness["guest_ready"] = true
	return readiness, nil
}

func ensureSliceVMHostDependencies(ctx context.Context) error {
	requiredCommands := []string{"cloud-localds", "qemu-img", "virt-install", "virsh", "ovs-vsctl", "findmnt", "ssh", "ssh-keygen"}
	missing := make([]string, 0, len(requiredCommands))
	for _, command := range requiredCommands {
		if _, err := exec.LookPath(command); err != nil {
			missing = append(missing, command)
		}
	}
	if len(missing) == 0 {
		return nil
	}
	if _, err := exec.LookPath("apt-get"); err != nil {
		return fmt.Errorf("slice VM host dependencies missing (%s) and apt-get is unavailable", strings.Join(missing, ","))
	}
	if err := runSliceVMCommand(ctx, "apt-get", "update"); err != nil {
		return fmt.Errorf("install slice VM host dependencies: %w", err)
	}
	packages := []string{
		"install",
		"-y",
		"qemu-kvm",
		"libvirt-daemon-system",
		"libvirt-clients",
		"virtinst",
		"openvswitch-switch",
		"cloud-image-utils",
		"genisoimage",
		"openssh-client",
	}
	if err := runSliceVMCommand(ctx, "apt-get", packages...); err != nil {
		return fmt.Errorf("install slice VM host dependencies: %w", err)
	}
	return nil
}

func ensureSliceVMHostPassthroughReady() error {
	if _, err := sliceVMStat("/dev/kvm"); err != nil {
		return fmt.Errorf("slice VM host KVM unavailable: %w", err)
	}
	groups, err := sliceVMGlob("/sys/kernel/iommu_groups/*")
	if err != nil {
		return fmt.Errorf("check IOMMU groups: %w", err)
	}
	if len(groups) == 0 {
		return fmt.Errorf("slice VM host passthrough unavailable: no IOMMU groups found; enable VT-d/IOMMU and reboot before GPU slice provisioning")
	}
	return nil
}

func ensureSliceVMHostDevicesBoundToVFIO(ctx context.Context, slots []sliceVMSlot) error {
	if err := runSliceVMCommand(ctx, "modprobe", "vfio-pci"); err != nil {
		return fmt.Errorf("load vfio-pci: %w", err)
	}
	for _, slot := range slots {
		for _, addr := range []string{slot.PCIAddress, slot.FabricDevice} {
			addr = strings.TrimSpace(addr)
			if addr == "" {
				continue
			}
			if err := ensureSliceVMHostDeviceBoundToVFIO(addr); err != nil {
				return err
			}
		}
	}
	return nil
}

func ensureSliceVMHostDeviceBoundToVFIO(addr string) error {
	if !sliceVMPCIAddressPattern.MatchString(addr) {
		return fmt.Errorf("invalid PCI address %q", addr)
	}
	devicePath := sliceVMSysfsPCIPath(addr)
	if _, err := sliceVMStat(devicePath); err != nil {
		return fmt.Errorf("stat PCI device %s: %w", addr, err)
	}
	if driver, ok := sliceVMHostDeviceDriver(devicePath); ok && driver == "vfio-pci" {
		return nil
	}
	if !sliceVMRuntimeVFIOBindEnabled() {
		driver, _ := sliceVMHostDeviceDriver(devicePath)
		if driver == "" {
			driver = "none"
		}
		return fmt.Errorf("PCI device %s is bound to %s, not vfio-pci; run the GPUaaS slice host bootstrap and reboot before scheduling this node", addr, driver)
	}
	if err := sliceVMWriteFile(filepath.Join(devicePath, "driver_override"), []byte("vfio-pci\n"), 0o644); err != nil {
		return fmt.Errorf("set vfio-pci driver override for %s: %w", addr, err)
	}
	if _, ok := sliceVMHostDeviceDriver(devicePath); ok {
		if err := sliceVMWriteFile(filepath.Join(devicePath, "driver", "unbind"), []byte(addr), 0o200); err != nil {
			return fmt.Errorf("unbind PCI device %s: %w", addr, err)
		}
	}
	if err := sliceVMWriteFile(sliceVMVFIOBindPath, []byte(addr), 0o200); err != nil {
		return fmt.Errorf("bind PCI device %s to vfio-pci: %w", addr, err)
	}
	if driver, ok := sliceVMHostDeviceDriver(devicePath); !ok || driver != "vfio-pci" {
		return fmt.Errorf("PCI device %s did not bind to vfio-pci", addr)
	}
	return nil
}

func sliceVMHostDeviceDriver(devicePath string) (string, bool) {
	target, err := sliceVMReadlink(filepath.Join(devicePath, "driver"))
	if err != nil {
		return "", false
	}
	return filepath.Base(target), true
}

func validateSliceVMRuntimeDevices(ctx context.Context, in sliceVMProvisionInput) error {
	if err := runSliceVMCommand(ctx, "ovs-vsctl", "br-exists", in.OVSBridge); err != nil {
		return fmt.Errorf("OVS bridge %q is not ready: %w", in.OVSBridge, err)
	}
	for _, slot := range in.Slots {
		if err := validateSliceVMBlockDevice(slot.NVMeDevice); err != nil {
			return err
		}
		if err := ensureSliceVMBlockDeviceNotMounted(ctx, slot.NVMeDevice); err != nil {
			return err
		}
	}
	return nil
}

func validateSliceVMBlockDevice(device string) error {
	info, err := sliceVMStat(device)
	if err != nil {
		return fmt.Errorf("stat nvme_device %s: %w", device, err)
	}
	if info == nil || info.Mode()&os.ModeDevice == 0 || info.Mode()&os.ModeCharDevice != 0 {
		return fmt.Errorf("nvme_device %s is not a block device", device)
	}
	return nil
}

func ensureSliceVMBlockDeviceNotMounted(ctx context.Context, device string) error {
	out, err := sliceVMCommandOutput(ctx, "findmnt", "--source", device, "--noheadings", "--output", "TARGET")
	if err != nil {
		out = ""
	}
	targets := sliceVMMountTargets(out)
	lsblkOut, lsblkErr := sliceVMCommandOutput(ctx, "lsblk", "-nr", "-o", "MOUNTPOINTS", device)
	if lsblkErr == nil {
		targets = append(targets, sliceVMMountTargets(lsblkOut)...)
	}
	if len(targets) == 0 {
		return nil
	}
	return fmt.Errorf("nvme_device %s or a child partition is mounted at %s", device, strings.Join(targets, ", "))
}

func sliceVMMountTargets(out string) []string {
	lines := strings.Split(out, "\n")
	targets := make([]string, 0, len(lines))
	seen := map[string]struct{}{}
	for _, line := range lines {
		target := strings.TrimSpace(line)
		if target == "" {
			continue
		}
		if _, ok := seen[target]; ok {
			continue
		}
		seen[target] = struct{}{}
		targets = append(targets, target)
	}
	return targets
}

func downloadSliceVMImage(ctx context.Context, imageURL, destPath string) error {
	if err := os.MkdirAll(filepath.Dir(destPath), 0o700); err != nil {
		return fmt.Errorf("create image directory: %w", err)
	}
	req, err := http.NewRequestWithContext(ctx, http.MethodGet, imageURL, nil)
	if err != nil {
		return fmt.Errorf("create image download request: %w", err)
	}
	resp, err := http.DefaultClient.Do(req)
	if err != nil {
		return fmt.Errorf("download image_url: %w", err)
	}
	defer func() { _ = resp.Body.Close() }()
	if resp.StatusCode != http.StatusOK {
		return fmt.Errorf("download image_url: status %d", resp.StatusCode)
	}
	tmp, err := os.CreateTemp(filepath.Dir(destPath), ".gpuaas-image-*.tmp")
	if err != nil {
		return fmt.Errorf("create image temp file: %w", err)
	}
	tmpPath := tmp.Name()
	defer func() { _ = os.Remove(tmpPath) }()
	limited := &io.LimitedReader{R: resp.Body, N: 64 << 30}
	if _, err := io.Copy(tmp, limited); err != nil {
		_ = tmp.Close()
		return fmt.Errorf("write image temp file: %w", err)
	}
	if limited.N == 0 {
		_ = tmp.Close()
		return fmt.Errorf("image_url exceeds 64 GiB limit")
	}
	if err := tmp.Close(); err != nil {
		return fmt.Errorf("close image temp file: %w", err)
	}
	if err := os.Chmod(tmpPath, 0o644); err != nil {
		return fmt.Errorf("chmod image temp file: %w", err)
	}
	if err := os.Rename(tmpPath, destPath); err != nil {
		return fmt.Errorf("install downloaded image: %w", err)
	}
	return nil
}

func runSliceVMRelease(ctx context.Context, in sliceVMReleaseInput) (map[string]any, error) {
	timeout := time.Duration(in.GracefulTimeoutSeconds) * time.Second
	_ = runSliceVMCommand(ctx, "virsh", "shutdown", in.VMName)
	deadline := time.Now().Add(timeout)
	for time.Now().Before(deadline) {
		state, _ := sliceVMCommandOutput(ctx, "virsh", "domstate", in.VMName)
		if !strings.Contains(strings.ToLower(state), "running") {
			break
		}
		time.Sleep(2 * time.Second)
	}
	state, _ := sliceVMCommandOutput(ctx, "virsh", "domstate", in.VMName)
	hardStopped := false
	if strings.Contains(strings.ToLower(state), "running") {
		if err := runSliceVMCommand(ctx, "virsh", "destroy", in.VMName); err != nil {
			return nil, err
		}
		hardStopped = true
	}
	if err := runSliceVMCommand(ctx, "virsh", "undefine", in.VMName); err != nil {
		_ = runSliceVMCommand(ctx, "virsh", "undefine", in.VMName, "--nvram")
	}
	if err := sliceVMEnsureHostDevicesBoundToVFIO(ctx, in.Slots); err != nil {
		return nil, fmt.Errorf("restore slice host devices to vfio after release: %w", err)
	}
	_ = sliceVMRemoveAll(in.CloudInitDir)
	_ = removeSliceVMDHCPReservation(ctx, in.VMName)
	wiped := false
	if in.Wipe {
		for _, slot := range in.Slots {
			if err := wipeSliceVMBlockDevice(ctx, slot.NVMeDevice); err != nil {
				return nil, err
			}
		}
		wiped = true
	}
	leasesReleased := releaseSliceVMNodeSchedulerLeases(in.AllocationID, in.Slots)
	_ = newGuestTelemetryStore().unregister(in.AllocationID)
	return map[string]any{
		"vm_name":         in.VMName,
		"released":        true,
		"hard_stopped":    hardStopped,
		"wiped":           wiped,
		"slot_count":      len(in.Slots),
		"leases_released": leasesReleased,
	}, nil
}

func verifySliceVMImageDigest(path, expected string) error {
	f, err := sliceVMOpen(path)
	if err != nil {
		return fmt.Errorf("open image_path: %w", err)
	}
	defer func() { _ = f.Close() }()
	h := sha256.New()
	if _, err := io.Copy(h, f); err != nil {
		return fmt.Errorf("hash image_path: %w", err)
	}
	got := hex.EncodeToString(h.Sum(nil))
	if got != expected {
		return fmt.Errorf("image_sha256 mismatch")
	}
	return nil
}

func buildSliceVMVirtInstallArgs(in sliceVMProvisionInput, isoPath string) []string {
	totalVCPU := 0
	totalMemory := 0
	hostDevices := make([]string, 0, len(in.Slots)*2)
	for _, slot := range in.Slots {
		totalVCPU += slot.VCPUCount
		totalMemory += slot.MemoryMIB
		hostDevices = append(hostDevices, "--host-device="+libvirtPCINodeDevice(slot.PCIAddress))
		if slot.FabricDevice != "" {
			hostDevices = append(hostDevices, "--host-device="+libvirtPCINodeDevice(slot.FabricDevice))
		}
	}
	if totalVCPU <= 0 {
		totalVCPU = 12
	}
	if totalMemory <= 0 {
		totalMemory = 65536
	}
	args := []string{
		"--name=" + in.VMName,
		"--memory=" + strconv.Itoa(totalMemory),
		"--vcpus=" + strconv.Itoa(totalVCPU),
		"--cpu=host-passthrough,cache.mode=passthrough",
		"--os-variant=ubuntu24.04",
		"--controller=scsi,model=virtio-scsi",
		"--disk=path=" + in.Slots[0].NVMeDevice + ",format=raw,bus=scsi,target.rotation_rate=1,driver.cache=none,driver.io=native,driver.discard=unmap,driver.detect_zeroes=unmap",
		"--disk=path=" + isoPath + ",device=cdrom",
		"--network=bridge=" + in.OVSBridge + ",virtualport_type=openvswitch,mac=" + in.Slots[0].MACAddress,
		"--graphics=none",
		"--noautoconsole",
		"--boot=uefi,loader_secure=no",
		"--tpm=none",
		"--import",
	}
	for _, slot := range in.Slots[1:] {
		args = append(args, "--disk=path="+slot.NVMeDevice+",format=raw,bus=scsi,target.rotation_rate=1,driver.cache=none,driver.io=native,driver.discard=unmap,driver.detect_zeroes=unmap")
	}
	if in.Slots[0].NUMANode != nil && *in.Slots[0].NUMANode >= 0 {
		args = append(args, "--numatune="+strconv.Itoa(*in.Slots[0].NUMANode))
	}
	args = append(args, hostDevices...)
	return args
}

func registerSliceVMGuestTelemetry(in sliceVMProvisionInput, bootSlot sliceVMSlot) (sliceVMGuestTelemetryConfig, error) {
	token, err := randomSliceVMGuestTelemetryToken()
	if err != nil {
		return sliceVMGuestTelemetryConfig{}, err
	}
	if err := newGuestTelemetryStore().register(in.AllocationID, in.VMName, bootSlot.PrivateIP, token); err != nil {
		return sliceVMGuestTelemetryConfig{}, err
	}
	return sliceVMGuestTelemetryConfig{
		AllocationID: in.AllocationID,
		PushURL:      sliceVMGuestTelemetryPushURL(bootSlot.PrivateIP),
		PushToken:    token,
	}, nil
}

func randomSliceVMGuestTelemetryToken() (string, error) {
	var raw [24]byte
	if _, err := rand.Read(raw[:]); err != nil {
		return "", fmt.Errorf("generate slice guest telemetry token: %w", err)
	}
	return base64.RawURLEncoding.EncodeToString(raw[:]), nil
}

func sliceVMGuestTelemetryPushURL(privateIP string) string {
	port := 9110
	if addr := strings.TrimSpace(os.Getenv("GPUAAS_TELEMETRY_ADDR")); addr != "" {
		if _, portStr, err := net.SplitHostPort(addr); err == nil {
			if parsed, parseErr := strconv.Atoi(portStr); parseErr == nil && parsed > 0 {
				port = parsed
			}
		}
	}
	gatewayCIDR := sliceVMGatewayCIDR(privateIP)
	gatewayHost := strings.TrimSuffix(gatewayCIDR, "/24")
	if gatewayHost == "" {
		gatewayHost = "10.100.0.1"
	}
	return fmt.Sprintf("http://%s:%d/internal/v1/guest-telemetry", gatewayHost, port)
}

func renderSliceVMUserData(username string, keys []string, driverStrategy string, telemetry *sliceVMGuestTelemetryConfig) string {
	var b strings.Builder
	normalizedDriverStrategy := normalizeSliceVMDriverStrategy(driverStrategy)
	b.WriteString("#cloud-config\n")
	b.WriteString("users:\n")
	b.WriteString("  - name: " + username + "\n")
	b.WriteString("    sudo: ALL=(ALL) NOPASSWD:ALL\n")
	b.WriteString("    groups: [docker]\n")
	b.WriteString("    shell: /bin/bash\n")
	b.WriteString("    lock_passwd: true\n")
	b.WriteString("    ssh_authorized_keys:\n")
	for _, key := range keys {
		b.WriteString("      - " + yamlQuote(key) + "\n")
	}
	b.WriteString("ssh_pwauth: false\n")
	if normalizedDriverStrategy == "cloud-init" {
		b.WriteString("package_update: true\n")
	} else {
		b.WriteString("package_update: false\n")
	}
	b.WriteString("package_upgrade: false\n")
	b.WriteString("manage_etc_hosts: true\n")
	b.WriteString("write_files:\n")
	writeSliceVMCloudInitFile(&b, "/etc/systemd/journald.conf.d/gpuaas.conf", "0644", `[Journal]
SystemMaxUse=1G
RuntimeMaxUse=256M
MaxRetentionSec=1day
`)
	writeSliceVMCloudInitFile(&b, "/etc/logrotate.d/rsyslog", "0644", `/var/log/syslog
/var/log/mail.log
/var/log/kern.log
/var/log/auth.log
/var/log/user.log
/var/log/cron.log
{
	rotate 1
	daily
	missingok
	notifempty
	compress
	delaycompress
	sharedscripts
	postrotate
		/usr/lib/rsyslog/rsyslog-rotate
	endscript
	}
	`)
	writeSliceVMCloudInitFile(&b, "/etc/logrotate.d/gpuaas", "0644", `/var/log/gpuaas*.log
/var/log/gpuaas/*.log
{
	rotate 1
	daily
	missingok
	notifempty
	compress
	delaycompress
	copytruncate
}
`)
	writeSliceVMCloudInitFile(&b, sliceVMGuestTelemetryProbePath, "0755", telemetryProbeScript)
	writeSliceVMCloudInitFile(&b, sliceVMGuestMetricsHelperPath, "0755", metricsHelperScript)
	if telemetry != nil {
		writeSliceVMCloudInitFile(&b, sliceVMGuestMetricsEnvPath, "0600", strings.Join([]string{
			"GPUAAS_METRICS_ALLOCATION_ID=" + telemetry.AllocationID,
			"GPUAAS_METRICS_PUSH_URL=" + telemetry.PushURL,
			"GPUAAS_METRICS_PUSH_TOKEN=" + telemetry.PushToken,
			"GPUAAS_METRICS_HELPER_PROBE_PATH=" + sliceVMGuestTelemetryProbePath,
			"GPUAAS_METRICS_LAST_SAMPLE_PATH=/var/lib/gpuaas/metrics/latest.json",
		}, "\n")+"\n")
		writeSliceVMCloudInitFile(&b, "/etc/systemd/system/gpuaas-metrics-helper.service", "0644", `[Unit]
Description=GPUaaS guest metrics helper
After=network-online.target
Wants=network-online.target

[Service]
Type=oneshot
EnvironmentFile=`+sliceVMGuestMetricsEnvPath+`
ExecStart=`+sliceVMGuestMetricsHelperPath+` push

[Install]
WantedBy=multi-user.target
`)
		writeSliceVMCloudInitFile(&b, "/etc/systemd/system/gpuaas-metrics-helper.timer", "0644", `[Unit]
Description=GPUaaS guest metrics helper timer

[Timer]
OnBootSec=20s
OnUnitActiveSec=30s
Unit=gpuaas-metrics-helper.service

[Install]
WantedBy=timers.target
`)
	}
	b.WriteString("growpart:\n")
	b.WriteString("  mode: auto\n")
	b.WriteString("  devices: ['/']\n")
	b.WriteString("runcmd:\n")
	b.WriteString("  - [ sh, -lc, \"mkdir -p /var/lib/gpuaas\" ]\n")
	b.WriteString("  - [ sh, -lc, \"mkdir -p /etc/systemd/journald.conf.d && systemctl restart systemd-journald || true\" ]\n")
	b.WriteString("  - [ sh, -lc, \"grep -q \\\"$(hostname)\\\" /etc/hosts || printf '127.0.1.1 %s\\\\n' \\\"$(hostname)\\\" >> /etc/hosts\" ]\n")
	if normalizedDriverStrategy == "cloud-init" {
		b.WriteString("  - [ sh, -lc, \"DEBIAN_FRONTEND=noninteractive apt-get install -y pciutils rdma-core ibverbs-utils infiniband-diags linux-headers-$(uname -r) linux-modules-extra-$(uname -r) nvidia-driver-570-server nvidia-utils-570-server\" ]\n")
	}
	b.WriteString("  - [ sh, -lc, \"modprobe nvidia\" ]\n")
	b.WriteString("  - [ sh, -lc, \"systemctl enable --now nvidia-persistenced || true; nvidia-smi -pm 1 || true\" ]\n")
	b.WriteString("  - [ sh, -lc, \"nvidia-smi -L\" ]\n")
	b.WriteString("  - [ sh, -lc, \"if command -v nvidia-ctk >/dev/null 2>&1; then nvidia-ctk runtime configure --runtime=docker || true; systemctl restart docker || true; fi\" ]\n")
	b.WriteString("  - [ sh, -lc, \"getent group docker >/dev/null 2>&1 && usermod -aG docker " + shellQuote(username) + " || true\" ]\n")
	b.WriteString("  - [ sh, -lc, \"modprobe mlx5_ib || true\" ]\n")
	b.WriteString("  - [ sh, -lc, \"for dev in /sys/class/net/*; do name=$(basename \\\"$dev\\\"); [ \\\"$name\\\" = lo ] && continue; ethtool -i \\\"$name\\\" 2>/dev/null | grep -q '^driver: mlx5_core' && ip link set \\\"$name\\\" up || true; done\" ]\n")
	b.WriteString("  - [ sh, -lc, \"ibv_devinfo -l || true\" ]\n")
	if telemetry != nil {
		b.WriteString("  - [ sh, -lc, \"systemctl daemon-reload && systemctl enable --now gpuaas-metrics-helper.timer && systemctl start gpuaas-metrics-helper.service\" ]\n")
	}
	b.WriteString("  - [ sh, -lc, \"touch " + sliceVMGuestReadyMarker + "\" ]\n")
	return b.String()
}

func writeSliceVMCloudInitFile(b *strings.Builder, path, permissions, content string) {
	b.WriteString("  - path: " + yamlQuote(path) + "\n")
	b.WriteString("    permissions: " + yamlQuote(permissions) + "\n")
	b.WriteString("    owner: root:root\n")
	b.WriteString("    content: |\n")
	for _, line := range strings.Split(strings.ReplaceAll(content, "\r\n", "\n"), "\n") {
		b.WriteString("      " + line + "\n")
	}
}

func renderSliceVMMetaData(allocationID, vmName string) string {
	return "instance-id: " + yamlQuote(allocationID) + "\nlocal-hostname: " + yamlQuote(vmName) + "\n"
}

func yamlQuote(value string) string {
	value = strings.ReplaceAll(value, "\\", "\\\\")
	value = strings.ReplaceAll(value, "\"", "\\\"")
	return "\"" + value + "\""
}

func shellQuote(value string) string {
	return "'" + strings.ReplaceAll(value, "'", "'\\''") + "'"
}

func libvirtPCINodeDevice(pci string) string {
	replacer := strings.NewReplacer(":", "_", ".", "_")
	return "pci_" + replacer.Replace(strings.ToLower(strings.TrimSpace(pci)))
}

func runSliceVMCommand(ctx context.Context, name string, args ...string) error {
	out, err := runSliceVMCommandCaptured(ctx, name, args...)
	if err != nil {
		if ctxErr := ctx.Err(); ctxErr != nil {
			return fmt.Errorf("%s: %w", name, ctxErr)
		}
		msg := strings.TrimSpace(string(out))
		if msg == "" {
			msg = err.Error()
		}
		return fmt.Errorf("%s: %s", name, msg)
	}
	return nil
}

func runSliceVMCommandCaptured(ctx context.Context, name string, args ...string) ([]byte, error) {
	outFile, err := os.CreateTemp("", "gpuaas-slice-command-*.log")
	if err != nil {
		return nil, err
	}
	outPath := outFile.Name()
	defer func() {
		_ = os.Remove(outPath)
	}()

	cmd := sliceVMCommandContext(ctx, name, args...)
	cmd.Stdout = outFile
	cmd.Stderr = outFile
	runErr := cmd.Run()
	closeErr := outFile.Close()
	out, readErr := os.ReadFile(outPath)
	if runErr != nil {
		return out, runErr
	}
	if closeErr != nil {
		return out, closeErr
	}
	if readErr != nil {
		return nil, readErr
	}
	return out, nil
}

func sliceVMCommandOutput(ctx context.Context, name string, args ...string) (string, error) {
	out, err := sliceVMCommandContext(ctx, name, args...).CombinedOutput()
	if err != nil {
		if ctxErr := ctx.Err(); ctxErr != nil {
			return "", fmt.Errorf("%s: %w", name, ctxErr)
		}
		msg := strings.TrimSpace(string(out))
		if msg == "" {
			msg = err.Error()
		}
		return "", fmt.Errorf("%s: %s", name, msg)
	}
	return strings.TrimSpace(string(out)), nil
}

func ensureSliceVMDHCPReservation(ctx context.Context, in sliceVMProvisionInput) error {
	if len(in.Slots) == 0 {
		return fmt.Errorf("slice VM has no slots")
	}
	bootSlot := in.Slots[0]
	if err := validateSliceVMBootNetwork(bootSlot); err != nil {
		return err
	}
	if err := ensureSliceVMBridgeReady(ctx, in.OVSBridge, bootSlot.PrivateIP); err != nil {
		return err
	}
	path := sliceVMDNSMasqReservationPath(in.VMName)
	content := renderSliceVMDNSMasqReservation(in.VMName, bootSlot)
	if err := removeStaleConflictingSliceVMDHCPReservations(ctx, in.VMName, bootSlot); err != nil {
		return err
	}
	if err := writeSecureFile(path, content, 0o644); err != nil {
		return fmt.Errorf("write dnsmasq reservation: %w", err)
	}
	if err := runSliceVMCommand(ctx, "systemctl", "restart", "dnsmasq"); err != nil {
		_ = os.Remove(path)
		_ = runSliceVMCommand(context.Background(), "systemctl", "restart", "dnsmasq")
		return fmt.Errorf("reload dnsmasq reservation: %w", err)
	}
	return nil
}

func removeStaleConflictingSliceVMDHCPReservations(ctx context.Context, vmName string, slot sliceVMSlot) error {
	entries, err := os.ReadDir(sliceVMDNSMasqConfigDir)
	if err != nil {
		if errors.Is(err, os.ErrNotExist) {
			return nil
		}
		return fmt.Errorf("list dnsmasq reservations: %w", err)
	}
	currentPath := filepath.Clean(sliceVMDNSMasqReservationPath(vmName))
	for _, entry := range entries {
		if entry.IsDir() {
			continue
		}
		name := entry.Name()
		if !strings.HasPrefix(name, "gpuaas-") || !strings.HasSuffix(name, ".conf") {
			continue
		}
		path := filepath.Join(sliceVMDNSMasqConfigDir, name)
		if filepath.Clean(path) == currentPath {
			continue
		}
		content, err := os.ReadFile(path)
		if err != nil {
			return fmt.Errorf("read dnsmasq reservation %s: %w", path, err)
		}
		if !sliceVMDHCPReservationConflicts(string(content), slot) {
			continue
		}
		otherVM := sliceVMNameFromDNSMasqReservationPath(path)
		if otherVM != "" {
			if _, err := sliceVMCommandOutput(ctx, "virsh", "dominfo", otherVM); err == nil {
				return fmt.Errorf("dnsmasq reservation conflict with active VM %s", otherVM)
			}
		}
		if err := os.Remove(path); err != nil && !errors.Is(err, os.ErrNotExist) {
			return fmt.Errorf("remove stale dnsmasq reservation %s: %w", path, err)
		}
	}
	return nil
}

func sliceVMDHCPReservationConflicts(content string, slot sliceVMSlot) bool {
	content = strings.ToLower(content)
	return strings.Contains(content, strings.ToLower(slot.MACAddress)+",") ||
		strings.Contains(content, ","+strings.ToLower(slot.PrivateIP)+",")
}

func sliceVMNameFromDNSMasqReservationPath(path string) string {
	name := filepath.Base(path)
	name = strings.TrimSuffix(name, ".conf")
	return strings.TrimPrefix(name, "gpuaas-")
}

func ensureSliceVMBridgeReady(ctx context.Context, bridge, privateIP string) error {
	bridge = strings.TrimSpace(bridge)
	if bridge == "" {
		bridge = "ovsbr0"
	}
	if !sliceVMBridgePattern.MatchString(bridge) {
		return fmt.Errorf("invalid ovs_bridge")
	}
	if err := runSliceVMCommand(ctx, "ovs-vsctl", "--may-exist", "add-br", bridge); err != nil {
		return fmt.Errorf("ensure OVS bridge: %w", err)
	}
	gatewayCIDR := sliceVMGatewayCIDR(privateIP)
	if gatewayCIDR != "" {
		current, err := sliceVMCommandOutput(ctx, "ip", "-4", "addr", "show", "dev", bridge)
		if err != nil || !strings.Contains(current, strings.TrimSuffix(gatewayCIDR, "/24")) {
			if err := runSliceVMCommand(ctx, "ip", "addr", "add", gatewayCIDR, "dev", bridge); err != nil && (current == "" || !strings.Contains(err.Error(), "File exists")) {
				return fmt.Errorf("configure OVS bridge address: %w", err)
			}
		}
	}
	if err := runSliceVMCommand(ctx, "ip", "link", "set", bridge, "up"); err != nil {
		return fmt.Errorf("bring OVS bridge up: %w", err)
	}
	return nil
}

func sliceVMGatewayCIDR(privateIP string) string {
	ipPart := strings.TrimSpace(privateIP)
	if ipPart == "" {
		return ""
	}
	if strings.Contains(ipPart, "/") {
		ip, _, err := net.ParseCIDR(ipPart)
		if err != nil {
			return ""
		}
		ipPart = ip.String()
	}
	ip := net.ParseIP(ipPart).To4()
	if ip == nil {
		return ""
	}
	return fmt.Sprintf("%d.%d.%d.1/24", ip[0], ip[1], ip[2])
}

func removeSliceVMDHCPReservation(ctx context.Context, vmName string) error {
	if err := os.Remove(sliceVMDNSMasqReservationPath(vmName)); err != nil && !errors.Is(err, os.ErrNotExist) {
		return fmt.Errorf("remove dnsmasq reservation: %w", err)
	}
	if _, err := exec.LookPath("systemctl"); err == nil {
		_ = runSliceVMCommand(ctx, "systemctl", "restart", "dnsmasq")
	}
	return nil
}

func sliceVMDNSMasqReservationPath(vmName string) string {
	return filepath.Join(sliceVMDNSMasqConfigDir, "gpuaas-"+vmName+".conf")
}

func renderSliceVMDNSMasqReservation(vmName string, slot sliceVMSlot) string {
	return strings.Join([]string{
		"# managed by gpuaas-node-agent",
		"dhcp-host=" + slot.MACAddress + "," + slot.PrivateIP + "," + vmName + ",24h",
		"",
	}, "\n")
}

func waitForSliceVMSSH(ctx context.Context, host string, port int, timeout time.Duration) error {
	deadline := time.Now().Add(timeout)
	for time.Now().Before(deadline) {
		dialer := net.Dialer{Timeout: 2 * time.Second}
		conn, err := dialer.DialContext(ctx, "tcp", net.JoinHostPort(host, strconv.Itoa(port)))
		if err == nil {
			_ = conn.Close()
			return nil
		}
		select {
		case <-ctx.Done():
			return ctx.Err()
		case <-time.After(2 * time.Second):
		}
	}
	return fmt.Errorf("ssh readiness timeout")
}

func waitForSliceVMGuestReady(ctx context.Context, username, host string, timeout time.Duration) error {
	username = strings.TrimSpace(username)
	host = strings.TrimSpace(host)
	if username == "" {
		return fmt.Errorf("slice VM guest readiness missing username")
	}
	if host == "" {
		return fmt.Errorf("slice VM guest readiness missing host")
	}
	keyPath := strings.TrimSpace(os.Getenv("GPUAAS_SLICE_TERMINAL_SSH_KEY_PATH"))
	if keyPath == "" {
		keyPath = defaultSliceVMTerminalSSHKeyPath
	}
	check := "test -f " + shellQuote(sliceVMGuestReadyMarker) + " && nvidia-smi -L >/tmp/gpuaas-nvidia-smi.log 2>&1"
	deadline := time.Now().Add(timeout)
	var lastErr error
	for time.Now().Before(deadline) {
		args := []string{
			"-i", keyPath,
			"-o", "BatchMode=yes",
			"-o", "StrictHostKeyChecking=no",
			"-o", "UserKnownHostsFile=/dev/null",
			"-o", "ConnectTimeout=5",
			username + "@" + host,
			check,
		}
		out, err := sliceVMCommandContext(ctx, "ssh", args...).CombinedOutput()
		if err == nil {
			return nil
		}
		if ctxErr := ctx.Err(); ctxErr != nil {
			return ctxErr
		}
		msg := strings.TrimSpace(string(out))
		if msg == "" {
			msg = err.Error()
		}
		lastErr = fmt.Errorf("%s", msg)
		select {
		case <-ctx.Done():
			return ctx.Err()
		case <-time.After(5 * time.Second):
		}
	}
	if lastErr != nil {
		return fmt.Errorf("guest readiness timeout: %w", lastErr)
	}
	return fmt.Errorf("guest readiness timeout")
}

func collectSliceVMPerformanceProbe(ctx context.Context, username, host string, timeout time.Duration) (map[string]any, error) {
	username = strings.TrimSpace(username)
	host = strings.TrimSpace(host)
	if username == "" {
		return nil, fmt.Errorf("slice VM performance probe missing username")
	}
	if host == "" {
		return nil, fmt.Errorf("slice VM performance probe missing host")
	}
	keyPath := strings.TrimSpace(os.Getenv("GPUAAS_SLICE_TERMINAL_SSH_KEY_PATH"))
	if keyPath == "" {
		keyPath = defaultSliceVMTerminalSSHKeyPath
	}
	if timeout <= 0 {
		timeout = 30 * time.Second
	}
	probeCtx, cancel := context.WithTimeout(ctx, timeout)
	defer cancel()
	check := strings.Join([]string{
		"set +e",
		"ms() { date +%s%3N; }",
		"time_cmd() { name=\"$1\"; shift; start=$(ms); \"$@\" >/tmp/gpuaas-${name}.out 2>/tmp/gpuaas-${name}.err; rc=$?; end=$(ms); printf '%s_rc=%s\\n' \"$name\" \"$rc\"; printf '%s_ms=%s\\n' \"$name\" \"$((end-start))\"; }",
		"root_source=$(findmnt -n -o SOURCE / 2>/dev/null)",
		"root_disk=$(lsblk -no PKNAME \"$root_source\" 2>/dev/null | awk 'NR==1 {print $1}')",
		"if [ -z \"$root_disk\" ]; then root_disk=${root_source#/dev/}; fi",
		"printf 'hostname=%s\\n' \"$(hostname)\"",
		"printf 'vcpus=%s\\n' \"$(nproc 2>/dev/null || printf 0)\"",
		"awk '/MemTotal/ {printf \"mem_mib=%d\\n\", int($2/1024)}' /proc/meminfo",
		"time_cmd sudo_true sudo -n true",
		"time_cmd nvidia_smi nvidia-smi -L",
		"time_cmd sudo_nvidia_smi sudo -n nvidia-smi -L",
		"printf 'gpu_count=%s\\n' \"$(nvidia-smi -L 2>/dev/null | grep -c '^GPU ')\"",
		"printf 'hca_count=%s\\n' \"$(ibv_devinfo -l 2>/dev/null | awk '/HCA found/ {print $1; found=1} END {if (!found) print 0}')\"",
		"printf 'rdma_devices=%s\\n' \"$(ibv_devinfo -l 2>/dev/null | awk 'NR > 1 && NF {gsub(/^[[:space:]]+/, \"\"); print}' | paste -sd, -)\"",
		"printf 'root_disk_device=%s\\n' \"$root_disk\"",
		"printf 'root_disk_rota=%s\\n' \"$(lsblk -dn -o ROTA \"/dev/$root_disk\" 2>/dev/null | awk 'NR==1 {print $1}')\"",
		"printf 'root_disk_model=%s\\n' \"$(lsblk -dn -o MODEL \"/dev/$root_disk\" 2>/dev/null | awk '{$1=$1; print}')\"",
		"printf 'docker_version=%s\\n' \"$(docker --version 2>/dev/null | awk '{print $3}' | tr -d ,)\"",
		"printf 'docker_compose_version=%s\\n' \"$(docker compose version --short 2>/dev/null)\"",
		"printf 'nvidia_ctk_version=%s\\n' \"$(nvidia-ctk --version 2>/dev/null | awk 'NR==1 {print $NF}')\"",
		"printf 'docker_nvidia_runtime=%s\\n' \"$(sudo -n docker info --format '{{json .Runtimes}}' 2>/dev/null | grep -q nvidia && printf true || printf false)\"",
	}, "\n")
	args := []string{
		"-i", keyPath,
		"-o", "BatchMode=yes",
		"-o", "StrictHostKeyChecking=no",
		"-o", "UserKnownHostsFile=/dev/null",
		"-o", "ConnectTimeout=5",
		username + "@" + host,
		check,
	}
	out, err := sliceVMCommandContext(probeCtx, "ssh", args...).CombinedOutput()
	if err != nil {
		if ctxErr := probeCtx.Err(); ctxErr != nil {
			return nil, fmt.Errorf("slice VM performance probe: %w", ctxErr)
		}
		msg := strings.TrimSpace(string(out))
		if msg == "" {
			msg = err.Error()
		}
		return nil, fmt.Errorf("slice VM performance probe: %s", msg)
	}
	return parseSliceVMPerformanceProbeOutput(string(out)), nil
}

func parseSliceVMPerformanceProbeOutput(out string) map[string]any {
	result := map[string]any{}
	for _, line := range strings.Split(out, "\n") {
		line = strings.TrimSpace(line)
		if line == "" {
			continue
		}
		key, value, ok := strings.Cut(line, "=")
		if !ok {
			continue
		}
		key = strings.TrimSpace(key)
		value = strings.TrimSpace(value)
		if key == "" {
			continue
		}
		if parsed, err := strconv.ParseInt(value, 10, 64); err == nil {
			result[key] = parsed
			continue
		}
		result[key] = value
	}
	return result
}

func wipeSliceVMBlockDevice(ctx context.Context, device string) error {
	if err := validateSliceVMBlockDevice(device); err != nil {
		return err
	}
	if err := ensureSliceVMBlockDeviceNotMounted(ctx, device); err != nil {
		return err
	}
	if err := runSliceVMCommand(ctx, "wipefs", "--all", "--force", device); err != nil {
		return err
	}
	if err := runSliceVMCommand(ctx, "blkdiscard", device); err != nil {
		return err
	}
	out, err := sliceVMCommandOutput(ctx, "wipefs", "--noheadings", device)
	if err != nil {
		return err
	}
	if strings.TrimSpace(out) != "" {
		return fmt.Errorf("wipe verification failed for %s", device)
	}
	return nil
}
