--- //depot/vendor/freebsd/src/sys/amd64/vmm/intel/vmcs.h +++ //depot/user/jhb/bhyve/sys/amd64/vmm/intel/vmcs.h @@ -345,6 +345,8 @@ #define VMCS_INTR_T_MASK 0x700 /* Interruption-info type */ #define VMCS_INTR_T_HWINTR (0 << 8) #define VMCS_INTR_T_NMI (2 << 8) +#define VMCS_INTR_T_HWEXCEPTION (3 << 8) +#define VMCS_INTR_DEL_ERRCODE (1 << 11) /* * VMCS IDT-Vectoring information fields --- //depot/vendor/freebsd/src/sys/amd64/vmm/intel/vmx.c +++ //depot/user/jhb/bhyve/sys/amd64/vmm/intel/vmx.c @@ -1062,6 +1062,48 @@ VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING) static void +vmx_inject_exception(struct vmx *vmx, int vcpu, struct vm_exit *vmexit, + int fault, int errvalid, int errcode) +{ + uint32_t info; + + info = vmcs_read(VMCS_ENTRY_INTR_INFO); + KASSERT((info & VMCS_INTR_VALID) == 0, ("vmx_inject_nmi: invalid " + "VM-entry interruption information %#x", info)); + + /* + * Although INTR_T_HWEXCEPTION does not advance %rip, vmx_run() + * always advances it, so we clear the instruction length to zero + * explicitly. + */ + vmexit->inst_length = 0; + info = fault | VMCS_INTR_T_HWEXCEPTION | VMCS_INTR_VALID; + if (errvalid) { + info |= VMCS_INTR_DEL_ERRCODE; + vmcs_write(VMCS_ENTRY_EXCEPTION_ERROR, errcode); + } + vmcs_write(VMCS_ENTRY_INTR_INFO, info); + + VCPU_CTR2(vmx->vm, vcpu, "Injecting fault %d (errcode %d)", fault, + errcode); +} + +/* All GP# faults VMM injects use an error code of 0. */ +static void +vmx_inject_gp(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) +{ + + vmx_inject_exception(vmx, vcpu, vmexit, IDT_GP, 1, 0); +} + +static void +vmx_inject_ud(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) +{ + + vmx_inject_exception(vmx, vcpu, vmexit, IDT_UD, 0, 0); +} + +static void vmx_inject_nmi(struct vmx *vmx, int vcpu) { uint32_t gi, info; @@ -1094,6 +1136,14 @@ uint64_t rflags; uint32_t gi, info; + /* + * If a hardware exception is pending, don't do anything. + */ + info = vmcs_read(VMCS_ENTRY_INTR_INFO); + if ((info & (VMCS_INTR_T_MASK | VMCS_INTR_VALID)) == + (VMCS_INTR_T_HWEXCEPTION | VMCS_INTR_VALID)) + return; + if (vm_nmi_pending(vmx->vm, vcpu)) { /* * If there are no conditions blocking NMI injection then @@ -1228,7 +1278,7 @@ } static int -vmx_emulate_xsetbv(struct vmx *vmx, int vcpu) +vmx_emulate_xsetbv(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) { struct vmxctx *vmxctx; uint64_t xcrval; @@ -1237,20 +1287,40 @@ vmxctx = &vmx->ctx[vcpu]; limits = vmm_get_xsave_limits(); - /* We only handle xcr0 if the host has XSAVE enabled. */ - if (vmxctx->guest_rcx != 0 || !limits->xsave_enabled) - return (UNHANDLED); + /* + * Note that the processor raises a GP# fault on its own if + * xsetbv is executed for CPL != 0, so we do not have to + * emulate that fault here. + */ + + /* Only xcr0 is supported. */ + if (vmxctx->guest_rcx != 0) { + vmx_inject_gp(vmx, vcpu, vmexit); + return (HANDLED); + } + + /* We only handle xcr0 if both the host and guest have XSAVE enabled. */ + if (!limits->xsave_enabled || !(vmcs_read(VMCS_GUEST_CR4) & CR4_XSAVE)) { + vmx_inject_ud(vmx, vcpu, vmexit); + return (HANDLED); + } xcrval = vmxctx->guest_rdx << 32 | (vmxctx->guest_rax & 0xffffffff); - if ((xcrval & ~limits->xcr0_allowed) != 0) - return (UNHANDLED); + if ((xcrval & ~limits->xcr0_allowed) != 0) { + vmx_inject_gp(vmx, vcpu, vmexit); + return (HANDLED); + } - if (!(xcrval & XFEATURE_ENABLED_X87)) - return (UNHANDLED); + if (!(xcrval & XFEATURE_ENABLED_X87)) { + vmx_inject_gp(vmx, vcpu, vmexit); + return (HANDLED); + } if ((xcrval & (XFEATURE_ENABLED_AVX | XFEATURE_ENABLED_SSE)) == - XFEATURE_ENABLED_AVX) - return (UNHANDLED); + XFEATURE_ENABLED_AVX) { + vmx_inject_gp(vmx, vcpu, vmexit); + return (HANDLED); + } /* * This runs "inside" vmrun() with the guest's FPU state, so @@ -1448,7 +1518,7 @@ if (!virtual_interrupt_delivery) return (UNHANDLED); - handled = 1; + handled = HANDLED; offset = APIC_WRITE_OFFSET(qual); switch (offset) { case APIC_OFFSET_ID: @@ -1470,7 +1540,7 @@ retu = false; error = vlapic_icrlo_write_handler(vlapic, &retu); if (error != 0 || retu) - handled = 0; + handled = UNHANDLED; break; case APIC_OFFSET_CMCI_LVT: case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: @@ -1483,7 +1553,7 @@ vlapic_dcr_write_handler(vlapic); break; default: - handled = 0; + handled = UNHANDLED; break; } return (handled); @@ -1583,7 +1653,7 @@ CTASSERT((PINBASED_CTLS_ONE_SETTING & PINBASED_VIRTUAL_NMI) != 0); CTASSERT((PINBASED_CTLS_ONE_SETTING & PINBASED_NMI_EXITING) != 0); - handled = 0; + handled = UNHANDLED; vmxctx = &vmx->ctx[vcpu]; qual = vmexit->u.vmx.exit_qualification; @@ -1646,7 +1716,7 @@ vmexit->exitcode = VM_EXITCODE_RDMSR; vmexit->u.msr.code = ecx; } else if (!retu) { - handled = 1; + handled = HANDLED; } else { /* Return to userspace with a valid exitcode */ KASSERT(vmexit->exitcode != VM_EXITCODE_BOGUS, @@ -1666,7 +1736,7 @@ vmexit->u.msr.code = ecx; vmexit->u.msr.wval = (uint64_t)edx << 32 | eax; } else if (!retu) { - handled = 1; + handled = HANDLED; } else { /* Return to userspace with a valid exitcode */ KASSERT(vmexit->exitcode != VM_EXITCODE_BOGUS, @@ -1809,7 +1886,7 @@ handled = vmx_handle_apic_write(vlapic, qual); break; case EXIT_REASON_XSETBV: - handled = vmx_emulate_xsetbv(vmx, vcpu); + handled = vmx_emulate_xsetbv(vmx, vcpu, vmexit); break; default: vmm_stat_incr(vmx->vm, vcpu, VMEXIT_UNKNOWN, 1);