mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-24 01:09:38 -05:00
x86:
* new selftests * fixes for migration with HyperV re-enlightenment enabled * fix RCU/SRCU usage * fixes for local_irq_restore misuse false positive -----BEGIN PGP SIGNATURE----- iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmBUpO8UHHBib256aW5p QHJlZGhhdC5jb20ACgkQv/vSX3jHroPj6Af+LSkDniR08Eh/x4GHdX+ZSA9EhNuP PMqL+nDYvLXqc0XaErbZQpQbSP4aK7Tjly0LguZmNkBk17pnbjLb5Vv9hqJ30pM/ pI8bGgdh+KDO9LClfrgsaYgC+B4R+fwqqTIvtBYMilVZ96JwixFiODB4ntRQmZgd xJS99jwjD8TO9pTYskKPf8y8yv5W9RH+wVQGXwc+T/sSzK/rcL4Jwt/ibO2FLcJK gBRXJDVjMIlpxPrqqoejVB2FHQQe36Bns85QU3dz0QuXfDuuEvbShY/f4R1z32fT RaccrvdMQtvgwS0l9Ij06PT0BdiG0EdZv/gOBUq5gVgx4XZyJTleJaVURw== =WZP4 -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull kvm fixes from Paolo Bonzini: "Fixes for kvm on x86: - new selftests - fixes for migration with HyperV re-enlightenment enabled - fix RCU/SRCU usage - fixes for local_irq_restore misuse false positive" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: documentation/kvm: additional explanations on KVM_SET_BOOT_CPU_ID x86/kvm: Fix broken irq restoration in kvm_wait KVM: X86: Fix missing local pCPU when executing wbinvd on all dirty pCPUs KVM: x86: Protect userspace MSR filter with SRCU, and set atomically-ish selftests: kvm: add set_boot_cpu_id test selftests: kvm: add _vm_ioctl selftests: kvm: add get_msr_index_features selftests: kvm: Add basic Hyper-V clocksources tests KVM: x86: hyper-v: Don't touch TSC page values when guest opted for re-enlightenment KVM: x86: hyper-v: Track Hyper-V TSC page status KVM: x86: hyper-v: Prevent using not-yet-updated TSC page by secondary CPUs KVM: x86: hyper-v: Limit guest to writing zero to HV_X64_MSR_TSC_EMULATION_STATUS KVM: x86/mmu: Store the address space ID in the TDP iterator KVM: x86/mmu: Factor out tdp_iter_return_to_root KVM: x86/mmu: Fix RCU usage when atomically zapping SPTEs KVM: x86/mmu: Fix RCU usage in handle_removed_tdp_mmu_page
This commit is contained in:
commit
ecd8ee7f9c
18 changed files with 808 additions and 121 deletions
|
@ -1495,7 +1495,8 @@ Fails if any VCPU has already been created.
|
|||
|
||||
Define which vcpu is the Bootstrap Processor (BSP). Values are the same
|
||||
as the vcpu id in KVM_CREATE_VCPU. If this ioctl is not called, the default
|
||||
is vcpu 0.
|
||||
is vcpu 0. This ioctl has to be called before vcpu creation,
|
||||
otherwise it will return EBUSY error.
|
||||
|
||||
|
||||
4.42 KVM_GET_XSAVE
|
||||
|
@ -4806,8 +4807,10 @@ If an MSR access is not permitted through the filtering, it generates a
|
|||
allows user space to deflect and potentially handle various MSR accesses
|
||||
into user space.
|
||||
|
||||
If a vCPU is in running state while this ioctl is invoked, the vCPU may
|
||||
experience inconsistent filtering behavior on MSR accesses.
|
||||
Note, invoking this ioctl with a vCPU is running is inherently racy. However,
|
||||
KVM does guarantee that vCPUs will see either the previous filter or the new
|
||||
filter, e.g. MSRs with identical settings in both the old and new filter will
|
||||
have deterministic behavior.
|
||||
|
||||
4.127 KVM_XEN_HVM_SET_ATTR
|
||||
--------------------------
|
||||
|
|
|
@ -884,12 +884,29 @@ struct kvm_hv_syndbg {
|
|||
u64 options;
|
||||
};
|
||||
|
||||
/* Current state of Hyper-V TSC page clocksource */
|
||||
enum hv_tsc_page_status {
|
||||
/* TSC page was not set up or disabled */
|
||||
HV_TSC_PAGE_UNSET = 0,
|
||||
/* TSC page MSR was written by the guest, update pending */
|
||||
HV_TSC_PAGE_GUEST_CHANGED,
|
||||
/* TSC page MSR was written by KVM userspace, update pending */
|
||||
HV_TSC_PAGE_HOST_CHANGED,
|
||||
/* TSC page was properly set up and is currently active */
|
||||
HV_TSC_PAGE_SET,
|
||||
/* TSC page is currently being updated and therefore is inactive */
|
||||
HV_TSC_PAGE_UPDATING,
|
||||
/* TSC page was set up with an inaccessible GPA */
|
||||
HV_TSC_PAGE_BROKEN,
|
||||
};
|
||||
|
||||
/* Hyper-V emulation context */
|
||||
struct kvm_hv {
|
||||
struct mutex hv_lock;
|
||||
u64 hv_guest_os_id;
|
||||
u64 hv_hypercall;
|
||||
u64 hv_tsc_page;
|
||||
enum hv_tsc_page_status hv_tsc_page_status;
|
||||
|
||||
/* Hyper-v based guest crash (NT kernel bugcheck) parameters */
|
||||
u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS];
|
||||
|
@ -931,6 +948,12 @@ enum kvm_irqchip_mode {
|
|||
KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */
|
||||
};
|
||||
|
||||
struct kvm_x86_msr_filter {
|
||||
u8 count;
|
||||
bool default_allow:1;
|
||||
struct msr_bitmap_range ranges[16];
|
||||
};
|
||||
|
||||
#define APICV_INHIBIT_REASON_DISABLE 0
|
||||
#define APICV_INHIBIT_REASON_HYPERV 1
|
||||
#define APICV_INHIBIT_REASON_NESTED 2
|
||||
|
@ -1025,16 +1048,11 @@ struct kvm_arch {
|
|||
bool guest_can_read_msr_platform_info;
|
||||
bool exception_payload_enabled;
|
||||
|
||||
bool bus_lock_detection_enabled;
|
||||
|
||||
/* Deflect RDMSR and WRMSR to user space when they trigger a #GP */
|
||||
u32 user_space_msr_mask;
|
||||
|
||||
struct {
|
||||
u8 count;
|
||||
bool default_allow:1;
|
||||
struct msr_bitmap_range ranges[16];
|
||||
} msr_filter;
|
||||
|
||||
bool bus_lock_detection_enabled;
|
||||
struct kvm_x86_msr_filter __rcu *msr_filter;
|
||||
|
||||
struct kvm_pmu_event_filter __rcu *pmu_event_filter;
|
||||
struct task_struct *nx_lpage_recovery_thread;
|
||||
|
|
|
@ -836,28 +836,25 @@ static void kvm_kick_cpu(int cpu)
|
|||
|
||||
static void kvm_wait(u8 *ptr, u8 val)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
if (in_nmi())
|
||||
return;
|
||||
|
||||
local_irq_save(flags);
|
||||
|
||||
if (READ_ONCE(*ptr) != val)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* halt until it's our turn and kicked. Note that we do safe halt
|
||||
* for irq enabled case to avoid hang when lock info is overwritten
|
||||
* in irq spinlock slowpath and no spurious interrupt occur to save us.
|
||||
*/
|
||||
if (arch_irqs_disabled_flags(flags))
|
||||
halt();
|
||||
else
|
||||
safe_halt();
|
||||
if (irqs_disabled()) {
|
||||
if (READ_ONCE(*ptr) == val)
|
||||
halt();
|
||||
} else {
|
||||
local_irq_disable();
|
||||
|
||||
out:
|
||||
local_irq_restore(flags);
|
||||
if (READ_ONCE(*ptr) == val)
|
||||
safe_halt();
|
||||
|
||||
local_irq_enable();
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
|
|
|
@ -520,10 +520,10 @@ static u64 get_time_ref_counter(struct kvm *kvm)
|
|||
u64 tsc;
|
||||
|
||||
/*
|
||||
* The guest has not set up the TSC page or the clock isn't
|
||||
* stable, fall back to get_kvmclock_ns.
|
||||
* Fall back to get_kvmclock_ns() when TSC page hasn't been set up,
|
||||
* is broken, disabled or being updated.
|
||||
*/
|
||||
if (!hv->tsc_ref.tsc_sequence)
|
||||
if (hv->hv_tsc_page_status != HV_TSC_PAGE_SET)
|
||||
return div_u64(get_kvmclock_ns(kvm), 100);
|
||||
|
||||
vcpu = kvm_get_vcpu(kvm, 0);
|
||||
|
@ -1077,6 +1077,21 @@ static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info *hv_clock,
|
|||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Don't touch TSC page values if the guest has opted for TSC emulation after
|
||||
* migration. KVM doesn't fully support reenlightenment notifications and TSC
|
||||
* access emulation and Hyper-V is known to expect the values in TSC page to
|
||||
* stay constant before TSC access emulation is disabled from guest side
|
||||
* (HV_X64_MSR_TSC_EMULATION_STATUS). KVM userspace is expected to preserve TSC
|
||||
* frequency and guest visible TSC value across migration (and prevent it when
|
||||
* TSC scaling is unsupported).
|
||||
*/
|
||||
static inline bool tsc_page_update_unsafe(struct kvm_hv *hv)
|
||||
{
|
||||
return (hv->hv_tsc_page_status != HV_TSC_PAGE_GUEST_CHANGED) &&
|
||||
hv->hv_tsc_emulation_control;
|
||||
}
|
||||
|
||||
void kvm_hv_setup_tsc_page(struct kvm *kvm,
|
||||
struct pvclock_vcpu_time_info *hv_clock)
|
||||
{
|
||||
|
@ -1087,7 +1102,8 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
|
|||
BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(hv->tsc_ref.tsc_sequence));
|
||||
BUILD_BUG_ON(offsetof(struct ms_hyperv_tsc_page, tsc_sequence) != 0);
|
||||
|
||||
if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE))
|
||||
if (hv->hv_tsc_page_status == HV_TSC_PAGE_BROKEN ||
|
||||
hv->hv_tsc_page_status == HV_TSC_PAGE_UNSET)
|
||||
return;
|
||||
|
||||
mutex_lock(&hv->hv_lock);
|
||||
|
@ -1101,7 +1117,15 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
|
|||
*/
|
||||
if (unlikely(kvm_read_guest(kvm, gfn_to_gpa(gfn),
|
||||
&tsc_seq, sizeof(tsc_seq))))
|
||||
goto out_err;
|
||||
|
||||
if (tsc_seq && tsc_page_update_unsafe(hv)) {
|
||||
if (kvm_read_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref)))
|
||||
goto out_err;
|
||||
|
||||
hv->hv_tsc_page_status = HV_TSC_PAGE_SET;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/*
|
||||
* While we're computing and writing the parameters, force the
|
||||
|
@ -1110,15 +1134,15 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
|
|||
hv->tsc_ref.tsc_sequence = 0;
|
||||
if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
|
||||
&hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)))
|
||||
goto out_unlock;
|
||||
goto out_err;
|
||||
|
||||
if (!compute_tsc_page_parameters(hv_clock, &hv->tsc_ref))
|
||||
goto out_unlock;
|
||||
goto out_err;
|
||||
|
||||
/* Ensure sequence is zero before writing the rest of the struct. */
|
||||
smp_wmb();
|
||||
if (kvm_write_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref)))
|
||||
goto out_unlock;
|
||||
goto out_err;
|
||||
|
||||
/*
|
||||
* Now switch to the TSC page mechanism by writing the sequence.
|
||||
|
@ -1131,8 +1155,45 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
|
|||
smp_wmb();
|
||||
|
||||
hv->tsc_ref.tsc_sequence = tsc_seq;
|
||||
kvm_write_guest(kvm, gfn_to_gpa(gfn),
|
||||
&hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence));
|
||||
if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
|
||||
&hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)))
|
||||
goto out_err;
|
||||
|
||||
hv->hv_tsc_page_status = HV_TSC_PAGE_SET;
|
||||
goto out_unlock;
|
||||
|
||||
out_err:
|
||||
hv->hv_tsc_page_status = HV_TSC_PAGE_BROKEN;
|
||||
out_unlock:
|
||||
mutex_unlock(&hv->hv_lock);
|
||||
}
|
||||
|
||||
void kvm_hv_invalidate_tsc_page(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_hv *hv = to_kvm_hv(kvm);
|
||||
u64 gfn;
|
||||
|
||||
if (hv->hv_tsc_page_status == HV_TSC_PAGE_BROKEN ||
|
||||
hv->hv_tsc_page_status == HV_TSC_PAGE_UNSET ||
|
||||
tsc_page_update_unsafe(hv))
|
||||
return;
|
||||
|
||||
mutex_lock(&hv->hv_lock);
|
||||
|
||||
if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE))
|
||||
goto out_unlock;
|
||||
|
||||
/* Preserve HV_TSC_PAGE_GUEST_CHANGED/HV_TSC_PAGE_HOST_CHANGED states */
|
||||
if (hv->hv_tsc_page_status == HV_TSC_PAGE_SET)
|
||||
hv->hv_tsc_page_status = HV_TSC_PAGE_UPDATING;
|
||||
|
||||
gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
|
||||
|
||||
hv->tsc_ref.tsc_sequence = 0;
|
||||
if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
|
||||
&hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)))
|
||||
hv->hv_tsc_page_status = HV_TSC_PAGE_BROKEN;
|
||||
|
||||
out_unlock:
|
||||
mutex_unlock(&hv->hv_lock);
|
||||
}
|
||||
|
@ -1193,8 +1254,15 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
|
|||
}
|
||||
case HV_X64_MSR_REFERENCE_TSC:
|
||||
hv->hv_tsc_page = data;
|
||||
if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)
|
||||
if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) {
|
||||
if (!host)
|
||||
hv->hv_tsc_page_status = HV_TSC_PAGE_GUEST_CHANGED;
|
||||
else
|
||||
hv->hv_tsc_page_status = HV_TSC_PAGE_HOST_CHANGED;
|
||||
kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
|
||||
} else {
|
||||
hv->hv_tsc_page_status = HV_TSC_PAGE_UNSET;
|
||||
}
|
||||
break;
|
||||
case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
|
||||
return kvm_hv_msr_set_crash_data(kvm,
|
||||
|
@ -1229,6 +1297,9 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
|
|||
hv->hv_tsc_emulation_control = data;
|
||||
break;
|
||||
case HV_X64_MSR_TSC_EMULATION_STATUS:
|
||||
if (data && !host)
|
||||
return 1;
|
||||
|
||||
hv->hv_tsc_emulation_status = data;
|
||||
break;
|
||||
case HV_X64_MSR_TIME_REF_COUNT:
|
||||
|
|
|
@ -133,6 +133,7 @@ void kvm_hv_process_stimers(struct kvm_vcpu *vcpu);
|
|||
|
||||
void kvm_hv_setup_tsc_page(struct kvm *kvm,
|
||||
struct pvclock_vcpu_time_info *hv_clock);
|
||||
void kvm_hv_invalidate_tsc_page(struct kvm *kvm);
|
||||
|
||||
void kvm_hv_init_vm(struct kvm *kvm);
|
||||
void kvm_hv_destroy_vm(struct kvm *kvm);
|
||||
|
|
|
@ -78,6 +78,11 @@ static inline struct kvm_mmu_page *sptep_to_sp(u64 *sptep)
|
|||
return to_shadow_page(__pa(sptep));
|
||||
}
|
||||
|
||||
static inline int kvm_mmu_page_as_id(struct kvm_mmu_page *sp)
|
||||
{
|
||||
return sp->role.smm ? 1 : 0;
|
||||
}
|
||||
|
||||
static inline bool kvm_vcpu_ad_need_write_protect(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
/*
|
||||
|
|
|
@ -20,6 +20,21 @@ static gfn_t round_gfn_for_level(gfn_t gfn, int level)
|
|||
return gfn & -KVM_PAGES_PER_HPAGE(level);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the TDP iterator to the root PT and allow it to continue its
|
||||
* traversal over the paging structure from there.
|
||||
*/
|
||||
void tdp_iter_restart(struct tdp_iter *iter)
|
||||
{
|
||||
iter->yielded_gfn = iter->next_last_level_gfn;
|
||||
iter->level = iter->root_level;
|
||||
|
||||
iter->gfn = round_gfn_for_level(iter->next_last_level_gfn, iter->level);
|
||||
tdp_iter_refresh_sptep(iter);
|
||||
|
||||
iter->valid = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Sets a TDP iterator to walk a pre-order traversal of the paging structure
|
||||
* rooted at root_pt, starting with the walk to translate next_last_level_gfn.
|
||||
|
@ -31,16 +46,12 @@ void tdp_iter_start(struct tdp_iter *iter, u64 *root_pt, int root_level,
|
|||
WARN_ON(root_level > PT64_ROOT_MAX_LEVEL);
|
||||
|
||||
iter->next_last_level_gfn = next_last_level_gfn;
|
||||
iter->yielded_gfn = iter->next_last_level_gfn;
|
||||
iter->root_level = root_level;
|
||||
iter->min_level = min_level;
|
||||
iter->level = root_level;
|
||||
iter->pt_path[iter->level - 1] = (tdp_ptep_t)root_pt;
|
||||
iter->pt_path[iter->root_level - 1] = (tdp_ptep_t)root_pt;
|
||||
iter->as_id = kvm_mmu_page_as_id(sptep_to_sp(root_pt));
|
||||
|
||||
iter->gfn = round_gfn_for_level(iter->next_last_level_gfn, iter->level);
|
||||
tdp_iter_refresh_sptep(iter);
|
||||
|
||||
iter->valid = true;
|
||||
tdp_iter_restart(iter);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -159,8 +170,3 @@ void tdp_iter_next(struct tdp_iter *iter)
|
|||
iter->valid = false;
|
||||
}
|
||||
|
||||
tdp_ptep_t tdp_iter_root_pt(struct tdp_iter *iter)
|
||||
{
|
||||
return iter->pt_path[iter->root_level - 1];
|
||||
}
|
||||
|
||||
|
|
|
@ -36,6 +36,8 @@ struct tdp_iter {
|
|||
int min_level;
|
||||
/* The iterator's current level within the paging structure */
|
||||
int level;
|
||||
/* The address space ID, i.e. SMM vs. regular. */
|
||||
int as_id;
|
||||
/* A snapshot of the value at sptep */
|
||||
u64 old_spte;
|
||||
/*
|
||||
|
@ -62,6 +64,6 @@ tdp_ptep_t spte_to_child_pt(u64 pte, int level);
|
|||
void tdp_iter_start(struct tdp_iter *iter, u64 *root_pt, int root_level,
|
||||
int min_level, gfn_t next_last_level_gfn);
|
||||
void tdp_iter_next(struct tdp_iter *iter);
|
||||
tdp_ptep_t tdp_iter_root_pt(struct tdp_iter *iter);
|
||||
void tdp_iter_restart(struct tdp_iter *iter);
|
||||
|
||||
#endif /* __KVM_X86_MMU_TDP_ITER_H */
|
||||
|
|
|
@ -203,11 +203,6 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
|
|||
u64 old_spte, u64 new_spte, int level,
|
||||
bool shared);
|
||||
|
||||
static int kvm_mmu_page_as_id(struct kvm_mmu_page *sp)
|
||||
{
|
||||
return sp->role.smm ? 1 : 0;
|
||||
}
|
||||
|
||||
static void handle_changed_spte_acc_track(u64 old_spte, u64 new_spte, int level)
|
||||
{
|
||||
bool pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte);
|
||||
|
@ -301,11 +296,16 @@ static void tdp_mmu_unlink_page(struct kvm *kvm, struct kvm_mmu_page *sp,
|
|||
*
|
||||
* Given a page table that has been removed from the TDP paging structure,
|
||||
* iterates through the page table to clear SPTEs and free child page tables.
|
||||
*
|
||||
* Note that pt is passed in as a tdp_ptep_t, but it does not need RCU
|
||||
* protection. Since this thread removed it from the paging structure,
|
||||
* this thread will be responsible for ensuring the page is freed. Hence the
|
||||
* early rcu_dereferences in the function.
|
||||
*/
|
||||
static void handle_removed_tdp_mmu_page(struct kvm *kvm, u64 *pt,
|
||||
static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt,
|
||||
bool shared)
|
||||
{
|
||||
struct kvm_mmu_page *sp = sptep_to_sp(pt);
|
||||
struct kvm_mmu_page *sp = sptep_to_sp(rcu_dereference(pt));
|
||||
int level = sp->role.level;
|
||||
gfn_t base_gfn = sp->gfn;
|
||||
u64 old_child_spte;
|
||||
|
@ -318,7 +318,7 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, u64 *pt,
|
|||
tdp_mmu_unlink_page(kvm, sp, shared);
|
||||
|
||||
for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
|
||||
sptep = pt + i;
|
||||
sptep = rcu_dereference(pt) + i;
|
||||
gfn = base_gfn + (i * KVM_PAGES_PER_HPAGE(level - 1));
|
||||
|
||||
if (shared) {
|
||||
|
@ -492,10 +492,6 @@ static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
|
|||
struct tdp_iter *iter,
|
||||
u64 new_spte)
|
||||
{
|
||||
u64 *root_pt = tdp_iter_root_pt(iter);
|
||||
struct kvm_mmu_page *root = sptep_to_sp(root_pt);
|
||||
int as_id = kvm_mmu_page_as_id(root);
|
||||
|
||||
lockdep_assert_held_read(&kvm->mmu_lock);
|
||||
|
||||
/*
|
||||
|
@ -509,8 +505,8 @@ static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
|
|||
new_spte) != iter->old_spte)
|
||||
return false;
|
||||
|
||||
handle_changed_spte(kvm, as_id, iter->gfn, iter->old_spte, new_spte,
|
||||
iter->level, true);
|
||||
handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte,
|
||||
new_spte, iter->level, true);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -538,7 +534,7 @@ static inline bool tdp_mmu_zap_spte_atomic(struct kvm *kvm,
|
|||
* here since the SPTE is going from non-present
|
||||
* to non-present.
|
||||
*/
|
||||
WRITE_ONCE(*iter->sptep, 0);
|
||||
WRITE_ONCE(*rcu_dereference(iter->sptep), 0);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -564,10 +560,6 @@ static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
|
|||
u64 new_spte, bool record_acc_track,
|
||||
bool record_dirty_log)
|
||||
{
|
||||
tdp_ptep_t root_pt = tdp_iter_root_pt(iter);
|
||||
struct kvm_mmu_page *root = sptep_to_sp(root_pt);
|
||||
int as_id = kvm_mmu_page_as_id(root);
|
||||
|
||||
lockdep_assert_held_write(&kvm->mmu_lock);
|
||||
|
||||
/*
|
||||
|
@ -581,13 +573,13 @@ static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
|
|||
|
||||
WRITE_ONCE(*rcu_dereference(iter->sptep), new_spte);
|
||||
|
||||
__handle_changed_spte(kvm, as_id, iter->gfn, iter->old_spte, new_spte,
|
||||
iter->level, false);
|
||||
__handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte,
|
||||
new_spte, iter->level, false);
|
||||
if (record_acc_track)
|
||||
handle_changed_spte_acc_track(iter->old_spte, new_spte,
|
||||
iter->level);
|
||||
if (record_dirty_log)
|
||||
handle_changed_spte_dirty_log(kvm, as_id, iter->gfn,
|
||||
handle_changed_spte_dirty_log(kvm, iter->as_id, iter->gfn,
|
||||
iter->old_spte, new_spte,
|
||||
iter->level);
|
||||
}
|
||||
|
@ -659,9 +651,7 @@ static inline bool tdp_mmu_iter_cond_resched(struct kvm *kvm,
|
|||
|
||||
WARN_ON(iter->gfn > iter->next_last_level_gfn);
|
||||
|
||||
tdp_iter_start(iter, iter->pt_path[iter->root_level - 1],
|
||||
iter->root_level, iter->min_level,
|
||||
iter->next_last_level_gfn);
|
||||
tdp_iter_restart(iter);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -1526,35 +1526,44 @@ EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
|
|||
|
||||
bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type)
|
||||
{
|
||||
struct kvm_x86_msr_filter *msr_filter;
|
||||
struct msr_bitmap_range *ranges;
|
||||
struct kvm *kvm = vcpu->kvm;
|
||||
struct msr_bitmap_range *ranges = kvm->arch.msr_filter.ranges;
|
||||
u32 count = kvm->arch.msr_filter.count;
|
||||
u32 i;
|
||||
bool r = kvm->arch.msr_filter.default_allow;
|
||||
bool allowed;
|
||||
int idx;
|
||||
u32 i;
|
||||
|
||||
/* MSR filtering not set up or x2APIC enabled, allow everything */
|
||||
if (!count || (index >= 0x800 && index <= 0x8ff))
|
||||
/* x2APIC MSRs do not support filtering. */
|
||||
if (index >= 0x800 && index <= 0x8ff)
|
||||
return true;
|
||||
|
||||
/* Prevent collision with set_msr_filter */
|
||||
idx = srcu_read_lock(&kvm->srcu);
|
||||
|
||||
for (i = 0; i < count; i++) {
|
||||
msr_filter = srcu_dereference(kvm->arch.msr_filter, &kvm->srcu);
|
||||
if (!msr_filter) {
|
||||
allowed = true;
|
||||
goto out;
|
||||
}
|
||||
|
||||
allowed = msr_filter->default_allow;
|
||||
ranges = msr_filter->ranges;
|
||||
|
||||
for (i = 0; i < msr_filter->count; i++) {
|
||||
u32 start = ranges[i].base;
|
||||
u32 end = start + ranges[i].nmsrs;
|
||||
u32 flags = ranges[i].flags;
|
||||
unsigned long *bitmap = ranges[i].bitmap;
|
||||
|
||||
if ((index >= start) && (index < end) && (flags & type)) {
|
||||
r = !!test_bit(index - start, bitmap);
|
||||
allowed = !!test_bit(index - start, bitmap);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
srcu_read_unlock(&kvm->srcu, idx);
|
||||
|
||||
return r;
|
||||
return allowed;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_msr_allowed);
|
||||
|
||||
|
@ -2551,6 +2560,8 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
|
|||
struct kvm_vcpu *vcpu;
|
||||
struct kvm_arch *ka = &kvm->arch;
|
||||
|
||||
kvm_hv_invalidate_tsc_page(kvm);
|
||||
|
||||
spin_lock(&ka->pvclock_gtod_sync_lock);
|
||||
kvm_make_mclock_inprogress_request(kvm);
|
||||
/* no guest entries from this point */
|
||||
|
@ -5352,25 +5363,34 @@ split_irqchip_unlock:
|
|||
return r;
|
||||
}
|
||||
|
||||
static void kvm_clear_msr_filter(struct kvm *kvm)
|
||||
static struct kvm_x86_msr_filter *kvm_alloc_msr_filter(bool default_allow)
|
||||
{
|
||||
u32 i;
|
||||
u32 count = kvm->arch.msr_filter.count;
|
||||
struct msr_bitmap_range ranges[16];
|
||||
struct kvm_x86_msr_filter *msr_filter;
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
kvm->arch.msr_filter.count = 0;
|
||||
memcpy(ranges, kvm->arch.msr_filter.ranges, count * sizeof(ranges[0]));
|
||||
mutex_unlock(&kvm->lock);
|
||||
synchronize_srcu(&kvm->srcu);
|
||||
msr_filter = kzalloc(sizeof(*msr_filter), GFP_KERNEL_ACCOUNT);
|
||||
if (!msr_filter)
|
||||
return NULL;
|
||||
|
||||
for (i = 0; i < count; i++)
|
||||
kfree(ranges[i].bitmap);
|
||||
msr_filter->default_allow = default_allow;
|
||||
return msr_filter;
|
||||
}
|
||||
|
||||
static int kvm_add_msr_filter(struct kvm *kvm, struct kvm_msr_filter_range *user_range)
|
||||
static void kvm_free_msr_filter(struct kvm_x86_msr_filter *msr_filter)
|
||||
{
|
||||
u32 i;
|
||||
|
||||
if (!msr_filter)
|
||||
return;
|
||||
|
||||
for (i = 0; i < msr_filter->count; i++)
|
||||
kfree(msr_filter->ranges[i].bitmap);
|
||||
|
||||
kfree(msr_filter);
|
||||
}
|
||||
|
||||
static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter,
|
||||
struct kvm_msr_filter_range *user_range)
|
||||
{
|
||||
struct msr_bitmap_range *ranges = kvm->arch.msr_filter.ranges;
|
||||
struct msr_bitmap_range range;
|
||||
unsigned long *bitmap = NULL;
|
||||
size_t bitmap_size;
|
||||
|
@ -5404,11 +5424,9 @@ static int kvm_add_msr_filter(struct kvm *kvm, struct kvm_msr_filter_range *user
|
|||
goto err;
|
||||
}
|
||||
|
||||
/* Everything ok, add this range identifier to our global pool */
|
||||
ranges[kvm->arch.msr_filter.count] = range;
|
||||
/* Make sure we filled the array before we tell anyone to walk it */
|
||||
smp_wmb();
|
||||
kvm->arch.msr_filter.count++;
|
||||
/* Everything ok, add this range identifier. */
|
||||
msr_filter->ranges[msr_filter->count] = range;
|
||||
msr_filter->count++;
|
||||
|
||||
return 0;
|
||||
err:
|
||||
|
@ -5419,10 +5437,11 @@ err:
|
|||
static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
|
||||
{
|
||||
struct kvm_msr_filter __user *user_msr_filter = argp;
|
||||
struct kvm_x86_msr_filter *new_filter, *old_filter;
|
||||
struct kvm_msr_filter filter;
|
||||
bool default_allow;
|
||||
int r = 0;
|
||||
bool empty = true;
|
||||
int r = 0;
|
||||
u32 i;
|
||||
|
||||
if (copy_from_user(&filter, user_msr_filter, sizeof(filter)))
|
||||
|
@ -5435,25 +5454,32 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
|
|||
if (empty && !default_allow)
|
||||
return -EINVAL;
|
||||
|
||||
kvm_clear_msr_filter(kvm);
|
||||
new_filter = kvm_alloc_msr_filter(default_allow);
|
||||
if (!new_filter)
|
||||
return -ENOMEM;
|
||||
|
||||
kvm->arch.msr_filter.default_allow = default_allow;
|
||||
|
||||
/*
|
||||
* Protect from concurrent calls to this function that could trigger
|
||||
* a TOCTOU violation on kvm->arch.msr_filter.count.
|
||||
*/
|
||||
mutex_lock(&kvm->lock);
|
||||
for (i = 0; i < ARRAY_SIZE(filter.ranges); i++) {
|
||||
r = kvm_add_msr_filter(kvm, &filter.ranges[i]);
|
||||
if (r)
|
||||
break;
|
||||
r = kvm_add_msr_filter(new_filter, &filter.ranges[i]);
|
||||
if (r) {
|
||||
kvm_free_msr_filter(new_filter);
|
||||
return r;
|
||||
}
|
||||
}
|
||||
|
||||
mutex_lock(&kvm->lock);
|
||||
|
||||
/* The per-VM filter is protected by kvm->lock... */
|
||||
old_filter = srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1);
|
||||
|
||||
rcu_assign_pointer(kvm->arch.msr_filter, new_filter);
|
||||
synchronize_srcu(&kvm->srcu);
|
||||
|
||||
kvm_free_msr_filter(old_filter);
|
||||
|
||||
kvm_make_all_cpus_request(kvm, KVM_REQ_MSR_FILTER_CHANGED);
|
||||
mutex_unlock(&kvm->lock);
|
||||
|
||||
return r;
|
||||
return 0;
|
||||
}
|
||||
|
||||
long kvm_arch_vm_ioctl(struct file *filp,
|
||||
|
@ -6603,7 +6629,7 @@ static int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu)
|
|||
int cpu = get_cpu();
|
||||
|
||||
cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
|
||||
smp_call_function_many(vcpu->arch.wbinvd_dirty_mask,
|
||||
on_each_cpu_mask(vcpu->arch.wbinvd_dirty_mask,
|
||||
wbinvd_ipi, NULL, 1);
|
||||
put_cpu();
|
||||
cpumask_clear(vcpu->arch.wbinvd_dirty_mask);
|
||||
|
@ -10634,8 +10660,6 @@ void kvm_arch_pre_destroy_vm(struct kvm *kvm)
|
|||
|
||||
void kvm_arch_destroy_vm(struct kvm *kvm)
|
||||
{
|
||||
u32 i;
|
||||
|
||||
if (current->mm == kvm->mm) {
|
||||
/*
|
||||
* Free memory regions allocated on behalf of userspace,
|
||||
|
@ -10651,8 +10675,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
|
|||
mutex_unlock(&kvm->slots_lock);
|
||||
}
|
||||
static_call_cond(kvm_x86_vm_destroy)(kvm);
|
||||
for (i = 0; i < kvm->arch.msr_filter.count; i++)
|
||||
kfree(kvm->arch.msr_filter.ranges[i].bitmap);
|
||||
kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1));
|
||||
kvm_pic_destroy(kvm);
|
||||
kvm_ioapic_destroy(kvm);
|
||||
kvm_free_vcpus(kvm);
|
||||
|
|
3
tools/testing/selftests/kvm/.gitignore
vendored
3
tools/testing/selftests/kvm/.gitignore
vendored
|
@ -8,10 +8,13 @@
|
|||
/x86_64/debug_regs
|
||||
/x86_64/evmcs_test
|
||||
/x86_64/get_cpuid_test
|
||||
/x86_64/get_msr_index_features
|
||||
/x86_64/kvm_pv_test
|
||||
/x86_64/hyperv_clock
|
||||
/x86_64/hyperv_cpuid
|
||||
/x86_64/mmio_warning_test
|
||||
/x86_64/platform_info_test
|
||||
/x86_64/set_boot_cpu_id
|
||||
/x86_64/set_sregs_test
|
||||
/x86_64/smm_test
|
||||
/x86_64/state_test
|
||||
|
|
|
@ -39,12 +39,15 @@ LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c
|
|||
LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c
|
||||
|
||||
TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/get_cpuid_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/smm_test
|
||||
TEST_GEN_PROGS_x86_64 += x86_64/state_test
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
|
||||
#include "sparsebit.h"
|
||||
|
||||
#define KVM_DEV_PATH "/dev/kvm"
|
||||
#define KVM_MAX_VCPUS 512
|
||||
|
||||
/*
|
||||
|
@ -133,6 +134,7 @@ void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
|
|||
int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
|
||||
void *arg);
|
||||
void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
|
||||
int _vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg);
|
||||
void kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
|
||||
int _kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
|
||||
void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
|
||||
|
|
|
@ -1697,11 +1697,16 @@ void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
|
|||
{
|
||||
int ret;
|
||||
|
||||
ret = ioctl(vm->fd, cmd, arg);
|
||||
ret = _vm_ioctl(vm, cmd, arg);
|
||||
TEST_ASSERT(ret == 0, "vm ioctl %lu failed, rc: %i errno: %i (%s)",
|
||||
cmd, ret, errno, strerror(errno));
|
||||
}
|
||||
|
||||
int _vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
|
||||
{
|
||||
return ioctl(vm->fd, cmd, arg);
|
||||
}
|
||||
|
||||
/*
|
||||
* KVM system ioctl
|
||||
*
|
||||
|
|
|
@ -10,8 +10,6 @@
|
|||
|
||||
#include "sparsebit.h"
|
||||
|
||||
#define KVM_DEV_PATH "/dev/kvm"
|
||||
|
||||
struct userspace_mem_region {
|
||||
struct kvm_userspace_memory_region region;
|
||||
struct sparsebit *unused_phy_pages;
|
||||
|
|
134
tools/testing/selftests/kvm/x86_64/get_msr_index_features.c
Normal file
134
tools/testing/selftests/kvm/x86_64/get_msr_index_features.c
Normal file
|
@ -0,0 +1,134 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Test that KVM_GET_MSR_INDEX_LIST and
|
||||
* KVM_GET_MSR_FEATURE_INDEX_LIST work as intended
|
||||
*
|
||||
* Copyright (C) 2020, Red Hat, Inc.
|
||||
*/
|
||||
#include <fcntl.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
#include "test_util.h"
|
||||
#include "kvm_util.h"
|
||||
#include "processor.h"
|
||||
|
||||
static int kvm_num_index_msrs(int kvm_fd, int nmsrs)
|
||||
{
|
||||
struct kvm_msr_list *list;
|
||||
int r;
|
||||
|
||||
list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
|
||||
list->nmsrs = nmsrs;
|
||||
r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
|
||||
TEST_ASSERT(r == -1 && errno == E2BIG,
|
||||
"Unexpected result from KVM_GET_MSR_INDEX_LIST probe, r: %i",
|
||||
r);
|
||||
|
||||
r = list->nmsrs;
|
||||
free(list);
|
||||
return r;
|
||||
}
|
||||
|
||||
static void test_get_msr_index(void)
|
||||
{
|
||||
int old_res, res, kvm_fd, r;
|
||||
struct kvm_msr_list *list;
|
||||
|
||||
kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
|
||||
if (kvm_fd < 0)
|
||||
exit(KSFT_SKIP);
|
||||
|
||||
old_res = kvm_num_index_msrs(kvm_fd, 0);
|
||||
TEST_ASSERT(old_res != 0, "Expecting nmsrs to be > 0");
|
||||
|
||||
if (old_res != 1) {
|
||||
res = kvm_num_index_msrs(kvm_fd, 1);
|
||||
TEST_ASSERT(res > 1, "Expecting nmsrs to be > 1");
|
||||
TEST_ASSERT(res == old_res, "Expecting nmsrs to be identical");
|
||||
}
|
||||
|
||||
list = malloc(sizeof(*list) + old_res * sizeof(list->indices[0]));
|
||||
list->nmsrs = old_res;
|
||||
r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
|
||||
|
||||
TEST_ASSERT(r == 0,
|
||||
"Unexpected result from KVM_GET_MSR_FEATURE_INDEX_LIST, r: %i",
|
||||
r);
|
||||
TEST_ASSERT(list->nmsrs == old_res, "Expecting nmsrs to be identical");
|
||||
free(list);
|
||||
|
||||
close(kvm_fd);
|
||||
}
|
||||
|
||||
static int kvm_num_feature_msrs(int kvm_fd, int nmsrs)
|
||||
{
|
||||
struct kvm_msr_list *list;
|
||||
int r;
|
||||
|
||||
list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
|
||||
list->nmsrs = nmsrs;
|
||||
r = ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, list);
|
||||
TEST_ASSERT(r == -1 && errno == E2BIG,
|
||||
"Unexpected result from KVM_GET_MSR_FEATURE_INDEX_LIST probe, r: %i",
|
||||
r);
|
||||
|
||||
r = list->nmsrs;
|
||||
free(list);
|
||||
return r;
|
||||
}
|
||||
|
||||
struct kvm_msr_list *kvm_get_msr_feature_list(int kvm_fd, int nmsrs)
|
||||
{
|
||||
struct kvm_msr_list *list;
|
||||
int r;
|
||||
|
||||
list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
|
||||
list->nmsrs = nmsrs;
|
||||
r = ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, list);
|
||||
|
||||
TEST_ASSERT(r == 0,
|
||||
"Unexpected result from KVM_GET_MSR_FEATURE_INDEX_LIST, r: %i",
|
||||
r);
|
||||
|
||||
return list;
|
||||
}
|
||||
|
||||
static void test_get_msr_feature(void)
|
||||
{
|
||||
int res, old_res, i, kvm_fd;
|
||||
struct kvm_msr_list *feature_list;
|
||||
|
||||
kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
|
||||
if (kvm_fd < 0)
|
||||
exit(KSFT_SKIP);
|
||||
|
||||
old_res = kvm_num_feature_msrs(kvm_fd, 0);
|
||||
TEST_ASSERT(old_res != 0, "Expecting nmsrs to be > 0");
|
||||
|
||||
if (old_res != 1) {
|
||||
res = kvm_num_feature_msrs(kvm_fd, 1);
|
||||
TEST_ASSERT(res > 1, "Expecting nmsrs to be > 1");
|
||||
TEST_ASSERT(res == old_res, "Expecting nmsrs to be identical");
|
||||
}
|
||||
|
||||
feature_list = kvm_get_msr_feature_list(kvm_fd, old_res);
|
||||
TEST_ASSERT(old_res == feature_list->nmsrs,
|
||||
"Unmatching number of msr indexes");
|
||||
|
||||
for (i = 0; i < feature_list->nmsrs; i++)
|
||||
kvm_get_feature_msr(feature_list->indices[i]);
|
||||
|
||||
free(feature_list);
|
||||
close(kvm_fd);
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
if (kvm_check_cap(KVM_CAP_GET_MSR_FEATURES))
|
||||
test_get_msr_feature();
|
||||
|
||||
test_get_msr_index();
|
||||
}
|
260
tools/testing/selftests/kvm/x86_64/hyperv_clock.c
Normal file
260
tools/testing/selftests/kvm/x86_64/hyperv_clock.c
Normal file
|
@ -0,0 +1,260 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Copyright (C) 2021, Red Hat, Inc.
|
||||
*
|
||||
* Tests for Hyper-V clocksources
|
||||
*/
|
||||
#include "test_util.h"
|
||||
#include "kvm_util.h"
|
||||
#include "processor.h"
|
||||
|
||||
struct ms_hyperv_tsc_page {
|
||||
volatile u32 tsc_sequence;
|
||||
u32 reserved1;
|
||||
volatile u64 tsc_scale;
|
||||
volatile s64 tsc_offset;
|
||||
} __packed;
|
||||
|
||||
#define HV_X64_MSR_GUEST_OS_ID 0x40000000
|
||||
#define HV_X64_MSR_TIME_REF_COUNT 0x40000020
|
||||
#define HV_X64_MSR_REFERENCE_TSC 0x40000021
|
||||
#define HV_X64_MSR_TSC_FREQUENCY 0x40000022
|
||||
#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106
|
||||
#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107
|
||||
|
||||
/* Simplified mul_u64_u64_shr() */
|
||||
static inline u64 mul_u64_u64_shr64(u64 a, u64 b)
|
||||
{
|
||||
union {
|
||||
u64 ll;
|
||||
struct {
|
||||
u32 low, high;
|
||||
} l;
|
||||
} rm, rn, rh, a0, b0;
|
||||
u64 c;
|
||||
|
||||
a0.ll = a;
|
||||
b0.ll = b;
|
||||
|
||||
rm.ll = (u64)a0.l.low * b0.l.high;
|
||||
rn.ll = (u64)a0.l.high * b0.l.low;
|
||||
rh.ll = (u64)a0.l.high * b0.l.high;
|
||||
|
||||
rh.l.low = c = rm.l.high + rn.l.high + rh.l.low;
|
||||
rh.l.high = (c >> 32) + rh.l.high;
|
||||
|
||||
return rh.ll;
|
||||
}
|
||||
|
||||
static inline void nop_loop(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 1000000; i++)
|
||||
asm volatile("nop");
|
||||
}
|
||||
|
||||
static inline void check_tsc_msr_rdtsc(void)
|
||||
{
|
||||
u64 tsc_freq, r1, r2, t1, t2;
|
||||
s64 delta_ns;
|
||||
|
||||
tsc_freq = rdmsr(HV_X64_MSR_TSC_FREQUENCY);
|
||||
GUEST_ASSERT(tsc_freq > 0);
|
||||
|
||||
/* First, check MSR-based clocksource */
|
||||
r1 = rdtsc();
|
||||
t1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
|
||||
nop_loop();
|
||||
r2 = rdtsc();
|
||||
t2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
|
||||
|
||||
GUEST_ASSERT(r2 > r1 && t2 > t1);
|
||||
|
||||
/* HV_X64_MSR_TIME_REF_COUNT is in 100ns */
|
||||
delta_ns = ((t2 - t1) * 100) - ((r2 - r1) * 1000000000 / tsc_freq);
|
||||
if (delta_ns < 0)
|
||||
delta_ns = -delta_ns;
|
||||
|
||||
/* 1% tolerance */
|
||||
GUEST_ASSERT(delta_ns * 100 < (t2 - t1) * 100);
|
||||
}
|
||||
|
||||
static inline void check_tsc_msr_tsc_page(struct ms_hyperv_tsc_page *tsc_page)
|
||||
{
|
||||
u64 r1, r2, t1, t2;
|
||||
|
||||
/* Compare TSC page clocksource with HV_X64_MSR_TIME_REF_COUNT */
|
||||
t1 = mul_u64_u64_shr64(rdtsc(), tsc_page->tsc_scale) + tsc_page->tsc_offset;
|
||||
r1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
|
||||
|
||||
/* 10 ms tolerance */
|
||||
GUEST_ASSERT(r1 >= t1 && r1 - t1 < 100000);
|
||||
nop_loop();
|
||||
|
||||
t2 = mul_u64_u64_shr64(rdtsc(), tsc_page->tsc_scale) + tsc_page->tsc_offset;
|
||||
r2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
|
||||
GUEST_ASSERT(r2 >= t1 && r2 - t2 < 100000);
|
||||
}
|
||||
|
||||
static void guest_main(struct ms_hyperv_tsc_page *tsc_page, vm_paddr_t tsc_page_gpa)
|
||||
{
|
||||
u64 tsc_scale, tsc_offset;
|
||||
|
||||
/* Set Guest OS id to enable Hyper-V emulation */
|
||||
GUEST_SYNC(1);
|
||||
wrmsr(HV_X64_MSR_GUEST_OS_ID, (u64)0x8100 << 48);
|
||||
GUEST_SYNC(2);
|
||||
|
||||
check_tsc_msr_rdtsc();
|
||||
|
||||
GUEST_SYNC(3);
|
||||
|
||||
/* Set up TSC page is disabled state, check that it's clean */
|
||||
wrmsr(HV_X64_MSR_REFERENCE_TSC, tsc_page_gpa);
|
||||
GUEST_ASSERT(tsc_page->tsc_sequence == 0);
|
||||
GUEST_ASSERT(tsc_page->tsc_scale == 0);
|
||||
GUEST_ASSERT(tsc_page->tsc_offset == 0);
|
||||
|
||||
GUEST_SYNC(4);
|
||||
|
||||
/* Set up TSC page is enabled state */
|
||||
wrmsr(HV_X64_MSR_REFERENCE_TSC, tsc_page_gpa | 0x1);
|
||||
GUEST_ASSERT(tsc_page->tsc_sequence != 0);
|
||||
|
||||
GUEST_SYNC(5);
|
||||
|
||||
check_tsc_msr_tsc_page(tsc_page);
|
||||
|
||||
GUEST_SYNC(6);
|
||||
|
||||
tsc_offset = tsc_page->tsc_offset;
|
||||
/* Call KVM_SET_CLOCK from userspace, check that TSC page was updated */
|
||||
GUEST_SYNC(7);
|
||||
GUEST_ASSERT(tsc_page->tsc_offset != tsc_offset);
|
||||
|
||||
nop_loop();
|
||||
|
||||
/*
|
||||
* Enable Re-enlightenment and check that TSC page stays constant across
|
||||
* KVM_SET_CLOCK.
|
||||
*/
|
||||
wrmsr(HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0x1 << 16 | 0xff);
|
||||
wrmsr(HV_X64_MSR_TSC_EMULATION_CONTROL, 0x1);
|
||||
tsc_offset = tsc_page->tsc_offset;
|
||||
tsc_scale = tsc_page->tsc_scale;
|
||||
GUEST_SYNC(8);
|
||||
GUEST_ASSERT(tsc_page->tsc_offset == tsc_offset);
|
||||
GUEST_ASSERT(tsc_page->tsc_scale == tsc_scale);
|
||||
|
||||
GUEST_SYNC(9);
|
||||
|
||||
check_tsc_msr_tsc_page(tsc_page);
|
||||
|
||||
/*
|
||||
* Disable re-enlightenment and TSC page, check that KVM doesn't update
|
||||
* it anymore.
|
||||
*/
|
||||
wrmsr(HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0);
|
||||
wrmsr(HV_X64_MSR_TSC_EMULATION_CONTROL, 0);
|
||||
wrmsr(HV_X64_MSR_REFERENCE_TSC, 0);
|
||||
memset(tsc_page, 0, sizeof(*tsc_page));
|
||||
|
||||
GUEST_SYNC(10);
|
||||
GUEST_ASSERT(tsc_page->tsc_sequence == 0);
|
||||
GUEST_ASSERT(tsc_page->tsc_offset == 0);
|
||||
GUEST_ASSERT(tsc_page->tsc_scale == 0);
|
||||
|
||||
GUEST_DONE();
|
||||
}
|
||||
|
||||
#define VCPU_ID 0
|
||||
|
||||
static void host_check_tsc_msr_rdtsc(struct kvm_vm *vm)
|
||||
{
|
||||
u64 tsc_freq, r1, r2, t1, t2;
|
||||
s64 delta_ns;
|
||||
|
||||
tsc_freq = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TSC_FREQUENCY);
|
||||
TEST_ASSERT(tsc_freq > 0, "TSC frequency must be nonzero");
|
||||
|
||||
/* First, check MSR-based clocksource */
|
||||
r1 = rdtsc();
|
||||
t1 = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TIME_REF_COUNT);
|
||||
nop_loop();
|
||||
r2 = rdtsc();
|
||||
t2 = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TIME_REF_COUNT);
|
||||
|
||||
TEST_ASSERT(t2 > t1, "Time reference MSR is not monotonic (%ld <= %ld)", t1, t2);
|
||||
|
||||
/* HV_X64_MSR_TIME_REF_COUNT is in 100ns */
|
||||
delta_ns = ((t2 - t1) * 100) - ((r2 - r1) * 1000000000 / tsc_freq);
|
||||
if (delta_ns < 0)
|
||||
delta_ns = -delta_ns;
|
||||
|
||||
/* 1% tolerance */
|
||||
TEST_ASSERT(delta_ns * 100 < (t2 - t1) * 100,
|
||||
"Elapsed time does not match (MSR=%ld, TSC=%ld)",
|
||||
(t2 - t1) * 100, (r2 - r1) * 1000000000 / tsc_freq);
|
||||
}
|
||||
|
||||
int main(void)
|
||||
{
|
||||
struct kvm_vm *vm;
|
||||
struct kvm_run *run;
|
||||
struct ucall uc;
|
||||
vm_vaddr_t tsc_page_gva;
|
||||
int stage;
|
||||
|
||||
vm = vm_create_default(VCPU_ID, 0, guest_main);
|
||||
run = vcpu_state(vm, VCPU_ID);
|
||||
|
||||
vcpu_set_hv_cpuid(vm, VCPU_ID);
|
||||
|
||||
tsc_page_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
|
||||
memset(addr_gpa2hva(vm, tsc_page_gva), 0x0, getpagesize());
|
||||
TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0,
|
||||
"TSC page has to be page aligned\n");
|
||||
vcpu_args_set(vm, VCPU_ID, 2, tsc_page_gva, addr_gva2gpa(vm, tsc_page_gva));
|
||||
|
||||
host_check_tsc_msr_rdtsc(vm);
|
||||
|
||||
for (stage = 1;; stage++) {
|
||||
_vcpu_run(vm, VCPU_ID);
|
||||
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
|
||||
"Stage %d: unexpected exit reason: %u (%s),\n",
|
||||
stage, run->exit_reason,
|
||||
exit_reason_str(run->exit_reason));
|
||||
|
||||
switch (get_ucall(vm, VCPU_ID, &uc)) {
|
||||
case UCALL_ABORT:
|
||||
TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
|
||||
__FILE__, uc.args[1]);
|
||||
/* NOT REACHED */
|
||||
case UCALL_SYNC:
|
||||
break;
|
||||
case UCALL_DONE:
|
||||
/* Keep in sync with guest_main() */
|
||||
TEST_ASSERT(stage == 11, "Testing ended prematurely, stage %d\n",
|
||||
stage);
|
||||
goto out;
|
||||
default:
|
||||
TEST_FAIL("Unknown ucall %lu", uc.cmd);
|
||||
}
|
||||
|
||||
TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
|
||||
uc.args[1] == stage,
|
||||
"Stage %d: Unexpected register values vmexit, got %lx",
|
||||
stage, (ulong)uc.args[1]);
|
||||
|
||||
/* Reset kvmclock triggering TSC page update */
|
||||
if (stage == 7 || stage == 8 || stage == 10) {
|
||||
struct kvm_clock_data clock = {0};
|
||||
|
||||
vm_ioctl(vm, KVM_SET_CLOCK, &clock);
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
kvm_vm_free(vm);
|
||||
}
|
166
tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
Normal file
166
tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
Normal file
|
@ -0,0 +1,166 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Test that KVM_SET_BOOT_CPU_ID works as intended
|
||||
*
|
||||
* Copyright (C) 2020, Red Hat, Inc.
|
||||
*/
|
||||
#define _GNU_SOURCE /* for program_invocation_name */
|
||||
#include <fcntl.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
#include "test_util.h"
|
||||
#include "kvm_util.h"
|
||||
#include "processor.h"
|
||||
|
||||
#define N_VCPU 2
|
||||
#define VCPU_ID0 0
|
||||
#define VCPU_ID1 1
|
||||
|
||||
static uint32_t get_bsp_flag(void)
|
||||
{
|
||||
return rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_BSP;
|
||||
}
|
||||
|
||||
static void guest_bsp_vcpu(void *arg)
|
||||
{
|
||||
GUEST_SYNC(1);
|
||||
|
||||
GUEST_ASSERT(get_bsp_flag() != 0);
|
||||
|
||||
GUEST_DONE();
|
||||
}
|
||||
|
||||
static void guest_not_bsp_vcpu(void *arg)
|
||||
{
|
||||
GUEST_SYNC(1);
|
||||
|
||||
GUEST_ASSERT(get_bsp_flag() == 0);
|
||||
|
||||
GUEST_DONE();
|
||||
}
|
||||
|
||||
static void test_set_boot_busy(struct kvm_vm *vm)
|
||||
{
|
||||
int res;
|
||||
|
||||
res = _vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *) VCPU_ID0);
|
||||
TEST_ASSERT(res == -1 && errno == EBUSY,
|
||||
"KVM_SET_BOOT_CPU_ID set while running vm");
|
||||
}
|
||||
|
||||
static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid)
|
||||
{
|
||||
struct ucall uc;
|
||||
int stage;
|
||||
|
||||
for (stage = 0; stage < 2; stage++) {
|
||||
|
||||
vcpu_run(vm, vcpuid);
|
||||
|
||||
switch (get_ucall(vm, vcpuid, &uc)) {
|
||||
case UCALL_SYNC:
|
||||
TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
|
||||
uc.args[1] == stage + 1,
|
||||
"Stage %d: Unexpected register values vmexit, got %lx",
|
||||
stage + 1, (ulong)uc.args[1]);
|
||||
test_set_boot_busy(vm);
|
||||
break;
|
||||
case UCALL_DONE:
|
||||
TEST_ASSERT(stage == 1,
|
||||
"Expected GUEST_DONE in stage 2, got stage %d",
|
||||
stage);
|
||||
break;
|
||||
case UCALL_ABORT:
|
||||
TEST_ASSERT(false, "%s at %s:%ld\n\tvalues: %#lx, %#lx",
|
||||
(const char *)uc.args[0], __FILE__,
|
||||
uc.args[1], uc.args[2], uc.args[3]);
|
||||
default:
|
||||
TEST_ASSERT(false, "Unexpected exit: %s",
|
||||
exit_reason_str(vcpu_state(vm, vcpuid)->exit_reason));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static struct kvm_vm *create_vm(void)
|
||||
{
|
||||
struct kvm_vm *vm;
|
||||
uint64_t vcpu_pages = (DEFAULT_STACK_PGS) * 2;
|
||||
uint64_t extra_pg_pages = vcpu_pages / PTES_PER_MIN_PAGE * N_VCPU;
|
||||
uint64_t pages = DEFAULT_GUEST_PHY_PAGES + vcpu_pages + extra_pg_pages;
|
||||
|
||||
pages = vm_adjust_num_guest_pages(VM_MODE_DEFAULT, pages);
|
||||
vm = vm_create(VM_MODE_DEFAULT, pages, O_RDWR);
|
||||
|
||||
kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
|
||||
vm_create_irqchip(vm);
|
||||
|
||||
return vm;
|
||||
}
|
||||
|
||||
static void add_x86_vcpu(struct kvm_vm *vm, uint32_t vcpuid, bool bsp_code)
|
||||
{
|
||||
if (bsp_code)
|
||||
vm_vcpu_add_default(vm, vcpuid, guest_bsp_vcpu);
|
||||
else
|
||||
vm_vcpu_add_default(vm, vcpuid, guest_not_bsp_vcpu);
|
||||
|
||||
vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid());
|
||||
}
|
||||
|
||||
static void run_vm_bsp(uint32_t bsp_vcpu)
|
||||
{
|
||||
struct kvm_vm *vm;
|
||||
bool is_bsp_vcpu1 = bsp_vcpu == VCPU_ID1;
|
||||
|
||||
vm = create_vm();
|
||||
|
||||
if (is_bsp_vcpu1)
|
||||
vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *) VCPU_ID1);
|
||||
|
||||
add_x86_vcpu(vm, VCPU_ID0, !is_bsp_vcpu1);
|
||||
add_x86_vcpu(vm, VCPU_ID1, is_bsp_vcpu1);
|
||||
|
||||
run_vcpu(vm, VCPU_ID0);
|
||||
run_vcpu(vm, VCPU_ID1);
|
||||
|
||||
kvm_vm_free(vm);
|
||||
}
|
||||
|
||||
static void check_set_bsp_busy(void)
|
||||
{
|
||||
struct kvm_vm *vm;
|
||||
int res;
|
||||
|
||||
vm = create_vm();
|
||||
|
||||
add_x86_vcpu(vm, VCPU_ID0, true);
|
||||
add_x86_vcpu(vm, VCPU_ID1, false);
|
||||
|
||||
res = _vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *) VCPU_ID1);
|
||||
TEST_ASSERT(res == -1 && errno == EBUSY, "KVM_SET_BOOT_CPU_ID set after adding vcpu");
|
||||
|
||||
run_vcpu(vm, VCPU_ID0);
|
||||
run_vcpu(vm, VCPU_ID1);
|
||||
|
||||
res = _vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *) VCPU_ID1);
|
||||
TEST_ASSERT(res == -1 && errno == EBUSY, "KVM_SET_BOOT_CPU_ID set to a terminated vcpu");
|
||||
|
||||
kvm_vm_free(vm);
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
if (!kvm_check_cap(KVM_CAP_SET_BOOT_CPU_ID)) {
|
||||
print_skip("set_boot_cpu_id not available");
|
||||
return 0;
|
||||
}
|
||||
|
||||
run_vm_bsp(VCPU_ID0);
|
||||
run_vm_bsp(VCPU_ID1);
|
||||
run_vm_bsp(VCPU_ID0);
|
||||
|
||||
check_set_bsp_busy();
|
||||
}
|
Loading…
Add table
Reference in a new issue