mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-24 01:09:38 -05:00
Bugfixes, including a TLB flush fix that affects processors
without nested page tables.
-----BEGIN PGP SIGNATURE-----
iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmDAVpQUHHBib256aW5p
QHJlZGhhdC5jb20ACgkQv/vSX3jHroNkOgf9F97eFxAdod3/wbW9EbsUPR5bMTLE
+R6Hmvw+yCm/W2cycVGdCSh1BEKNuZN/XfHln2cYVfVr6ndog58A4Y0urFAhTROv
IHs8TCA5biQitoZ716l88ExOitnqJiSmMhGex969+zm1Lb9MQo1KA/zxERlqCi3s
Pfcxb6I8VbD9LEb6NaQdDgQoslJo1tzhe9gGYAYrpMOZujpj1RPeIOZIfeII0MP/
g14/JSar8cXc9QJ6zbiKn8HhpmzGJnaIsyFFL2RMIBlKvxsnpOU6VmisLTL9407o
P246Vq59BM8pdRCVUW9W9hLr2ho8lmi+ZYXASCm+qfn8cLaHyRCqSK56ZQ==
=nW43
-----END PGP SIGNATURE-----
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini:
"Bugfixes, including a TLB flush fix that affects processors without
nested page tables"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
kvm: fix previous commit for 32-bit builds
kvm: avoid speculation-based attacks from out-of-range memslot accesses
KVM: x86: Unload MMU on guest TLB flush if TDP disabled to force MMU sync
KVM: x86: Ensure liveliness of nested VM-Enter fail tracepoint message
selftests: kvm: Add support for customized slot0 memory size
KVM: selftests: introduce P47V64 for s390x
KVM: x86: Ensure PV TLB flush tracepoint reflects KVM behavior
KVM: X86: MMU: Use the correct inherited permissions to get shadow page
KVM: LAPIC: Write 0 to TMICT should also cancel vmx-preemption timer
KVM: SVM: Fix SEV SEND_START session length & SEND_UPDATE_DATA query length after commit 238eca821c
This commit is contained in:
commit
2f673816b2
12 changed files with 105 additions and 39 deletions
|
@ -171,8 +171,8 @@ Shadow pages contain the following information:
|
|||
shadow pages) so role.quadrant takes values in the range 0..3. Each
|
||||
quadrant maps 1GB virtual address space.
|
||||
role.access:
|
||||
Inherited guest access permissions in the form uwx. Note execute
|
||||
permission is positive, not negative.
|
||||
Inherited guest access permissions from the parent ptes in the form uwx.
|
||||
Note execute permission is positive, not negative.
|
||||
role.invalid:
|
||||
The page is invalid and should not be used. It is a root page that is
|
||||
currently pinned (by a cpu hardware register pointing to it); once it is
|
||||
|
|
|
@ -1494,6 +1494,15 @@ static void limit_periodic_timer_frequency(struct kvm_lapic *apic)
|
|||
|
||||
static void cancel_hv_timer(struct kvm_lapic *apic);
|
||||
|
||||
static void cancel_apic_timer(struct kvm_lapic *apic)
|
||||
{
|
||||
hrtimer_cancel(&apic->lapic_timer.timer);
|
||||
preempt_disable();
|
||||
if (apic->lapic_timer.hv_timer_in_use)
|
||||
cancel_hv_timer(apic);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static void apic_update_lvtt(struct kvm_lapic *apic)
|
||||
{
|
||||
u32 timer_mode = kvm_lapic_get_reg(apic, APIC_LVTT) &
|
||||
|
@ -1502,11 +1511,7 @@ static void apic_update_lvtt(struct kvm_lapic *apic)
|
|||
if (apic->lapic_timer.timer_mode != timer_mode) {
|
||||
if (apic_lvtt_tscdeadline(apic) != (timer_mode ==
|
||||
APIC_LVT_TIMER_TSCDEADLINE)) {
|
||||
hrtimer_cancel(&apic->lapic_timer.timer);
|
||||
preempt_disable();
|
||||
if (apic->lapic_timer.hv_timer_in_use)
|
||||
cancel_hv_timer(apic);
|
||||
preempt_enable();
|
||||
cancel_apic_timer(apic);
|
||||
kvm_lapic_set_reg(apic, APIC_TMICT, 0);
|
||||
apic->lapic_timer.period = 0;
|
||||
apic->lapic_timer.tscdeadline = 0;
|
||||
|
@ -2092,7 +2097,7 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
|
|||
if (apic_lvtt_tscdeadline(apic))
|
||||
break;
|
||||
|
||||
hrtimer_cancel(&apic->lapic_timer.timer);
|
||||
cancel_apic_timer(apic);
|
||||
kvm_lapic_set_reg(apic, APIC_TMICT, val);
|
||||
start_apic_timer(apic);
|
||||
break;
|
||||
|
|
|
@ -90,8 +90,8 @@ struct guest_walker {
|
|||
gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
|
||||
pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS];
|
||||
bool pte_writable[PT_MAX_FULL_LEVELS];
|
||||
unsigned pt_access;
|
||||
unsigned pte_access;
|
||||
unsigned int pt_access[PT_MAX_FULL_LEVELS];
|
||||
unsigned int pte_access;
|
||||
gfn_t gfn;
|
||||
struct x86_exception fault;
|
||||
};
|
||||
|
@ -418,13 +418,15 @@ retry_walk:
|
|||
}
|
||||
|
||||
walker->ptes[walker->level - 1] = pte;
|
||||
|
||||
/* Convert to ACC_*_MASK flags for struct guest_walker. */
|
||||
walker->pt_access[walker->level - 1] = FNAME(gpte_access)(pt_access ^ walk_nx_mask);
|
||||
} while (!is_last_gpte(mmu, walker->level, pte));
|
||||
|
||||
pte_pkey = FNAME(gpte_pkeys)(vcpu, pte);
|
||||
accessed_dirty = have_ad ? pte_access & PT_GUEST_ACCESSED_MASK : 0;
|
||||
|
||||
/* Convert to ACC_*_MASK flags for struct guest_walker. */
|
||||
walker->pt_access = FNAME(gpte_access)(pt_access ^ walk_nx_mask);
|
||||
walker->pte_access = FNAME(gpte_access)(pte_access ^ walk_nx_mask);
|
||||
errcode = permission_fault(vcpu, mmu, walker->pte_access, pte_pkey, access);
|
||||
if (unlikely(errcode))
|
||||
|
@ -463,7 +465,8 @@ retry_walk:
|
|||
}
|
||||
|
||||
pgprintk("%s: pte %llx pte_access %x pt_access %x\n",
|
||||
__func__, (u64)pte, walker->pte_access, walker->pt_access);
|
||||
__func__, (u64)pte, walker->pte_access,
|
||||
walker->pt_access[walker->level - 1]);
|
||||
return 1;
|
||||
|
||||
error:
|
||||
|
@ -643,7 +646,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr,
|
|||
bool huge_page_disallowed = exec && nx_huge_page_workaround_enabled;
|
||||
struct kvm_mmu_page *sp = NULL;
|
||||
struct kvm_shadow_walk_iterator it;
|
||||
unsigned direct_access, access = gw->pt_access;
|
||||
unsigned int direct_access, access;
|
||||
int top_level, level, req_level, ret;
|
||||
gfn_t base_gfn = gw->gfn;
|
||||
|
||||
|
@ -675,6 +678,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gpa_t addr,
|
|||
sp = NULL;
|
||||
if (!is_shadow_present_pte(*it.sptep)) {
|
||||
table_gfn = gw->table_gfn[it.level - 2];
|
||||
access = gw->pt_access[it.level - 2];
|
||||
sp = kvm_mmu_get_page(vcpu, table_gfn, addr, it.level-1,
|
||||
false, access);
|
||||
}
|
||||
|
|
|
@ -1103,10 +1103,9 @@ __sev_send_start_query_session_length(struct kvm *kvm, struct kvm_sev_cmd *argp,
|
|||
struct sev_data_send_start data;
|
||||
int ret;
|
||||
|
||||
memset(&data, 0, sizeof(data));
|
||||
data.handle = sev->handle;
|
||||
ret = sev_issue_cmd(kvm, SEV_CMD_SEND_START, &data, &argp->error);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
params->session_len = data.session_len;
|
||||
if (copy_to_user((void __user *)(uintptr_t)argp->data, params,
|
||||
|
@ -1215,10 +1214,9 @@ __sev_send_update_data_query_lengths(struct kvm *kvm, struct kvm_sev_cmd *argp,
|
|||
struct sev_data_send_update_data data;
|
||||
int ret;
|
||||
|
||||
memset(&data, 0, sizeof(data));
|
||||
data.handle = sev->handle;
|
||||
ret = sev_issue_cmd(kvm, SEV_CMD_SEND_UPDATE_DATA, &data, &argp->error);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
params->hdr_len = data.hdr_len;
|
||||
params->trans_len = data.trans_len;
|
||||
|
|
|
@ -1550,16 +1550,16 @@ TRACE_EVENT(kvm_nested_vmenter_failed,
|
|||
TP_ARGS(msg, err),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(const char *, msg)
|
||||
__string(msg, msg)
|
||||
__field(u32, err)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->msg = msg;
|
||||
__assign_str(msg, msg);
|
||||
__entry->err = err;
|
||||
),
|
||||
|
||||
TP_printk("%s%s", __entry->msg, !__entry->err ? "" :
|
||||
TP_printk("%s%s", __get_str(msg), !__entry->err ? "" :
|
||||
__print_symbolic(__entry->err, VMX_VMENTER_INSTRUCTION_ERRORS))
|
||||
);
|
||||
|
||||
|
|
|
@ -3072,6 +3072,19 @@ static void kvm_vcpu_flush_tlb_all(struct kvm_vcpu *vcpu)
|
|||
static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
++vcpu->stat.tlb_flush;
|
||||
|
||||
if (!tdp_enabled) {
|
||||
/*
|
||||
* A TLB flush on behalf of the guest is equivalent to
|
||||
* INVPCID(all), toggling CR4.PGE, etc., which requires
|
||||
* a forced sync of the shadow page tables. Unload the
|
||||
* entire MMU here and the subsequent load will sync the
|
||||
* shadow page tables, and also flush the TLB.
|
||||
*/
|
||||
kvm_mmu_unload(vcpu);
|
||||
return;
|
||||
}
|
||||
|
||||
static_call(kvm_x86_tlb_flush_guest)(vcpu);
|
||||
}
|
||||
|
||||
|
@ -3101,9 +3114,11 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
|
|||
* expensive IPIs.
|
||||
*/
|
||||
if (guest_pv_has(vcpu, KVM_FEATURE_PV_TLB_FLUSH)) {
|
||||
u8 st_preempted = xchg(&st->preempted, 0);
|
||||
|
||||
trace_kvm_pv_tlb_flush(vcpu->vcpu_id,
|
||||
st->preempted & KVM_VCPU_FLUSH_TLB);
|
||||
if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB)
|
||||
st_preempted & KVM_VCPU_FLUSH_TLB);
|
||||
if (st_preempted & KVM_VCPU_FLUSH_TLB)
|
||||
kvm_vcpu_flush_tlb_guest(vcpu);
|
||||
} else {
|
||||
st->preempted = 0;
|
||||
|
|
|
@ -1185,7 +1185,15 @@ __gfn_to_memslot(struct kvm_memslots *slots, gfn_t gfn)
|
|||
static inline unsigned long
|
||||
__gfn_to_hva_memslot(const struct kvm_memory_slot *slot, gfn_t gfn)
|
||||
{
|
||||
return slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE;
|
||||
/*
|
||||
* The index was checked originally in search_memslots. To avoid
|
||||
* that a malicious guest builds a Spectre gadget out of e.g. page
|
||||
* table walks, do not let the processor speculate loads outside
|
||||
* the guest's registered memslots.
|
||||
*/
|
||||
unsigned long offset = gfn - slot->base_gfn;
|
||||
offset = array_index_nospec(offset, slot->npages);
|
||||
return slot->userspace_addr + offset * PAGE_SIZE;
|
||||
}
|
||||
|
||||
static inline int memslot_id(struct kvm *kvm, gfn_t gfn)
|
||||
|
|
|
@ -43,6 +43,7 @@ enum vm_guest_mode {
|
|||
VM_MODE_P40V48_4K,
|
||||
VM_MODE_P40V48_64K,
|
||||
VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */
|
||||
VM_MODE_P47V64_4K,
|
||||
NUM_VM_MODES,
|
||||
};
|
||||
|
||||
|
@ -60,7 +61,7 @@ enum vm_guest_mode {
|
|||
|
||||
#elif defined(__s390x__)
|
||||
|
||||
#define VM_MODE_DEFAULT VM_MODE_P52V48_4K
|
||||
#define VM_MODE_DEFAULT VM_MODE_P47V64_4K
|
||||
#define MIN_PAGE_SHIFT 12U
|
||||
#define ptes_per_page(page_size) ((page_size) / 16)
|
||||
|
||||
|
@ -285,10 +286,11 @@ struct kvm_vm *vm_create_default_with_vcpus(uint32_t nr_vcpus, uint64_t extra_me
|
|||
uint32_t num_percpu_pages, void *guest_code,
|
||||
uint32_t vcpuids[]);
|
||||
|
||||
/* Like vm_create_default_with_vcpus, but accepts mode as a parameter */
|
||||
/* Like vm_create_default_with_vcpus, but accepts mode and slot0 memory as a parameter */
|
||||
struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
|
||||
uint64_t extra_mem_pages, uint32_t num_percpu_pages,
|
||||
void *guest_code, uint32_t vcpuids[]);
|
||||
uint64_t slot0_mem_pages, uint64_t extra_mem_pages,
|
||||
uint32_t num_percpu_pages, void *guest_code,
|
||||
uint32_t vcpuids[]);
|
||||
|
||||
/*
|
||||
* Adds a vCPU with reasonable defaults (e.g. a stack)
|
||||
|
|
|
@ -268,7 +268,7 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg)
|
|||
|
||||
/* Create a VM with enough guest pages */
|
||||
guest_num_pages = test_mem_size / guest_page_size;
|
||||
vm = vm_create_with_vcpus(mode, nr_vcpus,
|
||||
vm = vm_create_with_vcpus(mode, nr_vcpus, DEFAULT_GUEST_PHY_PAGES,
|
||||
guest_num_pages, 0, guest_code, NULL);
|
||||
|
||||
/* Align down GPA of the testing memslot */
|
||||
|
|
|
@ -175,6 +175,7 @@ const char *vm_guest_mode_string(uint32_t i)
|
|||
[VM_MODE_P40V48_4K] = "PA-bits:40, VA-bits:48, 4K pages",
|
||||
[VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages",
|
||||
[VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages",
|
||||
[VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages",
|
||||
};
|
||||
_Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES,
|
||||
"Missing new mode strings?");
|
||||
|
@ -192,6 +193,7 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = {
|
|||
{ 40, 48, 0x1000, 12 },
|
||||
{ 40, 48, 0x10000, 16 },
|
||||
{ 0, 0, 0x1000, 12 },
|
||||
{ 47, 64, 0x1000, 12 },
|
||||
};
|
||||
_Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
|
||||
"Missing new mode params?");
|
||||
|
@ -277,6 +279,9 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
|
|||
TEST_FAIL("VM_MODE_PXXV48_4K not supported on non-x86 platforms");
|
||||
#endif
|
||||
break;
|
||||
case VM_MODE_P47V64_4K:
|
||||
vm->pgtable_levels = 5;
|
||||
break;
|
||||
default:
|
||||
TEST_FAIL("Unknown guest mode, mode: 0x%x", mode);
|
||||
}
|
||||
|
@ -308,21 +313,50 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
|
|||
return vm;
|
||||
}
|
||||
|
||||
/*
|
||||
* VM Create with customized parameters
|
||||
*
|
||||
* Input Args:
|
||||
* mode - VM Mode (e.g. VM_MODE_P52V48_4K)
|
||||
* nr_vcpus - VCPU count
|
||||
* slot0_mem_pages - Slot0 physical memory size
|
||||
* extra_mem_pages - Non-slot0 physical memory total size
|
||||
* num_percpu_pages - Per-cpu physical memory pages
|
||||
* guest_code - Guest entry point
|
||||
* vcpuids - VCPU IDs
|
||||
*
|
||||
* Output Args: None
|
||||
*
|
||||
* Return:
|
||||
* Pointer to opaque structure that describes the created VM.
|
||||
*
|
||||
* Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K),
|
||||
* with customized slot0 memory size, at least 512 pages currently.
|
||||
* extra_mem_pages is only used to calculate the maximum page table size,
|
||||
* no real memory allocation for non-slot0 memory in this function.
|
||||
*/
|
||||
struct kvm_vm *vm_create_with_vcpus(enum vm_guest_mode mode, uint32_t nr_vcpus,
|
||||
uint64_t extra_mem_pages, uint32_t num_percpu_pages,
|
||||
void *guest_code, uint32_t vcpuids[])
|
||||
uint64_t slot0_mem_pages, uint64_t extra_mem_pages,
|
||||
uint32_t num_percpu_pages, void *guest_code,
|
||||
uint32_t vcpuids[])
|
||||
{
|
||||
uint64_t vcpu_pages, extra_pg_pages, pages;
|
||||
struct kvm_vm *vm;
|
||||
int i;
|
||||
|
||||
/* Force slot0 memory size not small than DEFAULT_GUEST_PHY_PAGES */
|
||||
if (slot0_mem_pages < DEFAULT_GUEST_PHY_PAGES)
|
||||
slot0_mem_pages = DEFAULT_GUEST_PHY_PAGES;
|
||||
|
||||
/* The maximum page table size for a memory region will be when the
|
||||
* smallest pages are used. Considering each page contains x page
|
||||
* table descriptors, the total extra size for page tables (for extra
|
||||
* N pages) will be: N/x+N/x^2+N/x^3+... which is definitely smaller
|
||||
* than N/x*2.
|
||||
*/
|
||||
uint64_t vcpu_pages = (DEFAULT_STACK_PGS + num_percpu_pages) * nr_vcpus;
|
||||
uint64_t extra_pg_pages = (extra_mem_pages + vcpu_pages) / PTES_PER_MIN_PAGE * 2;
|
||||
uint64_t pages = DEFAULT_GUEST_PHY_PAGES + extra_mem_pages + vcpu_pages + extra_pg_pages;
|
||||
struct kvm_vm *vm;
|
||||
int i;
|
||||
vcpu_pages = (DEFAULT_STACK_PGS + num_percpu_pages) * nr_vcpus;
|
||||
extra_pg_pages = (slot0_mem_pages + extra_mem_pages + vcpu_pages) / PTES_PER_MIN_PAGE * 2;
|
||||
pages = slot0_mem_pages + vcpu_pages + extra_pg_pages;
|
||||
|
||||
TEST_ASSERT(nr_vcpus <= kvm_check_cap(KVM_CAP_MAX_VCPUS),
|
||||
"nr_vcpus = %d too large for host, max-vcpus = %d",
|
||||
|
@ -354,8 +388,8 @@ struct kvm_vm *vm_create_default_with_vcpus(uint32_t nr_vcpus, uint64_t extra_me
|
|||
uint32_t num_percpu_pages, void *guest_code,
|
||||
uint32_t vcpuids[])
|
||||
{
|
||||
return vm_create_with_vcpus(VM_MODE_DEFAULT, nr_vcpus, extra_mem_pages,
|
||||
num_percpu_pages, guest_code, vcpuids);
|
||||
return vm_create_with_vcpus(VM_MODE_DEFAULT, nr_vcpus, DEFAULT_GUEST_PHY_PAGES,
|
||||
extra_mem_pages, num_percpu_pages, guest_code, vcpuids);
|
||||
}
|
||||
|
||||
struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
|
||||
|
|
|
@ -69,7 +69,7 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int vcpus,
|
|||
TEST_ASSERT(vcpu_memory_bytes % perf_test_args.guest_page_size == 0,
|
||||
"Guest memory size is not guest page size aligned.");
|
||||
|
||||
vm = vm_create_with_vcpus(mode, vcpus,
|
||||
vm = vm_create_with_vcpus(mode, vcpus, DEFAULT_GUEST_PHY_PAGES,
|
||||
(vcpus * vcpu_memory_bytes) / perf_test_args.guest_page_size,
|
||||
0, guest_code, NULL);
|
||||
|
||||
|
|
|
@ -267,7 +267,7 @@ static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots,
|
|||
data->hva_slots = malloc(sizeof(*data->hva_slots) * data->nslots);
|
||||
TEST_ASSERT(data->hva_slots, "malloc() fail");
|
||||
|
||||
data->vm = vm_create_default(VCPU_ID, 1024, guest_code);
|
||||
data->vm = vm_create_default(VCPU_ID, mempages, guest_code);
|
||||
|
||||
pr_info_v("Adding slots 1..%i, each slot with %"PRIu64" pages + %"PRIu64" extra pages last\n",
|
||||
max_mem_slots - 1, data->pages_per_slot, rempages);
|
||||
|
|
Loading…
Add table
Reference in a new issue