mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-23 00:20:52 -05:00
* PPC secure guest support
* small x86 cleanup * fix for an x86-specific out-of-bounds write on a ioctl (not guest triggerable, data not attacker-controlled) -----BEGIN PGP SIGNATURE----- Version: GnuPG v2.0.22 (GNU/Linux) iQEcBAABAgAGBQJd551cAAoJEL/70l94x66D+JkH/R3eEOyvckPmYmzd0lnV8mQ/ 7e0n2G/aD+iLZkcCbUnMaImdmSJmoEEJCPjgPk/5nJ3zUi5b/ABWyidEM5uf19Hl rzKBg0DR7BiQptPnZv2JMwEVKu3JOTchMykqu9xXChQlICocZ0xjdOA6nQ19p0Lv FulDw5MUaWrXevIzCBskQ38zJejRQA6CpD1lQkHn7LKS9p3p+BsAOd/Ouy87RfWG b3ktECNbXyO6KStrrhgm+z8pviWY+kqYklyBlDOOwxWif0x8WvNDpQLoVo+ZuhLU Me8YJ1BN75vFlxzh6ZK5exBUnm9E3fGVKIaaF+dpuds2x+j4HnYl+lZCm89MdqY= =Q4v7 -----END PGP SIGNATURE----- Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull more KVM updates from Paolo Bonzini: - PPC secure guest support - small x86 cleanup - fix for an x86-specific out-of-bounds write on a ioctl (not guest triggerable, data not attacker-controlled) * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: kvm: vmx: Stop wasting a page for guest_msrs KVM: x86: fix out-of-bounds write in KVM_GET_EMULATED_CPUID (CVE-2019-19332) Documentation: kvm: Fix mention to number of ioctls classes powerpc: Ultravisor: Add PPC_UV config option KVM: PPC: Book3S HV: Support reset of secure guest KVM: PPC: Book3S HV: Handle memory plug/unplug to secure VM KVM: PPC: Book3S HV: Radix changes for secure guest KVM: PPC: Book3S HV: Shared pages support for secure guests KVM: PPC: Book3S HV: Support for running secure guests mm: ksm: Export ksm_madvise() KVM x86: Move kvm cpuid support out of svm
This commit is contained in:
commit
aedc0650f9
19 changed files with 1156 additions and 20 deletions
|
@ -5,7 +5,7 @@ The Definitive KVM (Kernel-based Virtual Machine) API Documentation
|
|||
----------------------
|
||||
|
||||
The kvm API is a set of ioctls that are issued to control various aspects
|
||||
of a virtual machine. The ioctls belong to three classes:
|
||||
of a virtual machine. The ioctls belong to the following classes:
|
||||
|
||||
- System ioctls: These query and set global attributes which affect the
|
||||
whole kvm subsystem. In addition a system ioctl is used to create
|
||||
|
@ -4149,6 +4149,24 @@ Valid values for 'action':
|
|||
#define KVM_PMU_EVENT_ALLOW 0
|
||||
#define KVM_PMU_EVENT_DENY 1
|
||||
|
||||
4.121 KVM_PPC_SVM_OFF
|
||||
|
||||
Capability: basic
|
||||
Architectures: powerpc
|
||||
Type: vm ioctl
|
||||
Parameters: none
|
||||
Returns: 0 on successful completion,
|
||||
Errors:
|
||||
EINVAL: if ultravisor failed to terminate the secure guest
|
||||
ENOMEM: if hypervisor failed to allocate new radix page tables for guest
|
||||
|
||||
This ioctl is used to turn off the secure mode of the guest or transition
|
||||
the guest from secure mode to normal mode. This is invoked when the guest
|
||||
is reset. This has no effect if called for a normal guest.
|
||||
|
||||
This ioctl issues an ultravisor call to terminate the secure guest,
|
||||
unpins the VPA pages and releases all the device pages that are used to
|
||||
track the secure pages by hypervisor.
|
||||
|
||||
5. The kvm_run structure
|
||||
------------------------
|
||||
|
|
|
@ -452,6 +452,23 @@ config PPC_TRANSACTIONAL_MEM
|
|||
help
|
||||
Support user-mode Transactional Memory on POWERPC.
|
||||
|
||||
config PPC_UV
|
||||
bool "Ultravisor support"
|
||||
depends on KVM_BOOK3S_HV_POSSIBLE
|
||||
select ZONE_DEVICE
|
||||
select DEV_PAGEMAP_OPS
|
||||
select DEVICE_PRIVATE
|
||||
select MEMORY_HOTPLUG
|
||||
select MEMORY_HOTREMOVE
|
||||
default n
|
||||
help
|
||||
This option paravirtualizes the kernel to run in POWER platforms that
|
||||
supports the Protected Execution Facility (PEF). On such platforms,
|
||||
the ultravisor firmware runs at a privilege level above the
|
||||
hypervisor.
|
||||
|
||||
If unsure, say "N".
|
||||
|
||||
config LD_HEAD_STUB_CATCH
|
||||
bool "Reserve 256 bytes to cope with linker stubs in HEAD text" if EXPERT
|
||||
depends on PPC64
|
||||
|
|
|
@ -342,6 +342,15 @@
|
|||
#define H_TLB_INVALIDATE 0xF808
|
||||
#define H_COPY_TOFROM_GUEST 0xF80C
|
||||
|
||||
/* Flags for H_SVM_PAGE_IN */
|
||||
#define H_PAGE_IN_SHARED 0x1
|
||||
|
||||
/* Platform-specific hcalls used by the Ultravisor */
|
||||
#define H_SVM_PAGE_IN 0xEF00
|
||||
#define H_SVM_PAGE_OUT 0xEF04
|
||||
#define H_SVM_INIT_START 0xEF08
|
||||
#define H_SVM_INIT_DONE 0xEF0C
|
||||
|
||||
/* Values for 2nd argument to H_SET_MODE */
|
||||
#define H_SET_MODE_RESOURCE_SET_CIABR 1
|
||||
#define H_SET_MODE_RESOURCE_SET_DAWR 2
|
||||
|
|
74
arch/powerpc/include/asm/kvm_book3s_uvmem.h
Normal file
74
arch/powerpc/include/asm/kvm_book3s_uvmem.h
Normal file
|
@ -0,0 +1,74 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef __ASM_KVM_BOOK3S_UVMEM_H__
|
||||
#define __ASM_KVM_BOOK3S_UVMEM_H__
|
||||
|
||||
#ifdef CONFIG_PPC_UV
|
||||
int kvmppc_uvmem_init(void);
|
||||
void kvmppc_uvmem_free(void);
|
||||
int kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot);
|
||||
void kvmppc_uvmem_slot_free(struct kvm *kvm,
|
||||
const struct kvm_memory_slot *slot);
|
||||
unsigned long kvmppc_h_svm_page_in(struct kvm *kvm,
|
||||
unsigned long gra,
|
||||
unsigned long flags,
|
||||
unsigned long page_shift);
|
||||
unsigned long kvmppc_h_svm_page_out(struct kvm *kvm,
|
||||
unsigned long gra,
|
||||
unsigned long flags,
|
||||
unsigned long page_shift);
|
||||
unsigned long kvmppc_h_svm_init_start(struct kvm *kvm);
|
||||
unsigned long kvmppc_h_svm_init_done(struct kvm *kvm);
|
||||
int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn);
|
||||
void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
|
||||
struct kvm *kvm);
|
||||
#else
|
||||
static inline int kvmppc_uvmem_init(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void kvmppc_uvmem_free(void) { }
|
||||
|
||||
static inline int
|
||||
kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void
|
||||
kvmppc_uvmem_slot_free(struct kvm *kvm, const struct kvm_memory_slot *slot) { }
|
||||
|
||||
static inline unsigned long
|
||||
kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gra,
|
||||
unsigned long flags, unsigned long page_shift)
|
||||
{
|
||||
return H_UNSUPPORTED;
|
||||
}
|
||||
|
||||
static inline unsigned long
|
||||
kvmppc_h_svm_page_out(struct kvm *kvm, unsigned long gra,
|
||||
unsigned long flags, unsigned long page_shift)
|
||||
{
|
||||
return H_UNSUPPORTED;
|
||||
}
|
||||
|
||||
static inline unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
|
||||
{
|
||||
return H_UNSUPPORTED;
|
||||
}
|
||||
|
||||
static inline unsigned long kvmppc_h_svm_init_done(struct kvm *kvm)
|
||||
{
|
||||
return H_UNSUPPORTED;
|
||||
}
|
||||
|
||||
static inline int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn)
|
||||
{
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
static inline void
|
||||
kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
|
||||
struct kvm *kvm) { }
|
||||
#endif /* CONFIG_PPC_UV */
|
||||
#endif /* __ASM_KVM_BOOK3S_UVMEM_H__ */
|
|
@ -275,6 +275,10 @@ struct kvm_hpt_info {
|
|||
|
||||
struct kvm_resize_hpt;
|
||||
|
||||
/* Flag values for kvm_arch.secure_guest */
|
||||
#define KVMPPC_SECURE_INIT_START 0x1 /* H_SVM_INIT_START has been called */
|
||||
#define KVMPPC_SECURE_INIT_DONE 0x2 /* H_SVM_INIT_DONE completed */
|
||||
|
||||
struct kvm_arch {
|
||||
unsigned int lpid;
|
||||
unsigned int smt_mode; /* # vcpus per virtual core */
|
||||
|
@ -330,6 +334,8 @@ struct kvm_arch {
|
|||
#endif
|
||||
struct kvmppc_ops *kvm_ops;
|
||||
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
|
||||
struct mutex uvmem_lock;
|
||||
struct list_head uvmem_pfns;
|
||||
struct mutex mmu_setup_lock; /* nests inside vcpu mutexes */
|
||||
u64 l1_ptcr;
|
||||
int max_nested_lpid;
|
||||
|
|
|
@ -322,6 +322,7 @@ struct kvmppc_ops {
|
|||
int size);
|
||||
int (*store_to_eaddr)(struct kvm_vcpu *vcpu, ulong *eaddr, void *ptr,
|
||||
int size);
|
||||
int (*svm_off)(struct kvm *kvm);
|
||||
};
|
||||
|
||||
extern struct kvmppc_ops *kvmppc_hv_ops;
|
||||
|
|
|
@ -26,8 +26,14 @@
|
|||
#define UV_WRITE_PATE 0xF104
|
||||
#define UV_RETURN 0xF11C
|
||||
#define UV_ESM 0xF110
|
||||
#define UV_REGISTER_MEM_SLOT 0xF120
|
||||
#define UV_UNREGISTER_MEM_SLOT 0xF124
|
||||
#define UV_PAGE_IN 0xF128
|
||||
#define UV_PAGE_OUT 0xF12C
|
||||
#define UV_SHARE_PAGE 0xF130
|
||||
#define UV_UNSHARE_PAGE 0xF134
|
||||
#define UV_UNSHARE_ALL_PAGES 0xF140
|
||||
#define UV_PAGE_INVAL 0xF138
|
||||
#define UV_SVM_TERMINATE 0xF13C
|
||||
|
||||
#endif /* _ASM_POWERPC_ULTRAVISOR_API_H */
|
||||
|
|
|
@ -46,4 +46,40 @@ static inline int uv_unshare_all_pages(void)
|
|||
return ucall_norets(UV_UNSHARE_ALL_PAGES);
|
||||
}
|
||||
|
||||
static inline int uv_page_in(u64 lpid, u64 src_ra, u64 dst_gpa, u64 flags,
|
||||
u64 page_shift)
|
||||
{
|
||||
return ucall_norets(UV_PAGE_IN, lpid, src_ra, dst_gpa, flags,
|
||||
page_shift);
|
||||
}
|
||||
|
||||
static inline int uv_page_out(u64 lpid, u64 dst_ra, u64 src_gpa, u64 flags,
|
||||
u64 page_shift)
|
||||
{
|
||||
return ucall_norets(UV_PAGE_OUT, lpid, dst_ra, src_gpa, flags,
|
||||
page_shift);
|
||||
}
|
||||
|
||||
static inline int uv_register_mem_slot(u64 lpid, u64 start_gpa, u64 size,
|
||||
u64 flags, u64 slotid)
|
||||
{
|
||||
return ucall_norets(UV_REGISTER_MEM_SLOT, lpid, start_gpa,
|
||||
size, flags, slotid);
|
||||
}
|
||||
|
||||
static inline int uv_unregister_mem_slot(u64 lpid, u64 slotid)
|
||||
{
|
||||
return ucall_norets(UV_UNREGISTER_MEM_SLOT, lpid, slotid);
|
||||
}
|
||||
|
||||
static inline int uv_page_inval(u64 lpid, u64 gpa, u64 page_shift)
|
||||
{
|
||||
return ucall_norets(UV_PAGE_INVAL, lpid, gpa, page_shift);
|
||||
}
|
||||
|
||||
static inline int uv_svm_terminate(u64 lpid)
|
||||
{
|
||||
return ucall_norets(UV_SVM_TERMINATE, lpid);
|
||||
}
|
||||
|
||||
#endif /* _ASM_POWERPC_ULTRAVISOR_H */
|
||||
|
|
|
@ -71,6 +71,9 @@ kvm-hv-y += \
|
|||
book3s_64_mmu_radix.o \
|
||||
book3s_hv_nested.o
|
||||
|
||||
kvm-hv-$(CONFIG_PPC_UV) += \
|
||||
book3s_hv_uvmem.o
|
||||
|
||||
kvm-hv-$(CONFIG_PPC_TRANSACTIONAL_MEM) += \
|
||||
book3s_hv_tm.o
|
||||
|
||||
|
|
|
@ -19,6 +19,8 @@
|
|||
#include <asm/pgtable.h>
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/pte-walk.h>
|
||||
#include <asm/ultravisor.h>
|
||||
#include <asm/kvm_book3s_uvmem.h>
|
||||
|
||||
/*
|
||||
* Supported radix tree geometry.
|
||||
|
@ -915,6 +917,9 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
|
|||
if (!(dsisr & DSISR_PRTABLE_FAULT))
|
||||
gpa |= ea & 0xfff;
|
||||
|
||||
if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
|
||||
return kvmppc_send_page_to_uv(kvm, gfn);
|
||||
|
||||
/* Get the corresponding memslot */
|
||||
memslot = gfn_to_memslot(kvm, gfn);
|
||||
|
||||
|
@ -972,6 +977,11 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
|||
unsigned long gpa = gfn << PAGE_SHIFT;
|
||||
unsigned int shift;
|
||||
|
||||
if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) {
|
||||
uv_page_inval(kvm->arch.lpid, gpa, PAGE_SHIFT);
|
||||
return 0;
|
||||
}
|
||||
|
||||
ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
|
||||
if (ptep && pte_present(*ptep))
|
||||
kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot,
|
||||
|
@ -989,6 +999,9 @@ int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
|||
int ref = 0;
|
||||
unsigned long old, *rmapp;
|
||||
|
||||
if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
|
||||
return ref;
|
||||
|
||||
ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
|
||||
if (ptep && pte_present(*ptep) && pte_young(*ptep)) {
|
||||
old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_ACCESSED, 0,
|
||||
|
@ -1013,6 +1026,9 @@ int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
|
|||
unsigned int shift;
|
||||
int ref = 0;
|
||||
|
||||
if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
|
||||
return ref;
|
||||
|
||||
ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
|
||||
if (ptep && pte_present(*ptep) && pte_young(*ptep))
|
||||
ref = 1;
|
||||
|
@ -1030,6 +1046,9 @@ static int kvm_radix_test_clear_dirty(struct kvm *kvm,
|
|||
int ret = 0;
|
||||
unsigned long old, *rmapp;
|
||||
|
||||
if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
|
||||
return ret;
|
||||
|
||||
ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift);
|
||||
if (ptep && pte_present(*ptep) && pte_dirty(*ptep)) {
|
||||
ret = 1;
|
||||
|
@ -1082,6 +1101,12 @@ void kvmppc_radix_flush_memslot(struct kvm *kvm,
|
|||
unsigned long gpa;
|
||||
unsigned int shift;
|
||||
|
||||
if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START)
|
||||
kvmppc_uvmem_drop_pages(memslot, kvm);
|
||||
|
||||
if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE)
|
||||
return;
|
||||
|
||||
gpa = memslot->base_gfn << PAGE_SHIFT;
|
||||
spin_lock(&kvm->mmu_lock);
|
||||
for (n = memslot->npages; n; --n) {
|
||||
|
|
|
@ -72,6 +72,9 @@
|
|||
#include <asm/xics.h>
|
||||
#include <asm/xive.h>
|
||||
#include <asm/hw_breakpoint.h>
|
||||
#include <asm/kvm_host.h>
|
||||
#include <asm/kvm_book3s_uvmem.h>
|
||||
#include <asm/ultravisor.h>
|
||||
|
||||
#include "book3s.h"
|
||||
|
||||
|
@ -1070,6 +1073,25 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
|
|||
kvmppc_get_gpr(vcpu, 5),
|
||||
kvmppc_get_gpr(vcpu, 6));
|
||||
break;
|
||||
case H_SVM_PAGE_IN:
|
||||
ret = kvmppc_h_svm_page_in(vcpu->kvm,
|
||||
kvmppc_get_gpr(vcpu, 4),
|
||||
kvmppc_get_gpr(vcpu, 5),
|
||||
kvmppc_get_gpr(vcpu, 6));
|
||||
break;
|
||||
case H_SVM_PAGE_OUT:
|
||||
ret = kvmppc_h_svm_page_out(vcpu->kvm,
|
||||
kvmppc_get_gpr(vcpu, 4),
|
||||
kvmppc_get_gpr(vcpu, 5),
|
||||
kvmppc_get_gpr(vcpu, 6));
|
||||
break;
|
||||
case H_SVM_INIT_START:
|
||||
ret = kvmppc_h_svm_init_start(vcpu->kvm);
|
||||
break;
|
||||
case H_SVM_INIT_DONE:
|
||||
ret = kvmppc_h_svm_init_done(vcpu->kvm);
|
||||
break;
|
||||
|
||||
default:
|
||||
return RESUME_HOST;
|
||||
}
|
||||
|
@ -4494,6 +4516,29 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
|
|||
if (change == KVM_MR_FLAGS_ONLY && kvm_is_radix(kvm) &&
|
||||
((new->flags ^ old->flags) & KVM_MEM_LOG_DIRTY_PAGES))
|
||||
kvmppc_radix_flush_memslot(kvm, old);
|
||||
/*
|
||||
* If UV hasn't yet called H_SVM_INIT_START, don't register memslots.
|
||||
*/
|
||||
if (!kvm->arch.secure_guest)
|
||||
return;
|
||||
|
||||
switch (change) {
|
||||
case KVM_MR_CREATE:
|
||||
if (kvmppc_uvmem_slot_init(kvm, new))
|
||||
return;
|
||||
uv_register_mem_slot(kvm->arch.lpid,
|
||||
new->base_gfn << PAGE_SHIFT,
|
||||
new->npages * PAGE_SIZE,
|
||||
0, new->id);
|
||||
break;
|
||||
case KVM_MR_DELETE:
|
||||
uv_unregister_mem_slot(kvm->arch.lpid, old->id);
|
||||
kvmppc_uvmem_slot_free(kvm, old);
|
||||
break;
|
||||
default:
|
||||
/* TODO: Handle KVM_MR_MOVE */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -4767,6 +4812,8 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
|
|||
char buf[32];
|
||||
int ret;
|
||||
|
||||
mutex_init(&kvm->arch.uvmem_lock);
|
||||
INIT_LIST_HEAD(&kvm->arch.uvmem_pfns);
|
||||
mutex_init(&kvm->arch.mmu_setup_lock);
|
||||
|
||||
/* Allocate the guest's logical partition ID */
|
||||
|
@ -4936,8 +4983,10 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
|
|||
if (nesting_enabled(kvm))
|
||||
kvmhv_release_all_nested(kvm);
|
||||
kvm->arch.process_table = 0;
|
||||
uv_svm_terminate(kvm->arch.lpid);
|
||||
kvmhv_set_ptbl_entry(kvm->arch.lpid, 0, 0);
|
||||
}
|
||||
|
||||
kvmppc_free_lpid(kvm->arch.lpid);
|
||||
|
||||
kvmppc_free_pimap(kvm);
|
||||
|
@ -5377,6 +5426,94 @@ static int kvmhv_store_to_eaddr(struct kvm_vcpu *vcpu, ulong *eaddr, void *ptr,
|
|||
return rc;
|
||||
}
|
||||
|
||||
static void unpin_vpa_reset(struct kvm *kvm, struct kvmppc_vpa *vpa)
|
||||
{
|
||||
unpin_vpa(kvm, vpa);
|
||||
vpa->gpa = 0;
|
||||
vpa->pinned_addr = NULL;
|
||||
vpa->dirty = false;
|
||||
vpa->update_pending = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* IOCTL handler to turn off secure mode of guest
|
||||
*
|
||||
* - Release all device pages
|
||||
* - Issue ucall to terminate the guest on the UV side
|
||||
* - Unpin the VPA pages.
|
||||
* - Reinit the partition scoped page tables
|
||||
*/
|
||||
static int kvmhv_svm_off(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_vcpu *vcpu;
|
||||
int mmu_was_ready;
|
||||
int srcu_idx;
|
||||
int ret = 0;
|
||||
int i;
|
||||
|
||||
if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
|
||||
return ret;
|
||||
|
||||
mutex_lock(&kvm->arch.mmu_setup_lock);
|
||||
mmu_was_ready = kvm->arch.mmu_ready;
|
||||
if (kvm->arch.mmu_ready) {
|
||||
kvm->arch.mmu_ready = 0;
|
||||
/* order mmu_ready vs. vcpus_running */
|
||||
smp_mb();
|
||||
if (atomic_read(&kvm->arch.vcpus_running)) {
|
||||
kvm->arch.mmu_ready = 1;
|
||||
ret = -EBUSY;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
srcu_idx = srcu_read_lock(&kvm->srcu);
|
||||
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
|
||||
struct kvm_memory_slot *memslot;
|
||||
struct kvm_memslots *slots = __kvm_memslots(kvm, i);
|
||||
|
||||
if (!slots)
|
||||
continue;
|
||||
|
||||
kvm_for_each_memslot(memslot, slots) {
|
||||
kvmppc_uvmem_drop_pages(memslot, kvm);
|
||||
uv_unregister_mem_slot(kvm->arch.lpid, memslot->id);
|
||||
}
|
||||
}
|
||||
srcu_read_unlock(&kvm->srcu, srcu_idx);
|
||||
|
||||
ret = uv_svm_terminate(kvm->arch.lpid);
|
||||
if (ret != U_SUCCESS) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* When secure guest is reset, all the guest pages are sent
|
||||
* to UV via UV_PAGE_IN before the non-boot vcpus get a
|
||||
* chance to run and unpin their VPA pages. Unpinning of all
|
||||
* VPA pages is done here explicitly so that VPA pages
|
||||
* can be migrated to the secure side.
|
||||
*
|
||||
* This is required to for the secure SMP guest to reboot
|
||||
* correctly.
|
||||
*/
|
||||
kvm_for_each_vcpu(i, vcpu, kvm) {
|
||||
spin_lock(&vcpu->arch.vpa_update_lock);
|
||||
unpin_vpa_reset(kvm, &vcpu->arch.dtl);
|
||||
unpin_vpa_reset(kvm, &vcpu->arch.slb_shadow);
|
||||
unpin_vpa_reset(kvm, &vcpu->arch.vpa);
|
||||
spin_unlock(&vcpu->arch.vpa_update_lock);
|
||||
}
|
||||
|
||||
kvmppc_setup_partition_table(kvm);
|
||||
kvm->arch.secure_guest = 0;
|
||||
kvm->arch.mmu_ready = mmu_was_ready;
|
||||
out:
|
||||
mutex_unlock(&kvm->arch.mmu_setup_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct kvmppc_ops kvm_ops_hv = {
|
||||
.get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv,
|
||||
.set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv,
|
||||
|
@ -5420,6 +5557,7 @@ static struct kvmppc_ops kvm_ops_hv = {
|
|||
.enable_nested = kvmhv_enable_nested,
|
||||
.load_from_eaddr = kvmhv_load_from_eaddr,
|
||||
.store_to_eaddr = kvmhv_store_to_eaddr,
|
||||
.svm_off = kvmhv_svm_off,
|
||||
};
|
||||
|
||||
static int kvm_init_subcore_bitmap(void)
|
||||
|
@ -5528,11 +5666,16 @@ static int kvmppc_book3s_init_hv(void)
|
|||
no_mixing_hpt_and_radix = true;
|
||||
}
|
||||
|
||||
r = kvmppc_uvmem_init();
|
||||
if (r < 0)
|
||||
pr_err("KVM-HV: kvmppc_uvmem_init failed %d\n", r);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static void kvmppc_book3s_exit_hv(void)
|
||||
{
|
||||
kvmppc_uvmem_free();
|
||||
kvmppc_free_host_rm_ops();
|
||||
if (kvmppc_radix_possible())
|
||||
kvmppc_radix_exit();
|
||||
|
|
785
arch/powerpc/kvm/book3s_hv_uvmem.c
Normal file
785
arch/powerpc/kvm/book3s_hv_uvmem.c
Normal file
|
@ -0,0 +1,785 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Secure pages management: Migration of pages between normal and secure
|
||||
* memory of KVM guests.
|
||||
*
|
||||
* Copyright 2018 Bharata B Rao, IBM Corp. <bharata@linux.ibm.com>
|
||||
*/
|
||||
|
||||
/*
|
||||
* A pseries guest can be run as secure guest on Ultravisor-enabled
|
||||
* POWER platforms. On such platforms, this driver will be used to manage
|
||||
* the movement of guest pages between the normal memory managed by
|
||||
* hypervisor (HV) and secure memory managed by Ultravisor (UV).
|
||||
*
|
||||
* The page-in or page-out requests from UV will come to HV as hcalls and
|
||||
* HV will call back into UV via ultracalls to satisfy these page requests.
|
||||
*
|
||||
* Private ZONE_DEVICE memory equal to the amount of secure memory
|
||||
* available in the platform for running secure guests is hotplugged.
|
||||
* Whenever a page belonging to the guest becomes secure, a page from this
|
||||
* private device memory is used to represent and track that secure page
|
||||
* on the HV side. Some pages (like virtio buffers, VPA pages etc) are
|
||||
* shared between UV and HV. However such pages aren't represented by
|
||||
* device private memory and mappings to shared memory exist in both
|
||||
* UV and HV page tables.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Notes on locking
|
||||
*
|
||||
* kvm->arch.uvmem_lock is a per-guest lock that prevents concurrent
|
||||
* page-in and page-out requests for the same GPA. Concurrent accesses
|
||||
* can either come via UV (guest vCPUs requesting for same page)
|
||||
* or when HV and guest simultaneously access the same page.
|
||||
* This mutex serializes the migration of page from HV(normal) to
|
||||
* UV(secure) and vice versa. So the serialization points are around
|
||||
* migrate_vma routines and page-in/out routines.
|
||||
*
|
||||
* Per-guest mutex comes with a cost though. Mainly it serializes the
|
||||
* fault path as page-out can occur when HV faults on accessing secure
|
||||
* guest pages. Currently UV issues page-in requests for all the guest
|
||||
* PFNs one at a time during early boot (UV_ESM uvcall), so this is
|
||||
* not a cause for concern. Also currently the number of page-outs caused
|
||||
* by HV touching secure pages is very very low. If an when UV supports
|
||||
* overcommitting, then we might see concurrent guest driven page-outs.
|
||||
*
|
||||
* Locking order
|
||||
*
|
||||
* 1. kvm->srcu - Protects KVM memslots
|
||||
* 2. kvm->mm->mmap_sem - find_vma, migrate_vma_pages and helpers, ksm_madvise
|
||||
* 3. kvm->arch.uvmem_lock - protects read/writes to uvmem slots thus acting
|
||||
* as sync-points for page-in/out
|
||||
*/
|
||||
|
||||
/*
|
||||
* Notes on page size
|
||||
*
|
||||
* Currently UV uses 2MB mappings internally, but will issue H_SVM_PAGE_IN
|
||||
* and H_SVM_PAGE_OUT hcalls in PAGE_SIZE(64K) granularity. HV tracks
|
||||
* secure GPAs at 64K page size and maintains one device PFN for each
|
||||
* 64K secure GPA. UV_PAGE_IN and UV_PAGE_OUT calls by HV are also issued
|
||||
* for 64K page at a time.
|
||||
*
|
||||
* HV faulting on secure pages: When HV touches any secure page, it
|
||||
* faults and issues a UV_PAGE_OUT request with 64K page size. Currently
|
||||
* UV splits and remaps the 2MB page if necessary and copies out the
|
||||
* required 64K page contents.
|
||||
*
|
||||
* Shared pages: Whenever guest shares a secure page, UV will split and
|
||||
* remap the 2MB page if required and issue H_SVM_PAGE_IN with 64K page size.
|
||||
*
|
||||
* HV invalidating a page: When a regular page belonging to secure
|
||||
* guest gets unmapped, HV informs UV with UV_PAGE_INVAL of 64K
|
||||
* page size. Using 64K page size is correct here because any non-secure
|
||||
* page will essentially be of 64K page size. Splitting by UV during sharing
|
||||
* and page-out ensures this.
|
||||
*
|
||||
* Page fault handling: When HV handles page fault of a page belonging
|
||||
* to secure guest, it sends that to UV with a 64K UV_PAGE_IN request.
|
||||
* Using 64K size is correct here too as UV would have split the 2MB page
|
||||
* into 64k mappings and would have done page-outs earlier.
|
||||
*
|
||||
* In summary, the current secure pages handling code in HV assumes
|
||||
* 64K page size and in fact fails any page-in/page-out requests of
|
||||
* non-64K size upfront. If and when UV starts supporting multiple
|
||||
* page-sizes, we need to break this assumption.
|
||||
*/
|
||||
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/migrate.h>
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/ksm.h>
|
||||
#include <asm/ultravisor.h>
|
||||
#include <asm/mman.h>
|
||||
#include <asm/kvm_ppc.h>
|
||||
|
||||
static struct dev_pagemap kvmppc_uvmem_pgmap;
|
||||
static unsigned long *kvmppc_uvmem_bitmap;
|
||||
static DEFINE_SPINLOCK(kvmppc_uvmem_bitmap_lock);
|
||||
|
||||
#define KVMPPC_UVMEM_PFN (1UL << 63)
|
||||
|
||||
struct kvmppc_uvmem_slot {
|
||||
struct list_head list;
|
||||
unsigned long nr_pfns;
|
||||
unsigned long base_pfn;
|
||||
unsigned long *pfns;
|
||||
};
|
||||
|
||||
struct kvmppc_uvmem_page_pvt {
|
||||
struct kvm *kvm;
|
||||
unsigned long gpa;
|
||||
bool skip_page_out;
|
||||
};
|
||||
|
||||
int kvmppc_uvmem_slot_init(struct kvm *kvm, const struct kvm_memory_slot *slot)
|
||||
{
|
||||
struct kvmppc_uvmem_slot *p;
|
||||
|
||||
p = kzalloc(sizeof(*p), GFP_KERNEL);
|
||||
if (!p)
|
||||
return -ENOMEM;
|
||||
p->pfns = vzalloc(array_size(slot->npages, sizeof(*p->pfns)));
|
||||
if (!p->pfns) {
|
||||
kfree(p);
|
||||
return -ENOMEM;
|
||||
}
|
||||
p->nr_pfns = slot->npages;
|
||||
p->base_pfn = slot->base_gfn;
|
||||
|
||||
mutex_lock(&kvm->arch.uvmem_lock);
|
||||
list_add(&p->list, &kvm->arch.uvmem_pfns);
|
||||
mutex_unlock(&kvm->arch.uvmem_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* All device PFNs are already released by the time we come here.
|
||||
*/
|
||||
void kvmppc_uvmem_slot_free(struct kvm *kvm, const struct kvm_memory_slot *slot)
|
||||
{
|
||||
struct kvmppc_uvmem_slot *p, *next;
|
||||
|
||||
mutex_lock(&kvm->arch.uvmem_lock);
|
||||
list_for_each_entry_safe(p, next, &kvm->arch.uvmem_pfns, list) {
|
||||
if (p->base_pfn == slot->base_gfn) {
|
||||
vfree(p->pfns);
|
||||
list_del(&p->list);
|
||||
kfree(p);
|
||||
break;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&kvm->arch.uvmem_lock);
|
||||
}
|
||||
|
||||
static void kvmppc_uvmem_pfn_insert(unsigned long gfn, unsigned long uvmem_pfn,
|
||||
struct kvm *kvm)
|
||||
{
|
||||
struct kvmppc_uvmem_slot *p;
|
||||
|
||||
list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) {
|
||||
if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
|
||||
unsigned long index = gfn - p->base_pfn;
|
||||
|
||||
p->pfns[index] = uvmem_pfn | KVMPPC_UVMEM_PFN;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void kvmppc_uvmem_pfn_remove(unsigned long gfn, struct kvm *kvm)
|
||||
{
|
||||
struct kvmppc_uvmem_slot *p;
|
||||
|
||||
list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) {
|
||||
if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
|
||||
p->pfns[gfn - p->base_pfn] = 0;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static bool kvmppc_gfn_is_uvmem_pfn(unsigned long gfn, struct kvm *kvm,
|
||||
unsigned long *uvmem_pfn)
|
||||
{
|
||||
struct kvmppc_uvmem_slot *p;
|
||||
|
||||
list_for_each_entry(p, &kvm->arch.uvmem_pfns, list) {
|
||||
if (gfn >= p->base_pfn && gfn < p->base_pfn + p->nr_pfns) {
|
||||
unsigned long index = gfn - p->base_pfn;
|
||||
|
||||
if (p->pfns[index] & KVMPPC_UVMEM_PFN) {
|
||||
if (uvmem_pfn)
|
||||
*uvmem_pfn = p->pfns[index] &
|
||||
~KVMPPC_UVMEM_PFN;
|
||||
return true;
|
||||
} else
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned long kvmppc_h_svm_init_start(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_memslots *slots;
|
||||
struct kvm_memory_slot *memslot;
|
||||
int ret = H_SUCCESS;
|
||||
int srcu_idx;
|
||||
|
||||
if (!kvmppc_uvmem_bitmap)
|
||||
return H_UNSUPPORTED;
|
||||
|
||||
/* Only radix guests can be secure guests */
|
||||
if (!kvm_is_radix(kvm))
|
||||
return H_UNSUPPORTED;
|
||||
|
||||
srcu_idx = srcu_read_lock(&kvm->srcu);
|
||||
slots = kvm_memslots(kvm);
|
||||
kvm_for_each_memslot(memslot, slots) {
|
||||
if (kvmppc_uvmem_slot_init(kvm, memslot)) {
|
||||
ret = H_PARAMETER;
|
||||
goto out;
|
||||
}
|
||||
ret = uv_register_mem_slot(kvm->arch.lpid,
|
||||
memslot->base_gfn << PAGE_SHIFT,
|
||||
memslot->npages * PAGE_SIZE,
|
||||
0, memslot->id);
|
||||
if (ret < 0) {
|
||||
kvmppc_uvmem_slot_free(kvm, memslot);
|
||||
ret = H_PARAMETER;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
kvm->arch.secure_guest |= KVMPPC_SECURE_INIT_START;
|
||||
out:
|
||||
srcu_read_unlock(&kvm->srcu, srcu_idx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
unsigned long kvmppc_h_svm_init_done(struct kvm *kvm)
|
||||
{
|
||||
if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
|
||||
return H_UNSUPPORTED;
|
||||
|
||||
kvm->arch.secure_guest |= KVMPPC_SECURE_INIT_DONE;
|
||||
pr_info("LPID %d went secure\n", kvm->arch.lpid);
|
||||
return H_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Drop device pages that we maintain for the secure guest
|
||||
*
|
||||
* We first mark the pages to be skipped from UV_PAGE_OUT when there
|
||||
* is HV side fault on these pages. Next we *get* these pages, forcing
|
||||
* fault on them, do fault time migration to replace the device PTEs in
|
||||
* QEMU page table with normal PTEs from newly allocated pages.
|
||||
*/
|
||||
void kvmppc_uvmem_drop_pages(const struct kvm_memory_slot *free,
|
||||
struct kvm *kvm)
|
||||
{
|
||||
int i;
|
||||
struct kvmppc_uvmem_page_pvt *pvt;
|
||||
unsigned long pfn, uvmem_pfn;
|
||||
unsigned long gfn = free->base_gfn;
|
||||
|
||||
for (i = free->npages; i; --i, ++gfn) {
|
||||
struct page *uvmem_page;
|
||||
|
||||
mutex_lock(&kvm->arch.uvmem_lock);
|
||||
if (!kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) {
|
||||
mutex_unlock(&kvm->arch.uvmem_lock);
|
||||
continue;
|
||||
}
|
||||
|
||||
uvmem_page = pfn_to_page(uvmem_pfn);
|
||||
pvt = uvmem_page->zone_device_data;
|
||||
pvt->skip_page_out = true;
|
||||
mutex_unlock(&kvm->arch.uvmem_lock);
|
||||
|
||||
pfn = gfn_to_pfn(kvm, gfn);
|
||||
if (is_error_noslot_pfn(pfn))
|
||||
continue;
|
||||
kvm_release_pfn_clean(pfn);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Get a free device PFN from the pool
|
||||
*
|
||||
* Called when a normal page is moved to secure memory (UV_PAGE_IN). Device
|
||||
* PFN will be used to keep track of the secure page on HV side.
|
||||
*
|
||||
* Called with kvm->arch.uvmem_lock held
|
||||
*/
|
||||
static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm)
|
||||
{
|
||||
struct page *dpage = NULL;
|
||||
unsigned long bit, uvmem_pfn;
|
||||
struct kvmppc_uvmem_page_pvt *pvt;
|
||||
unsigned long pfn_last, pfn_first;
|
||||
|
||||
pfn_first = kvmppc_uvmem_pgmap.res.start >> PAGE_SHIFT;
|
||||
pfn_last = pfn_first +
|
||||
(resource_size(&kvmppc_uvmem_pgmap.res) >> PAGE_SHIFT);
|
||||
|
||||
spin_lock(&kvmppc_uvmem_bitmap_lock);
|
||||
bit = find_first_zero_bit(kvmppc_uvmem_bitmap,
|
||||
pfn_last - pfn_first);
|
||||
if (bit >= (pfn_last - pfn_first))
|
||||
goto out;
|
||||
bitmap_set(kvmppc_uvmem_bitmap, bit, 1);
|
||||
spin_unlock(&kvmppc_uvmem_bitmap_lock);
|
||||
|
||||
pvt = kzalloc(sizeof(*pvt), GFP_KERNEL);
|
||||
if (!pvt)
|
||||
goto out_clear;
|
||||
|
||||
uvmem_pfn = bit + pfn_first;
|
||||
kvmppc_uvmem_pfn_insert(gpa >> PAGE_SHIFT, uvmem_pfn, kvm);
|
||||
|
||||
pvt->gpa = gpa;
|
||||
pvt->kvm = kvm;
|
||||
|
||||
dpage = pfn_to_page(uvmem_pfn);
|
||||
dpage->zone_device_data = pvt;
|
||||
get_page(dpage);
|
||||
lock_page(dpage);
|
||||
return dpage;
|
||||
out_clear:
|
||||
spin_lock(&kvmppc_uvmem_bitmap_lock);
|
||||
bitmap_clear(kvmppc_uvmem_bitmap, bit, 1);
|
||||
out:
|
||||
spin_unlock(&kvmppc_uvmem_bitmap_lock);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Alloc a PFN from private device memory pool and copy page from normal
|
||||
* memory to secure memory using UV_PAGE_IN uvcall.
|
||||
*/
|
||||
static int
|
||||
kvmppc_svm_page_in(struct vm_area_struct *vma, unsigned long start,
|
||||
unsigned long end, unsigned long gpa, struct kvm *kvm,
|
||||
unsigned long page_shift, bool *downgrade)
|
||||
{
|
||||
unsigned long src_pfn, dst_pfn = 0;
|
||||
struct migrate_vma mig;
|
||||
struct page *spage;
|
||||
unsigned long pfn;
|
||||
struct page *dpage;
|
||||
int ret = 0;
|
||||
|
||||
memset(&mig, 0, sizeof(mig));
|
||||
mig.vma = vma;
|
||||
mig.start = start;
|
||||
mig.end = end;
|
||||
mig.src = &src_pfn;
|
||||
mig.dst = &dst_pfn;
|
||||
|
||||
/*
|
||||
* We come here with mmap_sem write lock held just for
|
||||
* ksm_madvise(), otherwise we only need read mmap_sem.
|
||||
* Hence downgrade to read lock once ksm_madvise() is done.
|
||||
*/
|
||||
ret = ksm_madvise(vma, vma->vm_start, vma->vm_end,
|
||||
MADV_UNMERGEABLE, &vma->vm_flags);
|
||||
downgrade_write(&kvm->mm->mmap_sem);
|
||||
*downgrade = true;
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = migrate_vma_setup(&mig);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!(*mig.src & MIGRATE_PFN_MIGRATE)) {
|
||||
ret = -1;
|
||||
goto out_finalize;
|
||||
}
|
||||
|
||||
dpage = kvmppc_uvmem_get_page(gpa, kvm);
|
||||
if (!dpage) {
|
||||
ret = -1;
|
||||
goto out_finalize;
|
||||
}
|
||||
|
||||
pfn = *mig.src >> MIGRATE_PFN_SHIFT;
|
||||
spage = migrate_pfn_to_page(*mig.src);
|
||||
if (spage)
|
||||
uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0,
|
||||
page_shift);
|
||||
|
||||
*mig.dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
|
||||
migrate_vma_pages(&mig);
|
||||
out_finalize:
|
||||
migrate_vma_finalize(&mig);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Shares the page with HV, thus making it a normal page.
|
||||
*
|
||||
* - If the page is already secure, then provision a new page and share
|
||||
* - If the page is a normal page, share the existing page
|
||||
*
|
||||
* In the former case, uses dev_pagemap_ops.migrate_to_ram handler
|
||||
* to unmap the device page from QEMU's page tables.
|
||||
*/
|
||||
static unsigned long
|
||||
kvmppc_share_page(struct kvm *kvm, unsigned long gpa, unsigned long page_shift)
|
||||
{
|
||||
|
||||
int ret = H_PARAMETER;
|
||||
struct page *uvmem_page;
|
||||
struct kvmppc_uvmem_page_pvt *pvt;
|
||||
unsigned long pfn;
|
||||
unsigned long gfn = gpa >> page_shift;
|
||||
int srcu_idx;
|
||||
unsigned long uvmem_pfn;
|
||||
|
||||
srcu_idx = srcu_read_lock(&kvm->srcu);
|
||||
mutex_lock(&kvm->arch.uvmem_lock);
|
||||
if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) {
|
||||
uvmem_page = pfn_to_page(uvmem_pfn);
|
||||
pvt = uvmem_page->zone_device_data;
|
||||
pvt->skip_page_out = true;
|
||||
}
|
||||
|
||||
retry:
|
||||
mutex_unlock(&kvm->arch.uvmem_lock);
|
||||
pfn = gfn_to_pfn(kvm, gfn);
|
||||
if (is_error_noslot_pfn(pfn))
|
||||
goto out;
|
||||
|
||||
mutex_lock(&kvm->arch.uvmem_lock);
|
||||
if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, &uvmem_pfn)) {
|
||||
uvmem_page = pfn_to_page(uvmem_pfn);
|
||||
pvt = uvmem_page->zone_device_data;
|
||||
pvt->skip_page_out = true;
|
||||
kvm_release_pfn_clean(pfn);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
if (!uv_page_in(kvm->arch.lpid, pfn << page_shift, gpa, 0, page_shift))
|
||||
ret = H_SUCCESS;
|
||||
kvm_release_pfn_clean(pfn);
|
||||
mutex_unlock(&kvm->arch.uvmem_lock);
|
||||
out:
|
||||
srcu_read_unlock(&kvm->srcu, srcu_idx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* H_SVM_PAGE_IN: Move page from normal memory to secure memory.
|
||||
*
|
||||
* H_PAGE_IN_SHARED flag makes the page shared which means that the same
|
||||
* memory in is visible from both UV and HV.
|
||||
*/
|
||||
unsigned long
|
||||
kvmppc_h_svm_page_in(struct kvm *kvm, unsigned long gpa,
|
||||
unsigned long flags, unsigned long page_shift)
|
||||
{
|
||||
bool downgrade = false;
|
||||
unsigned long start, end;
|
||||
struct vm_area_struct *vma;
|
||||
int srcu_idx;
|
||||
unsigned long gfn = gpa >> page_shift;
|
||||
int ret;
|
||||
|
||||
if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
|
||||
return H_UNSUPPORTED;
|
||||
|
||||
if (page_shift != PAGE_SHIFT)
|
||||
return H_P3;
|
||||
|
||||
if (flags & ~H_PAGE_IN_SHARED)
|
||||
return H_P2;
|
||||
|
||||
if (flags & H_PAGE_IN_SHARED)
|
||||
return kvmppc_share_page(kvm, gpa, page_shift);
|
||||
|
||||
ret = H_PARAMETER;
|
||||
srcu_idx = srcu_read_lock(&kvm->srcu);
|
||||
down_write(&kvm->mm->mmap_sem);
|
||||
|
||||
start = gfn_to_hva(kvm, gfn);
|
||||
if (kvm_is_error_hva(start))
|
||||
goto out;
|
||||
|
||||
mutex_lock(&kvm->arch.uvmem_lock);
|
||||
/* Fail the page-in request of an already paged-in page */
|
||||
if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, NULL))
|
||||
goto out_unlock;
|
||||
|
||||
end = start + (1UL << page_shift);
|
||||
vma = find_vma_intersection(kvm->mm, start, end);
|
||||
if (!vma || vma->vm_start > start || vma->vm_end < end)
|
||||
goto out_unlock;
|
||||
|
||||
if (!kvmppc_svm_page_in(vma, start, end, gpa, kvm, page_shift,
|
||||
&downgrade))
|
||||
ret = H_SUCCESS;
|
||||
out_unlock:
|
||||
mutex_unlock(&kvm->arch.uvmem_lock);
|
||||
out:
|
||||
if (downgrade)
|
||||
up_read(&kvm->mm->mmap_sem);
|
||||
else
|
||||
up_write(&kvm->mm->mmap_sem);
|
||||
srcu_read_unlock(&kvm->srcu, srcu_idx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Provision a new page on HV side and copy over the contents
|
||||
* from secure memory using UV_PAGE_OUT uvcall.
|
||||
*/
|
||||
static int
|
||||
kvmppc_svm_page_out(struct vm_area_struct *vma, unsigned long start,
|
||||
unsigned long end, unsigned long page_shift,
|
||||
struct kvm *kvm, unsigned long gpa)
|
||||
{
|
||||
unsigned long src_pfn, dst_pfn = 0;
|
||||
struct migrate_vma mig;
|
||||
struct page *dpage, *spage;
|
||||
struct kvmppc_uvmem_page_pvt *pvt;
|
||||
unsigned long pfn;
|
||||
int ret = U_SUCCESS;
|
||||
|
||||
memset(&mig, 0, sizeof(mig));
|
||||
mig.vma = vma;
|
||||
mig.start = start;
|
||||
mig.end = end;
|
||||
mig.src = &src_pfn;
|
||||
mig.dst = &dst_pfn;
|
||||
|
||||
mutex_lock(&kvm->arch.uvmem_lock);
|
||||
/* The requested page is already paged-out, nothing to do */
|
||||
if (!kvmppc_gfn_is_uvmem_pfn(gpa >> page_shift, kvm, NULL))
|
||||
goto out;
|
||||
|
||||
ret = migrate_vma_setup(&mig);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
spage = migrate_pfn_to_page(*mig.src);
|
||||
if (!spage || !(*mig.src & MIGRATE_PFN_MIGRATE))
|
||||
goto out_finalize;
|
||||
|
||||
if (!is_zone_device_page(spage))
|
||||
goto out_finalize;
|
||||
|
||||
dpage = alloc_page_vma(GFP_HIGHUSER, vma, start);
|
||||
if (!dpage) {
|
||||
ret = -1;
|
||||
goto out_finalize;
|
||||
}
|
||||
|
||||
lock_page(dpage);
|
||||
pvt = spage->zone_device_data;
|
||||
pfn = page_to_pfn(dpage);
|
||||
|
||||
/*
|
||||
* This function is used in two cases:
|
||||
* - When HV touches a secure page, for which we do UV_PAGE_OUT
|
||||
* - When a secure page is converted to shared page, we *get*
|
||||
* the page to essentially unmap the device page. In this
|
||||
* case we skip page-out.
|
||||
*/
|
||||
if (!pvt->skip_page_out)
|
||||
ret = uv_page_out(kvm->arch.lpid, pfn << page_shift,
|
||||
gpa, 0, page_shift);
|
||||
|
||||
if (ret == U_SUCCESS)
|
||||
*mig.dst = migrate_pfn(pfn) | MIGRATE_PFN_LOCKED;
|
||||
else {
|
||||
unlock_page(dpage);
|
||||
__free_page(dpage);
|
||||
goto out_finalize;
|
||||
}
|
||||
|
||||
migrate_vma_pages(&mig);
|
||||
out_finalize:
|
||||
migrate_vma_finalize(&mig);
|
||||
out:
|
||||
mutex_unlock(&kvm->arch.uvmem_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Fault handler callback that gets called when HV touches any page that
|
||||
* has been moved to secure memory, we ask UV to give back the page by
|
||||
* issuing UV_PAGE_OUT uvcall.
|
||||
*
|
||||
* This eventually results in dropping of device PFN and the newly
|
||||
* provisioned page/PFN gets populated in QEMU page tables.
|
||||
*/
|
||||
static vm_fault_t kvmppc_uvmem_migrate_to_ram(struct vm_fault *vmf)
|
||||
{
|
||||
struct kvmppc_uvmem_page_pvt *pvt = vmf->page->zone_device_data;
|
||||
|
||||
if (kvmppc_svm_page_out(vmf->vma, vmf->address,
|
||||
vmf->address + PAGE_SIZE, PAGE_SHIFT,
|
||||
pvt->kvm, pvt->gpa))
|
||||
return VM_FAULT_SIGBUS;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Release the device PFN back to the pool
|
||||
*
|
||||
* Gets called when secure page becomes a normal page during H_SVM_PAGE_OUT.
|
||||
* Gets called with kvm->arch.uvmem_lock held.
|
||||
*/
|
||||
static void kvmppc_uvmem_page_free(struct page *page)
|
||||
{
|
||||
unsigned long pfn = page_to_pfn(page) -
|
||||
(kvmppc_uvmem_pgmap.res.start >> PAGE_SHIFT);
|
||||
struct kvmppc_uvmem_page_pvt *pvt;
|
||||
|
||||
spin_lock(&kvmppc_uvmem_bitmap_lock);
|
||||
bitmap_clear(kvmppc_uvmem_bitmap, pfn, 1);
|
||||
spin_unlock(&kvmppc_uvmem_bitmap_lock);
|
||||
|
||||
pvt = page->zone_device_data;
|
||||
page->zone_device_data = NULL;
|
||||
kvmppc_uvmem_pfn_remove(pvt->gpa >> PAGE_SHIFT, pvt->kvm);
|
||||
kfree(pvt);
|
||||
}
|
||||
|
||||
static const struct dev_pagemap_ops kvmppc_uvmem_ops = {
|
||||
.page_free = kvmppc_uvmem_page_free,
|
||||
.migrate_to_ram = kvmppc_uvmem_migrate_to_ram,
|
||||
};
|
||||
|
||||
/*
|
||||
* H_SVM_PAGE_OUT: Move page from secure memory to normal memory.
|
||||
*/
|
||||
unsigned long
|
||||
kvmppc_h_svm_page_out(struct kvm *kvm, unsigned long gpa,
|
||||
unsigned long flags, unsigned long page_shift)
|
||||
{
|
||||
unsigned long gfn = gpa >> page_shift;
|
||||
unsigned long start, end;
|
||||
struct vm_area_struct *vma;
|
||||
int srcu_idx;
|
||||
int ret;
|
||||
|
||||
if (!(kvm->arch.secure_guest & KVMPPC_SECURE_INIT_START))
|
||||
return H_UNSUPPORTED;
|
||||
|
||||
if (page_shift != PAGE_SHIFT)
|
||||
return H_P3;
|
||||
|
||||
if (flags)
|
||||
return H_P2;
|
||||
|
||||
ret = H_PARAMETER;
|
||||
srcu_idx = srcu_read_lock(&kvm->srcu);
|
||||
down_read(&kvm->mm->mmap_sem);
|
||||
start = gfn_to_hva(kvm, gfn);
|
||||
if (kvm_is_error_hva(start))
|
||||
goto out;
|
||||
|
||||
end = start + (1UL << page_shift);
|
||||
vma = find_vma_intersection(kvm->mm, start, end);
|
||||
if (!vma || vma->vm_start > start || vma->vm_end < end)
|
||||
goto out;
|
||||
|
||||
if (!kvmppc_svm_page_out(vma, start, end, page_shift, kvm, gpa))
|
||||
ret = H_SUCCESS;
|
||||
out:
|
||||
up_read(&kvm->mm->mmap_sem);
|
||||
srcu_read_unlock(&kvm->srcu, srcu_idx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int kvmppc_send_page_to_uv(struct kvm *kvm, unsigned long gfn)
|
||||
{
|
||||
unsigned long pfn;
|
||||
int ret = U_SUCCESS;
|
||||
|
||||
pfn = gfn_to_pfn(kvm, gfn);
|
||||
if (is_error_noslot_pfn(pfn))
|
||||
return -EFAULT;
|
||||
|
||||
mutex_lock(&kvm->arch.uvmem_lock);
|
||||
if (kvmppc_gfn_is_uvmem_pfn(gfn, kvm, NULL))
|
||||
goto out;
|
||||
|
||||
ret = uv_page_in(kvm->arch.lpid, pfn << PAGE_SHIFT, gfn << PAGE_SHIFT,
|
||||
0, PAGE_SHIFT);
|
||||
out:
|
||||
kvm_release_pfn_clean(pfn);
|
||||
mutex_unlock(&kvm->arch.uvmem_lock);
|
||||
return (ret == U_SUCCESS) ? RESUME_GUEST : -EFAULT;
|
||||
}
|
||||
|
||||
static u64 kvmppc_get_secmem_size(void)
|
||||
{
|
||||
struct device_node *np;
|
||||
int i, len;
|
||||
const __be32 *prop;
|
||||
u64 size = 0;
|
||||
|
||||
np = of_find_compatible_node(NULL, NULL, "ibm,uv-firmware");
|
||||
if (!np)
|
||||
goto out;
|
||||
|
||||
prop = of_get_property(np, "secure-memory-ranges", &len);
|
||||
if (!prop)
|
||||
goto out_put;
|
||||
|
||||
for (i = 0; i < len / (sizeof(*prop) * 4); i++)
|
||||
size += of_read_number(prop + (i * 4) + 2, 2);
|
||||
|
||||
out_put:
|
||||
of_node_put(np);
|
||||
out:
|
||||
return size;
|
||||
}
|
||||
|
||||
int kvmppc_uvmem_init(void)
|
||||
{
|
||||
int ret = 0;
|
||||
unsigned long size;
|
||||
struct resource *res;
|
||||
void *addr;
|
||||
unsigned long pfn_last, pfn_first;
|
||||
|
||||
size = kvmppc_get_secmem_size();
|
||||
if (!size) {
|
||||
/*
|
||||
* Don't fail the initialization of kvm-hv module if
|
||||
* the platform doesn't export ibm,uv-firmware node.
|
||||
* Let normal guests run on such PEF-disabled platform.
|
||||
*/
|
||||
pr_info("KVMPPC-UVMEM: No support for secure guests\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
res = request_free_mem_region(&iomem_resource, size, "kvmppc_uvmem");
|
||||
if (IS_ERR(res)) {
|
||||
ret = PTR_ERR(res);
|
||||
goto out;
|
||||
}
|
||||
|
||||
kvmppc_uvmem_pgmap.type = MEMORY_DEVICE_PRIVATE;
|
||||
kvmppc_uvmem_pgmap.res = *res;
|
||||
kvmppc_uvmem_pgmap.ops = &kvmppc_uvmem_ops;
|
||||
addr = memremap_pages(&kvmppc_uvmem_pgmap, NUMA_NO_NODE);
|
||||
if (IS_ERR(addr)) {
|
||||
ret = PTR_ERR(addr);
|
||||
goto out_free_region;
|
||||
}
|
||||
|
||||
pfn_first = res->start >> PAGE_SHIFT;
|
||||
pfn_last = pfn_first + (resource_size(res) >> PAGE_SHIFT);
|
||||
kvmppc_uvmem_bitmap = kcalloc(BITS_TO_LONGS(pfn_last - pfn_first),
|
||||
sizeof(unsigned long), GFP_KERNEL);
|
||||
if (!kvmppc_uvmem_bitmap) {
|
||||
ret = -ENOMEM;
|
||||
goto out_unmap;
|
||||
}
|
||||
|
||||
pr_info("KVMPPC-UVMEM: Secure Memory size 0x%lx\n", size);
|
||||
return ret;
|
||||
out_unmap:
|
||||
memunmap_pages(&kvmppc_uvmem_pgmap);
|
||||
out_free_region:
|
||||
release_mem_region(res->start, size);
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
void kvmppc_uvmem_free(void)
|
||||
{
|
||||
memunmap_pages(&kvmppc_uvmem_pgmap);
|
||||
release_mem_region(kvmppc_uvmem_pgmap.res.start,
|
||||
resource_size(&kvmppc_uvmem_pgmap.res));
|
||||
kfree(kvmppc_uvmem_bitmap);
|
||||
}
|
|
@ -31,6 +31,8 @@
|
|||
#include <asm/hvcall.h>
|
||||
#include <asm/plpar_wrappers.h>
|
||||
#endif
|
||||
#include <asm/ultravisor.h>
|
||||
#include <asm/kvm_host.h>
|
||||
|
||||
#include "timing.h"
|
||||
#include "irq.h"
|
||||
|
@ -2413,6 +2415,16 @@ long kvm_arch_vm_ioctl(struct file *filp,
|
|||
r = -EFAULT;
|
||||
break;
|
||||
}
|
||||
case KVM_PPC_SVM_OFF: {
|
||||
struct kvm *kvm = filp->private_data;
|
||||
|
||||
r = 0;
|
||||
if (!kvm->arch.kvm_ops->svm_off)
|
||||
goto out;
|
||||
|
||||
r = kvm->arch.kvm_ops->svm_off(kvm);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
struct kvm *kvm = filp->private_data;
|
||||
r = kvm->arch.kvm_ops->arch_vm_ioctl(filp, ioctl, arg);
|
||||
|
|
|
@ -504,7 +504,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
|
|||
|
||||
r = -E2BIG;
|
||||
|
||||
if (*nent >= maxnent)
|
||||
if (WARN_ON(*nent >= maxnent))
|
||||
goto out;
|
||||
|
||||
do_host_cpuid(entry, function, 0);
|
||||
|
@ -778,6 +778,11 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
|
|||
case 0x8000001a:
|
||||
case 0x8000001e:
|
||||
break;
|
||||
/* Support memory encryption cpuid if host supports it */
|
||||
case 0x8000001F:
|
||||
if (!boot_cpu_has(X86_FEATURE_SEV))
|
||||
entry->eax = entry->ebx = entry->ecx = entry->edx = 0;
|
||||
break;
|
||||
/*Add support for Centaur's CPUID instruction*/
|
||||
case 0xC0000000:
|
||||
/*Just support up to 0xC0000004 now*/
|
||||
|
@ -810,6 +815,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
|
|||
static int do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 func,
|
||||
int *nent, int maxnent, unsigned int type)
|
||||
{
|
||||
if (*nent >= maxnent)
|
||||
return -E2BIG;
|
||||
|
||||
if (type == KVM_GET_EMULATED_CPUID)
|
||||
return __do_cpuid_func_emulated(entry, func, nent, maxnent);
|
||||
|
||||
|
|
|
@ -5958,13 +5958,6 @@ static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
|
|||
if (npt_enabled)
|
||||
entry->edx |= F(NPT);
|
||||
|
||||
break;
|
||||
case 0x8000001F:
|
||||
/* Support memory encryption cpuid if host supports it */
|
||||
if (boot_cpu_has(X86_FEATURE_SEV))
|
||||
cpuid(0x8000001f, &entry->eax, &entry->ebx,
|
||||
&entry->ecx, &entry->edx);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -6666,7 +6666,6 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
|
|||
free_vpid(vmx->vpid);
|
||||
nested_vmx_free_vcpu(vcpu);
|
||||
free_loaded_vmcs(vmx->loaded_vmcs);
|
||||
kfree(vmx->guest_msrs);
|
||||
kvm_vcpu_uninit(vcpu);
|
||||
kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.user_fpu);
|
||||
kmem_cache_free(x86_fpu_cache, vmx->vcpu.arch.guest_fpu);
|
||||
|
@ -6723,12 +6722,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
|
|||
goto uninit_vcpu;
|
||||
}
|
||||
|
||||
vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL_ACCOUNT);
|
||||
BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) * sizeof(vmx->guest_msrs[0])
|
||||
> PAGE_SIZE);
|
||||
|
||||
if (!vmx->guest_msrs)
|
||||
goto free_pml;
|
||||
BUILD_BUG_ON(ARRAY_SIZE(vmx_msr_index) != NR_SHARED_MSRS);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i) {
|
||||
u32 index = vmx_msr_index[i];
|
||||
|
@ -6760,7 +6754,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
|
|||
|
||||
err = alloc_loaded_vmcs(&vmx->vmcs01);
|
||||
if (err < 0)
|
||||
goto free_msrs;
|
||||
goto free_pml;
|
||||
|
||||
msr_bitmap = vmx->vmcs01.msr_bitmap;
|
||||
vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_TSC, MSR_TYPE_R);
|
||||
|
@ -6822,8 +6816,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
|
|||
|
||||
free_vmcs:
|
||||
free_loaded_vmcs(vmx->loaded_vmcs);
|
||||
free_msrs:
|
||||
kfree(vmx->guest_msrs);
|
||||
free_pml:
|
||||
vmx_destroy_pml_buffer(vmx);
|
||||
uninit_vcpu:
|
||||
|
|
|
@ -22,6 +22,12 @@ extern u32 get_umwait_control_msr(void);
|
|||
|
||||
#define X2APIC_MSR(r) (APIC_BASE_MSR + ((r) >> 4))
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#define NR_SHARED_MSRS 7
|
||||
#else
|
||||
#define NR_SHARED_MSRS 4
|
||||
#endif
|
||||
|
||||
#define NR_LOADSTORE_MSRS 8
|
||||
|
||||
struct vmx_msrs {
|
||||
|
@ -206,7 +212,7 @@ struct vcpu_vmx {
|
|||
u32 idt_vectoring_info;
|
||||
ulong rflags;
|
||||
|
||||
struct shared_msr_entry *guest_msrs;
|
||||
struct shared_msr_entry guest_msrs[NR_SHARED_MSRS];
|
||||
int nmsrs;
|
||||
int save_nmsrs;
|
||||
bool guest_msrs_ready;
|
||||
|
|
|
@ -1348,6 +1348,7 @@ struct kvm_s390_ucas_mapping {
|
|||
#define KVM_PPC_GET_CPU_CHAR _IOR(KVMIO, 0xb1, struct kvm_ppc_cpu_char)
|
||||
/* Available with KVM_CAP_PMU_EVENT_FILTER */
|
||||
#define KVM_SET_PMU_EVENT_FILTER _IOW(KVMIO, 0xb2, struct kvm_pmu_event_filter)
|
||||
#define KVM_PPC_SVM_OFF _IO(KVMIO, 0xb3)
|
||||
|
||||
/* ioctl for vm fd */
|
||||
#define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device)
|
||||
|
|
1
mm/ksm.c
1
mm/ksm.c
|
@ -2478,6 +2478,7 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
|
|||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ksm_madvise);
|
||||
|
||||
int __ksm_enter(struct mm_struct *mm)
|
||||
{
|
||||
|
|
Loading…
Reference in a new issue