1
0
Fork 0
mirror of https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git synced 2025-01-26 18:43:33 -05:00

KVM: introduce readonly memslot

In current code, if we map a readonly memory space from host to guest
and the page is not currently mapped in the host, we will get a fault
pfn and async is not allowed, then the vm will crash

We introduce readonly memory region to map ROM/ROMD to the guest, read access
is happy for readonly memslot, write access on readonly memslot will cause
KVM_EXIT_MMIO exit

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
This commit is contained in:
Xiao Guangrong 2012-08-21 11:02:51 +08:00 committed by Avi Kivity
parent 7068d09715
commit 4d8b81abc4
7 changed files with 102 additions and 28 deletions

View file

@ -857,7 +857,8 @@ struct kvm_userspace_memory_region {
}; };
/* for kvm_memory_region::flags */ /* for kvm_memory_region::flags */
#define KVM_MEM_LOG_DIRTY_PAGES 1UL #define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0)
#define KVM_MEM_READONLY (1UL << 1)
This ioctl allows the user to create or modify a guest physical memory This ioctl allows the user to create or modify a guest physical memory
slot. When changing an existing slot, it may be moved in the guest slot. When changing an existing slot, it may be moved in the guest
@ -873,9 +874,12 @@ It is recommended that the lower 21 bits of guest_phys_addr and userspace_addr
be identical. This allows large pages in the guest to be backed by large be identical. This allows large pages in the guest to be backed by large
pages in the host. pages in the host.
The flags field supports just one flag, KVM_MEM_LOG_DIRTY_PAGES, which The flags field supports two flag, KVM_MEM_LOG_DIRTY_PAGES, which
instructs kvm to keep track of writes to memory within the slot. See instructs kvm to keep track of writes to memory within the slot. See
the KVM_GET_DIRTY_LOG ioctl. the KVM_GET_DIRTY_LOG ioctl. Another flag is KVM_MEM_READONLY when the
KVM_CAP_READONLY_MEM capability, it indicates the guest memory is read-only,
that means, guest is only allowed to read it. Writes will be posted to
userspace as KVM_EXIT_MMIO exits.
When the KVM_CAP_SYNC_MMU capability, changes in the backing of the memory When the KVM_CAP_SYNC_MMU capability, changes in the backing of the memory
region are automatically reflected into the guest. For example, an mmap() region are automatically reflected into the guest. For example, an mmap()

View file

@ -25,6 +25,7 @@
#define __KVM_HAVE_DEBUGREGS #define __KVM_HAVE_DEBUGREGS
#define __KVM_HAVE_XSAVE #define __KVM_HAVE_XSAVE
#define __KVM_HAVE_XCRS #define __KVM_HAVE_XCRS
#define __KVM_HAVE_READONLY_MEM
/* Architectural interrupt line count. */ /* Architectural interrupt line count. */
#define KVM_NR_INTERRUPTS 256 #define KVM_NR_INTERRUPTS 256

View file

@ -2647,6 +2647,15 @@ static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct *
static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, pfn_t pfn) static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, pfn_t pfn)
{ {
/*
* Do not cache the mmio info caused by writing the readonly gfn
* into the spte otherwise read access on readonly gfn also can
* caused mmio page fault and treat it as mmio access.
* Return 1 to tell kvm to emulate it.
*/
if (pfn == KVM_PFN_ERR_RO_FAULT)
return 1;
if (pfn == KVM_PFN_ERR_HWPOISON) { if (pfn == KVM_PFN_ERR_HWPOISON) {
kvm_send_hwpoison_signal(gfn_to_hva(vcpu->kvm, gfn), current); kvm_send_hwpoison_signal(gfn_to_hva(vcpu->kvm, gfn), current);
return 0; return 0;

View file

@ -2175,6 +2175,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_GET_TSC_KHZ: case KVM_CAP_GET_TSC_KHZ:
case KVM_CAP_PCI_2_3: case KVM_CAP_PCI_2_3:
case KVM_CAP_KVMCLOCK_CTRL: case KVM_CAP_KVMCLOCK_CTRL:
case KVM_CAP_READONLY_MEM:
r = 1; r = 1;
break; break;
case KVM_CAP_COALESCED_MMIO: case KVM_CAP_COALESCED_MMIO:

View file

@ -106,7 +106,8 @@ struct kvm_userspace_memory_region {
* other bits are reserved for kvm internal use which are defined in * other bits are reserved for kvm internal use which are defined in
* include/linux/kvm_host.h. * include/linux/kvm_host.h.
*/ */
#define KVM_MEM_LOG_DIRTY_PAGES 1UL #define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0)
#define KVM_MEM_READONLY (1UL << 1)
/* for KVM_IRQ_LINE */ /* for KVM_IRQ_LINE */
struct kvm_irq_level { struct kvm_irq_level {
@ -621,6 +622,9 @@ struct kvm_ppc_smmu_info {
#define KVM_CAP_PPC_GET_SMMU_INFO 78 #define KVM_CAP_PPC_GET_SMMU_INFO 78
#define KVM_CAP_S390_COW 79 #define KVM_CAP_S390_COW 79
#define KVM_CAP_PPC_ALLOC_HTAB 80 #define KVM_CAP_PPC_ALLOC_HTAB 80
#ifdef __KVM_HAVE_READONLY_MEM
#define KVM_CAP_READONLY_MEM 81
#endif
#ifdef KVM_CAP_IRQ_ROUTING #ifdef KVM_CAP_IRQ_ROUTING

View file

@ -465,6 +465,7 @@ int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn); struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn); unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
void kvm_release_page_clean(struct page *page); void kvm_release_page_clean(struct page *page);
void kvm_release_page_dirty(struct page *page); void kvm_release_page_dirty(struct page *page);
void kvm_set_page_dirty(struct page *page); void kvm_set_page_dirty(struct page *page);
@ -792,12 +793,6 @@ hva_to_gfn_memslot(unsigned long hva, struct kvm_memory_slot *slot)
return slot->base_gfn + gfn_offset; return slot->base_gfn + gfn_offset;
} }
static inline unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot,
gfn_t gfn)
{
return slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE;
}
static inline gpa_t gfn_to_gpa(gfn_t gfn) static inline gpa_t gfn_to_gpa(gfn_t gfn)
{ {
return (gpa_t)gfn << PAGE_SHIFT; return (gpa_t)gfn << PAGE_SHIFT;

View file

@ -680,7 +680,13 @@ void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new)
static int check_memory_region_flags(struct kvm_userspace_memory_region *mem) static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
{ {
if (mem->flags & ~KVM_MEM_LOG_DIRTY_PAGES) u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES;
#ifdef KVM_CAP_READONLY_MEM
valid_flags |= KVM_MEM_READONLY;
#endif
if (mem->flags & ~valid_flags)
return -EINVAL; return -EINVAL;
return 0; return 0;
@ -973,18 +979,45 @@ out:
return size; return size;
} }
static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn, static bool memslot_is_readonly(struct kvm_memory_slot *slot)
gfn_t *nr_pages) {
return slot->flags & KVM_MEM_READONLY;
}
static unsigned long __gfn_to_hva_memslot(struct kvm_memory_slot *slot,
gfn_t gfn)
{
return slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE;
}
static unsigned long __gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
gfn_t *nr_pages, bool write)
{ {
if (!slot || slot->flags & KVM_MEMSLOT_INVALID) if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
return KVM_HVA_ERR_BAD; return KVM_HVA_ERR_BAD;
if (memslot_is_readonly(slot) && write)
return KVM_HVA_ERR_RO_BAD;
if (nr_pages) if (nr_pages)
*nr_pages = slot->npages - (gfn - slot->base_gfn); *nr_pages = slot->npages - (gfn - slot->base_gfn);
return gfn_to_hva_memslot(slot, gfn); return __gfn_to_hva_memslot(slot, gfn);
} }
static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
gfn_t *nr_pages)
{
return __gfn_to_hva_many(slot, gfn, nr_pages, true);
}
unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot,
gfn_t gfn)
{
return gfn_to_hva_many(slot, gfn, NULL);
}
EXPORT_SYMBOL_GPL(gfn_to_hva_memslot);
unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn) unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
{ {
return gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL); return gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL);
@ -997,7 +1030,7 @@ EXPORT_SYMBOL_GPL(gfn_to_hva);
*/ */
static unsigned long gfn_to_hva_read(struct kvm *kvm, gfn_t gfn) static unsigned long gfn_to_hva_read(struct kvm *kvm, gfn_t gfn)
{ {
return gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL); return __gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL, false);
} }
static int kvm_read_hva(void *data, void __user *hva, int len) static int kvm_read_hva(void *data, void __user *hva, int len)
@ -1106,6 +1139,17 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
return npages; return npages;
} }
static bool vma_is_valid(struct vm_area_struct *vma, bool write_fault)
{
if (unlikely(!(vma->vm_flags & VM_READ)))
return false;
if (write_fault && (unlikely(!(vma->vm_flags & VM_WRITE))))
return false;
return true;
}
/* /*
* Pin guest page in memory and return its pfn. * Pin guest page in memory and return its pfn.
* @addr: host virtual address which maps memory to the guest * @addr: host virtual address which maps memory to the guest
@ -1130,8 +1174,6 @@ static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
/* we can do it either atomically or asynchronously, not both */ /* we can do it either atomically or asynchronously, not both */
BUG_ON(atomic && async); BUG_ON(atomic && async);
BUG_ON(!write_fault && !writable);
if (hva_to_pfn_fast(addr, atomic, async, write_fault, writable, &pfn)) if (hva_to_pfn_fast(addr, atomic, async, write_fault, writable, &pfn))
return pfn; return pfn;
@ -1158,7 +1200,7 @@ static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
vma->vm_pgoff; vma->vm_pgoff;
BUG_ON(!kvm_is_mmio_pfn(pfn)); BUG_ON(!kvm_is_mmio_pfn(pfn));
} else { } else {
if (async && (vma->vm_flags & VM_WRITE)) if (async && vma_is_valid(vma, write_fault))
*async = true; *async = true;
pfn = KVM_PFN_ERR_FAULT; pfn = KVM_PFN_ERR_FAULT;
} }
@ -1167,19 +1209,40 @@ exit:
return pfn; return pfn;
} }
static pfn_t
__gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn, bool atomic,
bool *async, bool write_fault, bool *writable)
{
unsigned long addr = __gfn_to_hva_many(slot, gfn, NULL, write_fault);
if (addr == KVM_HVA_ERR_RO_BAD)
return KVM_PFN_ERR_RO_FAULT;
if (kvm_is_error_hva(addr))
return KVM_PFN_ERR_BAD;
/* Do not map writable pfn in the readonly memslot. */
if (writable && memslot_is_readonly(slot)) {
*writable = false;
writable = NULL;
}
return hva_to_pfn(addr, atomic, async, write_fault,
writable);
}
static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async, static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async,
bool write_fault, bool *writable) bool write_fault, bool *writable)
{ {
unsigned long addr; struct kvm_memory_slot *slot;
if (async) if (async)
*async = false; *async = false;
addr = gfn_to_hva(kvm, gfn); slot = gfn_to_memslot(kvm, gfn);
if (kvm_is_error_hva(addr))
return KVM_PFN_ERR_BAD;
return hva_to_pfn(addr, atomic, async, write_fault, writable); return __gfn_to_pfn_memslot(slot, gfn, atomic, async, write_fault,
writable);
} }
pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn) pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn)
@ -1210,15 +1273,12 @@ EXPORT_SYMBOL_GPL(gfn_to_pfn_prot);
pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn) pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
{ {
unsigned long addr = gfn_to_hva_memslot(slot, gfn); return __gfn_to_pfn_memslot(slot, gfn, false, NULL, true, NULL);
return hva_to_pfn(addr, false, NULL, true, NULL);
} }
pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn) pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn)
{ {
unsigned long addr = gfn_to_hva_memslot(slot, gfn); return __gfn_to_pfn_memslot(slot, gfn, true, NULL, true, NULL);
return hva_to_pfn(addr, true, NULL, true, NULL);
} }
EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot_atomic); EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot_atomic);