mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-24 17:23:25 -05:00
KVM: x86/mmu: fix potential races when walking host page table
KVM uses lookup_address_in_mm() to detect the hugepage size that the host uses to map a pfn. The function suffers from several issues: - no usage of READ_ONCE(*). This allows multiple dereference of the same page table entry. The TOCTOU problem because of that may cause KVM to incorrectly treat a newly generated leaf entry as a nonleaf one, and dereference the content by using its pfn value. - the information returned does not match what KVM needs; for non-present entries it returns the level at which the walk was terminated, as long as the entry is not 'none'. KVM needs level information of only 'present' entries, otherwise it may regard a non-present PXE entry as a present large page mapping. - the function is not safe for mappings that can be torn down, because it does not disable IRQs and because it returns a PTE pointer which is never safe to dereference after the function returns. So implement the logic for walking host page tables directly in KVM, and stop using lookup_address_in_mm(). Cc: Sean Christopherson <seanjc@google.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Signed-off-by: Mingwei Zhang <mizhang@google.com> Message-Id: <20220429031757.2042406-1-mizhang@google.com> [Inline in host_pfn_mapping_level, ensure no semantic change for its callers. - Paolo] Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
parent
d495f942f4
commit
44187235cb
1 changed files with 42 additions and 5 deletions
|
@ -2804,8 +2804,12 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
|
|||
const struct kvm_memory_slot *slot)
|
||||
{
|
||||
unsigned long hva;
|
||||
pte_t *pte;
|
||||
int level;
|
||||
unsigned long flags;
|
||||
int level = PG_LEVEL_4K;
|
||||
pgd_t pgd;
|
||||
p4d_t p4d;
|
||||
pud_t pud;
|
||||
pmd_t pmd;
|
||||
|
||||
if (!PageCompound(pfn_to_page(pfn)) && !kvm_is_zone_device_pfn(pfn))
|
||||
return PG_LEVEL_4K;
|
||||
|
@ -2820,10 +2824,43 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
|
|||
*/
|
||||
hva = __gfn_to_hva_memslot(slot, gfn);
|
||||
|
||||
pte = lookup_address_in_mm(kvm->mm, hva, &level);
|
||||
if (unlikely(!pte))
|
||||
return PG_LEVEL_4K;
|
||||
/*
|
||||
* Lookup the mapping level in the current mm. The information
|
||||
* may become stale soon, but it is safe to use as long as
|
||||
* 1) mmu_notifier_retry was checked after taking mmu_lock, and
|
||||
* 2) mmu_lock is taken now.
|
||||
*
|
||||
* We still need to disable IRQs to prevent concurrent tear down
|
||||
* of page tables.
|
||||
*/
|
||||
local_irq_save(flags);
|
||||
|
||||
pgd = READ_ONCE(*pgd_offset(kvm->mm, hva));
|
||||
if (pgd_none(pgd))
|
||||
goto out;
|
||||
|
||||
p4d = READ_ONCE(*p4d_offset(&pgd, hva));
|
||||
if (p4d_none(p4d) || !p4d_present(p4d))
|
||||
goto out;
|
||||
|
||||
pud = READ_ONCE(*pud_offset(&p4d, hva));
|
||||
if (pud_none(pud) || !pud_present(pud))
|
||||
goto out;
|
||||
|
||||
if (pud_large(pud)) {
|
||||
level = PG_LEVEL_1G;
|
||||
goto out;
|
||||
}
|
||||
|
||||
pmd = READ_ONCE(*pmd_offset(&pud, hva));
|
||||
if (pmd_none(pmd) || !pmd_present(pmd))
|
||||
goto out;
|
||||
|
||||
if (pmd_large(pmd))
|
||||
level = PG_LEVEL_2M;
|
||||
|
||||
out:
|
||||
local_irq_restore(flags);
|
||||
return level;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue