mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-24 17:23:25 -05:00
21ea9fb69e
In balloon_page_dequeue, pages_lock should cover the loop (ie, list_for_each_entry_safe). Otherwise, the cursor page could be isolated by compaction and then list_del by isolation could poison the page->lru.{prev,next} so the loop finally could access wrong address like this. This patch fixes the bug. general protection fault: 0000 [#1] SMP Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 2 PID: 82 Comm: vballoon Not tainted 4.4.0-rc5-mm1-access_bit+ #1906 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 task: ffff8800a7ff0000 ti: ffff8800a7fec000 task.ti: ffff8800a7fec000 RIP: 0010:[<ffffffff8115e754>] [<ffffffff8115e754>] balloon_page_dequeue+0x54/0x130 RSP: 0018:ffff8800a7fefdc0 EFLAGS: 00010246 RAX: ffff88013fff9a70 RBX: ffffea000056fe00 RCX: 0000000000002b7d RDX: ffff88013fff9a70 RSI: ffffea000056fe00 RDI: ffff88013fff9a68 RBP: ffff8800a7fefde8 R08: ffffea000056fda0 R09: 0000000000000000 R10: ffff8800a7fefd90 R11: 0000000000000001 R12: dead0000000000e0 R13: ffffea000056fe20 R14: ffff880138809070 R15: ffff880138809060 FS: 0000000000000000(0000) GS:ffff88013fc40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00007f229c10e000 CR3: 00000000b8b53000 CR4: 00000000000006a0 Stack: 0000000000000100 ffff880138809088 ffff880138809000 ffff880138809060 0000000000000046 ffff8800a7fefe28 ffffffff812c86d3 ffff880138809020 ffff880138809000 fffffffffff91900 0000000000000100 ffff880138809060 Call Trace: [<ffffffff812c86d3>] leak_balloon+0x93/0x1a0 [<ffffffff812c8bc7>] balloon+0x217/0x2a0 [<ffffffff8143739e>] ? __schedule+0x31e/0x8b0 [<ffffffff81078160>] ? abort_exclusive_wait+0xb0/0xb0 [<ffffffff812c89b0>] ? update_balloon_stats+0xf0/0xf0 [<ffffffff8105b6e9>] kthread+0xc9/0xe0 [<ffffffff8105b620>] ? kthread_park+0x60/0x60 [<ffffffff8143b4af>] ret_from_fork+0x3f/0x70 [<ffffffff8105b620>] ? kthread_park+0x60/0x60 Code: 8d 60 e0 0f 84 af 00 00 00 48 8b 43 20 a8 01 75 3b 48 89 d8 f0 0f ba 28 00 72 10 48 8b 03 f6 c4 08 75 2f 48 89 df e8 8c 83 f9 ff <49> 8b 44 24 20 4d 8d 6c 24 20 48 83 e8 20 4d 39 f5 74 7a 4c 89 RIP [<ffffffff8115e754>] balloon_page_dequeue+0x54/0x130 RSP <ffff8800a7fefdc0> ---[ end trace 43cf28060d708d5f ]--- Kernel panic - not syncing: Fatal exception Dumping ftrace buffer: (ftrace buffer empty) Kernel Offset: disabled Cc: <stable@vger.kernel.org> Signed-off-by: Minchan Kim <minchan@kernel.org> Signed-off-by: Michael S. Tsirkin <mst@redhat.com> Acked-by: Rafael Aquini <aquini@redhat.com>
215 lines
6.6 KiB
C
215 lines
6.6 KiB
C
/*
|
|
* mm/balloon_compaction.c
|
|
*
|
|
* Common interface for making balloon pages movable by compaction.
|
|
*
|
|
* Copyright (C) 2012, Red Hat, Inc. Rafael Aquini <aquini@redhat.com>
|
|
*/
|
|
#include <linux/mm.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/export.h>
|
|
#include <linux/balloon_compaction.h>
|
|
|
|
/*
|
|
* balloon_page_enqueue - allocates a new page and inserts it into the balloon
|
|
* page list.
|
|
* @b_dev_info: balloon device decriptor where we will insert a new page to
|
|
*
|
|
* Driver must call it to properly allocate a new enlisted balloon page
|
|
* before definetively removing it from the guest system.
|
|
* This function returns the page address for the recently enqueued page or
|
|
* NULL in the case we fail to allocate a new page this turn.
|
|
*/
|
|
struct page *balloon_page_enqueue(struct balloon_dev_info *b_dev_info)
|
|
{
|
|
unsigned long flags;
|
|
struct page *page = alloc_page(balloon_mapping_gfp_mask() |
|
|
__GFP_NOMEMALLOC | __GFP_NORETRY);
|
|
if (!page)
|
|
return NULL;
|
|
|
|
/*
|
|
* Block others from accessing the 'page' when we get around to
|
|
* establishing additional references. We should be the only one
|
|
* holding a reference to the 'page' at this point.
|
|
*/
|
|
BUG_ON(!trylock_page(page));
|
|
spin_lock_irqsave(&b_dev_info->pages_lock, flags);
|
|
balloon_page_insert(b_dev_info, page);
|
|
__count_vm_event(BALLOON_INFLATE);
|
|
spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
|
|
unlock_page(page);
|
|
return page;
|
|
}
|
|
EXPORT_SYMBOL_GPL(balloon_page_enqueue);
|
|
|
|
/*
|
|
* balloon_page_dequeue - removes a page from balloon's page list and returns
|
|
* the its address to allow the driver release the page.
|
|
* @b_dev_info: balloon device decriptor where we will grab a page from.
|
|
*
|
|
* Driver must call it to properly de-allocate a previous enlisted balloon page
|
|
* before definetively releasing it back to the guest system.
|
|
* This function returns the page address for the recently dequeued page or
|
|
* NULL in the case we find balloon's page list temporarily empty due to
|
|
* compaction isolated pages.
|
|
*/
|
|
struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info)
|
|
{
|
|
struct page *page, *tmp;
|
|
unsigned long flags;
|
|
bool dequeued_page;
|
|
|
|
dequeued_page = false;
|
|
spin_lock_irqsave(&b_dev_info->pages_lock, flags);
|
|
list_for_each_entry_safe(page, tmp, &b_dev_info->pages, lru) {
|
|
/*
|
|
* Block others from accessing the 'page' while we get around
|
|
* establishing additional references and preparing the 'page'
|
|
* to be released by the balloon driver.
|
|
*/
|
|
if (trylock_page(page)) {
|
|
#ifdef CONFIG_BALLOON_COMPACTION
|
|
if (!PagePrivate(page)) {
|
|
/* raced with isolation */
|
|
unlock_page(page);
|
|
continue;
|
|
}
|
|
#endif
|
|
balloon_page_delete(page);
|
|
__count_vm_event(BALLOON_DEFLATE);
|
|
unlock_page(page);
|
|
dequeued_page = true;
|
|
break;
|
|
}
|
|
}
|
|
spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
|
|
|
|
if (!dequeued_page) {
|
|
/*
|
|
* If we are unable to dequeue a balloon page because the page
|
|
* list is empty and there is no isolated pages, then something
|
|
* went out of track and some balloon pages are lost.
|
|
* BUG() here, otherwise the balloon driver may get stuck into
|
|
* an infinite loop while attempting to release all its pages.
|
|
*/
|
|
spin_lock_irqsave(&b_dev_info->pages_lock, flags);
|
|
if (unlikely(list_empty(&b_dev_info->pages) &&
|
|
!b_dev_info->isolated_pages))
|
|
BUG();
|
|
spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
|
|
page = NULL;
|
|
}
|
|
return page;
|
|
}
|
|
EXPORT_SYMBOL_GPL(balloon_page_dequeue);
|
|
|
|
#ifdef CONFIG_BALLOON_COMPACTION
|
|
|
|
static inline void __isolate_balloon_page(struct page *page)
|
|
{
|
|
struct balloon_dev_info *b_dev_info = balloon_page_device(page);
|
|
unsigned long flags;
|
|
|
|
spin_lock_irqsave(&b_dev_info->pages_lock, flags);
|
|
ClearPagePrivate(page);
|
|
list_del(&page->lru);
|
|
b_dev_info->isolated_pages++;
|
|
spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
|
|
}
|
|
|
|
static inline void __putback_balloon_page(struct page *page)
|
|
{
|
|
struct balloon_dev_info *b_dev_info = balloon_page_device(page);
|
|
unsigned long flags;
|
|
|
|
spin_lock_irqsave(&b_dev_info->pages_lock, flags);
|
|
SetPagePrivate(page);
|
|
list_add(&page->lru, &b_dev_info->pages);
|
|
b_dev_info->isolated_pages--;
|
|
spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
|
|
}
|
|
|
|
/* __isolate_lru_page() counterpart for a ballooned page */
|
|
bool balloon_page_isolate(struct page *page)
|
|
{
|
|
/*
|
|
* Avoid burning cycles with pages that are yet under __free_pages(),
|
|
* or just got freed under us.
|
|
*
|
|
* In case we 'win' a race for a balloon page being freed under us and
|
|
* raise its refcount preventing __free_pages() from doing its job
|
|
* the put_page() at the end of this block will take care of
|
|
* release this page, thus avoiding a nasty leakage.
|
|
*/
|
|
if (likely(get_page_unless_zero(page))) {
|
|
/*
|
|
* As balloon pages are not isolated from LRU lists, concurrent
|
|
* compaction threads can race against page migration functions
|
|
* as well as race against the balloon driver releasing a page.
|
|
*
|
|
* In order to avoid having an already isolated balloon page
|
|
* being (wrongly) re-isolated while it is under migration,
|
|
* or to avoid attempting to isolate pages being released by
|
|
* the balloon driver, lets be sure we have the page lock
|
|
* before proceeding with the balloon page isolation steps.
|
|
*/
|
|
if (likely(trylock_page(page))) {
|
|
/*
|
|
* A ballooned page, by default, has PagePrivate set.
|
|
* Prevent concurrent compaction threads from isolating
|
|
* an already isolated balloon page by clearing it.
|
|
*/
|
|
if (balloon_page_movable(page)) {
|
|
__isolate_balloon_page(page);
|
|
unlock_page(page);
|
|
return true;
|
|
}
|
|
unlock_page(page);
|
|
}
|
|
put_page(page);
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/* putback_lru_page() counterpart for a ballooned page */
|
|
void balloon_page_putback(struct page *page)
|
|
{
|
|
/*
|
|
* 'lock_page()' stabilizes the page and prevents races against
|
|
* concurrent isolation threads attempting to re-isolate it.
|
|
*/
|
|
lock_page(page);
|
|
|
|
if (__is_movable_balloon_page(page)) {
|
|
__putback_balloon_page(page);
|
|
/* drop the extra ref count taken for page isolation */
|
|
put_page(page);
|
|
} else {
|
|
WARN_ON(1);
|
|
dump_page(page, "not movable balloon page");
|
|
}
|
|
unlock_page(page);
|
|
}
|
|
|
|
/* move_to_new_page() counterpart for a ballooned page */
|
|
int balloon_page_migrate(struct page *newpage,
|
|
struct page *page, enum migrate_mode mode)
|
|
{
|
|
struct balloon_dev_info *balloon = balloon_page_device(page);
|
|
int rc = -EAGAIN;
|
|
|
|
VM_BUG_ON_PAGE(!PageLocked(page), page);
|
|
VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
|
|
|
|
if (WARN_ON(!__is_movable_balloon_page(page))) {
|
|
dump_page(page, "not movable balloon page");
|
|
return rc;
|
|
}
|
|
|
|
if (balloon && balloon->migratepage)
|
|
rc = balloon->migratepage(balloon, newpage, page, mode);
|
|
|
|
return rc;
|
|
}
|
|
#endif /* CONFIG_BALLOON_COMPACTION */
|