mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-24 17:23:25 -05:00
mm: make madvise(MADV_WILLNEED) support swap file prefetch
Make madvise(MADV_WILLNEED) support swap file prefetch. If memory is swapout, this syscall can do swapin prefetch. It has no impact if the memory isn't swapout. [akpm@linux-foundation.org: fix CONFIG_SWAP=n build] [sasha.levin@oracle.com: fix BUG on madvise early failure] Signed-off-by: Shaohua Li <shli@fusionio.com> Cc: Hugh Dickins <hughd@google.com> Cc: Rik van Riel <riel@redhat.com> Signed-off-by: Sasha Levin <sasha.levin@oracle.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
a394cb8ee6
commit
1998cc0489
1 changed files with 101 additions and 4 deletions
105
mm/madvise.c
105
mm/madvise.c
|
@ -16,6 +16,9 @@
|
||||||
#include <linux/ksm.h>
|
#include <linux/ksm.h>
|
||||||
#include <linux/fs.h>
|
#include <linux/fs.h>
|
||||||
#include <linux/file.h>
|
#include <linux/file.h>
|
||||||
|
#include <linux/blkdev.h>
|
||||||
|
#include <linux/swap.h>
|
||||||
|
#include <linux/swapops.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Any behaviour which results in changes to the vma->vm_flags needs to
|
* Any behaviour which results in changes to the vma->vm_flags needs to
|
||||||
|
@ -131,6 +134,84 @@ out:
|
||||||
return error;
|
return error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_SWAP
|
||||||
|
static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start,
|
||||||
|
unsigned long end, struct mm_walk *walk)
|
||||||
|
{
|
||||||
|
pte_t *orig_pte;
|
||||||
|
struct vm_area_struct *vma = walk->private;
|
||||||
|
unsigned long index;
|
||||||
|
|
||||||
|
if (pmd_none_or_trans_huge_or_clear_bad(pmd))
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
for (index = start; index != end; index += PAGE_SIZE) {
|
||||||
|
pte_t pte;
|
||||||
|
swp_entry_t entry;
|
||||||
|
struct page *page;
|
||||||
|
spinlock_t *ptl;
|
||||||
|
|
||||||
|
orig_pte = pte_offset_map_lock(vma->vm_mm, pmd, start, &ptl);
|
||||||
|
pte = *(orig_pte + ((index - start) / PAGE_SIZE));
|
||||||
|
pte_unmap_unlock(orig_pte, ptl);
|
||||||
|
|
||||||
|
if (pte_present(pte) || pte_none(pte) || pte_file(pte))
|
||||||
|
continue;
|
||||||
|
entry = pte_to_swp_entry(pte);
|
||||||
|
if (unlikely(non_swap_entry(entry)))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
page = read_swap_cache_async(entry, GFP_HIGHUSER_MOVABLE,
|
||||||
|
vma, index);
|
||||||
|
if (page)
|
||||||
|
page_cache_release(page);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void force_swapin_readahead(struct vm_area_struct *vma,
|
||||||
|
unsigned long start, unsigned long end)
|
||||||
|
{
|
||||||
|
struct mm_walk walk = {
|
||||||
|
.mm = vma->vm_mm,
|
||||||
|
.pmd_entry = swapin_walk_pmd_entry,
|
||||||
|
.private = vma,
|
||||||
|
};
|
||||||
|
|
||||||
|
walk_page_range(start, end, &walk);
|
||||||
|
|
||||||
|
lru_add_drain(); /* Push any new pages onto the LRU now */
|
||||||
|
}
|
||||||
|
|
||||||
|
static void force_shm_swapin_readahead(struct vm_area_struct *vma,
|
||||||
|
unsigned long start, unsigned long end,
|
||||||
|
struct address_space *mapping)
|
||||||
|
{
|
||||||
|
pgoff_t index;
|
||||||
|
struct page *page;
|
||||||
|
swp_entry_t swap;
|
||||||
|
|
||||||
|
for (; start < end; start += PAGE_SIZE) {
|
||||||
|
index = ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
|
||||||
|
|
||||||
|
page = find_get_page(mapping, index);
|
||||||
|
if (!radix_tree_exceptional_entry(page)) {
|
||||||
|
if (page)
|
||||||
|
page_cache_release(page);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
swap = radix_to_swp_entry(page);
|
||||||
|
page = read_swap_cache_async(swap, GFP_HIGHUSER_MOVABLE,
|
||||||
|
NULL, 0);
|
||||||
|
if (page)
|
||||||
|
page_cache_release(page);
|
||||||
|
}
|
||||||
|
|
||||||
|
lru_add_drain(); /* Push any new pages onto the LRU now */
|
||||||
|
}
|
||||||
|
#endif /* CONFIG_SWAP */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Schedule all required I/O operations. Do not wait for completion.
|
* Schedule all required I/O operations. Do not wait for completion.
|
||||||
*/
|
*/
|
||||||
|
@ -140,6 +221,18 @@ static long madvise_willneed(struct vm_area_struct * vma,
|
||||||
{
|
{
|
||||||
struct file *file = vma->vm_file;
|
struct file *file = vma->vm_file;
|
||||||
|
|
||||||
|
#ifdef CONFIG_SWAP
|
||||||
|
if (!file || mapping_cap_swap_backed(file->f_mapping)) {
|
||||||
|
*prev = vma;
|
||||||
|
if (!file)
|
||||||
|
force_swapin_readahead(vma, start, end);
|
||||||
|
else
|
||||||
|
force_shm_swapin_readahead(vma, start, end,
|
||||||
|
file->f_mapping);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if (!file)
|
if (!file)
|
||||||
return -EBADF;
|
return -EBADF;
|
||||||
|
|
||||||
|
@ -371,6 +464,7 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
|
||||||
int error = -EINVAL;
|
int error = -EINVAL;
|
||||||
int write;
|
int write;
|
||||||
size_t len;
|
size_t len;
|
||||||
|
struct blk_plug plug;
|
||||||
|
|
||||||
#ifdef CONFIG_MEMORY_FAILURE
|
#ifdef CONFIG_MEMORY_FAILURE
|
||||||
if (behavior == MADV_HWPOISON || behavior == MADV_SOFT_OFFLINE)
|
if (behavior == MADV_HWPOISON || behavior == MADV_SOFT_OFFLINE)
|
||||||
|
@ -410,18 +504,19 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
|
||||||
if (vma && start > vma->vm_start)
|
if (vma && start > vma->vm_start)
|
||||||
prev = vma;
|
prev = vma;
|
||||||
|
|
||||||
|
blk_start_plug(&plug);
|
||||||
for (;;) {
|
for (;;) {
|
||||||
/* Still start < end. */
|
/* Still start < end. */
|
||||||
error = -ENOMEM;
|
error = -ENOMEM;
|
||||||
if (!vma)
|
if (!vma)
|
||||||
goto out;
|
goto out_plug;
|
||||||
|
|
||||||
/* Here start < (end|vma->vm_end). */
|
/* Here start < (end|vma->vm_end). */
|
||||||
if (start < vma->vm_start) {
|
if (start < vma->vm_start) {
|
||||||
unmapped_error = -ENOMEM;
|
unmapped_error = -ENOMEM;
|
||||||
start = vma->vm_start;
|
start = vma->vm_start;
|
||||||
if (start >= end)
|
if (start >= end)
|
||||||
goto out;
|
goto out_plug;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Here vma->vm_start <= start < (end|vma->vm_end) */
|
/* Here vma->vm_start <= start < (end|vma->vm_end) */
|
||||||
|
@ -432,18 +527,20 @@ SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
|
||||||
/* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */
|
/* Here vma->vm_start <= start < tmp <= (end|vma->vm_end). */
|
||||||
error = madvise_vma(vma, &prev, start, tmp, behavior);
|
error = madvise_vma(vma, &prev, start, tmp, behavior);
|
||||||
if (error)
|
if (error)
|
||||||
goto out;
|
goto out_plug;
|
||||||
start = tmp;
|
start = tmp;
|
||||||
if (prev && start < prev->vm_end)
|
if (prev && start < prev->vm_end)
|
||||||
start = prev->vm_end;
|
start = prev->vm_end;
|
||||||
error = unmapped_error;
|
error = unmapped_error;
|
||||||
if (start >= end)
|
if (start >= end)
|
||||||
goto out;
|
goto out_plug;
|
||||||
if (prev)
|
if (prev)
|
||||||
vma = prev->vm_next;
|
vma = prev->vm_next;
|
||||||
else /* madvise_remove dropped mmap_sem */
|
else /* madvise_remove dropped mmap_sem */
|
||||||
vma = find_vma(current->mm, start);
|
vma = find_vma(current->mm, start);
|
||||||
}
|
}
|
||||||
|
out_plug:
|
||||||
|
blk_finish_plug(&plug);
|
||||||
out:
|
out:
|
||||||
if (write)
|
if (write)
|
||||||
up_write(¤t->mm->mmap_sem);
|
up_write(¤t->mm->mmap_sem);
|
||||||
|
|
Loading…
Add table
Reference in a new issue