1
0
Fork 0
mirror of https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git synced 2025-01-25 17:53:34 -05:00

execve: expand new process stack manually ahead of time

This is a small step towards a model where GUP itself would not expand
the stack, and any user that needs GUP to not look up existing mappings,
but actually expand on them, would have to do so manually before-hand,
and with the mm lock held for writing.

It turns out that execve() already did almost exactly that, except it
didn't take the mm lock at all (it's single-threaded so no locking
technically needed, but it could cause lockdep errors).  And it only did
it for the CONFIG_STACK_GROWSUP case, since in that case GUP has
obviously never expanded the stack downwards.

So just make that CONFIG_STACK_GROWSUP case do the right thing with
locking, and enable it generally.  This will eventually help GUP, and in
the meantime avoids a special case and the lockdep issue.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Linus Torvalds 2023-06-19 11:34:15 -07:00
parent f440fa1ac9
commit f313c51d26

View file

@ -200,34 +200,39 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
int write)
{
struct page *page;
struct vm_area_struct *vma = bprm->vma;
struct mm_struct *mm = bprm->mm;
int ret;
unsigned int gup_flags = 0;
#ifdef CONFIG_STACK_GROWSUP
if (write) {
/* We claim to hold the lock - nobody to race with */
ret = expand_downwards(bprm->vma, pos, true);
if (ret < 0)
/*
* Avoid relying on expanding the stack down in GUP (which
* does not work for STACK_GROWSUP anyway), and just do it
* by hand ahead of time.
*/
if (write && pos < vma->vm_start) {
mmap_write_lock(mm);
ret = expand_downwards(vma, pos, true);
if (unlikely(ret < 0)) {
mmap_write_unlock(mm);
return NULL;
}
#endif
if (write)
gup_flags |= FOLL_WRITE;
}
mmap_write_downgrade(mm);
} else
mmap_read_lock(mm);
/*
* We are doing an exec(). 'current' is the process
* doing the exec and bprm->mm is the new process's mm.
* doing the exec and 'mm' is the new process's mm.
*/
mmap_read_lock(bprm->mm);
ret = get_user_pages_remote(bprm->mm, pos, 1, gup_flags,
ret = get_user_pages_remote(mm, pos, 1,
write ? FOLL_WRITE : 0,
&page, NULL, NULL);
mmap_read_unlock(bprm->mm);
mmap_read_unlock(mm);
if (ret <= 0)
return NULL;
if (write)
acct_arg_size(bprm, vma_pages(bprm->vma));
acct_arg_size(bprm, vma_pages(vma));
return page;
}