Skip to content

Commit

Permalink
userfaultfd: non-cooperative: userfaultfd_remove revalidate vma in MA…
Browse files Browse the repository at this point in the history
…DV_DONTNEED

userfaultfd_remove() has to be execute before zapping the pagetables or
UFFDIO_COPY could keep filling pages after zap_page_range returned,
which would result in non zero data after a MADV_DONTNEED.

However userfaultfd_remove() may have to release the mmap_sem.  This was
handled correctly in MADV_REMOVE, but MADV_DONTNEED accessed a
potentially stale vma (the very vma passed to zap_page_range(vma, ...)).

The fix consists in revalidating the vma in case userfaultfd_remove()
had to release the mmap_sem.

This also optimizes away an unnecessary down_read/up_read in the
MADV_REMOVE case if UFFD_EVENT_FORK had to be delivered.

It all remains zero runtime cost in case CONFIG_USERFAULTFD=n as
userfaultfd_remove() will be defined as "true" at build time.

Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Andrea Arcangeli <[email protected]>
Acked-by: Mike Rapoport <[email protected]>
Cc: "Dr. David Alan Gilbert" <[email protected]>
Cc: Mike Kravetz <[email protected]>
Cc: Pavel Emelyanov <[email protected]>
Cc: Hillf Danton <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
aagit authored and torvalds committed Mar 10, 2017
1 parent 7eb76d4 commit 70ccb92
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 13 deletions.
9 changes: 3 additions & 6 deletions fs/userfaultfd.c
Original file line number Diff line number Diff line change
Expand Up @@ -695,8 +695,7 @@ void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *vm_ctx,
userfaultfd_event_wait_completion(ctx, &ewq);
}

void userfaultfd_remove(struct vm_area_struct *vma,
struct vm_area_struct **prev,
bool userfaultfd_remove(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
struct mm_struct *mm = vma->vm_mm;
Expand All @@ -705,13 +704,11 @@ void userfaultfd_remove(struct vm_area_struct *vma,

ctx = vma->vm_userfaultfd_ctx.ctx;
if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_REMOVE))
return;
return true;

userfaultfd_ctx_get(ctx);
up_read(&mm->mmap_sem);

*prev = NULL; /* We wait for ACK w/o the mmap semaphore */

msg_init(&ewq.msg);

ewq.msg.event = UFFD_EVENT_REMOVE;
Expand All @@ -720,7 +717,7 @@ void userfaultfd_remove(struct vm_area_struct *vma,

userfaultfd_event_wait_completion(ctx, &ewq);

down_read(&mm->mmap_sem);
return false;
}

static bool has_unmap_ctx(struct userfaultfd_ctx *ctx, struct list_head *unmaps,
Expand Down
7 changes: 3 additions & 4 deletions include/linux/userfaultfd_k.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,7 @@ extern void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *,
unsigned long from, unsigned long to,
unsigned long len);

extern void userfaultfd_remove(struct vm_area_struct *vma,
struct vm_area_struct **prev,
extern bool userfaultfd_remove(struct vm_area_struct *vma,
unsigned long start,
unsigned long end);

Expand Down Expand Up @@ -118,11 +117,11 @@ static inline void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *ctx,
{
}

static inline void userfaultfd_remove(struct vm_area_struct *vma,
struct vm_area_struct **prev,
static inline bool userfaultfd_remove(struct vm_area_struct *vma,
unsigned long start,
unsigned long end)
{
return true;
}

static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma,
Expand Down
44 changes: 41 additions & 3 deletions mm/madvise.c
Original file line number Diff line number Diff line change
Expand Up @@ -513,7 +513,43 @@ static long madvise_dontneed(struct vm_area_struct *vma,
if (!can_madv_dontneed_vma(vma))
return -EINVAL;

userfaultfd_remove(vma, prev, start, end);
if (!userfaultfd_remove(vma, start, end)) {
*prev = NULL; /* mmap_sem has been dropped, prev is stale */

down_read(&current->mm->mmap_sem);
vma = find_vma(current->mm, start);
if (!vma)
return -ENOMEM;
if (start < vma->vm_start) {
/*
* This "vma" under revalidation is the one
* with the lowest vma->vm_start where start
* is also < vma->vm_end. If start <
* vma->vm_start it means an hole materialized
* in the user address space within the
* virtual range passed to MADV_DONTNEED.
*/
return -ENOMEM;
}
if (!can_madv_dontneed_vma(vma))
return -EINVAL;
if (end > vma->vm_end) {
/*
* Don't fail if end > vma->vm_end. If the old
* vma was splitted while the mmap_sem was
* released the effect of the concurrent
* operation may not cause MADV_DONTNEED to
* have an undefined result. There may be an
* adjacent next vma that we'll walk
* next. userfaultfd_remove() will generate an
* UFFD_EVENT_REMOVE repetition on the
* end-vma->vm_end range, but the manager can
* handle a repetition fine.
*/
end = vma->vm_end;
}
VM_WARN_ON(start >= end);
}
zap_page_range(vma, start, end - start);
return 0;
}
Expand Down Expand Up @@ -554,8 +590,10 @@ static long madvise_remove(struct vm_area_struct *vma,
* mmap_sem.
*/
get_file(f);
userfaultfd_remove(vma, prev, start, end);
up_read(&current->mm->mmap_sem);
if (userfaultfd_remove(vma, start, end)) {
/* mmap_sem was not released by userfaultfd_remove() */
up_read(&current->mm->mmap_sem);
}
error = vfs_fallocate(f,
FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
offset, end - start);
Expand Down

0 comments on commit 70ccb92

Please sign in to comment.