Skip to content

Commit

Permalink
nommu: remove uses of VMA linked list
Browse files Browse the repository at this point in the history
Use the maple tree or VMA iterator instead.  This is faster and will allow
us to shrink the VMA.

Link: https://lkml.kernel.org/r/[email protected]
Signed-off-by: Matthew Wilcox (Oracle) <[email protected]>
Signed-off-by: Liam R. Howlett <[email protected]>
Acked-by: Vlastimil Babka <[email protected]>
Tested-by: Yu Zhao <[email protected]>
Cc: Catalin Marinas <[email protected]>
Cc: David Hildenbrand <[email protected]>
Cc: David Howells <[email protected]>
Cc: Davidlohr Bueso <[email protected]>
Cc: SeongJae Park <[email protected]>
Cc: Sven Schnelle <[email protected]>
Cc: Will Deacon <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
  • Loading branch information
Matthew Wilcox (Oracle) authored and akpm00 committed Sep 27, 2022
1 parent f683b9d commit 8220543
Showing 1 changed file with 109 additions and 37 deletions.
146 changes: 109 additions & 37 deletions mm/nommu.c
Original file line number Diff line number Diff line change
Expand Up @@ -557,48 +557,67 @@ void vma_mas_remove(struct vm_area_struct *vma, struct ma_state *mas)
mas_store_prealloc(mas, NULL);
}

/*
* add a VMA into a process's mm_struct in the appropriate place in the list
* and tree and add to the address space's page tree also if not an anonymous
* page
* - should be called with mm->mmap_lock held writelocked
*/
static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma)
static void setup_vma_to_mm(struct vm_area_struct *vma, struct mm_struct *mm)
{
struct address_space *mapping;
struct vm_area_struct *prev;
MA_STATE(mas, &mm->mm_mt, vma->vm_start, vma->vm_end);

BUG_ON(!vma->vm_region);

mm->map_count++;
vma->vm_mm = mm;

/* add the VMA to the mapping */
if (vma->vm_file) {
mapping = vma->vm_file->f_mapping;
struct address_space *mapping = vma->vm_file->f_mapping;

i_mmap_lock_write(mapping);
flush_dcache_mmap_lock(mapping);
vma_interval_tree_insert(vma, &mapping->i_mmap);
flush_dcache_mmap_unlock(mapping);
i_mmap_unlock_write(mapping);
}
}

prev = mas_prev(&mas, 0);
mas_reset(&mas);
/*
* mas_add_vma_to_mm() - Maple state variant of add_mas_to_mm().
* @mas: The maple state with preallocations.
* @mm: The mm_struct
* @vma: The vma to add
*
*/
static void mas_add_vma_to_mm(struct ma_state *mas, struct mm_struct *mm,
struct vm_area_struct *vma)
{
struct vm_area_struct *prev;

BUG_ON(!vma->vm_region);

setup_vma_to_mm(vma, mm);

prev = mas_prev(mas, 0);
mas_reset(mas);
/* add the VMA to the tree */
vma_mas_store(vma, &mas);
vma_mas_store(vma, mas);
__vma_link_list(mm, vma, prev);
}

/*
* delete a VMA from its owning mm_struct and address space
* add a VMA into a process's mm_struct in the appropriate place in the list
* and tree and add to the address space's page tree also if not an anonymous
* page
* - should be called with mm->mmap_lock held writelocked
*/
static void delete_vma_from_mm(struct vm_area_struct *vma)
static int add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma)
{
MA_STATE(mas, &vma->vm_mm->mm_mt, 0, 0);
MA_STATE(mas, &mm->mm_mt, vma->vm_start, vma->vm_end);

if (mas_preallocate(&mas, vma, GFP_KERNEL)) {
pr_warn("Allocation of vma tree for process %d failed\n",
current->pid);
return -ENOMEM;
}
mas_add_vma_to_mm(&mas, mm, vma);
return 0;
}

static void cleanup_vma_from_mm(struct vm_area_struct *vma)
{
vma->vm_mm->map_count--;
/* remove the VMA from the mapping */
if (vma->vm_file) {
Expand All @@ -611,10 +630,25 @@ static void delete_vma_from_mm(struct vm_area_struct *vma)
flush_dcache_mmap_unlock(mapping);
i_mmap_unlock_write(mapping);
}
}
/*
* delete a VMA from its owning mm_struct and address space
*/
static int delete_vma_from_mm(struct vm_area_struct *vma)
{
MA_STATE(mas, &vma->vm_mm->mm_mt, 0, 0);

if (mas_preallocate(&mas, vma, GFP_KERNEL)) {
pr_warn("Allocation of vma tree for process %d failed\n",
current->pid);
return -ENOMEM;
}
cleanup_vma_from_mm(vma);

/* remove from the MM's tree and list */
vma_mas_remove(vma, &mas);
__vma_unlink_list(vma->vm_mm, vma);
return 0;
}

/*
Expand Down Expand Up @@ -1024,6 +1058,7 @@ unsigned long do_mmap(struct file *file,
vm_flags_t vm_flags;
unsigned long capabilities, result;
int ret;
MA_STATE(mas, &current->mm->mm_mt, 0, 0);

*populate = 0;

Expand All @@ -1042,6 +1077,7 @@ unsigned long do_mmap(struct file *file,
* now know into VMA flags */
vm_flags = determine_vm_flags(file, prot, flags, capabilities);


/* we're going to need to record the mapping */
region = kmem_cache_zalloc(vm_region_jar, GFP_KERNEL);
if (!region)
Expand All @@ -1051,6 +1087,9 @@ unsigned long do_mmap(struct file *file,
if (!vma)
goto error_getting_vma;

if (mas_preallocate(&mas, vma, GFP_KERNEL))
goto error_maple_preallocate;

region->vm_usage = 1;
region->vm_flags = vm_flags;
region->vm_pgoff = pgoff;
Expand Down Expand Up @@ -1191,7 +1230,7 @@ unsigned long do_mmap(struct file *file,
current->mm->total_vm += len >> PAGE_SHIFT;

share:
add_vma_to_mm(current->mm, vma);
mas_add_vma_to_mm(&mas, current->mm, vma);

/* we flush the region from the icache only when the first executable
* mapping of it is made */
Expand All @@ -1217,6 +1256,7 @@ unsigned long do_mmap(struct file *file,

sharing_violation:
up_write(&nommu_region_sem);
mas_destroy(&mas);
pr_warn("Attempt to share mismatched mappings\n");
ret = -EINVAL;
goto error;
Expand All @@ -1233,6 +1273,14 @@ unsigned long do_mmap(struct file *file,
len, current->pid);
show_free_areas(0, NULL);
return -ENOMEM;

error_maple_preallocate:
kmem_cache_free(vm_region_jar, region);
vm_area_free(vma);
pr_warn("Allocation of vma tree for process %d failed\n", current->pid);
show_free_areas(0, NULL);
return -ENOMEM;

}

unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
Expand Down Expand Up @@ -1298,6 +1346,7 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
struct vm_area_struct *new;
struct vm_region *region;
unsigned long npages;
MA_STATE(mas, &mm->mm_mt, vma->vm_start, vma->vm_end);

/* we're only permitted to split anonymous regions (these should have
* only a single usage on the region) */
Expand All @@ -1312,9 +1361,13 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
return -ENOMEM;

new = vm_area_dup(vma);
if (!new) {
kmem_cache_free(vm_region_jar, region);
return -ENOMEM;
if (!new)
goto err_vma_dup;

if (mas_preallocate(&mas, vma, GFP_KERNEL)) {
pr_warn("Allocation of vma tree for process %d failed\n",
current->pid);
goto err_mas_preallocate;
}

/* most fields are the same, copy all, and then fixup */
Expand All @@ -1333,7 +1386,6 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
if (new->vm_ops && new->vm_ops->open)
new->vm_ops->open(new);

delete_vma_from_mm(vma);
down_write(&nommu_region_sem);
delete_nommu_region(vma->vm_region);
if (new_below) {
Expand All @@ -1346,9 +1398,19 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
add_nommu_region(vma->vm_region);
add_nommu_region(new->vm_region);
up_write(&nommu_region_sem);
add_vma_to_mm(mm, vma);
add_vma_to_mm(mm, new);

setup_vma_to_mm(vma, mm);
setup_vma_to_mm(new, mm);
mas_set_range(&mas, vma->vm_start, vma->vm_end - 1);
mas_store(&mas, vma);
vma_mas_store(new, &mas);
return 0;

err_mas_preallocate:
vm_area_free(new);
err_vma_dup:
kmem_cache_free(vm_region_jar, region);
return -ENOMEM;
}

/*
Expand All @@ -1363,12 +1425,14 @@ static int shrink_vma(struct mm_struct *mm,

/* adjust the VMA's pointers, which may reposition it in the MM's tree
* and list */
delete_vma_from_mm(vma);
if (delete_vma_from_mm(vma))
return -ENOMEM;
if (from > vma->vm_start)
vma->vm_end = from;
else
vma->vm_start = to;
add_vma_to_mm(mm, vma);
if (add_vma_to_mm(mm, vma))
return -ENOMEM;

/* cut the backing region down to size */
region = vma->vm_region;
Expand Down Expand Up @@ -1396,9 +1460,10 @@ static int shrink_vma(struct mm_struct *mm,
*/
int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, struct list_head *uf)
{
MA_STATE(mas, &mm->mm_mt, start, start);
struct vm_area_struct *vma;
unsigned long end;
int ret;
int ret = 0;

len = PAGE_ALIGN(len);
if (len == 0)
Expand All @@ -1407,7 +1472,7 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, struct list
end = start + len;

/* find the first potentially overlapping VMA */
vma = find_vma(mm, start);
vma = mas_find(&mas, end - 1);
if (!vma) {
static int limit;
if (limit < 5) {
Expand All @@ -1426,7 +1491,7 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, struct list
return -EINVAL;
if (end == vma->vm_end)
goto erase_whole_vma;
vma = vma->vm_next;
vma = mas_next(&mas, end - 1);
} while (vma);
return -EINVAL;
} else {
Expand All @@ -1448,9 +1513,10 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, struct list
}

erase_whole_vma:
delete_vma_from_mm(vma);
if (delete_vma_from_mm(vma))
ret = -ENOMEM;
delete_vma(mm, vma);
return 0;
return ret;
}

int vm_munmap(unsigned long addr, size_t len)
Expand All @@ -1475,20 +1541,26 @@ SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
*/
void exit_mmap(struct mm_struct *mm)
{
VMA_ITERATOR(vmi, mm, 0);
struct vm_area_struct *vma;

if (!mm)
return;

mm->total_vm = 0;

while ((vma = mm->mmap)) {
mm->mmap = vma->vm_next;
delete_vma_from_mm(vma);
/*
* Lock the mm to avoid assert complaining even though this is the only
* user of the mm
*/
mmap_write_lock(mm);
for_each_vma(vmi, vma) {
cleanup_vma_from_mm(vma);
delete_vma(mm, vma);
cond_resched();
}
__mt_destroy(&mm->mm_mt);
mmap_write_unlock(mm);
}

int vm_brk(unsigned long addr, unsigned long len)
Expand Down

0 comments on commit 8220543

Please sign in to comment.