Skip to content

Commit

Permalink
[PATCH] core remove PageReserved
Browse files Browse the repository at this point in the history
Remove PageReserved() calls from core code by tightening VM_RESERVED
handling in mm/ to cover PageReserved functionality.

PageReserved special casing is removed from get_page and put_page.

All setting and clearing of PageReserved is retained, and it is now flagged
in the page_alloc checks to help ensure we don't introduce any refcount
based freeing of Reserved pages.

MAP_PRIVATE, PROT_WRITE of VM_RESERVED regions is tentatively being
deprecated.  We never completely handled it correctly anyway, and is be
reintroduced in future if required (Hugh has a proof of concept).

Once PageReserved() calls are removed from kernel/power/swsusp.c, and all
arch/ and driver code, the Set and Clear calls, and the PG_reserved bit can
be trivially removed.

Last real user of PageReserved is swsusp, which uses PageReserved to
determine whether a struct page points to valid memory or not.  This still
needs to be addressed (a generic page_is_ram() should work).

A last caveat: the ZERO_PAGE is now refcounted and managed with rmap (and
thus mapcounted and count towards shared rss).  These writes to the struct
page could cause excessive cacheline bouncing on big systems.  There are a
number of ways this could be addressed if it is an issue.

Signed-off-by: Nick Piggin <[email protected]>

Refcount bug fix for filemap_xip.c

Signed-off-by: Carsten Otte <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
Nick Piggin authored and Linus Torvalds committed Oct 30, 2005
1 parent f9c98d0 commit b581003
Show file tree
Hide file tree
Showing 22 changed files with 218 additions and 134 deletions.
12 changes: 7 additions & 5 deletions arch/ppc64/kernel/vdso.c
Original file line number Diff line number Diff line change
Expand Up @@ -176,13 +176,13 @@ static struct page * vdso_vma_nopage(struct vm_area_struct * vma,
return NOPAGE_SIGBUS;

/*
* Last page is systemcfg, special handling here, no get_page() a
* this is a reserved page
* Last page is systemcfg.
*/
if ((vma->vm_end - address) <= PAGE_SIZE)
return virt_to_page(systemcfg);
pg = virt_to_page(systemcfg);
else
pg = virt_to_page(vbase + offset);

pg = virt_to_page(vbase + offset);
get_page(pg);
DBG(" ->page count: %d\n", page_count(pg));

Expand Down Expand Up @@ -259,7 +259,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int executable_stack)
* gettimeofday will be totally dead. It's fine to use that for setting
* breakpoints in the vDSO code pages though
*/
vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
vma->vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC | VM_RESERVED;
vma->vm_flags |= mm->def_flags;
vma->vm_page_prot = protection_map[vma->vm_flags & 0x7];
vma->vm_ops = &vdso_vmops;
Expand Down Expand Up @@ -603,6 +603,8 @@ void __init vdso_init(void)
ClearPageReserved(pg);
get_page(pg);
}

get_page(virt_to_page(systemcfg));
}

int in_gate_area_no_task(unsigned long addr)
Expand Down
3 changes: 3 additions & 0 deletions arch/sparc/mm/generic.c
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
int space = GET_IOSPACE(pfn);
unsigned long offset = GET_PFN(pfn) << PAGE_SHIFT;

/* See comment in mm/memory.c remap_pfn_range */
vma->vm_flags |= VM_IO | VM_RESERVED;

prot = __pgprot(pg_iobits);
offset -= from;
dir = pgd_offset(mm, from);
Expand Down
3 changes: 3 additions & 0 deletions arch/sparc64/mm/generic.c
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,9 @@ int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long from,
int space = GET_IOSPACE(pfn);
unsigned long offset = GET_PFN(pfn) << PAGE_SHIFT;

/* See comment in mm/memory.c remap_pfn_range */
vma->vm_flags |= VM_IO | VM_RESERVED;

prot = __pgprot(pg_iobits);
offset -= from;
dir = pgd_offset(mm, from);
Expand Down
12 changes: 8 additions & 4 deletions drivers/scsi/sg.c
Original file line number Diff line number Diff line change
Expand Up @@ -1886,13 +1886,17 @@ st_unmap_user_pages(struct scatterlist *sgl, const unsigned int nr_pages,
int i;

for (i=0; i < nr_pages; i++) {
if (dirtied && !PageReserved(sgl[i].page))
SetPageDirty(sgl[i].page);
/* unlock_page(sgl[i].page); */
struct page *page = sgl[i].page;

/* XXX: just for debug. Remove when PageReserved is removed */
BUG_ON(PageReserved(page));
if (dirtied)
SetPageDirty(page);
/* unlock_page(page); */
/* FIXME: cache flush missing for rw==READ
* FIXME: call the correct reference counting function
*/
page_cache_release(sgl[i].page);
page_cache_release(page);
}

return 0;
Expand Down
10 changes: 7 additions & 3 deletions drivers/scsi/st.c
Original file line number Diff line number Diff line change
Expand Up @@ -4526,12 +4526,16 @@ static int sgl_unmap_user_pages(struct scatterlist *sgl, const unsigned int nr_p
int i;

for (i=0; i < nr_pages; i++) {
if (dirtied && !PageReserved(sgl[i].page))
SetPageDirty(sgl[i].page);
struct page *page = sgl[i].page;

/* XXX: just for debug. Remove when PageReserved is removed */
BUG_ON(PageReserved(page));
if (dirtied)
SetPageDirty(page);
/* FIXME: cache flush missing for rw==READ
* FIXME: call the correct reference counting function
*/
page_cache_release(sgl[i].page);
page_cache_release(page);
}

return 0;
Expand Down
4 changes: 3 additions & 1 deletion fs/direct-io.c
Original file line number Diff line number Diff line change
Expand Up @@ -162,14 +162,16 @@ static int dio_refill_pages(struct dio *dio)
up_read(&current->mm->mmap_sem);

if (ret < 0 && dio->blocks_available && (dio->rw == WRITE)) {
struct page *page = ZERO_PAGE(dio->curr_user_address);
/*
* A memory fault, but the filesystem has some outstanding
* mapped blocks. We need to use those blocks up to avoid
* leaking stale data in the file.
*/
if (dio->page_errors == 0)
dio->page_errors = ret;
dio->pages[0] = ZERO_PAGE(dio->curr_user_address);
page_cache_get(page);
dio->pages[0] = page;
dio->head = 0;
dio->tail = 1;
ret = 0;
Expand Down
5 changes: 3 additions & 2 deletions include/linux/mm.h
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ extern unsigned int kobjsize(const void *objp);

#define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */
#define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */
#define VM_RESERVED 0x00080000 /* Don't unmap it from swap_out */
#define VM_RESERVED 0x00080000 /* Pages managed in a special way */
#define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */
#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */
#define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */
Expand Down Expand Up @@ -338,7 +338,7 @@ static inline void get_page(struct page *page)

static inline void put_page(struct page *page)
{
if (!PageReserved(page) && put_page_testzero(page))
if (put_page_testzero(page))
__page_cache_release(page);
}

Expand Down Expand Up @@ -723,6 +723,7 @@ void install_arg_page(struct vm_area_struct *, struct page *, unsigned long);

int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start,
int len, int write, int force, struct page **pages, struct vm_area_struct **vmas);
void print_bad_pte(struct vm_area_struct *, pte_t, unsigned long);

int __set_page_dirty_buffers(struct page *page);
int __set_page_dirty_nobuffers(struct page *page);
Expand Down
25 changes: 16 additions & 9 deletions kernel/power/swsusp.c
Original file line number Diff line number Diff line change
Expand Up @@ -578,15 +578,23 @@ static int save_highmem_zone(struct zone *zone)
continue;
page = pfn_to_page(pfn);
/*
* This condition results from rvmalloc() sans vmalloc_32()
* and architectural memory reservations. This should be
* corrected eventually when the cases giving rise to this
* are better understood.
* PageReserved results from rvmalloc() sans vmalloc_32()
* and architectural memory reservations.
*
* rvmalloc should not cause this, because all implementations
* appear to always be using vmalloc_32 on architectures with
* highmem. This is a good thing, because we would like to save
* rvmalloc pages.
*
* It appears to be triggered by pages which do not point to
* valid memory (see arch/i386/mm/init.c:one_highpage_init(),
* which sets PageReserved if the page does not point to valid
* RAM.
*
* XXX: must remove usage of PageReserved!
*/
if (PageReserved(page)) {
printk("highmem reserved page?!\n");
if (PageReserved(page))
continue;
}
BUG_ON(PageNosave(page));
if (PageNosaveFree(page))
continue;
Expand Down Expand Up @@ -672,10 +680,9 @@ static int saveable(struct zone * zone, unsigned long * zone_pfn)
return 0;

page = pfn_to_page(pfn);
BUG_ON(PageReserved(page) && PageNosave(page));
if (PageNosave(page))
return 0;
if (PageReserved(page) && pfn_is_nosave(pfn)) {
if (pfn_is_nosave(pfn)) {
pr_debug("[nosave pfn 0x%lx]", pfn);
return 0;
}
Expand Down
1 change: 1 addition & 0 deletions mm/bootmem.c
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,7 @@ static unsigned long __init free_all_bootmem_core(pg_data_t *pgdat)
if (j + 16 < BITS_PER_LONG)
prefetchw(page + j + 16);
__ClearPageReserved(page + j);
set_page_count(page + j, 0);
}
__free_pages(page, order);
i += BITS_PER_LONG;
Expand Down
11 changes: 8 additions & 3 deletions mm/filemap_xip.c
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ __xip_unmap (struct address_space * mapping,
unsigned long address;
pte_t *pte;
pte_t pteval;
struct page *page = ZERO_PAGE(address);

spin_lock(&mapping->i_mmap_lock);
vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
Expand All @@ -185,15 +186,17 @@ __xip_unmap (struct address_space * mapping,
* We need the page_table_lock to protect us from page faults,
* munmap, fork, etc...
*/
pte = page_check_address(ZERO_PAGE(address), mm,
address);
pte = page_check_address(page, mm, address);
if (!IS_ERR(pte)) {
/* Nuke the page table entry. */
flush_cache_page(vma, address, pte_pfn(*pte));
pteval = ptep_clear_flush(vma, address, pte);
page_remove_rmap(page);
dec_mm_counter(mm, file_rss);
BUG_ON(pte_dirty(pteval));
pte_unmap(pte);
spin_unlock(&mm->page_table_lock);
page_cache_release(page);
}
}
spin_unlock(&mapping->i_mmap_lock);
Expand Down Expand Up @@ -228,7 +231,7 @@ xip_file_nopage(struct vm_area_struct * area,

page = mapping->a_ops->get_xip_page(mapping, pgoff*(PAGE_SIZE/512), 0);
if (!IS_ERR(page)) {
return page;
goto out;
}
if (PTR_ERR(page) != -ENODATA)
return NULL;
Expand All @@ -249,6 +252,8 @@ xip_file_nopage(struct vm_area_struct * area,
page = ZERO_PAGE(address);
}

out:
page_cache_get(page);
return page;
}

Expand Down
23 changes: 14 additions & 9 deletions mm/fremap.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,19 +29,20 @@ static inline void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
return;
if (pte_present(pte)) {
unsigned long pfn = pte_pfn(pte);
struct page *page;

flush_cache_page(vma, addr, pfn);
pte = ptep_clear_flush(vma, addr, ptep);
if (pfn_valid(pfn)) {
struct page *page = pfn_to_page(pfn);
if (!PageReserved(page)) {
if (pte_dirty(pte))
set_page_dirty(page);
page_remove_rmap(page);
page_cache_release(page);
dec_mm_counter(mm, file_rss);
}
if (unlikely(!pfn_valid(pfn))) {
print_bad_pte(vma, pte, addr);
return;
}
page = pfn_to_page(pfn);
if (pte_dirty(pte))
set_page_dirty(page);
page_remove_rmap(page);
page_cache_release(page);
dec_mm_counter(mm, file_rss);
} else {
if (!pte_file(pte))
free_swap_and_cache(pte_to_swp_entry(pte));
Expand All @@ -65,6 +66,8 @@ int install_page(struct mm_struct *mm, struct vm_area_struct *vma,
pgd_t *pgd;
pte_t pte_val;

BUG_ON(vma->vm_flags & VM_RESERVED);

pgd = pgd_offset(mm, addr);
spin_lock(&mm->page_table_lock);

Expand Down Expand Up @@ -125,6 +128,8 @@ int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma,
pgd_t *pgd;
pte_t pte_val;

BUG_ON(vma->vm_flags & VM_RESERVED);

pgd = pgd_offset(mm, addr);
spin_lock(&mm->page_table_lock);

Expand Down
2 changes: 1 addition & 1 deletion mm/madvise.c
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ static long madvise_dontneed(struct vm_area_struct * vma,
unsigned long start, unsigned long end)
{
*prev = vma;
if ((vma->vm_flags & VM_LOCKED) || is_vm_hugetlb_page(vma))
if (vma->vm_flags & (VM_LOCKED|VM_HUGETLB|VM_RESERVED))
return -EINVAL;

if (unlikely(vma->vm_flags & VM_NONLINEAR)) {
Expand Down
Loading

0 comments on commit b581003

Please sign in to comment.