Skip to content

Commit

Permalink
[PATCH] Fix buddy list race that could lead to page lru list corruptions
Browse files Browse the repository at this point in the history
Rohit found an obscure bug causing buddy list corruption.

page_is_buddy is using a non-atomic test (PagePrivate && page_count == 0)
to determine whether or not a free page's buddy is itself free and in the
buddy lists.

Each of the conjuncts may be true at different times due to unrelated
conditions, so the non-atomic page_is_buddy test may find each conjunct to
be true even if they were not both true at the same time (ie. the page was
not on the buddy lists).

Signed-off-by: Martin Bligh <[email protected]>
Signed-off-by: Rohit Seth <[email protected]>
Signed-off-by: Nick Piggin <[email protected]>
Signed-off-by: KAMEZAWA Hiroyuki <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
Nick Piggin authored and Linus Torvalds committed Apr 10, 2006
1 parent c3a9d65 commit 676165a
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 17 deletions.
5 changes: 2 additions & 3 deletions include/linux/mm.h
Original file line number Diff line number Diff line change
Expand Up @@ -229,10 +229,9 @@ struct page {
unsigned long private; /* Mapping-private opaque data:
* usually used for buffer_heads
* if PagePrivate set; used for
* swp_entry_t if PageSwapCache.
* When page is free, this
* swp_entry_t if PageSwapCache;
* indicates order in the buddy
* system.
* system if PG_buddy is set.
*/
struct address_space *mapping; /* If low bit clear, points to
* inode address_space, or NULL.
Expand Down
8 changes: 7 additions & 1 deletion include/linux/page-flags.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,9 @@
#define PG_mappedtodisk 16 /* Has blocks allocated on-disk */
#define PG_reclaim 17 /* To be reclaimed asap */
#define PG_nosave_free 18 /* Free, should not be written */
#define PG_uncached 19 /* Page has been mapped as uncached */
#define PG_buddy 19 /* Page is free, on buddy lists */

#define PG_uncached 20 /* Page has been mapped as uncached */

/*
* Global page accounting. One instance per CPU. Only unsigned longs are
Expand Down Expand Up @@ -317,6 +319,10 @@ extern void __mod_page_state_offset(unsigned long offset, unsigned long delta);
#define SetPageNosaveFree(page) set_bit(PG_nosave_free, &(page)->flags)
#define ClearPageNosaveFree(page) clear_bit(PG_nosave_free, &(page)->flags)

#define PageBuddy(page) test_bit(PG_buddy, &(page)->flags)
#define __SetPageBuddy(page) __set_bit(PG_buddy, &(page)->flags)
#define __ClearPageBuddy(page) __clear_bit(PG_buddy, &(page)->flags)

#define PageMappedToDisk(page) test_bit(PG_mappedtodisk, &(page)->flags)
#define SetPageMappedToDisk(page) set_bit(PG_mappedtodisk, &(page)->flags)
#define ClearPageMappedToDisk(page) clear_bit(PG_mappedtodisk, &(page)->flags)
Expand Down
31 changes: 18 additions & 13 deletions mm/page_alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,8 @@ static void bad_page(struct page *page)
1 << PG_reclaim |
1 << PG_slab |
1 << PG_swapcache |
1 << PG_writeback );
1 << PG_writeback |
1 << PG_buddy );
set_page_count(page, 0);
reset_page_mapcount(page);
page->mapping = NULL;
Expand Down Expand Up @@ -236,12 +237,12 @@ static inline unsigned long page_order(struct page *page) {

static inline void set_page_order(struct page *page, int order) {
set_page_private(page, order);
__SetPagePrivate(page);
__SetPageBuddy(page);
}

static inline void rmv_page_order(struct page *page)
{
__ClearPagePrivate(page);
__ClearPageBuddy(page);
set_page_private(page, 0);
}

Expand Down Expand Up @@ -280,11 +281,13 @@ __find_combined_index(unsigned long page_idx, unsigned int order)
* This function checks whether a page is free && is the buddy
* we can do coalesce a page and its buddy if
* (a) the buddy is not in a hole &&
* (b) the buddy is free &&
* (c) the buddy is on the buddy system &&
* (d) a page and its buddy have the same order.
* for recording page's order, we use page_private(page) and PG_private.
* (b) the buddy is in the buddy system &&
* (c) a page and its buddy have the same order.
*
* For recording whether a page is in the buddy system, we use PG_buddy.
* Setting, clearing, and testing PG_buddy is serialized by zone->lock.
*
* For recording page's order, we use page_private(page).
*/
static inline int page_is_buddy(struct page *page, int order)
{
Expand All @@ -293,10 +296,10 @@ static inline int page_is_buddy(struct page *page, int order)
return 0;
#endif

if (PagePrivate(page) &&
(page_order(page) == order) &&
page_count(page) == 0)
if (PageBuddy(page) && page_order(page) == order) {
BUG_ON(page_count(page) != 0);
return 1;
}
return 0;
}

Expand All @@ -313,7 +316,7 @@ static inline int page_is_buddy(struct page *page, int order)
* as necessary, plus some accounting needed to play nicely with other
* parts of the VM system.
* At each level, we keep a list of pages, which are heads of continuous
* free pages of length of (1 << order) and marked with PG_Private.Page's
* free pages of length of (1 << order) and marked with PG_buddy. Page's
* order is recorded in page_private(page) field.
* So when we are allocating or freeing one, we can derive the state of the
* other. That is, if we allocate a small block, and both were
Expand Down Expand Up @@ -376,7 +379,8 @@ static inline int free_pages_check(struct page *page)
1 << PG_slab |
1 << PG_swapcache |
1 << PG_writeback |
1 << PG_reserved ))))
1 << PG_reserved |
1 << PG_buddy ))))
bad_page(page);
if (PageDirty(page))
__ClearPageDirty(page);
Expand Down Expand Up @@ -524,7 +528,8 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
1 << PG_slab |
1 << PG_swapcache |
1 << PG_writeback |
1 << PG_reserved ))))
1 << PG_reserved |
1 << PG_buddy ))))
bad_page(page);

/*
Expand Down

0 comments on commit 676165a

Please sign in to comment.