Skip to content

Commit

Permalink
mm, x86: add support for PUD-sized transparent hugepages
Browse files Browse the repository at this point in the history
The current transparent hugepage code only supports PMDs.  This patch
adds support for transparent use of PUDs with DAX.  It does not include
support for anonymous pages.  x86 support code also added.

Most of this patch simply parallels the work that was done for huge
PMDs.  The only major difference is how the new ->pud_entry method in
mm_walk works.  The ->pmd_entry method replaces the ->pte_entry method,
whereas the ->pud_entry method works along with either ->pmd_entry or
->pte_entry.  The pagewalk code takes care of locking the PUD before
calling ->pud_walk, so handlers do not need to worry whether the PUD is
stable.

[[email protected]: fix SMP x86 32bit build for native_pud_clear()]
  Link: http://lkml.kernel.org/r/148719066814.31111.3239231168815337012.stgit@djiang5-desk3.ch.intel.com
[[email protected]: native_pud_clear missing on i386 build]
  Link: http://lkml.kernel.org/r/148640375195.69754.3315433724330910314.stgit@djiang5-desk3.ch.intel.com
Link: http://lkml.kernel.org/r/148545059381.17912.8602162635537598445.stgit@djiang5-desk3.ch.intel.com
Signed-off-by: Matthew Wilcox <[email protected]>
Signed-off-by: Dave Jiang <[email protected]>
Tested-by: Alexander Kapshuk <[email protected]>
Cc: Dave Hansen <[email protected]>
Cc: Vlastimil Babka <[email protected]>
Cc: Jan Kara <[email protected]>
Cc: Dan Williams <[email protected]>
Cc: Ross Zwisler <[email protected]>
Cc: Kirill A. Shutemov <[email protected]>
Cc: Nilesh Choudhury <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: "H. Peter Anvin" <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
Matthew Wilcox authored and torvalds committed Feb 25, 2017
1 parent a2d5816 commit a00cc7d
Show file tree
Hide file tree
Showing 21 changed files with 844 additions and 18 deletions.
3 changes: 3 additions & 0 deletions arch/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -571,6 +571,9 @@ config HAVE_IRQ_TIME_ACCOUNTING
config HAVE_ARCH_TRANSPARENT_HUGEPAGE
bool

config HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
bool

config HAVE_ARCH_HUGE_VMAP
bool

Expand Down
1 change: 1 addition & 0 deletions arch/x86/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ config X86
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64
select HAVE_ARCH_VMAP_STACK if X86_64
select HAVE_ARCH_WITHIN_STACK_FRAMES
select HAVE_CC_STACKPROTECTOR
Expand Down
11 changes: 11 additions & 0 deletions arch/x86/include/asm/paravirt.h
Original file line number Diff line number Diff line change
Expand Up @@ -475,6 +475,17 @@ static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
native_pmd_val(pmd));
}

static inline void set_pud_at(struct mm_struct *mm, unsigned long addr,
pud_t *pudp, pud_t pud)
{
if (sizeof(pudval_t) > sizeof(long))
/* 5 arg words */
pv_mmu_ops.set_pud_at(mm, addr, pudp, pud);
else
PVOP_VCALL4(pv_mmu_ops.set_pud_at, mm, addr, pudp,
native_pud_val(pud));
}

static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
{
pmdval_t val = native_pmd_val(pmd);
Expand Down
2 changes: 2 additions & 0 deletions arch/x86/include/asm/paravirt_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,8 @@ struct pv_mmu_ops {
void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval);
void (*set_pmd_at)(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t pmdval);
void (*set_pud_at)(struct mm_struct *mm, unsigned long addr,
pud_t *pudp, pud_t pudval);
void (*pte_update)(struct mm_struct *mm, unsigned long addr,
pte_t *ptep);

Expand Down
17 changes: 17 additions & 0 deletions arch/x86/include/asm/pgtable-2level.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
*pmdp = pmd;
}

static inline void native_set_pud(pud_t *pudp, pud_t pud)
{
}

static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
{
native_set_pte(ptep, pte);
Expand All @@ -31,6 +35,10 @@ static inline void native_pmd_clear(pmd_t *pmdp)
native_set_pmd(pmdp, __pmd(0));
}

static inline void native_pud_clear(pud_t *pudp)
{
}

static inline void native_pte_clear(struct mm_struct *mm,
unsigned long addr, pte_t *xp)
{
Expand All @@ -55,6 +63,15 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp)
#define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp)
#endif

#ifdef CONFIG_SMP
static inline pud_t native_pudp_get_and_clear(pud_t *xp)
{
return __pud(xchg((pudval_t *)xp, 0));
}
#else
#define native_pudp_get_and_clear(xp) native_local_pudp_get_and_clear(xp)
#endif

/* Bit manipulation helper on pte/pgoff entry */
static inline unsigned long pte_bitop(unsigned long value, unsigned int rightshift,
unsigned long mask, unsigned int leftshift)
Expand Down
30 changes: 30 additions & 0 deletions arch/x86/include/asm/pgtable-3level.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,12 @@ static inline void native_pmd_clear(pmd_t *pmd)
*(tmp + 1) = 0;
}

#ifndef CONFIG_SMP
static inline void native_pud_clear(pud_t *pudp)
{
}
#endif

static inline void pud_clear(pud_t *pudp)
{
set_pud(pudp, __pud(0));
Expand Down Expand Up @@ -176,6 +182,30 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp)
#define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp)
#endif

#ifdef CONFIG_SMP
union split_pud {
struct {
u32 pud_low;
u32 pud_high;
};
pud_t pud;
};

static inline pud_t native_pudp_get_and_clear(pud_t *pudp)
{
union split_pud res, *orig = (union split_pud *)pudp;

/* xchg acts as a barrier before setting of the high bits */
res.pud_low = xchg(&orig->pud_low, 0);
res.pud_high = orig->pud_high;
orig->pud_high = 0;

return res.pud;
}
#else
#define native_pudp_get_and_clear(xp) native_local_pudp_get_and_clear(xp)
#endif

/* Encode and de-code a swap entry */
#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5)
#define __swp_type(x) (((x).val) & 0x1f)
Expand Down
140 changes: 140 additions & 0 deletions arch/x86/include/asm/pgtable.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page);
#define set_pte(ptep, pte) native_set_pte(ptep, pte)
#define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte)
#define set_pmd_at(mm, addr, pmdp, pmd) native_set_pmd_at(mm, addr, pmdp, pmd)
#define set_pud_at(mm, addr, pudp, pud) native_set_pud_at(mm, addr, pudp, pud)

#define set_pte_atomic(ptep, pte) \
native_set_pte_atomic(ptep, pte)
Expand Down Expand Up @@ -128,6 +129,16 @@ static inline int pmd_young(pmd_t pmd)
return pmd_flags(pmd) & _PAGE_ACCESSED;
}

static inline int pud_dirty(pud_t pud)
{
return pud_flags(pud) & _PAGE_DIRTY;
}

static inline int pud_young(pud_t pud)
{
return pud_flags(pud) & _PAGE_ACCESSED;
}

static inline int pte_write(pte_t pte)
{
return pte_flags(pte) & _PAGE_RW;
Expand Down Expand Up @@ -181,6 +192,13 @@ static inline int pmd_trans_huge(pmd_t pmd)
return (pmd_val(pmd) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE;
}

#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
static inline int pud_trans_huge(pud_t pud)
{
return (pud_val(pud) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE;
}
#endif

#define has_transparent_hugepage has_transparent_hugepage
static inline int has_transparent_hugepage(void)
{
Expand All @@ -192,6 +210,18 @@ static inline int pmd_devmap(pmd_t pmd)
{
return !!(pmd_val(pmd) & _PAGE_DEVMAP);
}

#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
static inline int pud_devmap(pud_t pud)
{
return !!(pud_val(pud) & _PAGE_DEVMAP);
}
#else
static inline int pud_devmap(pud_t pud)
{
return 0;
}
#endif
#endif
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */

Expand Down Expand Up @@ -333,6 +363,65 @@ static inline pmd_t pmd_mknotpresent(pmd_t pmd)
return pmd_clear_flags(pmd, _PAGE_PRESENT | _PAGE_PROTNONE);
}

static inline pud_t pud_set_flags(pud_t pud, pudval_t set)
{
pudval_t v = native_pud_val(pud);

return __pud(v | set);
}

static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear)
{
pudval_t v = native_pud_val(pud);

return __pud(v & ~clear);
}

static inline pud_t pud_mkold(pud_t pud)
{
return pud_clear_flags(pud, _PAGE_ACCESSED);
}

static inline pud_t pud_mkclean(pud_t pud)
{
return pud_clear_flags(pud, _PAGE_DIRTY);
}

static inline pud_t pud_wrprotect(pud_t pud)
{
return pud_clear_flags(pud, _PAGE_RW);
}

static inline pud_t pud_mkdirty(pud_t pud)
{
return pud_set_flags(pud, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
}

static inline pud_t pud_mkdevmap(pud_t pud)
{
return pud_set_flags(pud, _PAGE_DEVMAP);
}

static inline pud_t pud_mkhuge(pud_t pud)
{
return pud_set_flags(pud, _PAGE_PSE);
}

static inline pud_t pud_mkyoung(pud_t pud)
{
return pud_set_flags(pud, _PAGE_ACCESSED);
}

static inline pud_t pud_mkwrite(pud_t pud)
{
return pud_set_flags(pud, _PAGE_RW);
}

static inline pud_t pud_mknotpresent(pud_t pud)
{
return pud_clear_flags(pud, _PAGE_PRESENT | _PAGE_PROTNONE);
}

#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
static inline int pte_soft_dirty(pte_t pte)
{
Expand All @@ -344,6 +433,11 @@ static inline int pmd_soft_dirty(pmd_t pmd)
return pmd_flags(pmd) & _PAGE_SOFT_DIRTY;
}

static inline int pud_soft_dirty(pud_t pud)
{
return pud_flags(pud) & _PAGE_SOFT_DIRTY;
}

static inline pte_t pte_mksoft_dirty(pte_t pte)
{
return pte_set_flags(pte, _PAGE_SOFT_DIRTY);
Expand All @@ -354,6 +448,11 @@ static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY);
}

static inline pud_t pud_mksoft_dirty(pud_t pud)
{
return pud_set_flags(pud, _PAGE_SOFT_DIRTY);
}

static inline pte_t pte_clear_soft_dirty(pte_t pte)
{
return pte_clear_flags(pte, _PAGE_SOFT_DIRTY);
Expand All @@ -364,6 +463,11 @@ static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd)
return pmd_clear_flags(pmd, _PAGE_SOFT_DIRTY);
}

static inline pud_t pud_clear_soft_dirty(pud_t pud)
{
return pud_clear_flags(pud, _PAGE_SOFT_DIRTY);
}

#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */

/*
Expand Down Expand Up @@ -392,6 +496,12 @@ static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
massage_pgprot(pgprot));
}

static inline pud_t pfn_pud(unsigned long page_nr, pgprot_t pgprot)
{
return __pud(((phys_addr_t)page_nr << PAGE_SHIFT) |
massage_pgprot(pgprot));
}

static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{
pteval_t val = pte_val(pte);
Expand Down Expand Up @@ -771,6 +881,14 @@ static inline pmd_t native_local_pmdp_get_and_clear(pmd_t *pmdp)
return res;
}

static inline pud_t native_local_pudp_get_and_clear(pud_t *pudp)
{
pud_t res = *pudp;

native_pud_clear(pudp);
return res;
}

static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep , pte_t pte)
{
Expand All @@ -783,6 +901,12 @@ static inline void native_set_pmd_at(struct mm_struct *mm, unsigned long addr,
native_set_pmd(pmdp, pmd);
}

static inline void native_set_pud_at(struct mm_struct *mm, unsigned long addr,
pud_t *pudp, pud_t pud)
{
native_set_pud(pudp, pud);
}

#ifndef CONFIG_PARAVIRT
/*
* Rules for using pte_update - it must be called after any PTE update which
Expand Down Expand Up @@ -861,10 +985,15 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm,
extern int pmdp_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp,
pmd_t entry, int dirty);
extern int pudp_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pud_t *pudp,
pud_t entry, int dirty);

#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
extern int pmdp_test_and_clear_young(struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmdp);
extern int pudp_test_and_clear_young(struct vm_area_struct *vma,
unsigned long addr, pud_t *pudp);

#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
Expand All @@ -884,6 +1013,13 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long
return native_pmdp_get_and_clear(pmdp);
}

#define __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR
static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm,
unsigned long addr, pud_t *pudp)
{
return native_pudp_get_and_clear(pudp);
}

#define __HAVE_ARCH_PMDP_SET_WRPROTECT
static inline void pmdp_set_wrprotect(struct mm_struct *mm,
unsigned long addr, pmd_t *pmdp)
Expand Down Expand Up @@ -932,6 +1068,10 @@ static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmd)
{
}
static inline void update_mmu_cache_pud(struct vm_area_struct *vma,
unsigned long addr, pud_t *pud)
{
}

#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
Expand Down
15 changes: 15 additions & 0 deletions arch/x86/include/asm/pgtable_64.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,21 @@ static inline void native_pud_clear(pud_t *pud)
native_set_pud(pud, native_make_pud(0));
}

static inline pud_t native_pudp_get_and_clear(pud_t *xp)
{
#ifdef CONFIG_SMP
return native_make_pud(xchg(&xp->pud, 0));
#else
/* native_local_pudp_get_and_clear,
* but duplicated because of cyclic dependency
*/
pud_t ret = *xp;

native_pud_clear(xp);
return ret;
#endif
}

static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
{
*pgdp = pgd;
Expand Down
1 change: 1 addition & 0 deletions arch/x86/kernel/paravirt.c
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,7 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
.pmd_clear = native_pmd_clear,
#endif
.set_pud = native_set_pud,
.set_pud_at = native_set_pud_at,

.pmd_val = PTE_IDENT,
.make_pmd = PTE_IDENT,
Expand Down
Loading

0 comments on commit a00cc7d

Please sign in to comment.