Skip to content

Commit

Permalink
x86/mm: Fix PTI for i386 some more
Browse files Browse the repository at this point in the history
commit c48b5a4 upstream.

So it turns out that we have to do two passes of
pti_clone_entry_text(), once before initcalls, such that device and
late initcalls can use user-mode-helper / modprobe and once after
free_initmem() / mark_readonly().

Now obviously mark_readonly() can cause PMD splits, and
pti_clone_pgtable() doesn't like that much.

Allow the late clone to split PMDs so that pagetables stay in sync.

[peterz: Changelog and comments]
Reported-by: Guenter Roeck <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Signed-off-by: Peter Zijlstra (Intel) <[email protected]>
Tested-by: Guenter Roeck <[email protected]>
Link: https://lkml.kernel.org/r/[email protected]
Signed-off-by: Greg Kroah-Hartman <[email protected]>
  • Loading branch information
KAGA-KOKO authored and gregkh committed Sep 12, 2024
1 parent 7c890ef commit b4e9331
Showing 1 changed file with 29 additions and 16 deletions.
45 changes: 29 additions & 16 deletions arch/x86/mm/pti.c
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
*
* Returns a pointer to a PTE on success, or NULL on failure.
*/
static pte_t *pti_user_pagetable_walk_pte(unsigned long address)
static pte_t *pti_user_pagetable_walk_pte(unsigned long address, bool late_text)
{
gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
pmd_t *pmd;
Expand All @@ -251,10 +251,15 @@ static pte_t *pti_user_pagetable_walk_pte(unsigned long address)
if (!pmd)
return NULL;

/* We can't do anything sensible if we hit a large mapping. */
/* Large PMD mapping found */
if (pmd_leaf(*pmd)) {
WARN_ON(1);
return NULL;
/* Clear the PMD if we hit a large mapping from the first round */
if (late_text) {
set_pmd(pmd, __pmd(0));
} else {
WARN_ON_ONCE(1);
return NULL;
}
}

if (pmd_none(*pmd)) {
Expand Down Expand Up @@ -283,7 +288,7 @@ static void __init pti_setup_vsyscall(void)
if (!pte || WARN_ON(level != PG_LEVEL_4K) || pte_none(*pte))
return;

target_pte = pti_user_pagetable_walk_pte(VSYSCALL_ADDR);
target_pte = pti_user_pagetable_walk_pte(VSYSCALL_ADDR, false);
if (WARN_ON(!target_pte))
return;

Expand All @@ -301,7 +306,7 @@ enum pti_clone_level {

static void
pti_clone_pgtable(unsigned long start, unsigned long end,
enum pti_clone_level level)
enum pti_clone_level level, bool late_text)
{
unsigned long addr;

Expand Down Expand Up @@ -390,7 +395,7 @@ pti_clone_pgtable(unsigned long start, unsigned long end,
return;

/* Allocate PTE in the user page-table */
target_pte = pti_user_pagetable_walk_pte(addr);
target_pte = pti_user_pagetable_walk_pte(addr, late_text);
if (WARN_ON(!target_pte))
return;

Expand Down Expand Up @@ -452,7 +457,7 @@ static void __init pti_clone_user_shared(void)
phys_addr_t pa = per_cpu_ptr_to_phys((void *)va);
pte_t *target_pte;

target_pte = pti_user_pagetable_walk_pte(va);
target_pte = pti_user_pagetable_walk_pte(va, false);
if (WARN_ON(!target_pte))
return;

Expand All @@ -475,7 +480,7 @@ static void __init pti_clone_user_shared(void)
start = CPU_ENTRY_AREA_BASE;
end = start + (PAGE_SIZE * CPU_ENTRY_AREA_PAGES);

pti_clone_pgtable(start, end, PTI_CLONE_PMD);
pti_clone_pgtable(start, end, PTI_CLONE_PMD, false);
}
#endif /* CONFIG_X86_64 */

Expand All @@ -492,11 +497,11 @@ static void __init pti_setup_espfix64(void)
/*
* Clone the populated PMDs of the entry text and force it RO.
*/
static void pti_clone_entry_text(void)
static void pti_clone_entry_text(bool late)
{
pti_clone_pgtable((unsigned long) __entry_text_start,
(unsigned long) __entry_text_end,
PTI_LEVEL_KERNEL_IMAGE);
PTI_LEVEL_KERNEL_IMAGE, late);
}

/*
Expand Down Expand Up @@ -571,7 +576,7 @@ static void pti_clone_kernel_text(void)
* pti_set_kernel_image_nonglobal() did to clear the
* global bit.
*/
pti_clone_pgtable(start, end_clone, PTI_LEVEL_KERNEL_IMAGE);
pti_clone_pgtable(start, end_clone, PTI_LEVEL_KERNEL_IMAGE, false);

/*
* pti_clone_pgtable() will set the global bit in any PMDs
Expand Down Expand Up @@ -638,8 +643,15 @@ void __init pti_init(void)

/* Undo all global bits from the init pagetables in head_64.S: */
pti_set_kernel_image_nonglobal();

/* Replace some of the global bits just for shared entry text: */
pti_clone_entry_text();
/*
* This is very early in boot. Device and Late initcalls can do
* modprobe before free_initmem() and mark_readonly(). This
* pti_clone_entry_text() allows those user-mode-helpers to function,
* but notably the text is still RW.
*/
pti_clone_entry_text(false);
pti_setup_espfix64();
pti_setup_vsyscall();
}
Expand All @@ -656,10 +668,11 @@ void pti_finalize(void)
if (!boot_cpu_has(X86_FEATURE_PTI))
return;
/*
* We need to clone everything (again) that maps parts of the
* kernel image.
* This is after free_initmem() (all initcalls are done) and we've done
* mark_readonly(). Text is now NX which might've split some PMDs
* relative to the early clone.
*/
pti_clone_entry_text();
pti_clone_entry_text(true);
pti_clone_kernel_text();

debug_checkwx_user();
Expand Down

0 comments on commit b4e9331

Please sign in to comment.