forked from torvalds/linux
-
Notifications
You must be signed in to change notification settings - Fork 17
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This is more or less the same as the x86 page table dumper which was merged four years ago: 926e539 "x86: add code to dump the (kernel) page tables for visual inspection by kernel developers". We add a file at /sys/kernel/debug/kernel_page_tables for debugging purposes so it's quite easy to see the kernel page table layout and possible odd mappings: ---[ Identity Mapping ]--- 0x0000000000000000-0x0000000000100000 1M PTE RW ---[ Kernel Image Start ]--- 0x0000000000100000-0x0000000000800000 7M PMD RO 0x0000000000800000-0x00000000008a9000 676K PTE RO 0x00000000008a9000-0x0000000000900000 348K PTE RW 0x0000000000900000-0x0000000001500000 12M PMD RW ---[ Kernel Image End ]--- 0x0000000001500000-0x0000000280000000 10219M PMD RW 0x0000000280000000-0x000003d280000000 3904G PUD I ---[ vmemmap Area ]--- 0x000003d280000000-0x000003d288c00000 140M PTE RW 0x000003d288c00000-0x000003d300000000 1908M PMD I 0x000003d300000000-0x000003e000000000 52G PUD I ---[ vmalloc Area ]--- 0x000003e000000000-0x000003e000009000 36K PTE RW 0x000003e000009000-0x000003e0000ee000 916K PTE I 0x000003e0000ee000-0x000003e000146000 352K PTE RW 0x000003e000146000-0x000003e000200000 744K PTE I 0x000003e000200000-0x000003e080000000 2046M PMD I 0x000003e080000000-0x0000040000000000 126G PUD I This usually makes only sense for kernel developers. The output with CONFIG_DEBUG_PAGEALLOC is not very helpful, because of the huge number of mapped out pages, however I decided for the time being to not add a !DEBUG_PAGEALLOC dependency. Maybe it's helpful for somebody even with that option. Signed-off-by: Heiko Carstens <[email protected]> Signed-off-by: Martin Schwidefsky <[email protected]>
- Loading branch information
Showing
3 changed files
with
232 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,219 @@ | ||
#include <linux/seq_file.h> | ||
#include <linux/debugfs.h> | ||
#include <linux/module.h> | ||
#include <linux/mm.h> | ||
#include <asm/sections.h> | ||
#include <asm/pgtable.h> | ||
|
||
static unsigned long max_addr; | ||
|
||
struct addr_marker { | ||
unsigned long start_address; | ||
const char *name; | ||
}; | ||
|
||
enum address_markers_idx { | ||
IDENTITY_NR = 0, | ||
KERNEL_START_NR, | ||
KERNEL_END_NR, | ||
VMEMMAP_NR, | ||
VMALLOC_NR, | ||
}; | ||
|
||
static struct addr_marker address_markers[] = { | ||
[IDENTITY_NR] = {0, "Identity Mapping"}, | ||
[KERNEL_START_NR] = {(unsigned long)&_stext, "Kernel Image Start"}, | ||
[KERNEL_END_NR] = {(unsigned long)&_end, "Kernel Image End"}, | ||
[VMEMMAP_NR] = {0, "vmemmap Area"}, | ||
[VMALLOC_NR] = {0, "vmalloc Area"}, | ||
{ -1, NULL } | ||
}; | ||
|
||
struct pg_state { | ||
int level; | ||
unsigned int current_prot; | ||
unsigned long start_address; | ||
unsigned long current_address; | ||
const struct addr_marker *marker; | ||
}; | ||
|
||
static void print_prot(struct seq_file *m, unsigned int pr, int level) | ||
{ | ||
static const char * const level_name[] = | ||
{ "ASCE", "PGD", "PUD", "PMD", "PTE" }; | ||
|
||
seq_printf(m, "%s ", level_name[level]); | ||
if (pr & _PAGE_INVALID) | ||
seq_printf(m, "I\n"); | ||
else | ||
seq_printf(m, "%s\n", pr & _PAGE_RO ? "RO" : "RW"); | ||
} | ||
|
||
static void note_page(struct seq_file *m, struct pg_state *st, | ||
unsigned int new_prot, int level) | ||
{ | ||
static const char units[] = "KMGTPE"; | ||
int width = sizeof(unsigned long) * 2; | ||
const char *unit = units; | ||
unsigned int prot, cur; | ||
unsigned long delta; | ||
|
||
/* | ||
* If we have a "break" in the series, we need to flush the state | ||
* that we have now. "break" is either changing perms, levels or | ||
* address space marker. | ||
*/ | ||
prot = new_prot; | ||
cur = st->current_prot; | ||
|
||
if (!st->level) { | ||
/* First entry */ | ||
st->current_prot = new_prot; | ||
st->level = level; | ||
st->marker = address_markers; | ||
seq_printf(m, "---[ %s ]---\n", st->marker->name); | ||
} else if (prot != cur || level != st->level || | ||
st->current_address >= st->marker[1].start_address) { | ||
/* Print the actual finished series */ | ||
seq_printf(m, "0x%0*lx-0x%0*lx", | ||
width, st->start_address, | ||
width, st->current_address); | ||
delta = (st->current_address - st->start_address) >> 10; | ||
while (!(delta & 0x3ff) && unit[1]) { | ||
delta >>= 10; | ||
unit++; | ||
} | ||
seq_printf(m, "%9lu%c ", delta, *unit); | ||
print_prot(m, st->current_prot, st->level); | ||
if (st->current_address >= st->marker[1].start_address) { | ||
st->marker++; | ||
seq_printf(m, "---[ %s ]---\n", st->marker->name); | ||
} | ||
st->start_address = st->current_address; | ||
st->current_prot = new_prot; | ||
st->level = level; | ||
} | ||
} | ||
|
||
/* | ||
* The actual page table walker functions. In order to keep the implementation | ||
* of print_prot() short, we only check and pass _PAGE_INVALID and _PAGE_RO | ||
* flags to note_page() if a region, segment or page table entry is invalid or | ||
* read-only. | ||
* After all it's just a hint that the current level being walked contains an | ||
* invalid or read-only entry. | ||
*/ | ||
static void walk_pte_level(struct seq_file *m, struct pg_state *st, | ||
pmd_t *pmd, unsigned long addr) | ||
{ | ||
unsigned int prot; | ||
pte_t *pte; | ||
int i; | ||
|
||
for (i = 0; i < PTRS_PER_PTE && addr < max_addr; i++) { | ||
st->current_address = addr; | ||
pte = pte_offset_kernel(pmd, addr); | ||
prot = pte_val(*pte) & (_PAGE_RO | _PAGE_INVALID); | ||
note_page(m, st, prot, 4); | ||
addr += PAGE_SIZE; | ||
} | ||
} | ||
|
||
static void walk_pmd_level(struct seq_file *m, struct pg_state *st, | ||
pud_t *pud, unsigned long addr) | ||
{ | ||
unsigned int prot; | ||
pmd_t *pmd; | ||
int i; | ||
|
||
for (i = 0; i < PTRS_PER_PMD && addr < max_addr; i++) { | ||
st->current_address = addr; | ||
pmd = pmd_offset(pud, addr); | ||
if (!pmd_none(*pmd)) { | ||
if (pmd_large(*pmd)) { | ||
prot = pmd_val(*pmd) & _SEGMENT_ENTRY_RO; | ||
note_page(m, st, prot, 3); | ||
} else | ||
walk_pte_level(m, st, pmd, addr); | ||
} else | ||
note_page(m, st, _PAGE_INVALID, 3); | ||
addr += PMD_SIZE; | ||
} | ||
} | ||
|
||
static void walk_pud_level(struct seq_file *m, struct pg_state *st, | ||
pgd_t *pgd, unsigned long addr) | ||
{ | ||
pud_t *pud; | ||
int i; | ||
|
||
for (i = 0; i < PTRS_PER_PUD && addr < max_addr; i++) { | ||
st->current_address = addr; | ||
pud = pud_offset(pgd, addr); | ||
if (!pud_none(*pud)) | ||
walk_pmd_level(m, st, pud, addr); | ||
else | ||
note_page(m, st, _PAGE_INVALID, 2); | ||
addr += PUD_SIZE; | ||
} | ||
} | ||
|
||
static void walk_pgd_level(struct seq_file *m) | ||
{ | ||
unsigned long addr = 0; | ||
struct pg_state st; | ||
pgd_t *pgd; | ||
int i; | ||
|
||
memset(&st, 0, sizeof(st)); | ||
for (i = 0; i < PTRS_PER_PGD && addr < max_addr; i++) { | ||
st.current_address = addr; | ||
pgd = pgd_offset_k(addr); | ||
if (!pgd_none(*pgd)) | ||
walk_pud_level(m, &st, pgd, addr); | ||
else | ||
note_page(m, &st, _PAGE_INVALID, 1); | ||
addr += PGDIR_SIZE; | ||
} | ||
/* Flush out the last page */ | ||
st.current_address = max_addr; | ||
note_page(m, &st, 0, 0); | ||
} | ||
|
||
static int ptdump_show(struct seq_file *m, void *v) | ||
{ | ||
walk_pgd_level(m); | ||
return 0; | ||
} | ||
|
||
static int ptdump_open(struct inode *inode, struct file *filp) | ||
{ | ||
return single_open(filp, ptdump_show, NULL); | ||
} | ||
|
||
static const struct file_operations ptdump_fops = { | ||
.open = ptdump_open, | ||
.read = seq_read, | ||
.llseek = seq_lseek, | ||
.release = single_release, | ||
}; | ||
|
||
static int pt_dump_init(void) | ||
{ | ||
/* | ||
* Figure out the maximum virtual address being accessible with the | ||
* kernel ASCE. We need this to keep the page table walker functions | ||
* from accessing non-existent entries. | ||
*/ | ||
#ifdef CONFIG_64BIT | ||
max_addr = (S390_lowcore.kernel_asce & _REGION_ENTRY_TYPE_MASK) >> 2; | ||
max_addr = 1UL << (max_addr * 11 + 31); | ||
#else | ||
max_addr = 1UL << 31; | ||
#endif | ||
address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap; | ||
address_markers[VMALLOC_NR].start_address = VMALLOC_START; | ||
debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops); | ||
return 0; | ||
} | ||
device_initcall(pt_dump_init); |