Skip to content

Commit

Permalink
s390/mm: add page table dumper
Browse files Browse the repository at this point in the history
This is more or less the same as the x86 page table dumper which was
merged four years ago: 926e539 "x86: add code to dump the (kernel)
page tables for visual inspection by kernel developers".

We add a file at /sys/kernel/debug/kernel_page_tables for debugging
purposes so it's quite easy to see the kernel page table layout and
possible odd mappings:

---[ Identity Mapping ]---
0x0000000000000000-0x0000000000100000        1M PTE RW
---[ Kernel Image Start ]---
0x0000000000100000-0x0000000000800000        7M PMD RO
0x0000000000800000-0x00000000008a9000      676K PTE RO
0x00000000008a9000-0x0000000000900000      348K PTE RW
0x0000000000900000-0x0000000001500000       12M PMD RW
---[ Kernel Image End ]---
0x0000000001500000-0x0000000280000000    10219M PMD RW
0x0000000280000000-0x000003d280000000     3904G PUD I
---[ vmemmap Area ]---
0x000003d280000000-0x000003d288c00000      140M PTE RW
0x000003d288c00000-0x000003d300000000     1908M PMD I
0x000003d300000000-0x000003e000000000       52G PUD I
---[ vmalloc Area ]---
0x000003e000000000-0x000003e000009000       36K PTE RW
0x000003e000009000-0x000003e0000ee000      916K PTE I
0x000003e0000ee000-0x000003e000146000      352K PTE RW
0x000003e000146000-0x000003e000200000      744K PTE I
0x000003e000200000-0x000003e080000000     2046M PMD I
0x000003e080000000-0x0000040000000000      126G PUD I

This usually makes only sense for kernel developers. The output
with CONFIG_DEBUG_PAGEALLOC is not very helpful, because of the
huge number of mapped out pages, however I decided for the time
being to not add a !DEBUG_PAGEALLOC dependency.
Maybe it's helpful for somebody even with that option.

Signed-off-by: Heiko Carstens <[email protected]>
Signed-off-by: Martin Schwidefsky <[email protected]>
  • Loading branch information
heicarst authored and Martin Schwidefsky committed Oct 9, 2012
1 parent 51eee03 commit e76e82d
Show file tree
Hide file tree
Showing 3 changed files with 232 additions and 0 deletions.
12 changes: 12 additions & 0 deletions arch/s390/Kconfig.debug
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,18 @@ config DEBUG_STRICT_USER_COPY_CHECKS

If unsure, or if you run an older (pre 4.4) gcc, say N.

config S390_PTDUMP
bool "Export kernel pagetable layout to userspace via debugfs"
depends on DEBUG_KERNEL
select DEBUG_FS
---help---
Say Y here if you want to show the kernel pagetable layout in a
debugfs file. This information is only useful for kernel developers
who are working in architecture specific areas of the kernel.
It is probably not a good idea to enable this feature in a production
kernel.
If in doubt, say "N"

config DEBUG_SET_MODULE_RONX
def_bool y
depends on MODULES
Expand Down
1 change: 1 addition & 0 deletions arch/s390/mm/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o \
obj-$(CONFIG_CMM) += cmm.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_DEBUG_SET_MODULE_RONX) += pageattr.o
obj-$(CONFIG_S390_PTDUMP) += dump_pagetables.o
219 changes: 219 additions & 0 deletions arch/s390/mm/dump_pagetables.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
#include <linux/seq_file.h>
#include <linux/debugfs.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <asm/sections.h>
#include <asm/pgtable.h>

static unsigned long max_addr;

struct addr_marker {
unsigned long start_address;
const char *name;
};

enum address_markers_idx {
IDENTITY_NR = 0,
KERNEL_START_NR,
KERNEL_END_NR,
VMEMMAP_NR,
VMALLOC_NR,
};

static struct addr_marker address_markers[] = {
[IDENTITY_NR] = {0, "Identity Mapping"},
[KERNEL_START_NR] = {(unsigned long)&_stext, "Kernel Image Start"},
[KERNEL_END_NR] = {(unsigned long)&_end, "Kernel Image End"},
[VMEMMAP_NR] = {0, "vmemmap Area"},
[VMALLOC_NR] = {0, "vmalloc Area"},
{ -1, NULL }
};

struct pg_state {
int level;
unsigned int current_prot;
unsigned long start_address;
unsigned long current_address;
const struct addr_marker *marker;
};

static void print_prot(struct seq_file *m, unsigned int pr, int level)
{
static const char * const level_name[] =
{ "ASCE", "PGD", "PUD", "PMD", "PTE" };

seq_printf(m, "%s ", level_name[level]);
if (pr & _PAGE_INVALID)
seq_printf(m, "I\n");
else
seq_printf(m, "%s\n", pr & _PAGE_RO ? "RO" : "RW");
}

static void note_page(struct seq_file *m, struct pg_state *st,
unsigned int new_prot, int level)
{
static const char units[] = "KMGTPE";
int width = sizeof(unsigned long) * 2;
const char *unit = units;
unsigned int prot, cur;
unsigned long delta;

/*
* If we have a "break" in the series, we need to flush the state
* that we have now. "break" is either changing perms, levels or
* address space marker.
*/
prot = new_prot;
cur = st->current_prot;

if (!st->level) {
/* First entry */
st->current_prot = new_prot;
st->level = level;
st->marker = address_markers;
seq_printf(m, "---[ %s ]---\n", st->marker->name);
} else if (prot != cur || level != st->level ||
st->current_address >= st->marker[1].start_address) {
/* Print the actual finished series */
seq_printf(m, "0x%0*lx-0x%0*lx",
width, st->start_address,
width, st->current_address);
delta = (st->current_address - st->start_address) >> 10;
while (!(delta & 0x3ff) && unit[1]) {
delta >>= 10;
unit++;
}
seq_printf(m, "%9lu%c ", delta, *unit);
print_prot(m, st->current_prot, st->level);
if (st->current_address >= st->marker[1].start_address) {
st->marker++;
seq_printf(m, "---[ %s ]---\n", st->marker->name);
}
st->start_address = st->current_address;
st->current_prot = new_prot;
st->level = level;
}
}

/*
* The actual page table walker functions. In order to keep the implementation
* of print_prot() short, we only check and pass _PAGE_INVALID and _PAGE_RO
* flags to note_page() if a region, segment or page table entry is invalid or
* read-only.
* After all it's just a hint that the current level being walked contains an
* invalid or read-only entry.
*/
static void walk_pte_level(struct seq_file *m, struct pg_state *st,
pmd_t *pmd, unsigned long addr)
{
unsigned int prot;
pte_t *pte;
int i;

for (i = 0; i < PTRS_PER_PTE && addr < max_addr; i++) {
st->current_address = addr;
pte = pte_offset_kernel(pmd, addr);
prot = pte_val(*pte) & (_PAGE_RO | _PAGE_INVALID);
note_page(m, st, prot, 4);
addr += PAGE_SIZE;
}
}

static void walk_pmd_level(struct seq_file *m, struct pg_state *st,
pud_t *pud, unsigned long addr)
{
unsigned int prot;
pmd_t *pmd;
int i;

for (i = 0; i < PTRS_PER_PMD && addr < max_addr; i++) {
st->current_address = addr;
pmd = pmd_offset(pud, addr);
if (!pmd_none(*pmd)) {
if (pmd_large(*pmd)) {
prot = pmd_val(*pmd) & _SEGMENT_ENTRY_RO;
note_page(m, st, prot, 3);
} else
walk_pte_level(m, st, pmd, addr);
} else
note_page(m, st, _PAGE_INVALID, 3);
addr += PMD_SIZE;
}
}

static void walk_pud_level(struct seq_file *m, struct pg_state *st,
pgd_t *pgd, unsigned long addr)
{
pud_t *pud;
int i;

for (i = 0; i < PTRS_PER_PUD && addr < max_addr; i++) {
st->current_address = addr;
pud = pud_offset(pgd, addr);
if (!pud_none(*pud))
walk_pmd_level(m, st, pud, addr);
else
note_page(m, st, _PAGE_INVALID, 2);
addr += PUD_SIZE;
}
}

static void walk_pgd_level(struct seq_file *m)
{
unsigned long addr = 0;
struct pg_state st;
pgd_t *pgd;
int i;

memset(&st, 0, sizeof(st));
for (i = 0; i < PTRS_PER_PGD && addr < max_addr; i++) {
st.current_address = addr;
pgd = pgd_offset_k(addr);
if (!pgd_none(*pgd))
walk_pud_level(m, &st, pgd, addr);
else
note_page(m, &st, _PAGE_INVALID, 1);
addr += PGDIR_SIZE;
}
/* Flush out the last page */
st.current_address = max_addr;
note_page(m, &st, 0, 0);
}

static int ptdump_show(struct seq_file *m, void *v)
{
walk_pgd_level(m);
return 0;
}

static int ptdump_open(struct inode *inode, struct file *filp)
{
return single_open(filp, ptdump_show, NULL);
}

static const struct file_operations ptdump_fops = {
.open = ptdump_open,
.read = seq_read,
.llseek = seq_lseek,
.release = single_release,
};

static int pt_dump_init(void)
{
/*
* Figure out the maximum virtual address being accessible with the
* kernel ASCE. We need this to keep the page table walker functions
* from accessing non-existent entries.
*/
#ifdef CONFIG_64BIT
max_addr = (S390_lowcore.kernel_asce & _REGION_ENTRY_TYPE_MASK) >> 2;
max_addr = 1UL << (max_addr * 11 + 31);
#else
max_addr = 1UL << 31;
#endif
address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap;
address_markers[VMALLOC_NR].start_address = VMALLOC_START;
debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops);
return 0;
}
device_initcall(pt_dump_init);

0 comments on commit e76e82d

Please sign in to comment.