forked from torvalds/linux
-
Notifications
You must be signed in to change notification settings - Fork 17
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[PATCH] kdump: Documentation for Kdump
This patch contains the documentation for the kexec based crash dump tool. Quick kdump-howto ================================================================ 1) Download and build kexec-tools. 2) Download and build the latest kexec/kdump (-mm) kernel patchset. Two kernels need to be built in order to get this feature working. A) First kernel: a) Enable "kexec system call" feature: CONFIG_KEXEC=y b) Physical load address (use default): CONFIG_PHYSICAL_START=0x100000 c) Enable "sysfs file system support": CONFIG_SYSFS=y d) Boot into first kernel with the command line parameter "crashkernel=Y@X": For example: "crashkernel=64M@16M". B) Second kernel: a) Enable "kernel crash dumps" feature: CONFIG_CRASH_DUMP=y b) Physical load addreess, use same load address as X in "crashkernel" kernel parameter in d) above, e.g., 16 MB or 0x1000000. CONFIG_PHYSICAL_START=0x1000000 c) Enable "/proc/vmcore support" (Optional, in Pseudo filesystems). CONFIG_PROC_VMCORE=y 3) Boot into the first kernel. 4) Load the second kernel to be booted using: kexec -p <second-kernel> --crash-dump --args-linux --append="root=<root-dev> maxcpus=1 init 1" 5) System reboots into the second kernel when a panic occurs. A module can be written to force the panic, for testing purposes. 6) See Documentation/kdump.txt for how to read the first kernel's memory image and how to analyze it. Signed-off-by: Hariprasad Nellitheertha <[email protected]> Signed-off-by: Eric Biederman <[email protected]> Signed-off-by: Vivek Goyal <[email protected]> Signed-off-by: randy_dunlap <[email protected]> Signed-off-by: Maneesh Soni <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
- Loading branch information
Vivek Goyal
authored and
Linus Torvalds
committed
Jun 25, 2005
1 parent
a3ea8ac
commit b089f4a
Showing
3 changed files
with
316 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,179 @@ | ||
# | ||
# This file contains a few gdb macros (user defined commands) to extract | ||
# useful information from kernel crashdump (kdump) like stack traces of | ||
# all the processes or a particular process and trapinfo. | ||
# | ||
# These macros can be used by copying this file in .gdbinit (put in home | ||
# directory or current directory) or by invoking gdb command with | ||
# --command=<command-file-name> option | ||
# | ||
# Credits: | ||
# Alexander Nyberg <[email protected]> | ||
# V Srivatsa <[email protected]> | ||
# Maneesh Soni <[email protected]> | ||
# | ||
|
||
define bttnobp | ||
set $tasks_off=((size_t)&((struct task_struct *)0)->tasks) | ||
set $pid_off=((size_t)&((struct task_struct *)0)->pids[1].pid_list.next) | ||
set $init_t=&init_task | ||
set $next_t=(((char *)($init_t->tasks).next) - $tasks_off) | ||
while ($next_t != $init_t) | ||
set $next_t=(struct task_struct *)$next_t | ||
printf "\npid %d; comm %s:\n", $next_t.pid, $next_t.comm | ||
printf "===================\n" | ||
set var $stackp = $next_t.thread.esp | ||
set var $stack_top = ($stackp & ~4095) + 4096 | ||
|
||
while ($stackp < $stack_top) | ||
if (*($stackp) > _stext && *($stackp) < _sinittext) | ||
info symbol *($stackp) | ||
end | ||
set $stackp += 4 | ||
end | ||
set $next_th=(((char *)$next_t->pids[1].pid_list.next) - $pid_off) | ||
while ($next_th != $next_t) | ||
set $next_th=(struct task_struct *)$next_th | ||
printf "\npid %d; comm %s:\n", $next_t.pid, $next_t.comm | ||
printf "===================\n" | ||
set var $stackp = $next_t.thread.esp | ||
set var $stack_top = ($stackp & ~4095) + 4096 | ||
|
||
while ($stackp < $stack_top) | ||
if (*($stackp) > _stext && *($stackp) < _sinittext) | ||
info symbol *($stackp) | ||
end | ||
set $stackp += 4 | ||
end | ||
set $next_th=(((char *)$next_th->pids[1].pid_list.next) - $pid_off) | ||
end | ||
set $next_t=(char *)($next_t->tasks.next) - $tasks_off | ||
end | ||
end | ||
document bttnobp | ||
dump all thread stack traces on a kernel compiled with !CONFIG_FRAME_POINTER | ||
end | ||
|
||
define btt | ||
set $tasks_off=((size_t)&((struct task_struct *)0)->tasks) | ||
set $pid_off=((size_t)&((struct task_struct *)0)->pids[1].pid_list.next) | ||
set $init_t=&init_task | ||
set $next_t=(((char *)($init_t->tasks).next) - $tasks_off) | ||
while ($next_t != $init_t) | ||
set $next_t=(struct task_struct *)$next_t | ||
printf "\npid %d; comm %s:\n", $next_t.pid, $next_t.comm | ||
printf "===================\n" | ||
set var $stackp = $next_t.thread.esp | ||
set var $stack_top = ($stackp & ~4095) + 4096 | ||
set var $stack_bot = ($stackp & ~4095) | ||
|
||
set $stackp = *($stackp) | ||
while (($stackp < $stack_top) && ($stackp > $stack_bot)) | ||
set var $addr = *($stackp + 4) | ||
info symbol $addr | ||
set $stackp = *($stackp) | ||
end | ||
|
||
set $next_th=(((char *)$next_t->pids[1].pid_list.next) - $pid_off) | ||
while ($next_th != $next_t) | ||
set $next_th=(struct task_struct *)$next_th | ||
printf "\npid %d; comm %s:\n", $next_t.pid, $next_t.comm | ||
printf "===================\n" | ||
set var $stackp = $next_t.thread.esp | ||
set var $stack_top = ($stackp & ~4095) + 4096 | ||
set var $stack_bot = ($stackp & ~4095) | ||
|
||
set $stackp = *($stackp) | ||
while (($stackp < $stack_top) && ($stackp > $stack_bot)) | ||
set var $addr = *($stackp + 4) | ||
info symbol $addr | ||
set $stackp = *($stackp) | ||
end | ||
set $next_th=(((char *)$next_th->pids[1].pid_list.next) - $pid_off) | ||
end | ||
set $next_t=(char *)($next_t->tasks.next) - $tasks_off | ||
end | ||
end | ||
document btt | ||
dump all thread stack traces on a kernel compiled with CONFIG_FRAME_POINTER | ||
end | ||
|
||
define btpid | ||
set var $pid = $arg0 | ||
set $tasks_off=((size_t)&((struct task_struct *)0)->tasks) | ||
set $pid_off=((size_t)&((struct task_struct *)0)->pids[1].pid_list.next) | ||
set $init_t=&init_task | ||
set $next_t=(((char *)($init_t->tasks).next) - $tasks_off) | ||
set var $pid_task = 0 | ||
|
||
while ($next_t != $init_t) | ||
set $next_t=(struct task_struct *)$next_t | ||
|
||
if ($next_t.pid == $pid) | ||
set $pid_task = $next_t | ||
end | ||
|
||
set $next_th=(((char *)$next_t->pids[1].pid_list.next) - $pid_off) | ||
while ($next_th != $next_t) | ||
set $next_th=(struct task_struct *)$next_th | ||
if ($next_th.pid == $pid) | ||
set $pid_task = $next_th | ||
end | ||
set $next_th=(((char *)$next_th->pids[1].pid_list.next) - $pid_off) | ||
end | ||
set $next_t=(char *)($next_t->tasks.next) - $tasks_off | ||
end | ||
|
||
printf "\npid %d; comm %s:\n", $pid_task.pid, $pid_task.comm | ||
printf "===================\n" | ||
set var $stackp = $pid_task.thread.esp | ||
set var $stack_top = ($stackp & ~4095) + 4096 | ||
set var $stack_bot = ($stackp & ~4095) | ||
|
||
set $stackp = *($stackp) | ||
while (($stackp < $stack_top) && ($stackp > $stack_bot)) | ||
set var $addr = *($stackp + 4) | ||
info symbol $addr | ||
set $stackp = *($stackp) | ||
end | ||
end | ||
document btpid | ||
backtrace of pid | ||
end | ||
|
||
|
||
define trapinfo | ||
set var $pid = $arg0 | ||
set $tasks_off=((size_t)&((struct task_struct *)0)->tasks) | ||
set $pid_off=((size_t)&((struct task_struct *)0)->pids[1].pid_list.next) | ||
set $init_t=&init_task | ||
set $next_t=(((char *)($init_t->tasks).next) - $tasks_off) | ||
set var $pid_task = 0 | ||
|
||
while ($next_t != $init_t) | ||
set $next_t=(struct task_struct *)$next_t | ||
|
||
if ($next_t.pid == $pid) | ||
set $pid_task = $next_t | ||
end | ||
|
||
set $next_th=(((char *)$next_t->pids[1].pid_list.next) - $pid_off) | ||
while ($next_th != $next_t) | ||
set $next_th=(struct task_struct *)$next_th | ||
if ($next_th.pid == $pid) | ||
set $pid_task = $next_th | ||
end | ||
set $next_th=(((char *)$next_th->pids[1].pid_list.next) - $pid_off) | ||
end | ||
set $next_t=(char *)($next_t->tasks.next) - $tasks_off | ||
end | ||
|
||
printf "Trapno %ld, cr2 0x%lx, error_code %ld\n", $pid_task.thread.trap_no, \ | ||
$pid_task.thread.cr2, $pid_task.thread.error_code | ||
|
||
end | ||
document trapinfo | ||
Run info threads and lookup pid of thread #1 | ||
'trapinfo <pid>' will tell you by which trap & possibly | ||
addresthe kernel paniced. | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
Documentation for kdump - the kexec-based crash dumping solution | ||
================================================================ | ||
|
||
DESIGN | ||
====== | ||
|
||
Kdump uses kexec to reboot to a second kernel whenever a dump needs to be taken. | ||
This second kernel is booted with very little memory. The first kernel reserves | ||
the section of memory that the second kernel uses. This ensures that on-going | ||
DMA from the first kernel does not corrupt the second kernel. | ||
|
||
All the necessary information about Core image is encoded in ELF format and | ||
stored in reserved area of memory before crash. Physical address of start of | ||
ELF header is passed to new kernel through command line parameter elfcorehdr=. | ||
|
||
On i386, the first 640 KB of physical memory is needed to boot, irrespective | ||
of where the kernel loads. Hence, this region is backed up by kexec just before | ||
rebooting into the new kernel. | ||
|
||
In the second kernel, "old memory" can be accessed in two ways. | ||
|
||
- The first one is through a /dev/oldmem device interface. A capture utility | ||
can read the device file and write out the memory in raw format. This is raw | ||
dump of memory and analysis/capture tool should be intelligent enough to | ||
determine where to look for the right information. ELF headers (elfcorehdr=) | ||
can become handy here. | ||
|
||
- The second interface is through /proc/vmcore. This exports the dump as an ELF | ||
format file which can be written out using any file copy command | ||
(cp, scp, etc). Further, gdb can be used to perform limited debugging on | ||
the dump file. This method ensures methods ensure that there is correct | ||
ordering of the dump pages (corresponding to the first 640 KB that has been | ||
relocated). | ||
|
||
SETUP | ||
===== | ||
|
||
1) Download http://www.xmission.com/~ebiederm/files/kexec/kexec-tools-1.101.tar.gz | ||
and apply http://lse.sourceforge.net/kdump/patches/kexec-tools-1.101-kdump.patch | ||
and after that build the source. | ||
|
||
2) Download and build the appropriate (latest) kexec/kdump (-mm) kernel | ||
patchset and apply it to the vanilla kernel tree. | ||
|
||
Two kernels need to be built in order to get this feature working. | ||
|
||
A) First kernel: | ||
a) Enable "kexec system call" feature (in Processor type and features). | ||
CONFIG_KEXEC=y | ||
b) This kernel's physical load address should be the default value of | ||
0x100000 (0x100000, 1 MB) (in Processor type and features). | ||
CONFIG_PHYSICAL_START=0x100000 | ||
c) Enable "sysfs file system support" (in Pseudo filesystems). | ||
CONFIG_SYSFS=y | ||
d) Boot into first kernel with the command line parameter "crashkernel=Y@X". | ||
Use appropriate values for X and Y. Y denotes how much memory to reserve | ||
for the second kernel, and X denotes at what physical address the reserved | ||
memory section starts. For example: "crashkernel=64M@16M". | ||
|
||
B) Second kernel: | ||
a) Enable "kernel crash dumps" feature (in Processor type and features). | ||
CONFIG_CRASH_DUMP=y | ||
b) Specify a suitable value for "Physical address where the kernel is | ||
loaded" (in Processor type and features). Typically this value | ||
should be same as X (See option d) above, e.g., 16 MB or 0x1000000. | ||
CONFIG_PHYSICAL_START=0x1000000 | ||
c) Enable "/proc/vmcore support" (Optional, in Pseudo filesystems). | ||
CONFIG_PROC_VMCORE=y | ||
|
||
Note: Options a) and b) depend upon "Configure standard kernel features | ||
(for small systems)" (under General setup). | ||
Option a) also depends on CONFIG_HIGHMEM (under Processor | ||
type and features). | ||
Both option a) and b) are under "Processor type and features". | ||
|
||
3) Boot into the first kernel. You are now ready to try out kexec-based crash | ||
dumps. | ||
|
||
4) Load the second kernel to be booted using: | ||
|
||
kexec -p <second-kernel> --crash-dump --args-linux --append="root=<root-dev> | ||
maxcpus=1 init 1" | ||
|
||
Note: i) <second-kernel> has to be a vmlinux image. bzImage will not work, | ||
as of now. | ||
ii) By default ELF headers are stored in ELF32 format (for i386). This | ||
is sufficient to represent the physical memory up to 4GB. To store | ||
headers in ELF64 format, specifiy "--elf64-core-headers" on the | ||
kexec command line additionally. | ||
iii) For now (or until it is fixed), it's best to build the | ||
second-kernel without multi-processor support, i.e., make it | ||
a uniprocessor kernel. | ||
|
||
5) System reboots into the second kernel when a panic occurs. A module can be | ||
written to force the panic, for testing purposes. | ||
|
||
6) Write out the dump file using | ||
|
||
cp /proc/vmcore <dump-file> | ||
|
||
Dump memory can also be accessed as a /dev/oldmem device for a linear/raw | ||
view. To create the device, type: | ||
|
||
mknod /dev/oldmem c 1 12 | ||
|
||
Use "dd" with suitable options for count, bs and skip to access specific | ||
portions of the dump. | ||
|
||
Entire memory: dd if=/dev/oldmem of=oldmem.001 | ||
|
||
ANALYSIS | ||
======== | ||
|
||
Limited analysis can be done using gdb on the dump file copied out of | ||
/proc/vmcore. Use vmlinux built with -g and run | ||
|
||
gdb vmlinux <dump-file> | ||
|
||
Stack trace for the task on processor 0, register display, memory display | ||
work fine. | ||
|
||
Note: gdb cannot analyse core files generated in ELF64 format for i386. | ||
|
||
TODO | ||
==== | ||
|
||
1) Provide a kernel pages filtering mechanism so that core file size is not | ||
insane on systems having huge memory banks. | ||
2) Modify "crash" tool to make it recognize this dump. | ||
|
||
CONTACT | ||
======= | ||
|
||
Hariprasad Nellitheertha - hari at in dot ibm dot com | ||
Vivek Goyal ([email protected]) |