diff options
author | Mauro Carvalho Chehab <mchehab+samsung@kernel.org> | 2019-06-13 21:21:39 +0300 |
---|---|---|
committer | Mauro Carvalho Chehab <mchehab+samsung@kernel.org> | 2019-07-15 17:03:01 +0300 |
commit | 330d48105245abfb8c9ca491dc53ea500657217a (patch) | |
tree | b6d50bd5a9b1dc479f4d3414a741fe51eaca4e39 /Documentation/kdump | |
parent | 9e1cbede267916e737c4a755059418da3ac4de95 (diff) | |
download | linux-330d48105245abfb8c9ca491dc53ea500657217a.tar.xz |
docs: admin-guide: add kdump documentation into it
The Kdump documentation describes procedures with admins use
in order to solve issues on their systems.
Signed-off-by: Mauro Carvalho Chehab <mchehab+samsung@kernel.org>
Diffstat (limited to 'Documentation/kdump')
-rw-r--r-- | Documentation/kdump/gdbmacros.txt | 264 | ||||
-rw-r--r-- | Documentation/kdump/index.rst | 21 | ||||
-rw-r--r-- | Documentation/kdump/kdump.rst | 534 | ||||
-rw-r--r-- | Documentation/kdump/vmcoreinfo.rst | 488 |
4 files changed, 0 insertions, 1307 deletions
diff --git a/Documentation/kdump/gdbmacros.txt b/Documentation/kdump/gdbmacros.txt deleted file mode 100644 index 220d0a80ca2c..000000000000 --- a/Documentation/kdump/gdbmacros.txt +++ /dev/null @@ -1,264 +0,0 @@ -# -# This file contains a few gdb macros (user defined commands) to extract -# useful information from kernel crashdump (kdump) like stack traces of -# all the processes or a particular process and trapinfo. -# -# These macros can be used by copying this file in .gdbinit (put in home -# directory or current directory) or by invoking gdb command with -# --command=<command-file-name> option -# -# Credits: -# Alexander Nyberg <alexn@telia.com> -# V Srivatsa <vatsa@in.ibm.com> -# Maneesh Soni <maneesh@in.ibm.com> -# - -define bttnobp - set $tasks_off=((size_t)&((struct task_struct *)0)->tasks) - set $pid_off=((size_t)&((struct task_struct *)0)->thread_group.next) - set $init_t=&init_task - set $next_t=(((char *)($init_t->tasks).next) - $tasks_off) - set var $stacksize = sizeof(union thread_union) - while ($next_t != $init_t) - set $next_t=(struct task_struct *)$next_t - printf "\npid %d; comm %s:\n", $next_t.pid, $next_t.comm - printf "===================\n" - set var $stackp = $next_t.thread.sp - set var $stack_top = ($stackp & ~($stacksize - 1)) + $stacksize - - while ($stackp < $stack_top) - if (*($stackp) > _stext && *($stackp) < _sinittext) - info symbol *($stackp) - end - set $stackp += 4 - end - set $next_th=(((char *)$next_t->thread_group.next) - $pid_off) - while ($next_th != $next_t) - set $next_th=(struct task_struct *)$next_th - printf "\npid %d; comm %s:\n", $next_t.pid, $next_t.comm - printf "===================\n" - set var $stackp = $next_t.thread.sp - set var $stack_top = ($stackp & ~($stacksize - 1)) + stacksize - - while ($stackp < $stack_top) - if (*($stackp) > _stext && *($stackp) < _sinittext) - info symbol *($stackp) - end - set $stackp += 4 - end - set $next_th=(((char *)$next_th->thread_group.next) - $pid_off) - end - set $next_t=(char *)($next_t->tasks.next) - $tasks_off - end -end -document bttnobp - dump all thread stack traces on a kernel compiled with !CONFIG_FRAME_POINTER -end - -define btthreadstack - set var $pid_task = $arg0 - - printf "\npid %d; comm %s:\n", $pid_task.pid, $pid_task.comm - printf "task struct: " - print $pid_task - printf "===================\n" - set var $stackp = $pid_task.thread.sp - set var $stacksize = sizeof(union thread_union) - set var $stack_top = ($stackp & ~($stacksize - 1)) + $stacksize - set var $stack_bot = ($stackp & ~($stacksize - 1)) - - set $stackp = *((unsigned long *) $stackp) - while (($stackp < $stack_top) && ($stackp > $stack_bot)) - set var $addr = *(((unsigned long *) $stackp) + 1) - info symbol $addr - set $stackp = *((unsigned long *) $stackp) - end -end -document btthreadstack - dump a thread stack using the given task structure pointer -end - - -define btt - set $tasks_off=((size_t)&((struct task_struct *)0)->tasks) - set $pid_off=((size_t)&((struct task_struct *)0)->thread_group.next) - set $init_t=&init_task - set $next_t=(((char *)($init_t->tasks).next) - $tasks_off) - while ($next_t != $init_t) - set $next_t=(struct task_struct *)$next_t - btthreadstack $next_t - - set $next_th=(((char *)$next_t->thread_group.next) - $pid_off) - while ($next_th != $next_t) - set $next_th=(struct task_struct *)$next_th - btthreadstack $next_th - set $next_th=(((char *)$next_th->thread_group.next) - $pid_off) - end - set $next_t=(char *)($next_t->tasks.next) - $tasks_off - end -end -document btt - dump all thread stack traces on a kernel compiled with CONFIG_FRAME_POINTER -end - -define btpid - set var $pid = $arg0 - set $tasks_off=((size_t)&((struct task_struct *)0)->tasks) - set $pid_off=((size_t)&((struct task_struct *)0)->thread_group.next) - set $init_t=&init_task - set $next_t=(((char *)($init_t->tasks).next) - $tasks_off) - set var $pid_task = 0 - - while ($next_t != $init_t) - set $next_t=(struct task_struct *)$next_t - - if ($next_t.pid == $pid) - set $pid_task = $next_t - end - - set $next_th=(((char *)$next_t->thread_group.next) - $pid_off) - while ($next_th != $next_t) - set $next_th=(struct task_struct *)$next_th - if ($next_th.pid == $pid) - set $pid_task = $next_th - end - set $next_th=(((char *)$next_th->thread_group.next) - $pid_off) - end - set $next_t=(char *)($next_t->tasks.next) - $tasks_off - end - - btthreadstack $pid_task -end -document btpid - backtrace of pid -end - - -define trapinfo - set var $pid = $arg0 - set $tasks_off=((size_t)&((struct task_struct *)0)->tasks) - set $pid_off=((size_t)&((struct task_struct *)0)->thread_group.next) - set $init_t=&init_task - set $next_t=(((char *)($init_t->tasks).next) - $tasks_off) - set var $pid_task = 0 - - while ($next_t != $init_t) - set $next_t=(struct task_struct *)$next_t - - if ($next_t.pid == $pid) - set $pid_task = $next_t - end - - set $next_th=(((char *)$next_t->thread_group.next) - $pid_off) - while ($next_th != $next_t) - set $next_th=(struct task_struct *)$next_th - if ($next_th.pid == $pid) - set $pid_task = $next_th - end - set $next_th=(((char *)$next_th->thread_group.next) - $pid_off) - end - set $next_t=(char *)($next_t->tasks.next) - $tasks_off - end - - printf "Trapno %ld, cr2 0x%lx, error_code %ld\n", $pid_task.thread.trap_no, \ - $pid_task.thread.cr2, $pid_task.thread.error_code - -end -document trapinfo - Run info threads and lookup pid of thread #1 - 'trapinfo <pid>' will tell you by which trap & possibly - address the kernel panicked. -end - -define dump_log_idx - set $idx = $arg0 - if ($argc > 1) - set $prev_flags = $arg1 - else - set $prev_flags = 0 - end - set $msg = ((struct printk_log *) (log_buf + $idx)) - set $prefix = 1 - set $newline = 1 - set $log = log_buf + $idx + sizeof(*$msg) - - # prev & LOG_CONT && !(msg->flags & LOG_PREIX) - if (($prev_flags & 8) && !($msg->flags & 4)) - set $prefix = 0 - end - - # msg->flags & LOG_CONT - if ($msg->flags & 8) - # (prev & LOG_CONT && !(prev & LOG_NEWLINE)) - if (($prev_flags & 8) && !($prev_flags & 2)) - set $prefix = 0 - end - # (!(msg->flags & LOG_NEWLINE)) - if (!($msg->flags & 2)) - set $newline = 0 - end - end - - if ($prefix) - printf "[%5lu.%06lu] ", $msg->ts_nsec / 1000000000, $msg->ts_nsec % 1000000000 - end - if ($msg->text_len != 0) - eval "printf \"%%%d.%ds\", $log", $msg->text_len, $msg->text_len - end - if ($newline) - printf "\n" - end - if ($msg->dict_len > 0) - set $dict = $log + $msg->text_len - set $idx = 0 - set $line = 1 - while ($idx < $msg->dict_len) - if ($line) - printf " " - set $line = 0 - end - set $c = $dict[$idx] - if ($c == '\0') - printf "\n" - set $line = 1 - else - if ($c < ' ' || $c >= 127 || $c == '\\') - printf "\\x%02x", $c - else - printf "%c", $c - end - end - set $idx = $idx + 1 - end - printf "\n" - end -end -document dump_log_idx - Dump a single log given its index in the log buffer. The first - parameter is the index into log_buf, the second is optional and - specified the previous log buffer's flags, used for properly - formatting continued lines. -end - -define dmesg - set $i = log_first_idx - set $end_idx = log_first_idx - set $prev_flags = 0 - - while (1) - set $msg = ((struct printk_log *) (log_buf + $i)) - if ($msg->len == 0) - set $i = 0 - else - dump_log_idx $i $prev_flags - set $i = $i + $msg->len - set $prev_flags = $msg->flags - end - if ($i == $end_idx) - loop_break - end - end -end -document dmesg - print the kernel ring buffer -end diff --git a/Documentation/kdump/index.rst b/Documentation/kdump/index.rst deleted file mode 100644 index 2b17fcf6867a..000000000000 --- a/Documentation/kdump/index.rst +++ /dev/null @@ -1,21 +0,0 @@ -:orphan: - -================================================================ -Documentation for Kdump - The kexec-based Crash Dumping Solution -================================================================ - -This document includes overview, setup and installation, and analysis -information. - -.. toctree:: - :maxdepth: 1 - - kdump - vmcoreinfo - -.. only:: subproject and html - - Indices - ======= - - * :ref:`genindex` diff --git a/Documentation/kdump/kdump.rst b/Documentation/kdump/kdump.rst deleted file mode 100644 index ac7e131d2935..000000000000 --- a/Documentation/kdump/kdump.rst +++ /dev/null @@ -1,534 +0,0 @@ -================================================================ -Documentation for Kdump - The kexec-based Crash Dumping Solution -================================================================ - -This document includes overview, setup and installation, and analysis -information. - -Overview -======== - -Kdump uses kexec to quickly boot to a dump-capture kernel whenever a -dump of the system kernel's memory needs to be taken (for example, when -the system panics). The system kernel's memory image is preserved across -the reboot and is accessible to the dump-capture kernel. - -You can use common commands, such as cp and scp, to copy the -memory image to a dump file on the local disk, or across the network to -a remote system. - -Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64, -s390x, arm and arm64 architectures. - -When the system kernel boots, it reserves a small section of memory for -the dump-capture kernel. This ensures that ongoing Direct Memory Access -(DMA) from the system kernel does not corrupt the dump-capture kernel. -The kexec -p command loads the dump-capture kernel into this reserved -memory. - -On x86 machines, the first 640 KB of physical memory is needed to boot, -regardless of where the kernel loads. Therefore, kexec backs up this -region just before rebooting into the dump-capture kernel. - -Similarly on PPC64 machines first 32KB of physical memory is needed for -booting regardless of where the kernel is loaded and to support 64K page -size kexec backs up the first 64KB memory. - -For s390x, when kdump is triggered, the crashkernel region is exchanged -with the region [0, crashkernel region size] and then the kdump kernel -runs in [0, crashkernel region size]. Therefore no relocatable kernel is -needed for s390x. - -All of the necessary information about the system kernel's core image is -encoded in the ELF format, and stored in a reserved area of memory -before a crash. The physical address of the start of the ELF header is -passed to the dump-capture kernel through the elfcorehdr= boot -parameter. Optionally the size of the ELF header can also be passed -when using the elfcorehdr=[size[KMG]@]offset[KMG] syntax. - - -With the dump-capture kernel, you can access the memory image through -/proc/vmcore. This exports the dump as an ELF-format file that you can -write out using file copy commands such as cp or scp. Further, you can -use analysis tools such as the GNU Debugger (GDB) and the Crash tool to -debug the dump file. This method ensures that the dump pages are correctly -ordered. - - -Setup and Installation -====================== - -Install kexec-tools -------------------- - -1) Login as the root user. - -2) Download the kexec-tools user-space package from the following URL: - -http://kernel.org/pub/linux/utils/kernel/kexec/kexec-tools.tar.gz - -This is a symlink to the latest version. - -The latest kexec-tools git tree is available at: - -- git://git.kernel.org/pub/scm/utils/kernel/kexec/kexec-tools.git -- http://www.kernel.org/pub/scm/utils/kernel/kexec/kexec-tools.git - -There is also a gitweb interface available at -http://www.kernel.org/git/?p=utils/kernel/kexec/kexec-tools.git - -More information about kexec-tools can be found at -http://horms.net/projects/kexec/ - -3) Unpack the tarball with the tar command, as follows:: - - tar xvpzf kexec-tools.tar.gz - -4) Change to the kexec-tools directory, as follows:: - - cd kexec-tools-VERSION - -5) Configure the package, as follows:: - - ./configure - -6) Compile the package, as follows:: - - make - -7) Install the package, as follows:: - - make install - - -Build the system and dump-capture kernels ------------------------------------------ -There are two possible methods of using Kdump. - -1) Build a separate custom dump-capture kernel for capturing the - kernel core dump. - -2) Or use the system kernel binary itself as dump-capture kernel and there is - no need to build a separate dump-capture kernel. This is possible - only with the architectures which support a relocatable kernel. As - of today, i386, x86_64, ppc64, ia64, arm and arm64 architectures support - relocatable kernel. - -Building a relocatable kernel is advantageous from the point of view that -one does not have to build a second kernel for capturing the dump. But -at the same time one might want to build a custom dump capture kernel -suitable to his needs. - -Following are the configuration setting required for system and -dump-capture kernels for enabling kdump support. - -System kernel config options ----------------------------- - -1) Enable "kexec system call" in "Processor type and features.":: - - CONFIG_KEXEC=y - -2) Enable "sysfs file system support" in "Filesystem" -> "Pseudo - filesystems." This is usually enabled by default:: - - CONFIG_SYSFS=y - - Note that "sysfs file system support" might not appear in the "Pseudo - filesystems" menu if "Configure standard kernel features (for small - systems)" is not enabled in "General Setup." In this case, check the - .config file itself to ensure that sysfs is turned on, as follows:: - - grep 'CONFIG_SYSFS' .config - -3) Enable "Compile the kernel with debug info" in "Kernel hacking.":: - - CONFIG_DEBUG_INFO=Y - - This causes the kernel to be built with debug symbols. The dump - analysis tools require a vmlinux with debug symbols in order to read - and analyze a dump file. - -Dump-capture kernel config options (Arch Independent) ------------------------------------------------------ - -1) Enable "kernel crash dumps" support under "Processor type and - features":: - - CONFIG_CRASH_DUMP=y - -2) Enable "/proc/vmcore support" under "Filesystems" -> "Pseudo filesystems":: - - CONFIG_PROC_VMCORE=y - - (CONFIG_PROC_VMCORE is set by default when CONFIG_CRASH_DUMP is selected.) - -Dump-capture kernel config options (Arch Dependent, i386 and x86_64) --------------------------------------------------------------------- - -1) On i386, enable high memory support under "Processor type and - features":: - - CONFIG_HIGHMEM64G=y - - or:: - - CONFIG_HIGHMEM4G - -2) On i386 and x86_64, disable symmetric multi-processing support - under "Processor type and features":: - - CONFIG_SMP=n - - (If CONFIG_SMP=y, then specify maxcpus=1 on the kernel command line - when loading the dump-capture kernel, see section "Load the Dump-capture - Kernel".) - -3) If one wants to build and use a relocatable kernel, - Enable "Build a relocatable kernel" support under "Processor type and - features":: - - CONFIG_RELOCATABLE=y - -4) Use a suitable value for "Physical address where the kernel is - loaded" (under "Processor type and features"). This only appears when - "kernel crash dumps" is enabled. A suitable value depends upon - whether kernel is relocatable or not. - - If you are using a relocatable kernel use CONFIG_PHYSICAL_START=0x100000 - This will compile the kernel for physical address 1MB, but given the fact - kernel is relocatable, it can be run from any physical address hence - kexec boot loader will load it in memory region reserved for dump-capture - kernel. - - Otherwise it should be the start of memory region reserved for - second kernel using boot parameter "crashkernel=Y@X". Here X is - start of memory region reserved for dump-capture kernel. - Generally X is 16MB (0x1000000). So you can set - CONFIG_PHYSICAL_START=0x1000000 - -5) Make and install the kernel and its modules. DO NOT add this kernel - to the boot loader configuration files. - -Dump-capture kernel config options (Arch Dependent, ppc64) ----------------------------------------------------------- - -1) Enable "Build a kdump crash kernel" support under "Kernel" options:: - - CONFIG_CRASH_DUMP=y - -2) Enable "Build a relocatable kernel" support:: - - CONFIG_RELOCATABLE=y - - Make and install the kernel and its modules. - -Dump-capture kernel config options (Arch Dependent, ia64) ----------------------------------------------------------- - -- No specific options are required to create a dump-capture kernel - for ia64, other than those specified in the arch independent section - above. This means that it is possible to use the system kernel - as a dump-capture kernel if desired. - - The crashkernel region can be automatically placed by the system - kernel at run time. This is done by specifying the base address as 0, - or omitting it all together:: - - crashkernel=256M@0 - - or:: - - crashkernel=256M - - If the start address is specified, note that the start address of the - kernel will be aligned to 64Mb, so if the start address is not then - any space below the alignment point will be wasted. - -Dump-capture kernel config options (Arch Dependent, arm) ----------------------------------------------------------- - -- To use a relocatable kernel, - Enable "AUTO_ZRELADDR" support under "Boot" options:: - - AUTO_ZRELADDR=y - -Dump-capture kernel config options (Arch Dependent, arm64) ----------------------------------------------------------- - -- Please note that kvm of the dump-capture kernel will not be enabled - on non-VHE systems even if it is configured. This is because the CPU - will not be reset to EL2 on panic. - -Extended crashkernel syntax -=========================== - -While the "crashkernel=size[@offset]" syntax is sufficient for most -configurations, sometimes it's handy to have the reserved memory dependent -on the value of System RAM -- that's mostly for distributors that pre-setup -the kernel command line to avoid a unbootable system after some memory has -been removed from the machine. - -The syntax is:: - - crashkernel=<range1>:<size1>[,<range2>:<size2>,...][@offset] - range=start-[end] - -For example:: - - crashkernel=512M-2G:64M,2G-:128M - -This would mean: - - 1) if the RAM is smaller than 512M, then don't reserve anything - (this is the "rescue" case) - 2) if the RAM size is between 512M and 2G (exclusive), then reserve 64M - 3) if the RAM size is larger than 2G, then reserve 128M - - - -Boot into System Kernel -======================= - -1) Update the boot loader (such as grub, yaboot, or lilo) configuration - files as necessary. - -2) Boot the system kernel with the boot parameter "crashkernel=Y@X", - where Y specifies how much memory to reserve for the dump-capture kernel - and X specifies the beginning of this reserved memory. For example, - "crashkernel=64M@16M" tells the system kernel to reserve 64 MB of memory - starting at physical address 0x01000000 (16MB) for the dump-capture kernel. - - On x86 and x86_64, use "crashkernel=64M@16M". - - On ppc64, use "crashkernel=128M@32M". - - On ia64, 256M@256M is a generous value that typically works. - The region may be automatically placed on ia64, see the - dump-capture kernel config option notes above. - If use sparse memory, the size should be rounded to GRANULE boundaries. - - On s390x, typically use "crashkernel=xxM". The value of xx is dependent - on the memory consumption of the kdump system. In general this is not - dependent on the memory size of the production system. - - On arm, the use of "crashkernel=Y@X" is no longer necessary; the - kernel will automatically locate the crash kernel image within the - first 512MB of RAM if X is not given. - - On arm64, use "crashkernel=Y[@X]". Note that the start address of - the kernel, X if explicitly specified, must be aligned to 2MiB (0x200000). - -Load the Dump-capture Kernel -============================ - -After booting to the system kernel, dump-capture kernel needs to be -loaded. - -Based on the architecture and type of image (relocatable or not), one -can choose to load the uncompressed vmlinux or compressed bzImage/vmlinuz -of dump-capture kernel. Following is the summary. - -For i386 and x86_64: - - - Use vmlinux if kernel is not relocatable. - - Use bzImage/vmlinuz if kernel is relocatable. - -For ppc64: - - - Use vmlinux - -For ia64: - - - Use vmlinux or vmlinuz.gz - -For s390x: - - - Use image or bzImage - -For arm: - - - Use zImage - -For arm64: - - - Use vmlinux or Image - -If you are using an uncompressed vmlinux image then use following command -to load dump-capture kernel:: - - kexec -p <dump-capture-kernel-vmlinux-image> \ - --initrd=<initrd-for-dump-capture-kernel> --args-linux \ - --append="root=<root-dev> <arch-specific-options>" - -If you are using a compressed bzImage/vmlinuz, then use following command -to load dump-capture kernel:: - - kexec -p <dump-capture-kernel-bzImage> \ - --initrd=<initrd-for-dump-capture-kernel> \ - --append="root=<root-dev> <arch-specific-options>" - -If you are using a compressed zImage, then use following command -to load dump-capture kernel:: - - kexec --type zImage -p <dump-capture-kernel-bzImage> \ - --initrd=<initrd-for-dump-capture-kernel> \ - --dtb=<dtb-for-dump-capture-kernel> \ - --append="root=<root-dev> <arch-specific-options>" - -If you are using an uncompressed Image, then use following command -to load dump-capture kernel:: - - kexec -p <dump-capture-kernel-Image> \ - --initrd=<initrd-for-dump-capture-kernel> \ - --append="root=<root-dev> <arch-specific-options>" - -Please note, that --args-linux does not need to be specified for ia64. -It is planned to make this a no-op on that architecture, but for now -it should be omitted - -Following are the arch specific command line options to be used while -loading dump-capture kernel. - -For i386, x86_64 and ia64: - - "1 irqpoll maxcpus=1 reset_devices" - -For ppc64: - - "1 maxcpus=1 noirqdistrib reset_devices" - -For s390x: - - "1 maxcpus=1 cgroup_disable=memory" - -For arm: - - "1 maxcpus=1 reset_devices" - -For arm64: - - "1 maxcpus=1 reset_devices" - -Notes on loading the dump-capture kernel: - -* By default, the ELF headers are stored in ELF64 format to support - systems with more than 4GB memory. On i386, kexec automatically checks if - the physical RAM size exceeds the 4 GB limit and if not, uses ELF32. - So, on non-PAE systems, ELF32 is always used. - - The --elf32-core-headers option can be used to force the generation of ELF32 - headers. This is necessary because GDB currently cannot open vmcore files - with ELF64 headers on 32-bit systems. - -* The "irqpoll" boot parameter reduces driver initialization failures - due to shared interrupts in the dump-capture kernel. - -* You must specify <root-dev> in the format corresponding to the root - device name in the output of mount command. - -* Boot parameter "1" boots the dump-capture kernel into single-user - mode without networking. If you want networking, use "3". - -* We generally don't have to bring up a SMP kernel just to capture the - dump. Hence generally it is useful either to build a UP dump-capture - kernel or specify maxcpus=1 option while loading dump-capture kernel. - Note, though maxcpus always works, you had better replace it with - nr_cpus to save memory if supported by the current ARCH, such as x86. - -* You should enable multi-cpu support in dump-capture kernel if you intend - to use multi-thread programs with it, such as parallel dump feature of - makedumpfile. Otherwise, the multi-thread program may have a great - performance degradation. To enable multi-cpu support, you should bring up an - SMP dump-capture kernel and specify maxcpus/nr_cpus, disable_cpu_apicid=[X] - options while loading it. - -* For s390x there are two kdump modes: If a ELF header is specified with - the elfcorehdr= kernel parameter, it is used by the kdump kernel as it - is done on all other architectures. If no elfcorehdr= kernel parameter is - specified, the s390x kdump kernel dynamically creates the header. The - second mode has the advantage that for CPU and memory hotplug, kdump has - not to be reloaded with kexec_load(). - -* For s390x systems with many attached devices the "cio_ignore" kernel - parameter should be used for the kdump kernel in order to prevent allocation - of kernel memory for devices that are not relevant for kdump. The same - applies to systems that use SCSI/FCP devices. In that case the - "allow_lun_scan" zfcp module parameter should be set to zero before - setting FCP devices online. - -Kernel Panic -============ - -After successfully loading the dump-capture kernel as previously -described, the system will reboot into the dump-capture kernel if a -system crash is triggered. Trigger points are located in panic(), -die(), die_nmi() and in the sysrq handler (ALT-SysRq-c). - -The following conditions will execute a crash trigger point: - -If a hard lockup is detected and "NMI watchdog" is configured, the system -will boot into the dump-capture kernel ( die_nmi() ). - -If die() is called, and it happens to be a thread with pid 0 or 1, or die() -is called inside interrupt context or die() is called and panic_on_oops is set, -the system will boot into the dump-capture kernel. - -On powerpc systems when a soft-reset is generated, die() is called by all cpus -and the system will boot into the dump-capture kernel. - -For testing purposes, you can trigger a crash by using "ALT-SysRq-c", -"echo c > /proc/sysrq-trigger" or write a module to force the panic. - -Write Out the Dump File -======================= - -After the dump-capture kernel is booted, write out the dump file with -the following command:: - - cp /proc/vmcore <dump-file> - - -Analysis -======== - -Before analyzing the dump image, you should reboot into a stable kernel. - -You can do limited analysis using GDB on the dump file copied out of -/proc/vmcore. Use the debug vmlinux built with -g and run the following -command:: - - gdb vmlinux <dump-file> - -Stack trace for the task on processor 0, register display, and memory -display work fine. - -Note: GDB cannot analyze core files generated in ELF64 format for x86. -On systems with a maximum of 4GB of memory, you can generate -ELF32-format headers using the --elf32-core-headers kernel option on the -dump kernel. - -You can also use the Crash utility to analyze dump files in Kdump -format. Crash is available on Dave Anderson's site at the following URL: - - http://people.redhat.com/~anderson/ - -Trigger Kdump on WARN() -======================= - -The kernel parameter, panic_on_warn, calls panic() in all WARN() paths. This -will cause a kdump to occur at the panic() call. In cases where a user wants -to specify this during runtime, /proc/sys/kernel/panic_on_warn can be set to 1 -to achieve the same behaviour. - -Contact -======= - -- Vivek Goyal (vgoyal@redhat.com) -- Maneesh Soni (maneesh@in.ibm.com) - -GDB macros -========== - -.. include:: gdbmacros.txt - :literal: diff --git a/Documentation/kdump/vmcoreinfo.rst b/Documentation/kdump/vmcoreinfo.rst deleted file mode 100644 index 007a6b86e0ee..000000000000 --- a/Documentation/kdump/vmcoreinfo.rst +++ /dev/null @@ -1,488 +0,0 @@ -========== -VMCOREINFO -========== - -What is it? -=========== - -VMCOREINFO is a special ELF note section. It contains various -information from the kernel like structure size, page size, symbol -values, field offsets, etc. These data are packed into an ELF note -section and used by user-space tools like crash and makedumpfile to -analyze a kernel's memory layout. - -Common variables -================ - -init_uts_ns.name.release ------------------------- - -The version of the Linux kernel. Used to find the corresponding source -code from which the kernel has been built. For example, crash uses it to -find the corresponding vmlinux in order to process vmcore. - -PAGE_SIZE ---------- - -The size of a page. It is the smallest unit of data used by the memory -management facilities. It is usually 4096 bytes of size and a page is -aligned on 4096 bytes. Used for computing page addresses. - -init_uts_ns ------------ - -The UTS namespace which is used to isolate two specific elements of the -system that relate to the uname(2) system call. It is named after the -data structure used to store information returned by the uname(2) system -call. - -User-space tools can get the kernel name, host name, kernel release -number, kernel version, architecture name and OS type from it. - -node_online_map ---------------- - -An array node_states[N_ONLINE] which represents the set of online nodes -in a system, one bit position per node number. Used to keep track of -which nodes are in the system and online. - -swapper_pg_dir --------------- - -The global page directory pointer of the kernel. Used to translate -virtual to physical addresses. - -_stext ------- - -Defines the beginning of the text section. In general, _stext indicates -the kernel start address. Used to convert a virtual address from the -direct kernel map to a physical address. - -vmap_area_list --------------- - -Stores the virtual area list. makedumpfile gets the vmalloc start value -from this variable and its value is necessary for vmalloc translation. - -mem_map -------- - -Physical addresses are translated to struct pages by treating them as -an index into the mem_map array. Right-shifting a physical address -PAGE_SHIFT bits converts it into a page frame number which is an index -into that mem_map array. - -Used to map an address to the corresponding struct page. - -contig_page_data ----------------- - -Makedumpfile gets the pglist_data structure from this symbol, which is -used to describe the memory layout. - -User-space tools use this to exclude free pages when dumping memory. - -mem_section|(mem_section, NR_SECTION_ROOTS)|(mem_section, section_mem_map) --------------------------------------------------------------------------- - -The address of the mem_section array, its length, structure size, and -the section_mem_map offset. - -It exists in the sparse memory mapping model, and it is also somewhat -similar to the mem_map variable, both of them are used to translate an -address. - -page ----- - -The size of a page structure. struct page is an important data structure -and it is widely used to compute contiguous memory. - -pglist_data ------------ - -The size of a pglist_data structure. This value is used to check if the -pglist_data structure is valid. It is also used for checking the memory -type. - -zone ----- - -The size of a zone structure. This value is used to check if the zone -structure has been found. It is also used for excluding free pages. - -free_area ---------- - -The size of a free_area structure. It indicates whether the free_area -structure is valid or not. Useful when excluding free pages. - -list_head ---------- - -The size of a list_head structure. Used when iterating lists in a -post-mortem analysis session. - -nodemask_t ----------- - -The size of a nodemask_t type. Used to compute the number of online -nodes. - -(page, flags|_refcount|mapping|lru|_mapcount|private|compound_dtor|compound_order|compound_head) -------------------------------------------------------------------------------------------------- - -User-space tools compute their values based on the offset of these -variables. The variables are used when excluding unnecessary pages. - -(pglist_data, node_zones|nr_zones|node_mem_map|node_start_pfn|node_spanned_pages|node_id) ------------------------------------------------------------------------------------------ - -On NUMA machines, each NUMA node has a pg_data_t to describe its memory -layout. On UMA machines there is a single pglist_data which describes the -whole memory. - -These values are used to check the memory type and to compute the -virtual address for memory map. - -(zone, free_area|vm_stat|spanned_pages) ---------------------------------------- - -Each node is divided into a number of blocks called zones which -represent ranges within memory. A zone is described by a structure zone. - -User-space tools compute required values based on the offset of these -variables. - -(free_area, free_list) ----------------------- - -Offset of the free_list's member. This value is used to compute the number -of free pages. - -Each zone has a free_area structure array called free_area[MAX_ORDER]. -The free_list represents a linked list of free page blocks. - -(list_head, next|prev) ----------------------- - -Offsets of the list_head's members. list_head is used to define a -circular linked list. User-space tools need these in order to traverse -lists. - -(vmap_area, va_start|list) --------------------------- - -Offsets of the vmap_area's members. They carry vmalloc-specific -information. Makedumpfile gets the start address of the vmalloc region -from this. - -(zone.free_area, MAX_ORDER) ---------------------------- - -Free areas descriptor. User-space tools use this value to iterate the -free_area ranges. MAX_ORDER is used by the zone buddy allocator. - -log_first_idx -------------- - -Index of the first record stored in the buffer log_buf. Used by -user-space tools to read the strings in the log_buf. - -log_buf -------- - -Console output is written to the ring buffer log_buf at index -log_first_idx. Used to get the kernel log. - -log_buf_len ------------ - -log_buf's length. - -clear_idx ---------- - -The index that the next printk() record to read after the last clear -command. It indicates the first record after the last SYSLOG_ACTION -_CLEAR, like issued by 'dmesg -c'. Used by user-space tools to dump -the dmesg log. - -log_next_idx ------------- - -The index of the next record to store in the buffer log_buf. Used to -compute the index of the current buffer position. - -printk_log ----------- - -The size of a structure printk_log. Used to compute the size of -messages, and extract dmesg log. It encapsulates header information for -log_buf, such as timestamp, syslog level, etc. - -(printk_log, ts_nsec|len|text_len|dict_len) -------------------------------------------- - -It represents field offsets in struct printk_log. User space tools -parse it and check whether the values of printk_log's members have been -changed. - -(free_area.free_list, MIGRATE_TYPES) ------------------------------------- - -The number of migrate types for pages. The free_list is described by the -array. Used by tools to compute the number of free pages. - -NR_FREE_PAGES -------------- - -On linux-2.6.21 or later, the number of free pages is in -vm_stat[NR_FREE_PAGES]. Used to get the number of free pages. - -PG_lru|PG_private|PG_swapcache|PG_swapbacked|PG_slab|PG_hwpoision|PG_head_mask ------------------------------------------------------------------------------- - -Page attributes. These flags are used to filter various unnecessary for -dumping pages. - -PAGE_BUDDY_MAPCOUNT_VALUE(~PG_buddy)|PAGE_OFFLINE_MAPCOUNT_VALUE(~PG_offline) ------------------------------------------------------------------------------ - -More page attributes. These flags are used to filter various unnecessary for -dumping pages. - - -HUGETLB_PAGE_DTOR ------------------ - -The HUGETLB_PAGE_DTOR flag denotes hugetlbfs pages. Makedumpfile -excludes these pages. - -x86_64 -====== - -phys_base ---------- - -Used to convert the virtual address of an exported kernel symbol to its -corresponding physical address. - -init_top_pgt ------------- - -Used to walk through the whole page table and convert virtual addresses -to physical addresses. The init_top_pgt is somewhat similar to -swapper_pg_dir, but it is only used in x86_64. - -pgtable_l5_enabled ------------------- - -User-space tools need to know whether the crash kernel was in 5-level -paging mode. - -node_data ---------- - -This is a struct pglist_data array and stores all NUMA nodes -information. Makedumpfile gets the pglist_data structure from it. - -(node_data, MAX_NUMNODES) -------------------------- - -The maximum number of nodes in system. - -KERNELOFFSET ------------- - -The kernel randomization offset. Used to compute the page offset. If -KASLR is disabled, this value is zero. - -KERNEL_IMAGE_SIZE ------------------ - -Currently unused by Makedumpfile. Used to compute the module virtual -address by Crash. - -sme_mask --------- - -AMD-specific with SME support: it indicates the secure memory encryption -mask. Makedumpfile tools need to know whether the crash kernel was -encrypted. If SME is enabled in the first kernel, the crash kernel's -page table entries (pgd/pud/pmd/pte) contain the memory encryption -mask. This is used to remove the SME mask and obtain the true physical -address. - -Currently, sme_mask stores the value of the C-bit position. If needed, -additional SME-relevant info can be placed in that variable. - -For example:: - - [ misc ][ enc bit ][ other misc SME info ] - 0000_0000_0000_0000_1000_0000_0000_0000_0000_0000_..._0000 - 63 59 55 51 47 43 39 35 31 27 ... 3 - -x86_32 -====== - -X86_PAE -------- - -Denotes whether physical address extensions are enabled. It has the cost -of a higher page table lookup overhead, and also consumes more page -table space per process. Used to check whether PAE was enabled in the -crash kernel when converting virtual addresses to physical addresses. - -ia64 -==== - -pgdat_list|(pgdat_list, MAX_NUMNODES) -------------------------------------- - -pg_data_t array storing all NUMA nodes information. MAX_NUMNODES -indicates the number of the nodes. - -node_memblk|(node_memblk, NR_NODE_MEMBLKS) ------------------------------------------- - -List of node memory chunks. Filled when parsing the SRAT table to obtain -information about memory nodes. NR_NODE_MEMBLKS indicates the number of -node memory chunks. - -These values are used to compute the number of nodes the crashed kernel used. - -node_memblk_s|(node_memblk_s, start_paddr)|(node_memblk_s, size) ----------------------------------------------------------------- - -The size of a struct node_memblk_s and the offsets of the -node_memblk_s's members. Used to compute the number of nodes. - -PGTABLE_3|PGTABLE_4 -------------------- - -User-space tools need to know whether the crash kernel was in 3-level or -4-level paging mode. Used to distinguish the page table. - -ARM64 -===== - -VA_BITS -------- - -The maximum number of bits for virtual addresses. Used to compute the -virtual memory ranges. - -kimage_voffset --------------- - -The offset between the kernel virtual and physical mappings. Used to -translate virtual to physical addresses. - -PHYS_OFFSET ------------ - -Indicates the physical address of the start of memory. Similar to -kimage_voffset, which is used to translate virtual to physical -addresses. - -KERNELOFFSET ------------- - -The kernel randomization offset. Used to compute the page offset. If -KASLR is disabled, this value is zero. - -arm -=== - -ARM_LPAE --------- - -It indicates whether the crash kernel supports large physical address -extensions. Used to translate virtual to physical addresses. - -s390 -==== - -lowcore_ptr ------------ - -An array with a pointer to the lowcore of every CPU. Used to print the -psw and all registers information. - -high_memory ------------ - -Used to get the vmalloc_start address from the high_memory symbol. - -(lowcore_ptr, NR_CPUS) ----------------------- - -The maximum number of CPUs. - -powerpc -======= - - -node_data|(node_data, MAX_NUMNODES) ------------------------------------ - -See above. - -contig_page_data ----------------- - -See above. - -vmemmap_list ------------- - -The vmemmap_list maintains the entire vmemmap physical mapping. Used -to get vmemmap list count and populated vmemmap regions info. If the -vmemmap address translation information is stored in the crash kernel, -it is used to translate vmemmap kernel virtual addresses. - -mmu_vmemmap_psize ------------------ - -The size of a page. Used to translate virtual to physical addresses. - -mmu_psize_defs --------------- - -Page size definitions, i.e. 4k, 64k, or 16M. - -Used to make vtop translations. - -vmemmap_backing|(vmemmap_backing, list)|(vmemmap_backing, phys)|(vmemmap_backing, virt_addr) --------------------------------------------------------------------------------------------- - -The vmemmap virtual address space management does not have a traditional -page table to track which virtual struct pages are backed by a physical -mapping. The virtual to physical mappings are tracked in a simple linked -list format. - -User-space tools need to know the offset of list, phys and virt_addr -when computing the count of vmemmap regions. - -mmu_psize_def|(mmu_psize_def, shift) ------------------------------------- - -The size of a struct mmu_psize_def and the offset of mmu_psize_def's -member. - -Used in vtop translations. - -sh -== - -node_data|(node_data, MAX_NUMNODES) ------------------------------------ - -See above. - -X2TLB ------ - -Indicates whether the crashed kernel enabled SH extended mode. |