From 4ab5a5d2a4a2289c2af07accbec7170ca5671f41 Mon Sep 17 00:00:00 2001 From: Thorsten Leemhuis Date: Tue, 8 Jan 2019 20:40:06 +0100 Subject: tools: add a kernel-chktaint to tools/debugging Add a script to the tools/ directory that shows if or why the running kernel was tainted. The script was mostly written by Randy Dunlap; I enhanced the script a bit. There does not appear to be a good home for this script. so create tools/debugging for tools of this nature. Signed-off-by: Randy Dunlap Signed-off-by: Thorsten Leemhuis [ jc: fixed conflicts, rewrote changelog ] Signed-off-by: Jonathan Corbet --- tools/Makefile | 14 +-- tools/debugging/Makefile | 16 ++++ tools/debugging/kernel-chktaint | 202 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 226 insertions(+), 6 deletions(-) create mode 100644 tools/debugging/Makefile create mode 100755 tools/debugging/kernel-chktaint diff --git a/tools/Makefile b/tools/Makefile index abb358a70ad0..c0d1e59f5abb 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -12,6 +12,7 @@ help: @echo ' acpi - ACPI tools' @echo ' cgroup - cgroup tools' @echo ' cpupower - a tool for all things x86 CPU power' + @echo ' debugging - tools for debugging' @echo ' firewire - the userspace part of nosy, an IEEE-1394 traffic sniffer' @echo ' freefall - laptop accelerometer program for disk protection' @echo ' gpio - GPIO tools' @@ -60,7 +61,7 @@ acpi: FORCE cpupower: FORCE $(call descend,power/$@) -cgroup firewire hv guest spi usb virtio vm bpf iio gpio objtool leds wmi pci: FORCE +cgroup firewire hv guest spi usb virtio vm bpf iio gpio objtool leds wmi pci debugging: FORCE $(call descend,$@) liblockdep: FORCE @@ -95,7 +96,8 @@ kvm_stat: FORCE all: acpi cgroup cpupower gpio hv firewire liblockdep \ perf selftests spi turbostat usb \ virtio vm bpf x86_energy_perf_policy \ - tmon freefall iio objtool kvm_stat wmi pci + tmon freefall iio objtool kvm_stat wmi \ + pci debugging acpi_install: $(call descend,power/$(@:_install=),install) @@ -103,7 +105,7 @@ acpi_install: cpupower_install: $(call descend,power/$(@:_install=),install) -cgroup_install firewire_install gpio_install hv_install iio_install perf_install spi_install usb_install virtio_install vm_install bpf_install objtool_install wmi_install pci_install: +cgroup_install firewire_install gpio_install hv_install iio_install perf_install spi_install usb_install virtio_install vm_install bpf_install objtool_install wmi_install pci_install debugging_install: $(call descend,$(@:_install=),install) liblockdep_install: @@ -129,7 +131,7 @@ install: acpi_install cgroup_install cpupower_install gpio_install \ perf_install selftests_install turbostat_install usb_install \ virtio_install vm_install bpf_install x86_energy_perf_policy_install \ tmon_install freefall_install objtool_install kvm_stat_install \ - wmi_install pci_install + wmi_install pci_install debugging_install acpi_clean: $(call descend,power/acpi,clean) @@ -137,7 +139,7 @@ acpi_clean: cpupower_clean: $(call descend,power/cpupower,clean) -cgroup_clean hv_clean firewire_clean spi_clean usb_clean virtio_clean vm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean pci_clean: +cgroup_clean hv_clean firewire_clean spi_clean usb_clean virtio_clean vm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean pci_clean debugging_clean: $(call descend,$(@:_clean=),clean) liblockdep_clean: @@ -175,6 +177,6 @@ clean: acpi_clean cgroup_clean cpupower_clean hv_clean firewire_clean \ perf_clean selftests_clean turbostat_clean spi_clean usb_clean virtio_clean \ vm_clean bpf_clean iio_clean x86_energy_perf_policy_clean tmon_clean \ freefall_clean build_clean libbpf_clean libsubcmd_clean liblockdep_clean \ - gpio_clean objtool_clean leds_clean wmi_clean pci_clean + gpio_clean objtool_clean leds_clean wmi_clean pci_clean debugging_clean .PHONY: FORCE diff --git a/tools/debugging/Makefile b/tools/debugging/Makefile new file mode 100644 index 000000000000..e2b7c1a6fb8f --- /dev/null +++ b/tools/debugging/Makefile @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for debugging tools + +PREFIX ?= /usr +BINDIR ?= bin +INSTALL ?= install + +TARGET = kernel-chktaint + +all: $(TARGET) + +clean: + +install: kernel-chktaint + $(INSTALL) -D -m 755 $(TARGET) $(DESTDIR)$(PREFIX)/$(BINDIR)/$(TARGET) + diff --git a/tools/debugging/kernel-chktaint b/tools/debugging/kernel-chktaint new file mode 100755 index 000000000000..2240cb56e6e5 --- /dev/null +++ b/tools/debugging/kernel-chktaint @@ -0,0 +1,202 @@ +#! /bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Randy Dunlap , 2018 +# Thorsten Leemhuis , 2018 + +usage() +{ + cat < + +Call without parameters to decode /proc/sys/kernel/tainted. + +Call with a positive integer as parameter to decode a value you +retrieved from /proc/sys/kernel/tainted on another system. + +EOF +} + +if [ "$1"x != "x" ]; then + if [ "$1"x == "--helpx" ] || [ "$1"x == "-hx" ] ; then + usage + exit 1 + elif [ $1 -ge 0 ] 2>/dev/null ; then + taint=$1 + else + echo "Error: Parameter '$1' not a positive interger. Aborting." >&2 + exit 1 + fi +else + TAINTFILE="/proc/sys/kernel/tainted" + if [ ! -r $TAINTFILE ]; then + echo "No file: $TAINTFILE" + exit + fi + + taint=`cat $TAINTFILE` +fi + +if [ $taint -eq 0 ]; then + echo "Kernel not Tainted" + exit +else + echo "Kernel is \"tainted\" for the following reasons:" +fi + +T=$taint +out= + +addout() { + out=$out$1 +} + +if [ `expr $T % 2` -eq 0 ]; then + addout "G" +else + addout "P" + echo " * proprietary module was loaded (#0)" +fi + +T=`expr $T / 2` +if [ `expr $T % 2` -eq 0 ]; then + addout " " +else + addout "F" + echo " * module was force loaded (#1)" +fi + +T=`expr $T / 2` +if [ `expr $T % 2` -eq 0 ]; then + addout " " +else + addout "S" + echo " * SMP kernel oops on an officially SMP incapable processor (#2)" +fi + +T=`expr $T / 2` +if [ `expr $T % 2` -eq 0 ]; then + addout " " +else + addout "R" + echo " * module was force unloaded (#3)" +fi + +T=`expr $T / 2` +if [ `expr $T % 2` -eq 0 ]; then + addout " " +else + addout "M" + echo " * processor reported a Machine Check Exception (MCE) (#4)" +fi + +T=`expr $T / 2` +if [ `expr $T % 2` -eq 0 ]; then + addout " " +else + addout "B" + echo " * bad page referenced or some unexpected page flags (#5)" +fi + +T=`expr $T / 2` +if [ `expr $T % 2` -eq 0 ]; then + addout " " +else + addout "U" + echo " * taint requested by userspace application (#6)" +fi + +T=`expr $T / 2` +if [ `expr $T % 2` -eq 0 ]; then + addout " " +else + addout "D" + echo " * kernel died recently, i.e. there was an OOPS or BUG (#7)" +fi + +T=`expr $T / 2` +if [ `expr $T % 2` -eq 0 ]; then + addout " " +else + addout "A" + echo " * an ACPI table was overridden by user (#8)" +fi + +T=`expr $T / 2` +if [ `expr $T % 2` -eq 0 ]; then + addout " " +else + addout "W" + echo " * kernel issued warning (#9)" +fi + +T=`expr $T / 2` +if [ `expr $T % 2` -eq 0 ]; then + addout " " +else + addout "C" + echo " * staging driver was loaded (#10)" +fi + +T=`expr $T / 2` +if [ `expr $T % 2` -eq 0 ]; then + addout " " +else + addout "I" + echo " * workaround for bug in platform firmware applied (#11)" +fi + +T=`expr $T / 2` +if [ `expr $T % 2` -eq 0 ]; then + addout " " +else + addout "O" + echo " * externally-built ('out-of-tree') module was loaded (#12)" +fi + +T=`expr $T / 2` +if [ `expr $T % 2` -eq 0 ]; then + addout " " +else + addout "E" + echo " * unsigned module was loaded (#13)" +fi + +T=`expr $T / 2` +if [ `expr $T % 2` -eq 0 ]; then + addout " " +else + addout "L" + echo " * soft lockup occurred (#14)" +fi + +T=`expr $T / 2` +if [ `expr $T % 2` -eq 0 ]; then + addout " " +else + addout "K" + echo " * kernel has been live patched (#15)" +fi + +T=`expr $T / 2` +if [ `expr $T % 2` -eq 0 ]; then + addout " " +else + addout "X" + echo " * auxiliary taint, defined for and used by distros (#16)" + +fi +T=`expr $T / 2` +if [ `expr $T % 2` -eq 0 ]; then + addout " " +else + addout "T" + echo " * kernel was built with the struct randomization plugin (#17)" +fi + +echo "For a more detailed explanation of the various taint flags see" +echo " Documentation/admin-guide/tainted-kernels.rst in the the Linux kernel sources" +echo " or https://kernel.org/doc/html/latest/admin-guide/tainted-kernels.html" +echo "Raw taint value as int/string: $taint/'$out'" +#EOF# -- cgit v1.2.3 From 896dd323abbf6a9980d8aca2656b6c4bf5352c3b Mon Sep 17 00:00:00 2001 From: Thorsten Leemhuis Date: Tue, 8 Jan 2019 20:40:07 +0100 Subject: docs: Revamp tainted-kernels.rst to make it more comprehensible Add a section about decoding /proc/sys/kernel/tainted, create a more understandable intro and a hopefully explain better the tainted flags in bugs, oops or panics messages. Only thing missing then is a table that quickly describes the various bits and taint flags before going into more detail, so add that as well. That table is partly based on a section from Documentation/sysctl/kernel.txt, but a bit more compact. To avoid confusion I added the shortened version to kernel.txt; the same table is used in three different places now: ./tools/debugging/kernel-chktaint, Documentation/admin-guide/tainted-kernels.rst and Documentation/sysctl/kernel.txt During review of v1 (see above) a number of existing issues with the text were raised, like outdated usages as well as incomplete or missing descriptions. Address most of those as well. Signed-off-by: Thorsten Leemhuis [jc: tightened up changelog] Signed-off-by: Jonathan Corbet --- Documentation/admin-guide/tainted-kernels.rst | 159 +++++++++++++++++++++----- Documentation/sysctl/kernel.txt | 50 ++++---- 2 files changed, 154 insertions(+), 55 deletions(-) diff --git a/Documentation/admin-guide/tainted-kernels.rst b/Documentation/admin-guide/tainted-kernels.rst index 28a869c509a0..71e9184a9079 100644 --- a/Documentation/admin-guide/tainted-kernels.rst +++ b/Documentation/admin-guide/tainted-kernels.rst @@ -1,59 +1,164 @@ Tainted kernels --------------- -Some oops reports contain the string **'Tainted: '** after the program -counter. This indicates that the kernel has been tainted by some -mechanism. The string is followed by a series of position-sensitive -characters, each representing a particular tainted value. - - 1) ``G`` if all modules loaded have a GPL or compatible license, ``P`` if +The kernel will mark itself as 'tainted' when something occurs that might be +relevant later when investigating problems. Don't worry too much about this, +most of the time it's not a problem to run a tainted kernel; the information is +mainly of interest once someone wants to investigate some problem, as its real +cause might be the event that got the kernel tainted. That's why bug reports +from tainted kernels will often be ignored by developers, hence try to reproduce +problems with an untainted kernel. + +Note the kernel will remain tainted even after you undo what caused the taint +(i.e. unload a proprietary kernel module), to indicate the kernel remains not +trustworthy. That's also why the kernel will print the tainted state when it +notices an internal problem (a 'kernel bug'), a recoverable error +('kernel oops') or a non-recoverable error ('kernel panic') and writes debug +information about this to the logs ``dmesg`` outputs. It's also possible to +check the tainted state at runtime through a file in ``/proc/``. + + +Tainted flag in bugs, oops or panics messages +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You find the tainted state near the top in a line starting with 'CPU:'; if or +why the kernel was tainted is shown after the Process ID ('PID:') and a shortened +name of the command ('Comm:') that triggered the event:: + + BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 + Oops: 0002 [#1] SMP PTI + CPU: 0 PID: 4424 Comm: insmod Tainted: P W O 4.20.0-0.rc6.fc30 #1 + Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 + RIP: 0010:my_oops_init+0x13/0x1000 [kpanic] + [...] + +You'll find a 'Not tainted: ' there if the kernel was not tainted at the +time of the event; if it was, then it will print 'Tainted: ' and characters +either letters or blanks. In above example it looks like this:: + + Tainted: P W O + +The meaning of those characters is explained in the table below. In tis case +the kernel got tainted earlier because a proprietary Module (``P``) was loaded, +a warning occurred (``W``), and an externally-built module was loaded (``O``). +To decode other letters use the table below. + + +Decoding tainted state at runtime +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +At runtime, you can query the tainted state by reading +``cat /proc/sys/kernel/tainted``. If that returns ``0``, the kernel is not +tainted; any other number indicates the reasons why it is. The easiest way to +decode that number is the script ``tools/debugging/kernel-chktaint``, which your +distribution might ship as part of a package called ``linux-tools`` or +``kernel-tools``; if it doesn't you can download the script from +`git.kernel.org `_ +and execute it with ``sh kernel-chktaint``, which would print something like +this on the machine that had the statements in the logs that were quoted earlier:: + + Kernel is Tainted for following reasons: + * Proprietary module was loaded (#0) + * Kernel issued warning (#9) + * Externally-built ('out-of-tree') module was loaded (#12) + See Documentation/admin-guide/tainted-kernels.rst in the the Linux kernel or + https://www.kernel.org/doc/html/latest/admin-guide/tainted-kernels.html for + a more details explanation of the various taint flags. + Raw taint value as int/string: 4609/'P W O ' + +You can try to decode the number yourself. That's easy if there was only one +reason that got your kernel tainted, as in this case you can find the number +with the table below. If there were multiple reasons you need to decode the +number, as it is a bitfield, where each bit indicates the absence or presence of +a particular type of taint. It's best to leave that to the aforementioned +script, but if you need something quick you can use this shell command to check +which bits are set:: + + $ for i in $(seq 18); do echo $(($i-1)) $(($(cat /proc/sys/kernel/tainted)>>($i-1)&1));done + +Table for decoding tainted state +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +=== === ====== ======================================================== +Bit Log Number Reason that got the kernel tainted +=== === ====== ======================================================== + 0 G/P 1 proprietary module was loaded + 1 _/F 2 module was force loaded + 2 _/S 4 SMP kernel oops on an officially SMP incapable processor + 3 _/R 8 module was force unloaded + 4 _/M 16 processor reported a Machine Check Exception (MCE) + 5 _/B 32 bad page referenced or some unexpected page flags + 6 _/U 64 taint requested by userspace application + 7 _/D 128 kernel died recently, i.e. there was an OOPS or BUG + 8 _/A 256 ACPI table overridden by user + 9 _/W 512 kernel issued warning + 10 _/C 1024 staging driver was loaded + 11 _/I 2048 workaround for bug in platform firmware applied + 12 _/O 4096 externally-built ("out-of-tree") module was loaded + 13 _/E 8192 unsigned module was loaded + 14 _/L 16384 soft lockup occurred + 15 _/K 32768 kernel has been live patched + 16 _/X 65536 auxiliary taint, defined for and used by distros + 17 _/T 131072 kernel was built with the struct randomization plugin +=== === ====== ======================================================== + +Note: The character ``_`` is representing a blank in this table to make reading +easier. + +More detailed explanation for tainting +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + 0) ``G`` if all modules loaded have a GPL or compatible license, ``P`` if any proprietary module has been loaded. Modules without a MODULE_LICENSE or with a MODULE_LICENSE that is not recognised by insmod as GPL compatible are assumed to be proprietary. - 2) ``F`` if any module was force loaded by ``insmod -f``, ``' '`` if all + 1) ``F`` if any module was force loaded by ``insmod -f``, ``' '`` if all modules were loaded normally. - 3) ``S`` if the oops occurred on an SMP kernel running on hardware that + 2) ``S`` if the oops occurred on an SMP kernel running on hardware that hasn't been certified as safe to run multiprocessor. Currently this occurs only on various Athlons that are not SMP capable. - 4) ``R`` if a module was force unloaded by ``rmmod -f``, ``' '`` if all + 3) ``R`` if a module was force unloaded by ``rmmod -f``, ``' '`` if all modules were unloaded normally. - 5) ``M`` if any processor has reported a Machine Check Exception, + 4) ``M`` if any processor has reported a Machine Check Exception, ``' '`` if no Machine Check Exceptions have occurred. - 6) ``B`` if a page-release function has found a bad page reference or - some unexpected page flags. + 5) ``B`` If a page-release function has found a bad page reference or some + unexpected page flags. This indicates a hardware problem or a kernel bug; + there should be other information in the log indicating why this tainting + occured. - 7) ``U`` if a user or user application specifically requested that the + 6) ``U`` if a user or user application specifically requested that the Tainted flag be set, ``' '`` otherwise. - 8) ``D`` if the kernel has died recently, i.e. there was an OOPS or BUG. + 7) ``D`` if the kernel has died recently, i.e. there was an OOPS or BUG. - 9) ``A`` if the ACPI table has been overridden. + 8) ``A`` if an ACPI table has been overridden. - 10) ``W`` if a warning has previously been issued by the kernel. + 9) ``W`` if a warning has previously been issued by the kernel. (Though some warnings may set more specific taint flags.) - 11) ``C`` if a staging driver has been loaded. + 10) ``C`` if a staging driver has been loaded. - 12) ``I`` if the kernel is working around a severe bug in the platform + 11) ``I`` if the kernel is working around a severe bug in the platform firmware (BIOS or similar). - 13) ``O`` if an externally-built ("out-of-tree") module has been loaded. + 12) ``O`` if an externally-built ("out-of-tree") module has been loaded. - 14) ``E`` if an unsigned module has been loaded in a kernel supporting + 13) ``E`` if an unsigned module has been loaded in a kernel supporting module signature. - 15) ``L`` if a soft lockup has previously occurred on the system. + 14) ``L`` if a soft lockup has previously occurred on the system. + + 15) ``K`` if the kernel has been live patched. - 16) ``K`` if the kernel has been live patched. + 16) ``X`` Auxiliary taint, defined for and used by Linux distributors. -The primary reason for the **'Tainted: '** string is to tell kernel -debuggers if this is a clean kernel or if anything unusual has -occurred. Tainting is permanent: even if an offending module is -unloaded, the tainted value remains to indicate that the kernel is not -trustworthy. + 17) ``T`` Kernel was build with the randstruct plugin, which can intentionally + produce extremely unusual kernel structure layouts (even performance + pathological ones), which is important to know when debugging. Set at + build time. diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 1b8775298cf7..b279b824b887 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -93,7 +93,7 @@ show up in /proc/sys/kernel: - stop-a [ SPARC only ] - sysrq ==> Documentation/admin-guide/sysrq.rst - sysctl_writes_strict -- tainted +- tainted ==> Documentation/admin-guide/tainted-kernels.rst - threads-max - unknown_nmi_panic - watchdog @@ -1002,39 +1002,33 @@ compilation sees a 1% slowdown, other systems and workloads may vary. 1: kernel stack erasing is enabled (default), it is performed before returning to the userspace at the end of syscalls. - ============================================================== -tainted: +tainted Non-zero if the kernel has been tainted. Numeric values, which can be ORed together. The letters are seen in "Tainted" line of Oops reports. - 1 (P): A module with a non-GPL license has been loaded, this - includes modules with no license. - Set by modutils >= 2.4.9 and module-init-tools. - 2 (F): A module was force loaded by insmod -f. - Set by modutils >= 2.4.9 and module-init-tools. - 4 (S): Unsafe SMP processors: SMP with CPUs not designed for SMP. - 8 (R): A module was forcibly unloaded from the system by rmmod -f. - 16 (M): A hardware machine check error occurred on the system. - 32 (B): A bad page was discovered on the system. - 64 (U): The user has asked that the system be marked "tainted". This - could be because they are running software that directly modifies - the hardware, or for other reasons. - 128 (D): The system has died. - 256 (A): The ACPI DSDT has been overridden with one supplied by the user - instead of using the one provided by the hardware. - 512 (W): A kernel warning has occurred. - 1024 (C): A module from drivers/staging was loaded. - 2048 (I): The system is working around a severe firmware bug. - 4096 (O): An out-of-tree module has been loaded. - 8192 (E): An unsigned module has been loaded in a kernel supporting module - signature. - 16384 (L): A soft lockup has previously occurred on the system. - 32768 (K): The kernel has been live patched. - 65536 (X): Auxiliary taint, defined and used by for distros. -131072 (T): The kernel was built with the struct randomization plugin. + 1 (P): proprietary module was loaded + 2 (F): module was force loaded + 4 (S): SMP kernel oops on an officially SMP incapable processor + 8 (R): module was force unloaded + 16 (M): processor reported a Machine Check Exception (MCE) + 32 (B): bad page referenced or some unexpected page flags + 64 (U): taint requested by userspace application + 128 (D): kernel died recently, i.e. there was an OOPS or BUG + 256 (A): an ACPI table was overridden by user + 512 (W): kernel issued warning + 1024 (C): staging driver was loaded + 2048 (I): workaround for bug in platform firmware applied + 4096 (O): externally-built ("out-of-tree") module was loaded + 8192 (E): unsigned module was loaded + 16384 (L): soft lockup occurred + 32768 (K): kernel has been live patched + 65536 (X): Auxiliary taint, defined and used by for distros +131072 (T): The kernel was built with the struct randomization plugin + +See Documentation/admin-guide/tainted-kernels.rst for more information. ============================================================== -- cgit v1.2.3