diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-09-20 21:48:06 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-09-20 21:48:06 +0300 |
commit | 45824fc0da6e46cc5d563105e1eaaf3098a686f9 (patch) | |
tree | 8e57c1f18104ed5f0d74d9eed9dc0365b3c137b8 /tools/testing/selftests/powerpc/eeh | |
parent | 8c2b418c3f95a488f5226870eee68574d323f0f8 (diff) | |
parent | d9101bfa6adc831bda8836c4d774820553c14942 (diff) | |
download | linux-45824fc0da6e46cc5d563105e1eaaf3098a686f9.tar.xz |
Merge tag 'powerpc-5.4-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc updates from Michael Ellerman:
"This is a bit late, partly due to me travelling, and partly due to a
power outage knocking out some of my test systems *while* I was
travelling.
- Initial support for running on a system with an Ultravisor, which
is software that runs below the hypervisor and protects guests
against some attacks by the hypervisor.
- Support for building the kernel to run as a "Secure Virtual
Machine", ie. as a guest capable of running on a system with an
Ultravisor.
- Some changes to our DMA code on bare metal, to allow devices with
medium sized DMA masks (> 32 && < 59 bits) to use more than 2GB of
DMA space.
- Support for firmware assisted crash dumps on bare metal (powernv).
- Two series fixing bugs in and refactoring our PCI EEH code.
- A large series refactoring our exception entry code to use gas
macros, both to make it more readable and also enable some future
optimisations.
As well as many cleanups and other minor features & fixups.
Thanks to: Adam Zerella, Alexey Kardashevskiy, Alistair Popple, Andrew
Donnellan, Aneesh Kumar K.V, Anju T Sudhakar, Anshuman Khandual,
Balbir Singh, Benjamin Herrenschmidt, Cédric Le Goater, Christophe
JAILLET, Christophe Leroy, Christopher M. Riedl, Christoph Hellwig,
Claudio Carvalho, Daniel Axtens, David Gibson, David Hildenbrand,
Desnes A. Nunes do Rosario, Ganesh Goudar, Gautham R. Shenoy, Greg
Kurz, Guerney Hunt, Gustavo Romero, Halil Pasic, Hari Bathini, Joakim
Tjernlund, Jonathan Neuschafer, Jordan Niethe, Leonardo Bras, Lianbo
Jiang, Madhavan Srinivasan, Mahesh Salgaonkar, Mahesh Salgaonkar,
Masahiro Yamada, Maxiwell S. Garcia, Michael Anderson, Nathan
Chancellor, Nathan Lynch, Naveen N. Rao, Nicholas Piggin, Oliver
O'Halloran, Qian Cai, Ram Pai, Ravi Bangoria, Reza Arbab, Ryan Grimm,
Sam Bobroff, Santosh Sivaraj, Segher Boessenkool, Sukadev Bhattiprolu,
Thiago Bauermann, Thiago Jung Bauermann, Thomas Gleixner, Tom
Lendacky, Vasant Hegde"
* tag 'powerpc-5.4-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (264 commits)
powerpc/mm/mce: Keep irqs disabled during lockless page table walk
powerpc: Use ftrace_graph_ret_addr() when unwinding
powerpc/ftrace: Enable HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
ftrace: Look up the address of return_to_handler() using helpers
powerpc: dump kernel log before carrying out fadump or kdump
docs: powerpc: Add missing documentation reference
powerpc/xmon: Fix output of XIVE IPI
powerpc/xmon: Improve output of XIVE interrupts
powerpc/mm/radix: remove useless kernel messages
powerpc/fadump: support holes in kernel boot memory area
powerpc/fadump: remove RMA_START and RMA_END macros
powerpc/fadump: update documentation about option to release opalcore
powerpc/fadump: consider f/w load area
powerpc/opalcore: provide an option to invalidate /sys/firmware/opal/core file
powerpc/opalcore: export /sys/firmware/opal/core for analysing opal crashes
powerpc/fadump: update documentation about CONFIG_PRESERVE_FA_DUMP
powerpc/fadump: add support to preserve crash data on FADUMP disabled kernel
powerpc/fadump: improve how crashed kernel's memory is reserved
powerpc/fadump: consider reserved ranges while releasing memory
powerpc/fadump: make crash memory ranges array allocation generic
...
Diffstat (limited to 'tools/testing/selftests/powerpc/eeh')
-rw-r--r-- | tools/testing/selftests/powerpc/eeh/Makefile | 9 | ||||
-rwxr-xr-x | tools/testing/selftests/powerpc/eeh/eeh-basic.sh | 82 | ||||
-rwxr-xr-x | tools/testing/selftests/powerpc/eeh/eeh-functions.sh | 76 |
3 files changed, 167 insertions, 0 deletions
diff --git a/tools/testing/selftests/powerpc/eeh/Makefile b/tools/testing/selftests/powerpc/eeh/Makefile new file mode 100644 index 000000000000..b397babd569b --- /dev/null +++ b/tools/testing/selftests/powerpc/eeh/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 +noarg: + $(MAKE) -C ../ + +TEST_PROGS := eeh-basic.sh +TEST_FILES := eeh-functions.sh + +top_srcdir = ../../../../.. +include ../../lib.mk diff --git a/tools/testing/selftests/powerpc/eeh/eeh-basic.sh b/tools/testing/selftests/powerpc/eeh/eeh-basic.sh new file mode 100755 index 000000000000..f988d2f42e8f --- /dev/null +++ b/tools/testing/selftests/powerpc/eeh/eeh-basic.sh @@ -0,0 +1,82 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0-only + +. ./eeh-functions.sh + +if ! eeh_supported ; then + echo "EEH not supported on this system, skipping" + exit 0; +fi + +if [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_check" ] && \ + [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_break" ] ; then + echo "debugfs EEH testing files are missing. Is debugfs mounted?" + exit 1; +fi + +pre_lspci=`mktemp` +lspci > $pre_lspci + +# Bump the max freeze count to something absurd so we don't +# trip over it while breaking things. +echo 5000 > /sys/kernel/debug/powerpc/eeh_max_freezes + +# record the devices that we break in here. Assuming everything +# goes to plan we should get them back once the recover process +# is finished. +devices="" + +# Build up a list of candidate devices. +for dev in `ls -1 /sys/bus/pci/devices/ | grep '\.0$'` ; do + # skip bridges since we can't recover them (yet...) + if [ -e "/sys/bus/pci/devices/$dev/pci_bus" ] ; then + echo "$dev, Skipped: bridge" + continue; + fi + + # Skip VFs for now since we don't have a reliable way + # to break them. + if [ -e "/sys/bus/pci/devices/$dev/physfn" ] ; then + echo "$dev, Skipped: virtfn" + continue; + fi + + # Don't inject errosr into an already-frozen PE. This happens with + # PEs that contain multiple PCI devices (e.g. multi-function cards) + # and injecting new errors during the recovery process will probably + # result in the recovery failing and the device being marked as + # failed. + if ! pe_ok $dev ; then + echo "$dev, Skipped: Bad initial PE state" + continue; + fi + + echo "$dev, Added" + + # Add to this list of device to check + devices="$devices $dev" +done + +dev_count="$(echo $devices | wc -w)" +echo "Found ${dev_count} breakable devices..." + +failed=0 +for dev in $devices ; do + echo "Breaking $dev..." + + if ! pe_ok $dev ; then + echo "Skipping $dev, Initial PE state is not ok" + failed="$((failed + 1))" + continue; + fi + + if ! eeh_one_dev $dev ; then + failed="$((failed + 1))" + fi +done + +echo "$failed devices failed to recover ($dev_count tested)" +lspci | diff -u $pre_lspci - +rm -f $pre_lspci + +exit $failed diff --git a/tools/testing/selftests/powerpc/eeh/eeh-functions.sh b/tools/testing/selftests/powerpc/eeh/eeh-functions.sh new file mode 100755 index 000000000000..26112ab5cdf4 --- /dev/null +++ b/tools/testing/selftests/powerpc/eeh/eeh-functions.sh @@ -0,0 +1,76 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0-only + +pe_ok() { + local dev="$1" + local path="/sys/bus/pci/devices/$dev/eeh_pe_state" + + if ! [ -e "$path" ] ; then + return 1; + fi + + local fw_state="$(cut -d' ' -f1 < $path)" + local sw_state="$(cut -d' ' -f2 < $path)" + + # If EEH_PE_ISOLATED or EEH_PE_RECOVERING are set then the PE is in an + # error state or being recovered. Either way, not ok. + if [ "$((sw_state & 0x3))" -ne 0 ] ; then + return 1 + fi + + # A functioning PE should have the EEH_STATE_MMIO_ACTIVE and + # EEH_STATE_DMA_ACTIVE flags set. For some goddamn stupid reason + # the platform backends set these when the PE is in reset. The + # RECOVERING check above should stop any false positives though. + if [ "$((fw_state & 0x18))" -ne "$((0x18))" ] ; then + return 1 + fi + + return 0; +} + +eeh_supported() { + test -e /proc/powerpc/eeh && \ + grep -q 'EEH Subsystem is enabled' /proc/powerpc/eeh +} + +eeh_one_dev() { + local dev="$1" + + # Using this function from the command line is sometimes useful for + # testing so check that the argument is a well-formed sysfs device + # name. + if ! test -e /sys/bus/pci/devices/$dev/ ; then + echo "Error: '$dev' must be a sysfs device name (DDDD:BB:DD.F)" + return 1; + fi + + # Break it + echo $dev >/sys/kernel/debug/powerpc/eeh_dev_break + + # Force an EEH device check. If the kernel has already + # noticed the EEH (due to a driver poll or whatever), this + # is a no-op. + echo $dev >/sys/kernel/debug/powerpc/eeh_dev_check + + # Enforce a 30s timeout for recovery. Even the IPR, which is infamously + # slow to reset, should recover within 30s. + max_wait=30 + + for i in `seq 0 ${max_wait}` ; do + if pe_ok $dev ; then + break; + fi + echo "$dev, waited $i/${max_wait}" + sleep 1 + done + + if ! pe_ok $dev ; then + echo "$dev, Failed to recover!" + return 1; + fi + + echo "$dev, Recovered after $i seconds" + return 0; +} + |