1#!/bin/sh 2# SPDX-License-Identifier: GPL-2.0-only 3 4KSELFTESTS_SKIP=4 5 6. ./eeh-functions.sh 7 8if ! eeh_supported ; then 9 echo "EEH not supported on this system, skipping" 10 exit $KSELFTESTS_SKIP; 11fi 12 13if [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_check" ] && \ 14 [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_break" ] ; then 15 echo "debugfs EEH testing files are missing. Is debugfs mounted?" 16 exit $KSELFTESTS_SKIP; 17fi 18 19pre_lspci=`mktemp` 20lspci > $pre_lspci 21 22# Bump the max freeze count to something absurd so we don't 23# trip over it while breaking things. 24echo 5000 > /sys/kernel/debug/powerpc/eeh_max_freezes 25 26# record the devices that we break in here. Assuming everything 27# goes to plan we should get them back once the recover process 28# is finished. 29devices="" 30 31# Build up a list of candidate devices. 32for dev in `ls -1 /sys/bus/pci/devices/ | grep '\.0$'` ; do 33 # skip bridges since we can't recover them (yet...) 34 if [ -e "/sys/bus/pci/devices/$dev/pci_bus" ] ; then 35 echo "$dev, Skipped: bridge" 36 continue; 37 fi 38 39 # Skip VFs for now since we don't have a reliable way 40 # to break them. 41 if [ -e "/sys/bus/pci/devices/$dev/physfn" ] ; then 42 echo "$dev, Skipped: virtfn" 43 continue; 44 fi 45 46 if [ "ahci" = "$(basename $(realpath /sys/bus/pci/devices/$dev/driver))" ] ; then 47 echo "$dev, Skipped: ahci doesn't support recovery" 48 continue 49 fi 50 51 # Don't inject errosr into an already-frozen PE. This happens with 52 # PEs that contain multiple PCI devices (e.g. multi-function cards) 53 # and injecting new errors during the recovery process will probably 54 # result in the recovery failing and the device being marked as 55 # failed. 56 if ! pe_ok $dev ; then 57 echo "$dev, Skipped: Bad initial PE state" 58 continue; 59 fi 60 61 echo "$dev, Added" 62 63 # Add to this list of device to check 64 devices="$devices $dev" 65done 66 67dev_count="$(echo $devices | wc -w)" 68echo "Found ${dev_count} breakable devices..." 69 70failed=0 71for dev in $devices ; do 72 echo "Breaking $dev..." 73 74 if ! pe_ok $dev ; then 75 echo "Skipping $dev, Initial PE state is not ok" 76 failed="$((failed + 1))" 77 continue; 78 fi 79 80 if ! eeh_one_dev $dev ; then 81 failed="$((failed + 1))" 82 fi 83done 84 85echo "$failed devices failed to recover ($dev_count tested)" 86lspci | diff -u $pre_lspci - 87rm -f $pre_lspci 88 89test "$failed" == 0 90exit $? 91