1#!/bin/sh 2# 3# Copyright 2015, Daniel Axtens, IBM Corporation 4# 5# This program is free software; you can redistribute it and/or modify 6# it under the terms of the GNU General Public License as published by 7# the Free Software Foundation; version 2 of the License. 8# 9# This program is distributed in the hope that it will be useful, 10# but WITHOUT ANY WARRANTY; without even the implied warranty of 11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12# GNU General Public License for more details. 13 14 15# do we have ./getscom, ./putscom? 16if [ -x ./getscom ] && [ -x ./putscom ]; then 17 GETSCOM=./getscom 18 PUTSCOM=./putscom 19elif which getscom > /dev/null; then 20 GETSCOM=$(which getscom) 21 PUTSCOM=$(which putscom) 22else 23 cat <<EOF 24Can't find getscom/putscom in . or \$PATH. 25See https://github.com/open-power/skiboot. 26The tool is in external/xscom-utils 27EOF 28 exit 1 29fi 30 31# We will get 8 HMI events per injection 32# todo: deal with things being offline 33expected_hmis=8 34COUNT_HMIS() { 35 dmesg | grep -c 'Harmless Hypervisor Maintenance interrupt' 36} 37 38# massively expand snooze delay, allowing injection on all cores 39ppc64_cpu --smt-snooze-delay=1000000000 40 41# when we exit, restore it 42trap "ppc64_cpu --smt-snooze-delay=100" 0 1 43 44# for each chip+core combination 45# todo - less fragile parsing 46egrep -o 'OCC: Chip [0-9a-f]+ Core [0-9a-f]' < /sys/firmware/opal/msglog | 47while read chipcore; do 48 chip=$(echo "$chipcore"|awk '{print $3}') 49 core=$(echo "$chipcore"|awk '{print $5}') 50 fir="0x1${core}013100" 51 52 # verify that Core FIR is zero as expected 53 if [ "$($GETSCOM -c 0x${chip} $fir)" != 0 ]; then 54 echo "FIR was not zero before injection for chip $chip, core $core. Aborting!" 55 echo "Result of $GETSCOM -c 0x${chip} $fir:" 56 $GETSCOM -c 0x${chip} $fir 57 echo "If you get a -5 error, the core may be in idle state. Try stress-ng." 58 echo "Otherwise, try $PUTSCOM -c 0x${chip} $fir 0" 59 exit 1 60 fi 61 62 # keep track of the number of HMIs handled 63 old_hmis=$(COUNT_HMIS) 64 65 # do injection, adding a marker to dmesg for clarity 66 echo "Injecting HMI on core $core, chip $chip" | tee /dev/kmsg 67 # inject a RegFile recoverable error 68 if ! $PUTSCOM -c 0x${chip} $fir 2000000000000000 > /dev/null; then 69 echo "Error injecting. Aborting!" 70 exit 1 71 fi 72 73 # now we want to wait for all the HMIs to be processed 74 # we expect one per thread on the core 75 i=0; 76 new_hmis=$(COUNT_HMIS) 77 while [ $new_hmis -lt $((old_hmis + expected_hmis)) ] && [ $i -lt 12 ]; do 78 echo "Seen $((new_hmis - old_hmis)) HMI(s) out of $expected_hmis expected, sleeping" 79 sleep 5; 80 i=$((i + 1)) 81 new_hmis=$(COUNT_HMIS) 82 done 83 if [ $i = 12 ]; then 84 echo "Haven't seen expected $expected_hmis recoveries after 1 min. Aborting." 85 exit 1 86 fi 87 echo "Processed $expected_hmis events; presumed success. Check dmesg." 88 echo "" 89done 90