1a8da474eSDaniel Axtens#!/bin/sh 28e8e69d6SThomas Gleixner# SPDX-License-Identifier: GPL-2.0-only 3a8da474eSDaniel Axtens# 4a8da474eSDaniel Axtens# Copyright 2015, Daniel Axtens, IBM Corporation 5a8da474eSDaniel Axtens# 6a8da474eSDaniel Axtens 7a8da474eSDaniel Axtens 8a8da474eSDaniel Axtens# do we have ./getscom, ./putscom? 9a8da474eSDaniel Axtensif [ -x ./getscom ] && [ -x ./putscom ]; then 10a8da474eSDaniel Axtens GETSCOM=./getscom 11a8da474eSDaniel Axtens PUTSCOM=./putscom 12a8da474eSDaniel Axtenselif which getscom > /dev/null; then 13a8da474eSDaniel Axtens GETSCOM=$(which getscom) 14a8da474eSDaniel Axtens PUTSCOM=$(which putscom) 15a8da474eSDaniel Axtenselse 16a8da474eSDaniel Axtens cat <<EOF 17a8da474eSDaniel AxtensCan't find getscom/putscom in . or \$PATH. 18a8da474eSDaniel AxtensSee https://github.com/open-power/skiboot. 19a8da474eSDaniel AxtensThe tool is in external/xscom-utils 20a8da474eSDaniel AxtensEOF 21a8da474eSDaniel Axtens exit 1 22a8da474eSDaniel Axtensfi 23a8da474eSDaniel Axtens 24a8da474eSDaniel Axtens# We will get 8 HMI events per injection 25a8da474eSDaniel Axtens# todo: deal with things being offline 26a8da474eSDaniel Axtensexpected_hmis=8 27a8da474eSDaniel AxtensCOUNT_HMIS() { 28a8da474eSDaniel Axtens dmesg | grep -c 'Harmless Hypervisor Maintenance interrupt' 29a8da474eSDaniel Axtens} 30a8da474eSDaniel Axtens 31a8da474eSDaniel Axtens# massively expand snooze delay, allowing injection on all cores 32a8da474eSDaniel Axtensppc64_cpu --smt-snooze-delay=1000000000 33a8da474eSDaniel Axtens 34a8da474eSDaniel Axtens# when we exit, restore it 35a8da474eSDaniel Axtenstrap "ppc64_cpu --smt-snooze-delay=100" 0 1 36a8da474eSDaniel Axtens 37a8da474eSDaniel Axtens# for each chip+core combination 38a8da474eSDaniel Axtens# todo - less fragile parsing 39*5921eb36STiezhu Yanggrep -E -o 'OCC: Chip [0-9a-f]+ Core [0-9a-f]' < /sys/firmware/opal/msglog | 40a8da474eSDaniel Axtenswhile read chipcore; do 41a8da474eSDaniel Axtens chip=$(echo "$chipcore"|awk '{print $3}') 42a8da474eSDaniel Axtens core=$(echo "$chipcore"|awk '{print $5}') 43a8da474eSDaniel Axtens fir="0x1${core}013100" 44a8da474eSDaniel Axtens 45a8da474eSDaniel Axtens # verify that Core FIR is zero as expected 46a8da474eSDaniel Axtens if [ "$($GETSCOM -c 0x${chip} $fir)" != 0 ]; then 47a8da474eSDaniel Axtens echo "FIR was not zero before injection for chip $chip, core $core. Aborting!" 48a8da474eSDaniel Axtens echo "Result of $GETSCOM -c 0x${chip} $fir:" 49a8da474eSDaniel Axtens $GETSCOM -c 0x${chip} $fir 50a8da474eSDaniel Axtens echo "If you get a -5 error, the core may be in idle state. Try stress-ng." 51a8da474eSDaniel Axtens echo "Otherwise, try $PUTSCOM -c 0x${chip} $fir 0" 52a8da474eSDaniel Axtens exit 1 53a8da474eSDaniel Axtens fi 54a8da474eSDaniel Axtens 55a8da474eSDaniel Axtens # keep track of the number of HMIs handled 56a8da474eSDaniel Axtens old_hmis=$(COUNT_HMIS) 57a8da474eSDaniel Axtens 58a8da474eSDaniel Axtens # do injection, adding a marker to dmesg for clarity 59a8da474eSDaniel Axtens echo "Injecting HMI on core $core, chip $chip" | tee /dev/kmsg 60a8da474eSDaniel Axtens # inject a RegFile recoverable error 61a8da474eSDaniel Axtens if ! $PUTSCOM -c 0x${chip} $fir 2000000000000000 > /dev/null; then 62a8da474eSDaniel Axtens echo "Error injecting. Aborting!" 63a8da474eSDaniel Axtens exit 1 64a8da474eSDaniel Axtens fi 65a8da474eSDaniel Axtens 66a8da474eSDaniel Axtens # now we want to wait for all the HMIs to be processed 67a8da474eSDaniel Axtens # we expect one per thread on the core 68a8da474eSDaniel Axtens i=0; 69a8da474eSDaniel Axtens new_hmis=$(COUNT_HMIS) 70a8da474eSDaniel Axtens while [ $new_hmis -lt $((old_hmis + expected_hmis)) ] && [ $i -lt 12 ]; do 71a8da474eSDaniel Axtens echo "Seen $((new_hmis - old_hmis)) HMI(s) out of $expected_hmis expected, sleeping" 72a8da474eSDaniel Axtens sleep 5; 73a8da474eSDaniel Axtens i=$((i + 1)) 74a8da474eSDaniel Axtens new_hmis=$(COUNT_HMIS) 75a8da474eSDaniel Axtens done 76a8da474eSDaniel Axtens if [ $i = 12 ]; then 77a8da474eSDaniel Axtens echo "Haven't seen expected $expected_hmis recoveries after 1 min. Aborting." 78a8da474eSDaniel Axtens exit 1 79a8da474eSDaniel Axtens fi 80a8da474eSDaniel Axtens echo "Processed $expected_hmis events; presumed success. Check dmesg." 81a8da474eSDaniel Axtens echo "" 82a8da474eSDaniel Axtensdone 83