1a8da474eSDaniel Axtens#!/bin/sh
28e8e69d6SThomas Gleixner# SPDX-License-Identifier: GPL-2.0-only
3a8da474eSDaniel Axtens#
4a8da474eSDaniel Axtens# Copyright 2015, Daniel Axtens, IBM Corporation
5a8da474eSDaniel Axtens#
6a8da474eSDaniel Axtens
7a8da474eSDaniel Axtens
8a8da474eSDaniel Axtens# do we have ./getscom, ./putscom?
9a8da474eSDaniel Axtensif [ -x ./getscom ] && [ -x ./putscom ]; then
10a8da474eSDaniel Axtens	GETSCOM=./getscom
11a8da474eSDaniel Axtens	PUTSCOM=./putscom
12a8da474eSDaniel Axtenselif which getscom > /dev/null; then
13a8da474eSDaniel Axtens	GETSCOM=$(which getscom)
14a8da474eSDaniel Axtens	PUTSCOM=$(which putscom)
15a8da474eSDaniel Axtenselse
16a8da474eSDaniel Axtens	cat <<EOF
17a8da474eSDaniel AxtensCan't find getscom/putscom in . or \$PATH.
18a8da474eSDaniel AxtensSee https://github.com/open-power/skiboot.
19a8da474eSDaniel AxtensThe tool is in external/xscom-utils
20a8da474eSDaniel AxtensEOF
21a8da474eSDaniel Axtens	exit 1
22a8da474eSDaniel Axtensfi
23a8da474eSDaniel Axtens
24a8da474eSDaniel Axtens# We will get 8 HMI events per injection
25a8da474eSDaniel Axtens# todo: deal with things being offline
26a8da474eSDaniel Axtensexpected_hmis=8
27a8da474eSDaniel AxtensCOUNT_HMIS() {
28a8da474eSDaniel Axtens    dmesg | grep -c 'Harmless Hypervisor Maintenance interrupt'
29a8da474eSDaniel Axtens}
30a8da474eSDaniel Axtens
31a8da474eSDaniel Axtens# massively expand snooze delay, allowing injection on all cores
32a8da474eSDaniel Axtensppc64_cpu --smt-snooze-delay=1000000000
33a8da474eSDaniel Axtens
34a8da474eSDaniel Axtens# when we exit, restore it
35a8da474eSDaniel Axtenstrap "ppc64_cpu --smt-snooze-delay=100" 0 1
36a8da474eSDaniel Axtens
37a8da474eSDaniel Axtens# for each chip+core combination
38a8da474eSDaniel Axtens# todo - less fragile parsing
39*5921eb36STiezhu Yanggrep -E -o 'OCC: Chip [0-9a-f]+ Core [0-9a-f]' < /sys/firmware/opal/msglog |
40a8da474eSDaniel Axtenswhile read chipcore; do
41a8da474eSDaniel Axtens	chip=$(echo "$chipcore"|awk '{print $3}')
42a8da474eSDaniel Axtens	core=$(echo "$chipcore"|awk '{print $5}')
43a8da474eSDaniel Axtens	fir="0x1${core}013100"
44a8da474eSDaniel Axtens
45a8da474eSDaniel Axtens	# verify that Core FIR is zero as expected
46a8da474eSDaniel Axtens	if [ "$($GETSCOM -c 0x${chip} $fir)" != 0 ]; then
47a8da474eSDaniel Axtens		echo "FIR was not zero before injection for chip $chip, core $core. Aborting!"
48a8da474eSDaniel Axtens		echo "Result of $GETSCOM -c 0x${chip} $fir:"
49a8da474eSDaniel Axtens		$GETSCOM -c 0x${chip} $fir
50a8da474eSDaniel Axtens		echo "If you get a -5 error, the core may be in idle state. Try stress-ng."
51a8da474eSDaniel Axtens		echo "Otherwise, try $PUTSCOM -c 0x${chip} $fir 0"
52a8da474eSDaniel Axtens		exit 1
53a8da474eSDaniel Axtens	fi
54a8da474eSDaniel Axtens
55a8da474eSDaniel Axtens	# keep track of the number of HMIs handled
56a8da474eSDaniel Axtens	old_hmis=$(COUNT_HMIS)
57a8da474eSDaniel Axtens
58a8da474eSDaniel Axtens	# do injection, adding a marker to dmesg for clarity
59a8da474eSDaniel Axtens	echo "Injecting HMI on core $core, chip $chip" | tee /dev/kmsg
60a8da474eSDaniel Axtens	# inject a RegFile recoverable error
61a8da474eSDaniel Axtens	if ! $PUTSCOM -c 0x${chip} $fir 2000000000000000 > /dev/null; then
62a8da474eSDaniel Axtens		echo "Error injecting. Aborting!"
63a8da474eSDaniel Axtens		exit 1
64a8da474eSDaniel Axtens	fi
65a8da474eSDaniel Axtens
66a8da474eSDaniel Axtens	# now we want to wait for all the HMIs to be processed
67a8da474eSDaniel Axtens	# we expect one per thread on the core
68a8da474eSDaniel Axtens	i=0;
69a8da474eSDaniel Axtens	new_hmis=$(COUNT_HMIS)
70a8da474eSDaniel Axtens	while [ $new_hmis -lt $((old_hmis + expected_hmis)) ] && [ $i -lt 12 ]; do
71a8da474eSDaniel Axtens	    echo "Seen $((new_hmis - old_hmis)) HMI(s) out of $expected_hmis expected, sleeping"
72a8da474eSDaniel Axtens	    sleep 5;
73a8da474eSDaniel Axtens	    i=$((i + 1))
74a8da474eSDaniel Axtens	    new_hmis=$(COUNT_HMIS)
75a8da474eSDaniel Axtens	done
76a8da474eSDaniel Axtens	if [ $i = 12 ]; then
77a8da474eSDaniel Axtens	    echo "Haven't seen expected $expected_hmis recoveries after 1 min. Aborting."
78a8da474eSDaniel Axtens	    exit 1
79a8da474eSDaniel Axtens	fi
80a8da474eSDaniel Axtens	echo "Processed $expected_hmis events; presumed success. Check dmesg."
81a8da474eSDaniel Axtens	echo ""
82a8da474eSDaniel Axtensdone
83