1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0+
3#
4# Run a series of tests on remote systems under KVM.
5#
6# Usage: kvm-remote.sh "systems" [ <kvm.sh args> ]
7#	 kvm-remote.sh "systems" /path/to/old/run [ <kvm-again.sh args> ]
8#
9# Copyright (C) 2021 Facebook, Inc.
10#
11# Authors: Paul E. McKenney <paulmck@kernel.org>
12
13scriptname=$0
14args="$*"
15
16if ! test -d tools/testing/selftests/rcutorture/bin
17then
18	echo $scriptname must be run from top-level directory of kernel source tree.
19	exit 1
20fi
21
22KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
23PATH=${KVM}/bin:$PATH; export PATH
24. functions.sh
25
26starttime="`get_starttime`"
27
28systems="$1"
29if test -z "$systems"
30then
31	echo $scriptname: Empty list of systems will go nowhere good, giving up.
32	exit 1
33fi
34shift
35
36# Pathnames:
37# T:	  /tmp/kvm-remote.sh.$$
38# resdir: /tmp/kvm-remote.sh.$$/res
39# rundir: /tmp/kvm-remote.sh.$$/res/$ds ("-remote" suffix)
40# oldrun: `pwd`/tools/testing/.../res/$otherds
41#
42# Pathname segments:
43# TD:	  kvm-remote.sh.$$
44# ds:	  yyyy.mm.dd-hh.mm.ss-remote
45
46TD=kvm-remote.sh.$$
47T=${TMPDIR-/tmp}/$TD
48trap 'rm -rf $T' 0
49mkdir $T
50
51resdir="$T/res"
52ds=`date +%Y.%m.%d-%H.%M.%S`-remote
53rundir=$resdir/$ds
54echo Results directory: $rundir
55echo $scriptname $args
56if echo $1 | grep -q '^--'
57then
58	# Fresh build.  Create a datestamp unless the caller supplied one.
59	datestamp="`echo "$@" | awk -v ds="$ds" '{
60		for (i = 1; i < NF; i++) {
61			if ($i == "--datestamp") {
62				ds = "";
63				break;
64			}
65		}
66		if (ds != "")
67			print "--datestamp " ds;
68	}'`"
69	kvm.sh --remote "$@" $datestamp --buildonly > $T/kvm.sh.out 2>&1
70	ret=$?
71	if test "$ret" -ne 0
72	then
73		echo $scriptname: kvm.sh failed exit code $?
74		cat $T/kvm.sh.out
75		exit 2
76	fi
77	oldrun="`grep -m 1 "^Results directory: " $T/kvm.sh.out | awk '{ print $3 }'`"
78	touch "$oldrun/remote-log"
79	echo $scriptname $args >> "$oldrun/remote-log"
80	echo | tee -a "$oldrun/remote-log"
81	echo " ----" kvm.sh output: "(`date`)" | tee -a "$oldrun/remote-log"
82	cat $T/kvm.sh.out | tee -a "$oldrun/remote-log"
83	# We are going to run this, so remove the buildonly files.
84	rm -f "$oldrun"/*/buildonly
85	kvm-again.sh $oldrun --dryrun --remote --rundir "$rundir" > $T/kvm-again.sh.out 2>&1
86	ret=$?
87	if test "$ret" -ne 0
88	then
89		echo $scriptname: kvm-again.sh failed exit code $? | tee -a "$oldrun/remote-log"
90		cat $T/kvm-again.sh.out | tee -a "$oldrun/remote-log"
91		exit 2
92	fi
93else
94	# Re-use old run.
95	oldrun="$1"
96	if ! echo $oldrun | grep -q '^/'
97	then
98		oldrun="`pwd`/$oldrun"
99	fi
100	shift
101	touch "$oldrun/remote-log"
102	echo $scriptname $args >> "$oldrun/remote-log"
103	kvm-again.sh "$oldrun" "$@" --dryrun --remote --rundir "$rundir" > $T/kvm-again.sh.out 2>&1
104	ret=$?
105	if test "$ret" -ne 0
106	then
107		echo $scriptname: kvm-again.sh failed exit code $? | tee -a "$oldrun/remote-log"
108		cat $T/kvm-again.sh.out | tee -a "$oldrun/remote-log"
109		exit 2
110	fi
111	cp -a "$rundir" "$KVM/res/"
112	oldrun="$KVM/res/$ds"
113fi
114echo | tee -a "$oldrun/remote-log"
115echo " ----" kvm-again.sh output: "(`date`)" | tee -a "$oldrun/remote-log"
116cat $T/kvm-again.sh.out
117echo | tee -a "$oldrun/remote-log"
118echo Remote run directory: $rundir | tee -a "$oldrun/remote-log"
119echo Local build-side run directory: $oldrun | tee -a "$oldrun/remote-log"
120
121# Create the kvm-remote-N.sh scripts in the bin directory.
122awk < "$rundir"/scenarios -v dest="$T/bin" -v rundir="$rundir" '
123{
124	n = $1;
125	sub(/\./, "", n);
126	fn = dest "/kvm-remote-" n ".sh"
127	scenarios = "";
128	for (i = 2; i <= NF; i++)
129		scenarios = scenarios " " $i;
130	print "kvm-test-1-run-batch.sh" scenarios > fn;
131	print "rm " rundir "/remote.run" >> fn;
132}'
133chmod +x $T/bin/kvm-remote-*.sh
134( cd "`dirname $T`"; tar -chzf $T/binres.tgz "$TD/bin" "$TD/res" )
135
136# Check first to avoid the need for cleanup for system-name typos
137for i in $systems
138do
139	ncpus="`ssh $i getconf _NPROCESSORS_ONLN 2> /dev/null`"
140	echo $i: $ncpus CPUs " " `date` | tee -a "$oldrun/remote-log"
141	ret=$?
142	if test "$ret" -ne 0
143	then
144		echo System $i unreachable, giving up. | tee -a "$oldrun/remote-log"
145		exit 4 | tee -a "$oldrun/remote-log"
146	fi
147done
148
149# Download and expand the tarball on all systems.
150for i in $systems
151do
152	echo Downloading tarball to $i `date` | tee -a "$oldrun/remote-log"
153	cat $T/binres.tgz | ssh $i "cd /tmp; tar -xzf -"
154	ret=$?
155	if test "$ret" -ne 0
156	then
157		echo Unable to download $T/binres.tgz to system $i, giving up. | tee -a "$oldrun/remote-log"
158		exit 10 | tee -a "$oldrun/remote-log"
159	fi
160done
161
162# Function to check for presence of a file on the specified system.
163# Complain if the system cannot be reached, and retry after a wait.
164# Currently just waits forever if a machine disappears.
165#
166# Usage: checkremotefile system pathname
167checkremotefile () {
168	local ret
169	local sleeptime=60
170
171	while :
172	do
173		ssh $1 "test -f \"$2\""
174		ret=$?
175		if test "$ret" -ne 255
176		then
177			return $ret
178		fi
179		echo " ---" ssh failure to $1 checking for file $2, retry after $sleeptime seconds. `date`
180		sleep $sleeptime
181	done
182}
183
184# Function to start batches on idle remote $systems
185#
186# Usage: startbatches curbatch nbatches
187#
188# Batches are numbered starting at 1.  Returns the next batch to start.
189# Be careful to redirect all debug output to FD 2 (stderr).
190startbatches () {
191	local curbatch="$1"
192	local nbatches="$2"
193	local ret
194
195	# Each pass through the following loop examines one system.
196	for i in $systems
197	do
198		if test "$curbatch" -gt "$nbatches"
199		then
200			echo $((nbatches + 1))
201			return 0
202		fi
203		if checkremotefile "$i" "$resdir/$ds/remote.run" 1>&2
204		then
205			continue # System still running last test, skip.
206		fi
207		ssh "$i" "cd \"$resdir/$ds\"; touch remote.run; PATH=\"$T/bin:$PATH\" nohup kvm-remote-$curbatch.sh > kvm-remote-$curbatch.sh.out 2>&1 &" 1>&2
208		ret=$?
209		if test "$ret" -ne 0
210		then
211			echo ssh $i failed: exitcode $ret 1>&2
212			exit 11
213		fi
214		echo " ----" System $i Batch `head -n $curbatch < "$rundir"/scenarios | tail -1` `date` 1>&2
215		curbatch=$((curbatch + 1))
216	done
217	echo $curbatch
218}
219
220# Launch all the scenarios.
221nbatches="`wc -l "$rundir"/scenarios | awk '{ print $1 }'`"
222curbatch=1
223while test "$curbatch" -le "$nbatches"
224do
225	startbatches $curbatch $nbatches > $T/curbatch 2> $T/startbatches.stderr
226	curbatch="`cat $T/curbatch`"
227	if test -s "$T/startbatches.stderr"
228	then
229		cat "$T/startbatches.stderr" | tee -a "$oldrun/remote-log"
230	fi
231	if test "$curbatch" -le "$nbatches"
232	then
233		sleep 30
234	fi
235done
236echo All batches started. `date`
237
238# Wait for all remaining scenarios to complete and collect results.
239for i in $systems
240do
241	while checkremotefile "$i" "$resdir/$ds/remote.run"
242	do
243		sleep 30
244	done
245	( cd "$oldrun"; ssh $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu_pid */qemu-retval; rm -rf $T > /dev/null 2>&1" | tar -xzf - )
246done
247
248( kvm-end-run-stats.sh "$oldrun" "$starttime"; echo $? > $T/exitcode ) | tee -a "$oldrun/remote-log"
249exit "`cat $T/exitcode`"
250