1#!/bin/bash 2# SPDX-License-Identifier: GPL-2.0+ 3# 4# Run a series of tests on remote systems under KVM. 5# 6# Usage: kvm-remote.sh "systems" [ <kvm.sh args> ] 7# kvm-remote.sh "systems" /path/to/old/run [ <kvm-again.sh args> ] 8# 9# Copyright (C) 2021 Facebook, Inc. 10# 11# Authors: Paul E. McKenney <paulmck@kernel.org> 12 13scriptname=$0 14args="$*" 15 16if ! test -d tools/testing/selftests/rcutorture/bin 17then 18 echo $scriptname must be run from top-level directory of kernel source tree. 19 exit 1 20fi 21 22KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM 23PATH=${KVM}/bin:$PATH; export PATH 24. functions.sh 25 26starttime="`get_starttime`" 27 28systems="$1" 29if test -z "$systems" 30then 31 echo $scriptname: Empty list of systems will go nowhere good, giving up. 32 exit 1 33fi 34shift 35 36# Pathnames: 37# T: /tmp/kvm-remote.sh.$$ 38# resdir: /tmp/kvm-remote.sh.$$/res 39# rundir: /tmp/kvm-remote.sh.$$/res/$ds ("-remote" suffix) 40# oldrun: `pwd`/tools/testing/.../res/$otherds 41# 42# Pathname segments: 43# TD: kvm-remote.sh.$$ 44# ds: yyyy.mm.dd-hh.mm.ss-remote 45 46TD=kvm-remote.sh.$$ 47T=${TMPDIR-/tmp}/$TD 48trap 'rm -rf $T' 0 49mkdir $T 50 51resdir="$T/res" 52ds=`date +%Y.%m.%d-%H.%M.%S`-remote 53rundir=$resdir/$ds 54echo Results directory: $rundir 55echo $scriptname $args 56if echo $1 | grep -q '^--' 57then 58 # Fresh build. Create a datestamp unless the caller supplied one. 59 datestamp="`echo "$@" | awk -v ds="$ds" '{ 60 for (i = 1; i < NF; i++) { 61 if ($i == "--datestamp") { 62 ds = ""; 63 break; 64 } 65 } 66 if (ds != "") 67 print "--datestamp " ds; 68 }'`" 69 kvm.sh --remote "$@" $datestamp --buildonly > $T/kvm.sh.out 2>&1 70 ret=$? 71 if test "$ret" -ne 0 72 then 73 echo $scriptname: kvm.sh failed exit code $? 74 cat $T/kvm.sh.out 75 exit 2 76 fi 77 oldrun="`grep -m 1 "^Results directory: " $T/kvm.sh.out | awk '{ print $3 }'`" 78 touch "$oldrun/remote-log" 79 echo $scriptname $args >> "$oldrun/remote-log" 80 echo | tee -a "$oldrun/remote-log" 81 echo " ----" kvm.sh output: "(`date`)" | tee -a "$oldrun/remote-log" 82 cat $T/kvm.sh.out | tee -a "$oldrun/remote-log" 83 # We are going to run this, so remove the buildonly files. 84 rm -f "$oldrun"/*/buildonly 85 kvm-again.sh $oldrun --dryrun --remote --rundir "$rundir" > $T/kvm-again.sh.out 2>&1 86 ret=$? 87 if test "$ret" -ne 0 88 then 89 echo $scriptname: kvm-again.sh failed exit code $? | tee -a "$oldrun/remote-log" 90 cat $T/kvm-again.sh.out | tee -a "$oldrun/remote-log" 91 exit 2 92 fi 93else 94 # Re-use old run. 95 oldrun="$1" 96 if ! echo $oldrun | grep -q '^/' 97 then 98 oldrun="`pwd`/$oldrun" 99 fi 100 shift 101 touch "$oldrun/remote-log" 102 echo $scriptname $args >> "$oldrun/remote-log" 103 kvm-again.sh "$oldrun" "$@" --dryrun --remote --rundir "$rundir" > $T/kvm-again.sh.out 2>&1 104 ret=$? 105 if test "$ret" -ne 0 106 then 107 echo $scriptname: kvm-again.sh failed exit code $? | tee -a "$oldrun/remote-log" 108 cat $T/kvm-again.sh.out | tee -a "$oldrun/remote-log" 109 exit 2 110 fi 111 cp -a "$rundir" "$KVM/res/" 112 oldrun="$KVM/res/$ds" 113fi 114echo | tee -a "$oldrun/remote-log" 115echo " ----" kvm-again.sh output: "(`date`)" | tee -a "$oldrun/remote-log" 116cat $T/kvm-again.sh.out 117echo | tee -a "$oldrun/remote-log" 118echo Remote run directory: $rundir | tee -a "$oldrun/remote-log" 119echo Local build-side run directory: $oldrun | tee -a "$oldrun/remote-log" 120 121# Create the kvm-remote-N.sh scripts in the bin directory. 122awk < "$rundir"/scenarios -v dest="$T/bin" -v rundir="$rundir" ' 123{ 124 n = $1; 125 sub(/\./, "", n); 126 fn = dest "/kvm-remote-" n ".sh" 127 print "kvm-remote-noreap.sh " rundir " &" > fn; 128 scenarios = ""; 129 for (i = 2; i <= NF; i++) 130 scenarios = scenarios " " $i; 131 print "kvm-test-1-run-batch.sh" scenarios >> fn; 132 print "sync" >> fn; 133 print "rm " rundir "/remote.run" >> fn; 134}' 135chmod +x $T/bin/kvm-remote-*.sh 136( cd "`dirname $T`"; tar -chzf $T/binres.tgz "$TD/bin" "$TD/res" ) 137 138# Check first to avoid the need for cleanup for system-name typos 139for i in $systems 140do 141 ncpus="`ssh $i getconf _NPROCESSORS_ONLN 2> /dev/null`" 142 echo $i: $ncpus CPUs " " `date` | tee -a "$oldrun/remote-log" 143 ret=$? 144 if test "$ret" -ne 0 145 then 146 echo System $i unreachable, giving up. | tee -a "$oldrun/remote-log" 147 exit 4 | tee -a "$oldrun/remote-log" 148 fi 149done 150 151# Download and expand the tarball on all systems. 152echo Build-products tarball: `du -h $T/binres.tgz` | tee -a "$oldrun/remote-log" 153for i in $systems 154do 155 echo Downloading tarball to $i `date` | tee -a "$oldrun/remote-log" 156 cat $T/binres.tgz | ssh $i "cd /tmp; tar -xzf -" 157 ret=$? 158 if test "$ret" -ne 0 159 then 160 echo Unable to download $T/binres.tgz to system $i, giving up. | tee -a "$oldrun/remote-log" 161 exit 10 | tee -a "$oldrun/remote-log" 162 fi 163done 164 165# Function to check for presence of a file on the specified system. 166# Complain if the system cannot be reached, and retry after a wait. 167# Currently just waits forever if a machine disappears. 168# 169# Usage: checkremotefile system pathname 170checkremotefile () { 171 local ret 172 local sleeptime=60 173 174 while : 175 do 176 ssh $1 "test -f \"$2\"" 177 ret=$? 178 if test "$ret" -eq 255 179 then 180 echo " ---" ssh failure to $1 checking for file $2, retry after $sleeptime seconds. `date` 181 elif test "$ret" -eq 0 182 then 183 return 0 184 elif test "$ret" -eq 1 185 then 186 echo " ---" File \"$2\" not found: ssh $1 test -f \"$2\" 187 return 1 188 else 189 echo " ---" Exit code $ret: ssh $1 test -f \"$2\", retry after $sleeptime seconds. `date` 190 return $ret 191 fi 192 sleep $sleeptime 193 done 194} 195 196# Function to start batches on idle remote $systems 197# 198# Usage: startbatches curbatch nbatches 199# 200# Batches are numbered starting at 1. Returns the next batch to start. 201# Be careful to redirect all debug output to FD 2 (stderr). 202startbatches () { 203 local curbatch="$1" 204 local nbatches="$2" 205 local ret 206 207 # Each pass through the following loop examines one system. 208 for i in $systems 209 do 210 if test "$curbatch" -gt "$nbatches" 211 then 212 echo $((nbatches + 1)) 213 return 0 214 fi 215 if checkremotefile "$i" "$resdir/$ds/remote.run" 1>&2 216 then 217 continue # System still running last test, skip. 218 fi 219 ssh "$i" "cd \"$resdir/$ds\"; touch remote.run; PATH=\"$T/bin:$PATH\" nohup kvm-remote-$curbatch.sh > kvm-remote-$curbatch.sh.out 2>&1 &" 1>&2 220 ret=$? 221 if test "$ret" -ne 0 222 then 223 echo ssh $i failed: exitcode $ret 1>&2 224 exit 11 225 fi 226 echo " ----" System $i Batch `head -n $curbatch < "$rundir"/scenarios | tail -1` `date` 1>&2 227 curbatch=$((curbatch + 1)) 228 done 229 echo $curbatch 230} 231 232# Launch all the scenarios. 233nbatches="`wc -l "$rundir"/scenarios | awk '{ print $1 }'`" 234curbatch=1 235while test "$curbatch" -le "$nbatches" 236do 237 startbatches $curbatch $nbatches > $T/curbatch 2> $T/startbatches.stderr 238 curbatch="`cat $T/curbatch`" 239 if test -s "$T/startbatches.stderr" 240 then 241 cat "$T/startbatches.stderr" | tee -a "$oldrun/remote-log" 242 fi 243 if test "$curbatch" -le "$nbatches" 244 then 245 sleep 30 246 fi 247done 248echo All batches started. `date` 249 250# Wait for all remaining scenarios to complete and collect results. 251for i in $systems 252do 253 while checkremotefile "$i" "$resdir/$ds/remote.run" 254 do 255 sleep 30 256 done 257 echo " ---" Collecting results from $i `date` 258 ( cd "$oldrun"; ssh $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - ) 259done 260 261( kvm-end-run-stats.sh "$oldrun" "$starttime"; echo $? > $T/exitcode ) | tee -a "$oldrun/remote-log" 262exit "`cat $T/exitcode`" 263