1#!/bin/bash 2# SPDX-License-Identifier: GPL-2.0+ 3# 4# Run a series of tests on remote systems under KVM. 5# 6# Usage: kvm-remote.sh "systems" [ <kvm.sh args> ] 7# kvm-remote.sh "systems" /path/to/old/run [ <kvm-again.sh args> ] 8# 9# Copyright (C) 2021 Facebook, Inc. 10# 11# Authors: Paul E. McKenney <paulmck@kernel.org> 12 13scriptname=$0 14args="$*" 15 16if ! test -d tools/testing/selftests/rcutorture/bin 17then 18 echo $scriptname must be run from top-level directory of kernel source tree. 19 exit 1 20fi 21 22RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE 23PATH=${RCUTORTURE}/bin:$PATH; export PATH 24. functions.sh 25 26starttime="`get_starttime`" 27 28systems="$1" 29if test -z "$systems" 30then 31 echo $scriptname: Empty list of systems will go nowhere good, giving up. 32 exit 1 33fi 34shift 35 36# Pathnames: 37# T: /tmp/kvm-remote.sh.NNNNNN where "NNNNNN" is set by mktemp 38# resdir: /tmp/kvm-remote.sh.NNNNNN/res 39# rundir: /tmp/kvm-remote.sh.NNNNNN/res/$ds ("-remote" suffix) 40# oldrun: `pwd`/tools/testing/.../res/$otherds 41# 42# Pathname segments: 43# TD: kvm-remote.sh.NNNNNN 44# ds: yyyy.mm.dd-hh.mm.ss-remote 45 46T="`mktemp -d ${TMPDIR-/tmp}/kvm-remote.sh.XXXXXX`" 47trap 'rm -rf $T' 0 48TD="`basename "$T"`" 49 50resdir="$T/res" 51ds=`date +%Y.%m.%d-%H.%M.%S`-remote 52rundir=$resdir/$ds 53echo Results directory: $rundir 54echo $scriptname $args 55if echo $1 | grep -q '^--' 56then 57 # Fresh build. Create a datestamp unless the caller supplied one. 58 datestamp="`echo "$@" | awk -v ds="$ds" '{ 59 for (i = 1; i < NF; i++) { 60 if ($i == "--datestamp") { 61 ds = ""; 62 break; 63 } 64 } 65 if (ds != "") 66 print "--datestamp " ds; 67 }'`" 68 kvm.sh --remote "$@" $datestamp --buildonly > $T/kvm.sh.out 2>&1 69 ret=$? 70 if test "$ret" -ne 0 71 then 72 echo $scriptname: kvm.sh failed exit code $? 73 cat $T/kvm.sh.out 74 exit 2 75 fi 76 oldrun="`grep -m 1 "^Results directory: " $T/kvm.sh.out | awk '{ print $3 }'`" 77 touch "$oldrun/remote-log" 78 echo $scriptname $args >> "$oldrun/remote-log" 79 echo | tee -a "$oldrun/remote-log" 80 echo " ----" kvm.sh output: "(`date`)" | tee -a "$oldrun/remote-log" 81 cat $T/kvm.sh.out | tee -a "$oldrun/remote-log" 82 # We are going to run this, so remove the buildonly files. 83 rm -f "$oldrun"/*/buildonly 84 kvm-again.sh $oldrun --dryrun --remote --rundir "$rundir" > $T/kvm-again.sh.out 2>&1 85 ret=$? 86 if test "$ret" -ne 0 87 then 88 echo $scriptname: kvm-again.sh failed exit code $? | tee -a "$oldrun/remote-log" 89 cat $T/kvm-again.sh.out | tee -a "$oldrun/remote-log" 90 exit 2 91 fi 92else 93 # Re-use old run. 94 oldrun="$1" 95 if ! echo $oldrun | grep -q '^/' 96 then 97 oldrun="`pwd`/$oldrun" 98 fi 99 shift 100 touch "$oldrun/remote-log" 101 echo $scriptname $args >> "$oldrun/remote-log" 102 kvm-again.sh "$oldrun" "$@" --dryrun --remote --rundir "$rundir" > $T/kvm-again.sh.out 2>&1 103 ret=$? 104 if test "$ret" -ne 0 105 then 106 echo $scriptname: kvm-again.sh failed exit code $? | tee -a "$oldrun/remote-log" 107 cat $T/kvm-again.sh.out | tee -a "$oldrun/remote-log" 108 exit 2 109 fi 110 cp -a "$rundir" "$RCUTORTURE/res/" 111 oldrun="$RCUTORTURE/res/$ds" 112fi 113echo | tee -a "$oldrun/remote-log" 114echo " ----" kvm-again.sh output: "(`date`)" | tee -a "$oldrun/remote-log" 115cat $T/kvm-again.sh.out 116echo | tee -a "$oldrun/remote-log" 117echo Remote run directory: $rundir | tee -a "$oldrun/remote-log" 118echo Local build-side run directory: $oldrun | tee -a "$oldrun/remote-log" 119 120# Create the kvm-remote-N.sh scripts in the bin directory. 121awk < "$rundir"/scenarios -v dest="$T/bin" -v rundir="$rundir" ' 122{ 123 n = $1; 124 sub(/\./, "", n); 125 fn = dest "/kvm-remote-" n ".sh" 126 print "kvm-remote-noreap.sh " rundir " &" > fn; 127 scenarios = ""; 128 for (i = 2; i <= NF; i++) 129 scenarios = scenarios " " $i; 130 print "kvm-test-1-run-batch.sh" scenarios >> fn; 131 print "sync" >> fn; 132 print "rm " rundir "/remote.run" >> fn; 133}' 134chmod +x $T/bin/kvm-remote-*.sh 135( cd "`dirname $T`"; tar -chzf $T/binres.tgz "$TD/bin" "$TD/res" ) 136 137# Check first to avoid the need for cleanup for system-name typos 138for i in $systems 139do 140 ncpus="`ssh -o BatchMode=yes $i getconf _NPROCESSORS_ONLN 2> /dev/null`" 141 ret=$? 142 if test "$ret" -ne 0 143 then 144 echo System $i unreachable, giving up. | tee -a "$oldrun/remote-log" 145 exit 4 146 fi 147 echo $i: $ncpus CPUs " " `date` | tee -a "$oldrun/remote-log" 148done 149 150# Download and expand the tarball on all systems. 151echo Build-products tarball: `du -h $T/binres.tgz` | tee -a "$oldrun/remote-log" 152for i in $systems 153do 154 echo Downloading tarball to $i `date` | tee -a "$oldrun/remote-log" 155 cat $T/binres.tgz | ssh -o BatchMode=yes $i "cd /tmp; tar -xzf -" 156 ret=$? 157 tries=0 158 while test "$ret" -ne 0 159 do 160 echo Unable to download $T/binres.tgz to system $i, waiting and then retrying. $tries prior retries. | tee -a "$oldrun/remote-log" 161 sleep 60 162 cat $T/binres.tgz | ssh -o BatchMode=yes $i "cd /tmp; tar -xzf -" 163 ret=$? 164 if test "$ret" -ne 0 165 then 166 if test "$tries" > 5 167 then 168 echo Unable to download $T/binres.tgz to system $i, giving up. | tee -a "$oldrun/remote-log" 169 exit 10 170 fi 171 fi 172 tries=$((tries+1)) 173 done 174done 175 176# Function to check for presence of a file on the specified system. 177# Complain if the system cannot be reached, and retry after a wait. 178# Currently just waits forever if a machine disappears. 179# 180# Usage: checkremotefile system pathname 181checkremotefile () { 182 local ret 183 local sleeptime=60 184 185 while : 186 do 187 ssh -o BatchMode=yes $1 "test -f \"$2\"" 188 ret=$? 189 if test "$ret" -eq 255 190 then 191 echo " ---" ssh failure to $1 checking for file $2, retry after $sleeptime seconds. `date` | tee -a "$oldrun/remote-log" 192 elif test "$ret" -eq 0 193 then 194 return 0 195 elif test "$ret" -eq 1 196 then 197 echo " ---" File \"$2\" not found: ssh $1 test -f \"$2\" | tee -a "$oldrun/remote-log" 198 return 1 199 else 200 echo " ---" Exit code $ret: ssh $1 test -f \"$2\", retry after $sleeptime seconds. `date` | tee -a "$oldrun/remote-log" 201 return $ret 202 fi 203 sleep $sleeptime 204 done 205} 206 207# Function to start batches on idle remote $systems 208# 209# Usage: startbatches curbatch nbatches 210# 211# Batches are numbered starting at 1. Returns the next batch to start. 212# Be careful to redirect all debug output to FD 2 (stderr). 213startbatches () { 214 local curbatch="$1" 215 local nbatches="$2" 216 local ret 217 218 # Each pass through the following loop examines one system. 219 for i in $systems 220 do 221 if test "$curbatch" -gt "$nbatches" 222 then 223 echo $((nbatches + 1)) 224 return 0 225 fi 226 if checkremotefile "$i" "$resdir/$ds/remote.run" 1>&2 227 then 228 continue # System still running last test, skip. 229 fi 230 ssh -o BatchMode=yes "$i" "cd \"$resdir/$ds\"; touch remote.run; PATH=\"$T/bin:$PATH\" nohup kvm-remote-$curbatch.sh > kvm-remote-$curbatch.sh.out 2>&1 &" 1>&2 231 ret=$? 232 if test "$ret" -ne 0 233 then 234 echo ssh $i failed: exitcode $ret 1>&2 235 exit 11 236 fi 237 echo " ----" System $i Batch `head -n $curbatch < "$rundir"/scenarios | tail -1` `date` 1>&2 238 curbatch=$((curbatch + 1)) 239 done 240 echo $curbatch 241} 242 243# Launch all the scenarios. 244nbatches="`wc -l "$rundir"/scenarios | awk '{ print $1 }'`" 245curbatch=1 246while test "$curbatch" -le "$nbatches" 247do 248 startbatches $curbatch $nbatches > $T/curbatch 2> $T/startbatches.stderr 249 curbatch="`cat $T/curbatch`" 250 if test -s "$T/startbatches.stderr" 251 then 252 cat "$T/startbatches.stderr" | tee -a "$oldrun/remote-log" 253 fi 254 if test "$curbatch" -le "$nbatches" 255 then 256 sleep 30 257 fi 258done 259echo All batches started. `date` | tee -a "$oldrun/remote-log" 260 261# Wait for all remaining scenarios to complete and collect results. 262for i in $systems 263do 264 echo " ---" Waiting for $i `date` | tee -a "$oldrun/remote-log" 265 while checkremotefile "$i" "$resdir/$ds/remote.run" 266 do 267 sleep 30 268 done 269 echo " ---" Collecting results from $i `date` | tee -a "$oldrun/remote-log" 270 ( cd "$oldrun"; ssh -o BatchMode=yes $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - ) 271done 272 273( kvm-end-run-stats.sh "$oldrun" "$starttime"; echo $? > $T/exitcode ) | tee -a "$oldrun/remote-log" 274exit "`cat $T/exitcode`" 275