summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@kernel.org>2024-10-09 21:56:28 +0300
committerUladzislau Rezki (Sony) <urezki@gmail.com>2024-12-14 18:16:58 +0300
commit6ca774f06a7df650f41b38b67bec0665d862ac23 (patch)
tree5a5d991bc67d937e4cc36642e9118910849968de
parent1806b1f97f7ab0bb8bda7c117b2573a335cea940 (diff)
downloadlinux-6ca774f06a7df650f41b38b67bec0665d862ac23.tar.xz
torture: Make kvm-remote.sh give up on unresponsive system
Currently, a system that stops responding at the wrong time will hang kvm-remote.sh. This can happen when the system in question is forced offline for maintenance, and there is currently no way for the user to kick this script into moving ahead. This commit therefore causes kvm-remote.sh to wait at most 15 minutes for a non-responsive system, that is, a system for which ssh gives an exit code of 255. Signed-off-by: Paul E. McKenney <paulmck@kernel.org> Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-remote.sh25
1 files changed, 21 insertions, 4 deletions
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
index 134cdef5a6e0..48a8052d5dae 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
@@ -181,10 +181,11 @@ done
# Function to check for presence of a file on the specified system.
# Complain if the system cannot be reached, and retry after a wait.
-# Currently just waits forever if a machine disappears.
+# Currently just waits 15 minutes if a machine disappears.
#
# Usage: checkremotefile system pathname
checkremotefile () {
+ local nsshfails=0
local ret
local sleeptime=60
@@ -195,6 +196,11 @@ checkremotefile () {
if test "$ret" -eq 255
then
echo " ---" ssh failure to $1 checking for file $2, retry after $sleeptime seconds. `date` | tee -a "$oldrun/remote-log"
+ nsshfails=$((nsshfails+1))
+ if ((nsshfails > 15))
+ then
+ return 255
+ fi
elif test "$ret" -eq 0
then
return 0
@@ -268,12 +274,23 @@ echo All batches started. `date` | tee -a "$oldrun/remote-log"
for i in $systems
do
echo " ---" Waiting for $i `date` | tee -a "$oldrun/remote-log"
- while checkremotefile "$i" "$resdir/$ds/remote.run"
+ while :
do
+ checkremotefile "$i" "$resdir/$ds/remote.run"
+ ret=$?
+ if test "$ret" -eq 1
+ then
+ echo " ---" Collecting results from $i `date` | tee -a "$oldrun/remote-log"
+ ( cd "$oldrun"; ssh -o BatchMode=yes $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - )
+ break;
+ fi
+ if test "$ret" -eq 255
+ then
+ echo System $i persistent ssh failure, lost results `date` | tee -a "$oldrun/remote-log"
+ break;
+ fi
sleep 30
done
- echo " ---" Collecting results from $i `date` | tee -a "$oldrun/remote-log"
- ( cd "$oldrun"; ssh -o BatchMode=yes $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - )
done
( kvm-end-run-stats.sh "$oldrun" "$starttime"; echo $? > $T/exitcode ) | tee -a "$oldrun/remote-log"