summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUday Shankar <ushankar@purestorage.com>2026-04-06 07:25:31 +0300
committerJens Axboe <axboe@kernel.dk>2026-04-06 17:37:48 +0300
commit320f9b1c6a942a73c26be56742ee1da04f893a4f (patch)
tree78f4be6beea952f4bf0561d504ee37f9cc04698f
parent0842186d2c4e67d2f8c8c2d1d779e8acffd41b5b (diff)
downloadlinux-320f9b1c6a942a73c26be56742ee1da04f893a4f.tar.xz
selftests: ublk: test that teardown after incomplete recovery completes
Before the fix, teardown of a ublk server that was attempting to recover a device, but died when it had submitted a nonempty proper subset of the fetch commands to any queue would loop forever. Add a test to verify that, after the fix, teardown completes. This is done by: - Adding a new argument to the fault_inject target that causes it die after fetching a nonempty proper subset of the IOs to a queue - Using that argument in a new test while trying to recover an already-created device - Attempting to delete the ublk device at the end of the test; this hangs forever if teardown from the fault-injected ublk server never completed. It was manually verified that the test passes with the fix and hangs without it. Signed-off-by: Uday Shankar <ushankar@purestorage.com> Reviewed-by: Ming Lei <ming.lei@redhat.com> Link: https://patch.msgid.link/20260405-cancel-v2-2-02d711e643c2@purestorage.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--tools/testing/selftests/ublk/Makefile1
-rw-r--r--tools/testing/selftests/ublk/fault_inject.c52
-rw-r--r--tools/testing/selftests/ublk/kublk.c7
-rw-r--r--tools/testing/selftests/ublk/kublk.h3
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_17.sh35
5 files changed, 95 insertions, 3 deletions
diff --git a/tools/testing/selftests/ublk/Makefile b/tools/testing/selftests/ublk/Makefile
index 8ac2d4a682a1..d338668c5a5f 100644
--- a/tools/testing/selftests/ublk/Makefile
+++ b/tools/testing/selftests/ublk/Makefile
@@ -18,6 +18,7 @@ TEST_PROGS += test_generic_10.sh
TEST_PROGS += test_generic_12.sh
TEST_PROGS += test_generic_13.sh
TEST_PROGS += test_generic_16.sh
+TEST_PROGS += test_generic_17.sh
TEST_PROGS += test_batch_01.sh
TEST_PROGS += test_batch_02.sh
diff --git a/tools/testing/selftests/ublk/fault_inject.c b/tools/testing/selftests/ublk/fault_inject.c
index 3b897f69c014..150896e02ff8 100644
--- a/tools/testing/selftests/ublk/fault_inject.c
+++ b/tools/testing/selftests/ublk/fault_inject.c
@@ -10,11 +10,17 @@
#include "kublk.h"
+struct fi_opts {
+ long long delay_ns;
+ bool die_during_fetch;
+};
+
static int ublk_fault_inject_tgt_init(const struct dev_ctx *ctx,
struct ublk_dev *dev)
{
const struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
unsigned long dev_size = 250UL << 30;
+ struct fi_opts *opts = NULL;
if (ctx->auto_zc_fallback) {
ublk_err("%s: not support auto_zc_fallback\n", __func__);
@@ -35,17 +41,52 @@ static int ublk_fault_inject_tgt_init(const struct dev_ctx *ctx,
};
ublk_set_integrity_params(ctx, &dev->tgt.params);
- dev->private_data = (void *)(unsigned long)(ctx->fault_inject.delay_us * 1000);
+ opts = calloc(1, sizeof(*opts));
+ if (!opts) {
+ ublk_err("%s: couldn't allocate memory for opts\n", __func__);
+ return -ENOMEM;
+ }
+
+ opts->delay_ns = ctx->fault_inject.delay_us * 1000;
+ opts->die_during_fetch = ctx->fault_inject.die_during_fetch;
+ dev->private_data = opts;
+
return 0;
}
+static void ublk_fault_inject_pre_fetch_io(struct ublk_thread *t,
+ struct ublk_queue *q, int tag,
+ bool batch)
+{
+ struct fi_opts *opts = q->dev->private_data;
+
+ if (!opts->die_during_fetch)
+ return;
+
+ /*
+ * Each queue fetches its IOs in increasing order of tags, so
+ * dying just before we're about to fetch tag 1 (regardless of
+ * what queue we're on) guarantees that we've fetched a nonempty
+ * proper subset of the tags on that queue.
+ */
+ if (tag == 1) {
+ /*
+ * Ensure our commands are actually live in the kernel
+ * before we die.
+ */
+ io_uring_submit(&t->ring);
+ raise(SIGKILL);
+ }
+}
+
static int ublk_fault_inject_queue_io(struct ublk_thread *t,
struct ublk_queue *q, int tag)
{
const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
struct io_uring_sqe *sqe;
+ struct fi_opts *opts = q->dev->private_data;
struct __kernel_timespec ts = {
- .tv_nsec = (long long)q->dev->private_data,
+ .tv_nsec = opts->delay_ns,
};
ublk_io_alloc_sqes(t, &sqe, 1);
@@ -77,29 +118,34 @@ static void ublk_fault_inject_cmd_line(struct dev_ctx *ctx, int argc, char *argv
{
static const struct option longopts[] = {
{ "delay_us", 1, NULL, 0 },
+ { "die_during_fetch", 1, NULL, 0 },
{ 0, 0, 0, 0 }
};
int option_idx, opt;
ctx->fault_inject.delay_us = 0;
+ ctx->fault_inject.die_during_fetch = false;
while ((opt = getopt_long(argc, argv, "",
longopts, &option_idx)) != -1) {
switch (opt) {
case 0:
if (!strcmp(longopts[option_idx].name, "delay_us"))
ctx->fault_inject.delay_us = strtoll(optarg, NULL, 10);
+ if (!strcmp(longopts[option_idx].name, "die_during_fetch"))
+ ctx->fault_inject.die_during_fetch = strtoll(optarg, NULL, 10);
}
}
}
static void ublk_fault_inject_usage(const struct ublk_tgt_ops *ops)
{
- printf("\tfault_inject: [--delay_us us (default 0)]\n");
+ printf("\tfault_inject: [--delay_us us (default 0)] [--die_during_fetch 1]\n");
}
const struct ublk_tgt_ops fault_inject_tgt_ops = {
.name = "fault_inject",
.init_tgt = ublk_fault_inject_tgt_init,
+ .pre_fetch_io = ublk_fault_inject_pre_fetch_io,
.queue_io = ublk_fault_inject_queue_io,
.tgt_io_done = ublk_fault_inject_tgt_io_done,
.parse_cmd_line = ublk_fault_inject_cmd_line,
diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c
index e1c3b3c55e56..e5b787ba2175 100644
--- a/tools/testing/selftests/ublk/kublk.c
+++ b/tools/testing/selftests/ublk/kublk.c
@@ -796,6 +796,8 @@ static void ublk_submit_fetch_commands(struct ublk_thread *t)
q = &t->dev->q[q_id];
io = &q->ios[tag];
io->buf_index = j++;
+ if (q->tgt_ops->pre_fetch_io)
+ q->tgt_ops->pre_fetch_io(t, q, tag, false);
ublk_queue_io_cmd(t, io);
}
} else {
@@ -807,6 +809,8 @@ static void ublk_submit_fetch_commands(struct ublk_thread *t)
for (i = 0; i < q->q_depth; i++) {
io = &q->ios[i];
io->buf_index = i;
+ if (q->tgt_ops->pre_fetch_io)
+ q->tgt_ops->pre_fetch_io(t, q, i, false);
ublk_queue_io_cmd(t, io);
}
}
@@ -983,6 +987,9 @@ static void ublk_batch_setup_queues(struct ublk_thread *t)
if (t->q_map[i] == 0)
continue;
+ if (q->tgt_ops->pre_fetch_io)
+ q->tgt_ops->pre_fetch_io(t, q, 0, true);
+
ret = ublk_batch_queue_prep_io_cmds(t, q);
ublk_assert(ret >= 0);
}
diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h
index 02f0c55d006b..6d1762aa30df 100644
--- a/tools/testing/selftests/ublk/kublk.h
+++ b/tools/testing/selftests/ublk/kublk.h
@@ -60,6 +60,7 @@ struct stripe_ctx {
struct fault_inject_ctx {
/* fault_inject */
unsigned long delay_us;
+ bool die_during_fetch;
};
struct dev_ctx {
@@ -138,6 +139,8 @@ struct ublk_tgt_ops {
int (*init_tgt)(const struct dev_ctx *ctx, struct ublk_dev *);
void (*deinit_tgt)(struct ublk_dev *);
+ void (*pre_fetch_io)(struct ublk_thread *t, struct ublk_queue *q,
+ int tag, bool batch);
int (*queue_io)(struct ublk_thread *, struct ublk_queue *, int tag);
void (*tgt_io_done)(struct ublk_thread *, struct ublk_queue *,
const struct io_uring_cqe *);
diff --git a/tools/testing/selftests/ublk/test_generic_17.sh b/tools/testing/selftests/ublk/test_generic_17.sh
new file mode 100755
index 000000000000..2278b5fc9dba
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_generic_17.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+ERR_CODE=0
+
+_prep_test "fault_inject" "teardown after incomplete recovery"
+
+# First start and stop a ublk server with device configured for recovery
+dev_id=$(_add_ublk_dev -t fault_inject -r 1)
+_check_add_dev $TID $?
+state=$(__ublk_kill_daemon "${dev_id}" "QUIESCED")
+if [ "$state" != "QUIESCED" ]; then
+ echo "device isn't quiesced($state) after $action"
+ ERR_CODE=255
+fi
+
+# Then recover the device, but use --die_during_fetch to have the ublk
+# server die while a queue has some (but not all) I/Os fetched
+${UBLK_PROG} recover -n "${dev_id}" --foreground -t fault_inject --die_during_fetch 1
+RECOVER_RES=$?
+# 137 is the result when dying of SIGKILL
+if (( RECOVER_RES != 137 )); then
+ echo "recover command exited with unexpected code ${RECOVER_RES}!"
+ ERR_CODE=255
+fi
+
+# Clean up the device. This can only succeed once teardown of the above
+# exited ublk server completes. So if teardown never completes, we will
+# time out here
+_ublk_del_dev "${dev_id}"
+
+_cleanup_test "fault_inject"
+_show_result $TID $ERR_CODE