summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-10-22 01:14:14 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2022-10-22 01:14:14 +0300
commitd4b7332eef46ed403061e27b03c71ad26b2f5353 (patch)
tree8e6abe26a8fcf3c7c1f091d954fc12633b703c03
parent294e73ffb0efac4c8bac2d9e6a896225098bd419 (diff)
parent2db96217e7e515071726ca4ec791742c4202a1b2 (diff)
downloadlinux-d4b7332eef46ed403061e27b03c71ad26b2f5353.tar.xz
Merge tag 'block-6.1-2022-10-20' of git://git.kernel.dk/linux
Pull block fixes from Jens Axboe: - NVMe pull request via Christoph: - fix nvme-hwmon for DMA non-cohehrent architectures (Serge Semin) - add a nvme-hwmong maintainer (Christoph Hellwig) - fix error pointer dereference in error handling (Dan Carpenter) - fix invalid memory reference in nvmet_subsys_attr_qid_max_show (Daniel Wagner) - don't limit the DMA segment size in nvme-apple (Russell King) - fix workqueue MEM_RECLAIM flushing dependency (Sagi Grimberg) - disable write zeroes on various Kingston SSDs (Xander Li) - fix a memory leak with block device tracing (Ye) - flexible-array fix for ublk (Yushan) - document the ublk recovery feature from this merge window (ZiyangZhang) - remove dead bfq variable in struct (Yuwei) - error handling rq clearing fix (Yu) - add an IRQ safety check for the cached bio freeing (Pavel) - drbd bio cloning fix (Christoph) * tag 'block-6.1-2022-10-20' of git://git.kernel.dk/linux: blktrace: remove unnessary stop block trace in 'blk_trace_shutdown' blktrace: fix possible memleak in '__blk_trace_remove' blktrace: introduce 'blk_trace_{start,stop}' helper bio: safeguard REQ_ALLOC_CACHE bio put block, bfq: remove unused variable for bfq_queue drbd: only clone bio if we have a backing device ublk_drv: use flexible-array member instead of zero-length array nvmet: fix invalid memory reference in nvmet_subsys_attr_qid_max_show nvmet: fix workqueue MEM_RECLAIM flushing dependency nvme-hwmon: kmalloc the NVME SMART log buffer nvme-hwmon: consistently ignore errors from nvme_hwmon_init nvme: add Guenther as nvme-hwmon maintainer nvme-apple: don't limit DMA segement size nvme-pci: disable write zeroes on various Kingston SSD nvme: fix error pointer dereference in error handling Documentation: document ublk user recovery feature blk-mq: fix null pointer dereference in blk_mq_clear_rq_mapping()
-rw-r--r--Documentation/block/ublk.rst36
-rw-r--r--MAINTAINERS6
-rw-r--r--block/bfq-iosched.h4
-rw-r--r--block/bio.c2
-rw-r--r--block/blk-mq.c7
-rw-r--r--drivers/block/drbd/drbd_req.c14
-rw-r--r--drivers/block/ublk_drv.c2
-rw-r--r--drivers/nvme/host/apple.c2
-rw-r--r--drivers/nvme/host/core.c8
-rw-r--r--drivers/nvme/host/hwmon.c32
-rw-r--r--drivers/nvme/host/pci.c10
-rw-r--r--drivers/nvme/target/configfs.c4
-rw-r--r--drivers/nvme/target/core.c2
-rw-r--r--kernel/trace/blktrace.c82
14 files changed, 135 insertions, 76 deletions
diff --git a/Documentation/block/ublk.rst b/Documentation/block/ublk.rst
index 2122d1a4a541..ba45c46cc0da 100644
--- a/Documentation/block/ublk.rst
+++ b/Documentation/block/ublk.rst
@@ -144,6 +144,42 @@ managing and controlling ublk devices with help of several control commands:
For retrieving device info via ``ublksrv_ctrl_dev_info``. It is the server's
responsibility to save IO target specific info in userspace.
+- ``UBLK_CMD_START_USER_RECOVERY``
+
+ This command is valid if ``UBLK_F_USER_RECOVERY`` feature is enabled. This
+ command is accepted after the old process has exited, ublk device is quiesced
+ and ``/dev/ublkc*`` is released. User should send this command before he starts
+ a new process which re-opens ``/dev/ublkc*``. When this command returns, the
+ ublk device is ready for the new process.
+
+- ``UBLK_CMD_END_USER_RECOVERY``
+
+ This command is valid if ``UBLK_F_USER_RECOVERY`` feature is enabled. This
+ command is accepted after ublk device is quiesced and a new process has
+ opened ``/dev/ublkc*`` and get all ublk queues be ready. When this command
+ returns, ublk device is unquiesced and new I/O requests are passed to the
+ new process.
+
+- user recovery feature description
+
+ Two new features are added for user recovery: ``UBLK_F_USER_RECOVERY`` and
+ ``UBLK_F_USER_RECOVERY_REISSUE``.
+
+ With ``UBLK_F_USER_RECOVERY`` set, after one ubq_daemon(ublk server's io
+ handler) is dying, ublk does not delete ``/dev/ublkb*`` during the whole
+ recovery stage and ublk device ID is kept. It is ublk server's
+ responsibility to recover the device context by its own knowledge.
+ Requests which have not been issued to userspace are requeued. Requests
+ which have been issued to userspace are aborted.
+
+ With ``UBLK_F_USER_RECOVERY_REISSUE`` set, after one ubq_daemon(ublk
+ server's io handler) is dying, contrary to ``UBLK_F_USER_RECOVERY``,
+ requests which have been issued to userspace are requeued and will be
+ re-issued to the new process after handling ``UBLK_CMD_END_USER_RECOVERY``.
+ ``UBLK_F_USER_RECOVERY_REISSUE`` is designed for backends who tolerate
+ double-write since the driver may issue the same I/O request twice. It
+ might be useful to a read-only FS or a VM backend.
+
Data plane
----------
diff --git a/MAINTAINERS b/MAINTAINERS
index 8904f186c83b..bdd43abd2683 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -14713,6 +14713,12 @@ F: drivers/nvme/target/auth.c
F: drivers/nvme/target/fabrics-cmd-auth.c
F: include/linux/nvme-auth.h
+NVM EXPRESS HARDWARE MONITORING SUPPORT
+M: Guenter Roeck <linux@roeck-us.net>
+L: linux-nvme@lists.infradead.org
+S: Supported
+F: drivers/nvme/host/hwmon.c
+
NVM EXPRESS FC TRANSPORT DRIVERS
M: James Smart <james.smart@broadcom.com>
L: linux-nvme@lists.infradead.org
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index 64ee618064ba..71f721670ab6 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -369,12 +369,8 @@ struct bfq_queue {
unsigned long split_time; /* time of last split */
unsigned long first_IO_time; /* time of first I/O for this queue */
-
unsigned long creation_time; /* when this queue is created */
- /* max service rate measured so far */
- u32 max_service_rate;
-
/*
* Pointer to the waker queue for this queue, i.e., to the
* queue Q such that this queue happens to get new I/O right
diff --git a/block/bio.c b/block/bio.c
index 633a902468ec..57c2f327225b 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -741,7 +741,7 @@ void bio_put(struct bio *bio)
return;
}
- if (bio->bi_opf & REQ_ALLOC_CACHE) {
+ if ((bio->bi_opf & REQ_ALLOC_CACHE) && !WARN_ON_ONCE(in_interrupt())) {
struct bio_alloc_cache *cache;
bio_uninit(bio);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 8070b6c10e8d..33292c01875d 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3112,8 +3112,11 @@ static void blk_mq_clear_rq_mapping(struct blk_mq_tags *drv_tags,
struct page *page;
unsigned long flags;
- /* There is no need to clear a driver tags own mapping */
- if (drv_tags == tags)
+ /*
+ * There is no need to clear mapping if driver tags is not initialized
+ * or the mapping belongs to the driver tags.
+ */
+ if (!drv_tags || drv_tags == tags)
return;
list_for_each_entry(page, &tags->page_list, lru) {
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 8f7f144e54f3..7f9bcc82fc9c 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -30,11 +30,6 @@ static struct drbd_request *drbd_req_new(struct drbd_device *device, struct bio
return NULL;
memset(req, 0, sizeof(*req));
- req->private_bio = bio_alloc_clone(device->ldev->backing_bdev, bio_src,
- GFP_NOIO, &drbd_io_bio_set);
- req->private_bio->bi_private = req;
- req->private_bio->bi_end_io = drbd_request_endio;
-
req->rq_state = (bio_data_dir(bio_src) == WRITE ? RQ_WRITE : 0)
| (bio_op(bio_src) == REQ_OP_WRITE_ZEROES ? RQ_ZEROES : 0)
| (bio_op(bio_src) == REQ_OP_DISCARD ? RQ_UNMAP : 0);
@@ -1219,9 +1214,12 @@ drbd_request_prepare(struct drbd_device *device, struct bio *bio)
/* Update disk stats */
req->start_jif = bio_start_io_acct(req->master_bio);
- if (!get_ldev(device)) {
- bio_put(req->private_bio);
- req->private_bio = NULL;
+ if (get_ldev(device)) {
+ req->private_bio = bio_alloc_clone(device->ldev->backing_bdev,
+ bio, GFP_NOIO,
+ &drbd_io_bio_set);
+ req->private_bio->bi_private = req;
+ req->private_bio->bi_end_io = drbd_request_endio;
}
/* process discards always from our submitter thread */
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 2651bf41dde3..5afce6ffaadf 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -124,7 +124,7 @@ struct ublk_queue {
bool force_abort;
unsigned short nr_io_ready; /* how many ios setup */
struct ublk_device *dev;
- struct ublk_io ios[0];
+ struct ublk_io ios[];
};
#define UBLK_DAEMON_MONITOR_PERIOD (5 * HZ)
diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c
index 5fc5ea196b40..ff8b083dc5c6 100644
--- a/drivers/nvme/host/apple.c
+++ b/drivers/nvme/host/apple.c
@@ -1039,6 +1039,8 @@ static void apple_nvme_reset_work(struct work_struct *work)
dma_max_mapping_size(anv->dev) >> 9);
anv->ctrl.max_segments = NVME_MAX_SEGS;
+ dma_set_max_seg_size(anv->dev, 0xffffffff);
+
/*
* Enable NVMMU and linear submission queues.
* While we could keep those disabled and pretend this is slightly
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 059737c1a2c1..dc4220600585 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3262,8 +3262,12 @@ int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl)
return ret;
if (!ctrl->identified && !nvme_discovery_ctrl(ctrl)) {
+ /*
+ * Do not return errors unless we are in a controller reset,
+ * the controller works perfectly fine without hwmon.
+ */
ret = nvme_hwmon_init(ctrl);
- if (ret < 0)
+ if (ret == -EINTR)
return ret;
}
@@ -4846,7 +4850,7 @@ int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
return 0;
out_cleanup_admin_q:
- blk_mq_destroy_queue(ctrl->fabrics_q);
+ blk_mq_destroy_queue(ctrl->admin_q);
out_free_tagset:
blk_mq_free_tag_set(ctrl->admin_tagset);
return ret;
diff --git a/drivers/nvme/host/hwmon.c b/drivers/nvme/host/hwmon.c
index 0a586d712920..9e6e56c20ec9 100644
--- a/drivers/nvme/host/hwmon.c
+++ b/drivers/nvme/host/hwmon.c
@@ -12,7 +12,7 @@
struct nvme_hwmon_data {
struct nvme_ctrl *ctrl;
- struct nvme_smart_log log;
+ struct nvme_smart_log *log;
struct mutex read_lock;
};
@@ -60,14 +60,14 @@ static int nvme_set_temp_thresh(struct nvme_ctrl *ctrl, int sensor, bool under,
static int nvme_hwmon_get_smart_log(struct nvme_hwmon_data *data)
{
return nvme_get_log(data->ctrl, NVME_NSID_ALL, NVME_LOG_SMART, 0,
- NVME_CSI_NVM, &data->log, sizeof(data->log), 0);
+ NVME_CSI_NVM, data->log, sizeof(*data->log), 0);
}
static int nvme_hwmon_read(struct device *dev, enum hwmon_sensor_types type,
u32 attr, int channel, long *val)
{
struct nvme_hwmon_data *data = dev_get_drvdata(dev);
- struct nvme_smart_log *log = &data->log;
+ struct nvme_smart_log *log = data->log;
int temp;
int err;
@@ -163,7 +163,7 @@ static umode_t nvme_hwmon_is_visible(const void *_data,
case hwmon_temp_max:
case hwmon_temp_min:
if ((!channel && data->ctrl->wctemp) ||
- (channel && data->log.temp_sensor[channel - 1])) {
+ (channel && data->log->temp_sensor[channel - 1])) {
if (data->ctrl->quirks &
NVME_QUIRK_NO_TEMP_THRESH_CHANGE)
return 0444;
@@ -176,7 +176,7 @@ static umode_t nvme_hwmon_is_visible(const void *_data,
break;
case hwmon_temp_input:
case hwmon_temp_label:
- if (!channel || data->log.temp_sensor[channel - 1])
+ if (!channel || data->log->temp_sensor[channel - 1])
return 0444;
break;
default:
@@ -230,7 +230,13 @@ int nvme_hwmon_init(struct nvme_ctrl *ctrl)
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data)
- return 0;
+ return -ENOMEM;
+
+ data->log = kzalloc(sizeof(*data->log), GFP_KERNEL);
+ if (!data->log) {
+ err = -ENOMEM;
+ goto err_free_data;
+ }
data->ctrl = ctrl;
mutex_init(&data->read_lock);
@@ -238,8 +244,7 @@ int nvme_hwmon_init(struct nvme_ctrl *ctrl)
err = nvme_hwmon_get_smart_log(data);
if (err) {
dev_warn(dev, "Failed to read smart log (error %d)\n", err);
- kfree(data);
- return err;
+ goto err_free_log;
}
hwmon = hwmon_device_register_with_info(dev, "nvme",
@@ -247,11 +252,17 @@ int nvme_hwmon_init(struct nvme_ctrl *ctrl)
NULL);
if (IS_ERR(hwmon)) {
dev_warn(dev, "Failed to instantiate hwmon device\n");
- kfree(data);
- return PTR_ERR(hwmon);
+ err = PTR_ERR(hwmon);
+ goto err_free_log;
}
ctrl->hwmon_device = hwmon;
return 0;
+
+err_free_log:
+ kfree(data->log);
+err_free_data:
+ kfree(data);
+ return err;
}
void nvme_hwmon_exit(struct nvme_ctrl *ctrl)
@@ -262,6 +273,7 @@ void nvme_hwmon_exit(struct nvme_ctrl *ctrl)
hwmon_device_unregister(ctrl->hwmon_device);
ctrl->hwmon_device = NULL;
+ kfree(data->log);
kfree(data);
}
}
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index bcbef6bc5672..31e577b01257 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -3511,6 +3511,16 @@ static const struct pci_device_id nvme_id_table[] = {
.driver_data = NVME_QUIRK_NO_DEEPEST_PS, },
{ PCI_DEVICE(0x2646, 0x2263), /* KINGSTON A2000 NVMe SSD */
.driver_data = NVME_QUIRK_NO_DEEPEST_PS, },
+ { PCI_DEVICE(0x2646, 0x5018), /* KINGSTON OM8SFP4xxxxP OS21012 NVMe SSD */
+ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
+ { PCI_DEVICE(0x2646, 0x5016), /* KINGSTON OM3PGP4xxxxP OS21011 NVMe SSD */
+ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
+ { PCI_DEVICE(0x2646, 0x501A), /* KINGSTON OM8PGP4xxxxP OS21005 NVMe SSD */
+ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
+ { PCI_DEVICE(0x2646, 0x501B), /* KINGSTON OM8PGP4xxxxQ OS21005 NVMe SSD */
+ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
+ { PCI_DEVICE(0x2646, 0x501E), /* KINGSTON OM3PGP4xxxxQ OS21011 NVMe SSD */
+ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
{ PCI_DEVICE(0x1e4B, 0x1001), /* MAXIO MAP1001 */
.driver_data = NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x1e4B, 0x1002), /* MAXIO MAP1002 */
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index e34a2896fedb..9443ee1d4ae3 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -1290,12 +1290,8 @@ static ssize_t nvmet_subsys_attr_qid_max_show(struct config_item *item,
static ssize_t nvmet_subsys_attr_qid_max_store(struct config_item *item,
const char *page, size_t cnt)
{
- struct nvmet_port *port = to_nvmet_port(item);
u16 qid_max;
- if (nvmet_is_port_enabled(port, __func__))
- return -EACCES;
-
if (sscanf(page, "%hu\n", &qid_max) != 1)
return -EINVAL;
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 14677145bbba..aecb5853f8da 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -1176,7 +1176,7 @@ static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
* reset the keep alive timer when the controller is enabled.
*/
if (ctrl->kato)
- mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ);
+ mod_delayed_work(nvmet_wq, &ctrl->ka_work, ctrl->kato * HZ);
}
static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl)
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 7f5eb295fe19..a995ea1ef849 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -346,8 +346,40 @@ static void put_probe_ref(void)
mutex_unlock(&blk_probe_mutex);
}
+static int blk_trace_start(struct blk_trace *bt)
+{
+ if (bt->trace_state != Blktrace_setup &&
+ bt->trace_state != Blktrace_stopped)
+ return -EINVAL;
+
+ blktrace_seq++;
+ smp_mb();
+ bt->trace_state = Blktrace_running;
+ raw_spin_lock_irq(&running_trace_lock);
+ list_add(&bt->running_list, &running_trace_list);
+ raw_spin_unlock_irq(&running_trace_lock);
+ trace_note_time(bt);
+
+ return 0;
+}
+
+static int blk_trace_stop(struct blk_trace *bt)
+{
+ if (bt->trace_state != Blktrace_running)
+ return -EINVAL;
+
+ bt->trace_state = Blktrace_stopped;
+ raw_spin_lock_irq(&running_trace_lock);
+ list_del_init(&bt->running_list);
+ raw_spin_unlock_irq(&running_trace_lock);
+ relay_flush(bt->rchan);
+
+ return 0;
+}
+
static void blk_trace_cleanup(struct request_queue *q, struct blk_trace *bt)
{
+ blk_trace_stop(bt);
synchronize_rcu();
blk_trace_free(q, bt);
put_probe_ref();
@@ -362,8 +394,7 @@ static int __blk_trace_remove(struct request_queue *q)
if (!bt)
return -EINVAL;
- if (bt->trace_state != Blktrace_running)
- blk_trace_cleanup(q, bt);
+ blk_trace_cleanup(q, bt);
return 0;
}
@@ -658,7 +689,6 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name,
static int __blk_trace_startstop(struct request_queue *q, int start)
{
- int ret;
struct blk_trace *bt;
bt = rcu_dereference_protected(q->blk_trace,
@@ -666,36 +696,10 @@ static int __blk_trace_startstop(struct request_queue *q, int start)
if (bt == NULL)
return -EINVAL;
- /*
- * For starting a trace, we can transition from a setup or stopped
- * trace. For stopping a trace, the state must be running
- */
- ret = -EINVAL;
- if (start) {
- if (bt->trace_state == Blktrace_setup ||
- bt->trace_state == Blktrace_stopped) {
- blktrace_seq++;
- smp_mb();
- bt->trace_state = Blktrace_running;
- raw_spin_lock_irq(&running_trace_lock);
- list_add(&bt->running_list, &running_trace_list);
- raw_spin_unlock_irq(&running_trace_lock);
-
- trace_note_time(bt);
- ret = 0;
- }
- } else {
- if (bt->trace_state == Blktrace_running) {
- bt->trace_state = Blktrace_stopped;
- raw_spin_lock_irq(&running_trace_lock);
- list_del_init(&bt->running_list);
- raw_spin_unlock_irq(&running_trace_lock);
- relay_flush(bt->rchan);
- ret = 0;
- }
- }
-
- return ret;
+ if (start)
+ return blk_trace_start(bt);
+ else
+ return blk_trace_stop(bt);
}
int blk_trace_startstop(struct request_queue *q, int start)
@@ -772,10 +776,8 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
void blk_trace_shutdown(struct request_queue *q)
{
if (rcu_dereference_protected(q->blk_trace,
- lockdep_is_held(&q->debugfs_mutex))) {
- __blk_trace_startstop(q, 0);
+ lockdep_is_held(&q->debugfs_mutex)))
__blk_trace_remove(q);
- }
}
#ifdef CONFIG_BLK_CGROUP
@@ -1614,13 +1616,7 @@ static int blk_trace_remove_queue(struct request_queue *q)
if (bt == NULL)
return -EINVAL;
- if (bt->trace_state == Blktrace_running) {
- bt->trace_state = Blktrace_stopped;
- raw_spin_lock_irq(&running_trace_lock);
- list_del_init(&bt->running_list);
- raw_spin_unlock_irq(&running_trace_lock);
- relay_flush(bt->rchan);
- }
+ blk_trace_stop(bt);
put_probe_ref();
synchronize_rcu();