diff options
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/nbd.c | 36 | ||||
-rw-r--r-- | drivers/block/nvme-core.c | 24 | ||||
-rw-r--r-- | drivers/block/rbd.c | 82 | ||||
-rw-r--r-- | drivers/block/xen-blkfront.c | 3 |
4 files changed, 97 insertions, 48 deletions
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 293495a75d3d..1b87623381e2 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -60,6 +60,7 @@ struct nbd_device { bool disconnect; /* a disconnect has been requested by user */ struct timer_list timeout_timer; + spinlock_t tasks_lock; struct task_struct *task_recv; struct task_struct *task_send; @@ -140,21 +141,23 @@ static void sock_shutdown(struct nbd_device *nbd) static void nbd_xmit_timeout(unsigned long arg) { struct nbd_device *nbd = (struct nbd_device *)arg; - struct task_struct *task; + unsigned long flags; if (list_empty(&nbd->queue_head)) return; nbd->disconnect = true; - task = READ_ONCE(nbd->task_recv); - if (task) - force_sig(SIGKILL, task); + spin_lock_irqsave(&nbd->tasks_lock, flags); + + if (nbd->task_recv) + force_sig(SIGKILL, nbd->task_recv); - task = READ_ONCE(nbd->task_send); - if (task) + if (nbd->task_send) force_sig(SIGKILL, nbd->task_send); + spin_unlock_irqrestore(&nbd->tasks_lock, flags); + dev_err(nbd_to_dev(nbd), "Connection timed out, killed receiver and sender, shutting down connection\n"); } @@ -403,17 +406,24 @@ static int nbd_thread_recv(struct nbd_device *nbd) { struct request *req; int ret; + unsigned long flags; BUG_ON(nbd->magic != NBD_MAGIC); sk_set_memalloc(nbd->sock->sk); + spin_lock_irqsave(&nbd->tasks_lock, flags); nbd->task_recv = current; + spin_unlock_irqrestore(&nbd->tasks_lock, flags); ret = device_create_file(disk_to_dev(nbd->disk), &pid_attr); if (ret) { dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n"); + + spin_lock_irqsave(&nbd->tasks_lock, flags); nbd->task_recv = NULL; + spin_unlock_irqrestore(&nbd->tasks_lock, flags); + return ret; } @@ -429,7 +439,9 @@ static int nbd_thread_recv(struct nbd_device *nbd) device_remove_file(disk_to_dev(nbd->disk), &pid_attr); + spin_lock_irqsave(&nbd->tasks_lock, flags); nbd->task_recv = NULL; + spin_unlock_irqrestore(&nbd->tasks_lock, flags); if (signal_pending(current)) { siginfo_t info; @@ -534,8 +546,11 @@ static int nbd_thread_send(void *data) { struct nbd_device *nbd = data; struct request *req; + unsigned long flags; + spin_lock_irqsave(&nbd->tasks_lock, flags); nbd->task_send = current; + spin_unlock_irqrestore(&nbd->tasks_lock, flags); set_user_nice(current, MIN_NICE); while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) { @@ -572,7 +587,15 @@ static int nbd_thread_send(void *data) nbd_handle_req(nbd, req); } + spin_lock_irqsave(&nbd->tasks_lock, flags); nbd->task_send = NULL; + spin_unlock_irqrestore(&nbd->tasks_lock, flags); + + /* Clear maybe pending signals */ + if (signal_pending(current)) { + siginfo_t info; + dequeue_signal_lock(current, ¤t->blocked, &info); + } return 0; } @@ -1052,6 +1075,7 @@ static int __init nbd_init(void) nbd_dev[i].magic = NBD_MAGIC; INIT_LIST_HEAD(&nbd_dev[i].waiting_queue); spin_lock_init(&nbd_dev[i].queue_lock); + spin_lock_init(&nbd_dev[i].tasks_lock); INIT_LIST_HEAD(&nbd_dev[i].queue_head); mutex_init(&nbd_dev[i].tx_lock); init_timer(&nbd_dev[i].timeout_timer); diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index 6f04771f1019..ccc0c1f93daa 100644 --- a/drivers/block/nvme-core.c +++ b/drivers/block/nvme-core.c @@ -603,27 +603,31 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx, struct nvme_iod *iod = ctx; struct request *req = iod_get_private(iod); struct nvme_cmd_info *cmd_rq = blk_mq_rq_to_pdu(req); - u16 status = le16_to_cpup(&cqe->status) >> 1; + bool requeue = false; + int error = 0; if (unlikely(status)) { if (!(status & NVME_SC_DNR || blk_noretry_request(req)) && (jiffies - req->start_time) < req->timeout) { unsigned long flags; + requeue = true; blk_mq_requeue_request(req); spin_lock_irqsave(req->q->queue_lock, flags); if (!blk_queue_stopped(req->q)) blk_mq_kick_requeue_list(req->q); spin_unlock_irqrestore(req->q->queue_lock, flags); - return; + goto release_iod; } if (req->cmd_type == REQ_TYPE_DRV_PRIV) { if (cmd_rq->ctx == CMD_CTX_CANCELLED) - status = -EINTR; + error = -EINTR; + else + error = status; } else { - status = nvme_error_status(status); + error = nvme_error_status(status); } } @@ -635,8 +639,9 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx, if (cmd_rq->aborted) dev_warn(nvmeq->dev->dev, "completing aborted command with status:%04x\n", - status); + error); +release_iod: if (iod->nents) { dma_unmap_sg(nvmeq->dev->dev, iod->sg, iod->nents, rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE); @@ -649,7 +654,8 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx, } nvme_free_iod(nvmeq->dev, iod); - blk_mq_complete_request(req, status); + if (likely(!requeue)) + blk_mq_complete_request(req, error); } /* length is in bytes. gfp flags indicates whether we may sleep. */ @@ -1804,7 +1810,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) length = (io.nblocks + 1) << ns->lba_shift; meta_len = (io.nblocks + 1) * ns->ms; - metadata = (void __user *)(unsigned long)io.metadata; + metadata = (void __user *)(uintptr_t)io.metadata; write = io.opcode & 1; if (ns->ext) { @@ -1844,7 +1850,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) c.rw.metadata = cpu_to_le64(meta_dma); status = __nvme_submit_sync_cmd(ns->queue, &c, NULL, - (void __user *)io.addr, length, NULL, 0); + (void __user *)(uintptr_t)io.addr, length, NULL, 0); unmap: if (meta) { if (status == NVME_SC_SUCCESS && !write) { @@ -1886,7 +1892,7 @@ static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns, timeout = msecs_to_jiffies(cmd.timeout_ms); status = __nvme_submit_sync_cmd(ns ? ns->queue : dev->admin_q, &c, - NULL, (void __user *)cmd.addr, cmd.data_len, + NULL, (void __user *)(uintptr_t)cmd.addr, cmd.data_len, &cmd.result, timeout); if (status >= 0) { if (put_user(cmd.result, &ucmd->result)) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index d93a0372b37b..128e7df5b807 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -96,6 +96,8 @@ static int atomic_dec_return_safe(atomic_t *v) #define RBD_MINORS_PER_MAJOR 256 #define RBD_SINGLE_MAJOR_PART_SHIFT 4 +#define RBD_MAX_PARENT_CHAIN_LEN 16 + #define RBD_SNAP_DEV_NAME_PREFIX "snap_" #define RBD_MAX_SNAP_NAME_LEN \ (NAME_MAX - (sizeof (RBD_SNAP_DEV_NAME_PREFIX) - 1)) @@ -426,7 +428,7 @@ static ssize_t rbd_add_single_major(struct bus_type *bus, const char *buf, size_t count); static ssize_t rbd_remove_single_major(struct bus_type *bus, const char *buf, size_t count); -static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping); +static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth); static void rbd_spec_put(struct rbd_spec *spec); static int rbd_dev_id_to_minor(int dev_id) @@ -1863,9 +1865,11 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req, rbd_osd_read_callback(obj_request); break; case CEPH_OSD_OP_SETALLOCHINT: - rbd_assert(osd_req->r_ops[1].op == CEPH_OSD_OP_WRITE); + rbd_assert(osd_req->r_ops[1].op == CEPH_OSD_OP_WRITE || + osd_req->r_ops[1].op == CEPH_OSD_OP_WRITEFULL); /* fall through */ case CEPH_OSD_OP_WRITE: + case CEPH_OSD_OP_WRITEFULL: rbd_osd_write_callback(obj_request); break; case CEPH_OSD_OP_STAT: @@ -2401,7 +2405,10 @@ static void rbd_img_obj_request_fill(struct rbd_obj_request *obj_request, opcode = CEPH_OSD_OP_ZERO; } } else if (op_type == OBJ_OP_WRITE) { - opcode = CEPH_OSD_OP_WRITE; + if (!offset && length == object_size) + opcode = CEPH_OSD_OP_WRITEFULL; + else + opcode = CEPH_OSD_OP_WRITE; osd_req_op_alloc_hint_init(osd_request, num_ops, object_size, object_size); num_ops++; @@ -3760,6 +3767,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) /* set io sizes to object size */ segment_size = rbd_obj_bytes(&rbd_dev->header); blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE); + q->limits.max_sectors = queue_max_hw_sectors(q); blk_queue_max_segments(q, segment_size / SECTOR_SIZE); blk_queue_max_segment_size(q, segment_size); blk_queue_io_min(q, segment_size); @@ -3772,6 +3780,9 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) blk_queue_max_discard_sectors(q, segment_size / SECTOR_SIZE); q->limits.discard_zeroes_data = 1; + if (!ceph_test_opt(rbd_dev->rbd_client->client, NOCRC)) + q->backing_dev_info.capabilities |= BDI_CAP_STABLE_WRITES; + disk->queue = q; q->queuedata = rbd_dev; @@ -5125,44 +5136,51 @@ out_err: return ret; } -static int rbd_dev_probe_parent(struct rbd_device *rbd_dev) +/* + * @depth is rbd_dev_image_probe() -> rbd_dev_probe_parent() -> + * rbd_dev_image_probe() recursion depth, which means it's also the + * length of the already discovered part of the parent chain. + */ +static int rbd_dev_probe_parent(struct rbd_device *rbd_dev, int depth) { struct rbd_device *parent = NULL; - struct rbd_spec *parent_spec; - struct rbd_client *rbdc; int ret; if (!rbd_dev->parent_spec) return 0; - /* - * We need to pass a reference to the client and the parent - * spec when creating the parent rbd_dev. Images related by - * parent/child relationships always share both. - */ - parent_spec = rbd_spec_get(rbd_dev->parent_spec); - rbdc = __rbd_get_client(rbd_dev->rbd_client); - ret = -ENOMEM; - parent = rbd_dev_create(rbdc, parent_spec, NULL); - if (!parent) + if (++depth > RBD_MAX_PARENT_CHAIN_LEN) { + pr_info("parent chain is too long (%d)\n", depth); + ret = -EINVAL; + goto out_err; + } + + parent = rbd_dev_create(rbd_dev->rbd_client, rbd_dev->parent_spec, + NULL); + if (!parent) { + ret = -ENOMEM; goto out_err; + } - ret = rbd_dev_image_probe(parent, false); + /* + * Images related by parent/child relationships always share + * rbd_client and spec/parent_spec, so bump their refcounts. + */ + __rbd_get_client(rbd_dev->rbd_client); + rbd_spec_get(rbd_dev->parent_spec); + + ret = rbd_dev_image_probe(parent, depth); if (ret < 0) goto out_err; + rbd_dev->parent = parent; atomic_set(&rbd_dev->parent_ref, 1); - return 0; + out_err: - if (parent) { - rbd_dev_unparent(rbd_dev); + rbd_dev_unparent(rbd_dev); + if (parent) rbd_dev_destroy(parent); - } else { - rbd_put_client(rbdc); - rbd_spec_put(parent_spec); - } - return ret; } @@ -5280,7 +5298,7 @@ static void rbd_dev_image_release(struct rbd_device *rbd_dev) * parent), initiate a watch on its header object before using that * object to get detailed information about the rbd image. */ -static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping) +static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth) { int ret; @@ -5298,7 +5316,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping) if (ret) goto err_out_format; - if (mapping) { + if (!depth) { ret = rbd_dev_header_watch_sync(rbd_dev); if (ret) { if (ret == -ENOENT) @@ -5319,7 +5337,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping) * Otherwise this is a parent image, identified by pool, image * and snap ids - need to fill in names for those ids. */ - if (mapping) + if (!depth) ret = rbd_spec_fill_snap_id(rbd_dev); else ret = rbd_spec_fill_names(rbd_dev); @@ -5341,12 +5359,12 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping) * Need to warn users if this image is the one being * mapped and has a parent. */ - if (mapping && rbd_dev->parent_spec) + if (!depth && rbd_dev->parent_spec) rbd_warn(rbd_dev, "WARNING: kernel layering is EXPERIMENTAL!"); } - ret = rbd_dev_probe_parent(rbd_dev); + ret = rbd_dev_probe_parent(rbd_dev, depth); if (ret) goto err_out_probe; @@ -5357,7 +5375,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping) err_out_probe: rbd_dev_unprobe(rbd_dev); err_out_watch: - if (mapping) + if (!depth) rbd_dev_header_unwatch_sync(rbd_dev); out_header_name: kfree(rbd_dev->header_name); @@ -5420,7 +5438,7 @@ static ssize_t do_rbd_add(struct bus_type *bus, spec = NULL; /* rbd_dev now owns this */ rbd_opts = NULL; /* rbd_dev now owns this */ - rc = rbd_dev_image_probe(rbd_dev, true); + rc = rbd_dev_image_probe(rbd_dev, 0); if (rc < 0) goto err_out_rbd_dev; diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 611170896b8c..a69c02dadec0 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1956,7 +1956,8 @@ static void blkback_changed(struct xenbus_device *dev, break; /* Missed the backend's Closing state -- fallthrough */ case XenbusStateClosing: - blkfront_closing(info); + if (info) + blkfront_closing(info); break; } } |