diff options
Diffstat (limited to 'drivers/block/rbd.c')
-rw-r--r-- | drivers/block/rbd.c | 218 |
1 files changed, 90 insertions, 128 deletions
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 3ec85dfce124..b40af3203089 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -38,6 +38,7 @@ #include <linux/kernel.h> #include <linux/device.h> #include <linux/module.h> +#include <linux/blk-mq.h> #include <linux/fs.h> #include <linux/blkdev.h> #include <linux/slab.h> @@ -340,9 +341,7 @@ struct rbd_device { char name[DEV_NAME_LEN]; /* blkdev name, e.g. rbd3 */ - struct list_head rq_queue; /* incoming rq queue */ spinlock_t lock; /* queue, flags, open_count */ - struct work_struct rq_work; struct rbd_image_header header; unsigned long flags; /* possibly lock protected */ @@ -360,6 +359,9 @@ struct rbd_device { atomic_t parent_ref; struct rbd_device *parent; + /* Block layer tags. */ + struct blk_mq_tag_set tag_set; + /* protects updating the header */ struct rw_semaphore header_rwsem; @@ -1817,7 +1819,8 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req, /* * We support a 64-bit length, but ultimately it has to be - * passed to blk_end_request(), which takes an unsigned int. + * passed to the block layer, which just supports a 32-bit + * length field. */ obj_request->xferred = osd_req->r_reply_op_len[0]; rbd_assert(obj_request->xferred < (u64)UINT_MAX); @@ -2098,32 +2101,26 @@ static void rbd_dev_parent_put(struct rbd_device *rbd_dev) * If an image has a non-zero parent overlap, get a reference to its * parent. * - * We must get the reference before checking for the overlap to - * coordinate properly with zeroing the parent overlap in - * rbd_dev_v2_parent_info() when an image gets flattened. We - * drop it again if there is no overlap. - * * Returns true if the rbd device has a parent with a non-zero * overlap and a reference for it was successfully taken, or * false otherwise. */ static bool rbd_dev_parent_get(struct rbd_device *rbd_dev) { - int counter; + int counter = 0; if (!rbd_dev->parent_spec) return false; - counter = atomic_inc_return_safe(&rbd_dev->parent_ref); - if (counter > 0 && rbd_dev->parent_overlap) - return true; - - /* Image was flattened, but parent is not yet torn down */ + down_read(&rbd_dev->header_rwsem); + if (rbd_dev->parent_overlap) + counter = atomic_inc_return_safe(&rbd_dev->parent_ref); + up_read(&rbd_dev->header_rwsem); if (counter < 0) rbd_warn(rbd_dev, "parent reference overflow"); - return false; + return counter > 0; } /* @@ -2281,7 +2278,10 @@ static bool rbd_img_obj_end_request(struct rbd_obj_request *obj_request) more = obj_request->which < img_request->obj_request_count - 1; } else { rbd_assert(img_request->rq != NULL); - more = blk_end_request(img_request->rq, result, xferred); + + more = blk_update_request(img_request->rq, result, xferred); + if (!more) + __blk_mq_end_request(img_request->rq, result); } return more; @@ -3310,8 +3310,10 @@ out: return ret; } -static void rbd_handle_request(struct rbd_device *rbd_dev, struct request *rq) +static void rbd_queue_workfn(struct work_struct *work) { + struct request *rq = blk_mq_rq_from_pdu(work); + struct rbd_device *rbd_dev = rq->q->queuedata; struct rbd_img_request *img_request; struct ceph_snap_context *snapc = NULL; u64 offset = (u64)blk_rq_pos(rq) << SECTOR_SHIFT; @@ -3320,6 +3322,13 @@ static void rbd_handle_request(struct rbd_device *rbd_dev, struct request *rq) u64 mapping_size; int result; + if (rq->cmd_type != REQ_TYPE_FS) { + dout("%s: non-fs request type %d\n", __func__, + (int) rq->cmd_type); + result = -EIO; + goto err; + } + if (rq->cmd_flags & REQ_DISCARD) op_type = OBJ_OP_DISCARD; else if (rq->cmd_flags & REQ_WRITE) @@ -3365,6 +3374,8 @@ static void rbd_handle_request(struct rbd_device *rbd_dev, struct request *rq) goto err_rq; /* Shouldn't happen */ } + blk_mq_start_request(rq); + down_read(&rbd_dev->header_rwsem); mapping_size = rbd_dev->mapping.size; if (op_type != OBJ_OP_READ) { @@ -3410,53 +3421,18 @@ err_rq: rbd_warn(rbd_dev, "%s %llx at %llx result %d", obj_op_name(op_type), length, offset, result); ceph_put_snap_context(snapc); - blk_end_request_all(rq, result); +err: + blk_mq_end_request(rq, result); } -static void rbd_request_workfn(struct work_struct *work) +static int rbd_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) { - struct rbd_device *rbd_dev = - container_of(work, struct rbd_device, rq_work); - struct request *rq, *next; - LIST_HEAD(requests); + struct request *rq = bd->rq; + struct work_struct *work = blk_mq_rq_to_pdu(rq); - spin_lock_irq(&rbd_dev->lock); /* rq->q->queue_lock */ - list_splice_init(&rbd_dev->rq_queue, &requests); - spin_unlock_irq(&rbd_dev->lock); - - list_for_each_entry_safe(rq, next, &requests, queuelist) { - list_del_init(&rq->queuelist); - rbd_handle_request(rbd_dev, rq); - } -} - -/* - * Called with q->queue_lock held and interrupts disabled, possibly on - * the way to schedule(). Do not sleep here! - */ -static void rbd_request_fn(struct request_queue *q) -{ - struct rbd_device *rbd_dev = q->queuedata; - struct request *rq; - int queued = 0; - - rbd_assert(rbd_dev); - - while ((rq = blk_fetch_request(q))) { - /* Ignore any non-FS requests that filter through. */ - if (rq->cmd_type != REQ_TYPE_FS) { - dout("%s: non-fs request type %d\n", __func__, - (int) rq->cmd_type); - __blk_end_request_all(rq, 0); - continue; - } - - list_add_tail(&rq->queuelist, &rbd_dev->rq_queue); - queued++; - } - - if (queued) - queue_work(rbd_wq, &rbd_dev->rq_work); + queue_work(rbd_wq, work); + return BLK_MQ_RQ_QUEUE_OK; } /* @@ -3517,6 +3493,7 @@ static void rbd_free_disk(struct rbd_device *rbd_dev) del_gendisk(disk); if (disk->queue) blk_cleanup_queue(disk->queue); + blk_mq_free_tag_set(&rbd_dev->tag_set); } put_disk(disk); } @@ -3700,7 +3677,7 @@ static int rbd_dev_refresh(struct rbd_device *rbd_dev) ret = rbd_dev_header_info(rbd_dev); if (ret) - return ret; + goto out; /* * If there is a parent, see if it has disappeared due to the @@ -3709,30 +3686,46 @@ static int rbd_dev_refresh(struct rbd_device *rbd_dev) if (rbd_dev->parent) { ret = rbd_dev_v2_parent_info(rbd_dev); if (ret) - return ret; + goto out; } if (rbd_dev->spec->snap_id == CEPH_NOSNAP) { - if (rbd_dev->mapping.size != rbd_dev->header.image_size) - rbd_dev->mapping.size = rbd_dev->header.image_size; + rbd_dev->mapping.size = rbd_dev->header.image_size; } else { /* validate mapped snapshot's EXISTS flag */ rbd_exists_validate(rbd_dev); } +out: up_write(&rbd_dev->header_rwsem); - - if (mapping_size != rbd_dev->mapping.size) + if (!ret && mapping_size != rbd_dev->mapping.size) rbd_dev_update_size(rbd_dev); + return ret; +} + +static int rbd_init_request(void *data, struct request *rq, + unsigned int hctx_idx, unsigned int request_idx, + unsigned int numa_node) +{ + struct work_struct *work = blk_mq_rq_to_pdu(rq); + + INIT_WORK(work, rbd_queue_workfn); return 0; } +static struct blk_mq_ops rbd_mq_ops = { + .queue_rq = rbd_queue_rq, + .map_queue = blk_mq_map_queue, + .init_request = rbd_init_request, +}; + static int rbd_init_disk(struct rbd_device *rbd_dev) { struct gendisk *disk; struct request_queue *q; u64 segment_size; + int err; /* create gendisk info */ disk = alloc_disk(single_major ? @@ -3750,10 +3743,25 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) disk->fops = &rbd_bd_ops; disk->private_data = rbd_dev; - q = blk_init_queue(rbd_request_fn, &rbd_dev->lock); - if (!q) + memset(&rbd_dev->tag_set, 0, sizeof(rbd_dev->tag_set)); + rbd_dev->tag_set.ops = &rbd_mq_ops; + rbd_dev->tag_set.queue_depth = BLKDEV_MAX_RQ; + rbd_dev->tag_set.numa_node = NUMA_NO_NODE; + rbd_dev->tag_set.flags = + BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE; + rbd_dev->tag_set.nr_hw_queues = 1; + rbd_dev->tag_set.cmd_size = sizeof(struct work_struct); + + err = blk_mq_alloc_tag_set(&rbd_dev->tag_set); + if (err) goto out_disk; + q = blk_mq_init_queue(&rbd_dev->tag_set); + if (IS_ERR(q)) { + err = PTR_ERR(q); + goto out_tag_set; + } + /* We use the default size, but let's be explicit about it. */ blk_queue_physical_block_size(q, SECTOR_SIZE); @@ -3779,10 +3787,11 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) rbd_dev->disk = disk; return 0; +out_tag_set: + blk_mq_free_tag_set(&rbd_dev->tag_set); out_disk: put_disk(disk); - - return -ENOMEM; + return err; } /* @@ -4039,8 +4048,6 @@ static struct rbd_device *rbd_dev_create(struct rbd_client *rbdc, return NULL; spin_lock_init(&rbd_dev->lock); - INIT_LIST_HEAD(&rbd_dev->rq_queue); - INIT_WORK(&rbd_dev->rq_work, rbd_request_workfn); rbd_dev->flags = 0; atomic_set(&rbd_dev->parent_ref, 0); INIT_LIST_HEAD(&rbd_dev->node); @@ -4239,7 +4246,6 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) */ if (rbd_dev->parent_overlap) { rbd_dev->parent_overlap = 0; - smp_mb(); rbd_dev_parent_put(rbd_dev); pr_info("%s: clone image has been flattened\n", rbd_dev->disk->disk_name); @@ -4281,33 +4287,22 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev) } /* - * We always update the parent overlap. If it's zero we - * treat it specially. + * We always update the parent overlap. If it's zero we issue + * a warning, as we will proceed as if there was no parent. */ - rbd_dev->parent_overlap = overlap; - smp_mb(); if (!overlap) { - - /* A null parent_spec indicates it's the initial probe */ - if (parent_spec) { - /* - * The overlap has become zero, so the clone - * must have been resized down to 0 at some - * point. Treat this the same as a flatten. - */ - rbd_dev_parent_put(rbd_dev); - pr_info("%s: clone image now standalone\n", - rbd_dev->disk->disk_name); + /* refresh, careful to warn just once */ + if (rbd_dev->parent_overlap) + rbd_warn(rbd_dev, + "clone now standalone (overlap became 0)"); } else { - /* - * For the initial probe, if we find the - * overlap is zero we just pretend there was - * no parent image. - */ - rbd_warn(rbd_dev, "ignoring parent with overlap 0"); + /* initial probe */ + rbd_warn(rbd_dev, "clone is standalone (overlap 0)"); } } + rbd_dev->parent_overlap = overlap; + out: ret = 0; out_err: @@ -4779,36 +4774,6 @@ static inline size_t next_token(const char **buf) } /* - * Finds the next token in *buf, and if the provided token buffer is - * big enough, copies the found token into it. The result, if - * copied, is guaranteed to be terminated with '\0'. Note that *buf - * must be terminated with '\0' on entry. - * - * Returns the length of the token found (not including the '\0'). - * Return value will be 0 if no token is found, and it will be >= - * token_size if the token would not fit. - * - * The *buf pointer will be updated to point beyond the end of the - * found token. Note that this occurs even if the token buffer is - * too small to hold it. - */ -static inline size_t copy_token(const char **buf, - char *token, - size_t token_size) -{ - size_t len; - - len = next_token(buf); - if (len < token_size) { - memcpy(token, *buf, len); - *(token + len) = '\0'; - } - *buf += len; - - return len; -} - -/* * Finds the next token in *buf, dynamically allocates a buffer big * enough to hold a copy of it, and copies the token into the new * buffer. The copy is guaranteed to be terminated with '\0'. Note @@ -5114,10 +5079,7 @@ static void rbd_dev_unprobe(struct rbd_device *rbd_dev) { struct rbd_image_header *header; - /* Drop parent reference unless it's already been done (or none) */ - - if (rbd_dev->parent_overlap) - rbd_dev_parent_put(rbd_dev); + rbd_dev_parent_put(rbd_dev); /* Free dynamic fields from the header, then zero it out */ |