diff options
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/brd.c | 6 | ||||
-rw-r--r-- | drivers/block/mtip32xx/mtip32xx.c | 10 | ||||
-rw-r--r-- | drivers/block/null_blk.c | 36 | ||||
-rw-r--r-- | drivers/block/paride/pcd.c | 4 | ||||
-rw-r--r-- | drivers/block/rbd.c | 400 | ||||
-rw-r--r-- | drivers/block/rsxx/core.c | 81 | ||||
-rw-r--r-- | drivers/block/sunvdc.c | 167 | ||||
-rw-r--r-- | drivers/block/virtio_blk.c | 48 | ||||
-rw-r--r-- | drivers/block/xen-blkback/xenbus.c | 11 | ||||
-rw-r--r-- | drivers/block/xen-blkfront.c | 5 | ||||
-rw-r--r-- | drivers/block/xsysace.c | 1 | ||||
-rw-r--r-- | drivers/block/zram/zram_drv.c | 116 | ||||
-rw-r--r-- | drivers/block/zram/zram_drv.h | 8 |
13 files changed, 603 insertions, 290 deletions
diff --git a/drivers/block/brd.c b/drivers/block/brd.c index c7d138eca731..3598110d2cef 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -442,12 +442,15 @@ static int rd_nr; int rd_size = CONFIG_BLK_DEV_RAM_SIZE; static int max_part; static int part_shift; +static int part_show = 0; module_param(rd_nr, int, S_IRUGO); MODULE_PARM_DESC(rd_nr, "Maximum number of brd devices"); module_param(rd_size, int, S_IRUGO); MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes."); module_param(max_part, int, S_IRUGO); MODULE_PARM_DESC(max_part, "Maximum number of partitions per RAM disk"); +module_param(part_show, int, S_IRUGO); +MODULE_PARM_DESC(part_show, "Control RAM disk visibility in /proc/partitions"); MODULE_LICENSE("GPL"); MODULE_ALIAS_BLOCKDEV_MAJOR(RAMDISK_MAJOR); MODULE_ALIAS("rd"); @@ -501,7 +504,8 @@ static struct brd_device *brd_alloc(int i) disk->fops = &brd_fops; disk->private_data = brd; disk->queue = brd->brd_queue; - disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO; + if (!part_show) + disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO; sprintf(disk->disk_name, "ram%d", i); set_capacity(disk, rd_size * 2); diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 936f8c160e46..1bd5f523f8fd 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -247,7 +247,7 @@ static void mtip_async_complete(struct mtip_port *port, if (unlikely(cmd->unaligned)) up(&port->cmd_slot_unal); - blk_mq_end_io(rq, status ? -EIO : 0); + blk_mq_end_request(rq, status ? -EIO : 0); } /* @@ -3739,7 +3739,7 @@ static int mtip_submit_request(struct blk_mq_hw_ctx *hctx, struct request *rq) int err; err = mtip_send_trim(dd, blk_rq_pos(rq), blk_rq_sectors(rq)); - blk_mq_end_io(rq, err); + blk_mq_end_request(rq, err); return 0; } @@ -3775,13 +3775,16 @@ static bool mtip_check_unal_depth(struct blk_mq_hw_ctx *hctx, return false; } -static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq) +static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq, + bool last) { int ret; if (unlikely(mtip_check_unal_depth(hctx, rq))) return BLK_MQ_RQ_QUEUE_BUSY; + blk_mq_start_request(rq); + ret = mtip_submit_request(hctx, rq); if (likely(!ret)) return BLK_MQ_RQ_QUEUE_OK; @@ -3918,7 +3921,6 @@ skip_create_disk: if (rv) { dev_err(&dd->pdev->dev, "Unable to allocate request queue\n"); - rv = -ENOMEM; goto block_queue_alloc_init_error; } diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index b0d94b6973ab..2671a3f02f0c 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -177,7 +177,7 @@ static void end_cmd(struct nullb_cmd *cmd) { switch (queue_mode) { case NULL_Q_MQ: - blk_mq_end_io(cmd->rq, 0); + blk_mq_end_request(cmd->rq, 0); return; case NULL_Q_RQ: INIT_LIST_HEAD(&cmd->rq->queuelist); @@ -313,13 +313,16 @@ static void null_request_fn(struct request_queue *q) } } -static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq) +static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq, + bool last) { struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq); cmd->rq = rq; cmd->nq = hctx->driver_data; + blk_mq_start_request(rq); + null_handle_cmd(cmd); return BLK_MQ_RQ_QUEUE_OK; } @@ -462,17 +465,21 @@ static int null_add_dev(void) struct gendisk *disk; struct nullb *nullb; sector_t size; + int rv; nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, home_node); - if (!nullb) + if (!nullb) { + rv = -ENOMEM; goto out; + } spin_lock_init(&nullb->lock); if (queue_mode == NULL_Q_MQ && use_per_node_hctx) submit_queues = nr_online_nodes; - if (setup_queues(nullb)) + rv = setup_queues(nullb); + if (rv) goto out_free_nullb; if (queue_mode == NULL_Q_MQ) { @@ -484,22 +491,29 @@ static int null_add_dev(void) nullb->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; nullb->tag_set.driver_data = nullb; - if (blk_mq_alloc_tag_set(&nullb->tag_set)) + rv = blk_mq_alloc_tag_set(&nullb->tag_set); + if (rv) goto out_cleanup_queues; nullb->q = blk_mq_init_queue(&nullb->tag_set); - if (!nullb->q) + if (!nullb->q) { + rv = -ENOMEM; goto out_cleanup_tags; + } } else if (queue_mode == NULL_Q_BIO) { nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node); - if (!nullb->q) + if (!nullb->q) { + rv = -ENOMEM; goto out_cleanup_queues; + } blk_queue_make_request(nullb->q, null_queue_bio); init_driver_queues(nullb); } else { nullb->q = blk_init_queue_node(null_request_fn, &nullb->lock, home_node); - if (!nullb->q) + if (!nullb->q) { + rv = -ENOMEM; goto out_cleanup_queues; + } blk_queue_prep_rq(nullb->q, null_rq_prep_fn); blk_queue_softirq_done(nullb->q, null_softirq_done_fn); init_driver_queues(nullb); @@ -510,8 +524,10 @@ static int null_add_dev(void) queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, nullb->q); disk = nullb->disk = alloc_disk_node(1, home_node); - if (!disk) + if (!disk) { + rv = -ENOMEM; goto out_cleanup_blk_queue; + } mutex_lock(&lock); list_add_tail(&nullb->list, &nullb_list); @@ -545,7 +561,7 @@ out_cleanup_queues: out_free_nullb: kfree(nullb); out: - return -ENOMEM; + return rv; } static int __init null_init(void) diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index 719cb1bc1640..3b7c9f1be484 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -69,8 +69,8 @@ nice This parameter controls the driver's use of idle CPU time, at the expense of some speed. - If this driver is built into the kernel, you can use kernel - the following command line parameters, with the same values + If this driver is built into the kernel, you can use the + following kernel command line parameters, with the same values as the corresponding module parameters listed above: pcd.drive0 diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 623c84145b79..0a54c588e433 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -210,6 +210,12 @@ enum obj_request_type { OBJ_REQUEST_NODATA, OBJ_REQUEST_BIO, OBJ_REQUEST_PAGES }; +enum obj_operation_type { + OBJ_OP_WRITE, + OBJ_OP_READ, + OBJ_OP_DISCARD, +}; + enum obj_req_flags { OBJ_REQ_DONE, /* completion flag: not done = 0, done = 1 */ OBJ_REQ_IMG_DATA, /* object usage: standalone = 0, image = 1 */ @@ -276,6 +282,7 @@ enum img_req_flags { IMG_REQ_WRITE, /* I/O direction: read = 0, write = 1 */ IMG_REQ_CHILD, /* initiator: block = 0, child image = 1 */ IMG_REQ_LAYERED, /* ENOENT handling: normal = 0, layered = 1 */ + IMG_REQ_DISCARD, /* discard: normal = 0, discard request = 1 */ }; struct rbd_img_request { @@ -785,6 +792,20 @@ static int parse_rbd_opts_token(char *c, void *private) return 0; } +static char* obj_op_name(enum obj_operation_type op_type) +{ + switch (op_type) { + case OBJ_OP_READ: + return "read"; + case OBJ_OP_WRITE: + return "write"; + case OBJ_OP_DISCARD: + return "discard"; + default: + return "???"; + } +} + /* * Get a ceph client with specific addr and configuration, if one does * not exist create it. Either way, ceph_opts is consumed by this @@ -1600,6 +1621,21 @@ static bool img_request_write_test(struct rbd_img_request *img_request) return test_bit(IMG_REQ_WRITE, &img_request->flags) != 0; } +/* + * Set the discard flag when the img_request is an discard request + */ +static void img_request_discard_set(struct rbd_img_request *img_request) +{ + set_bit(IMG_REQ_DISCARD, &img_request->flags); + smp_mb(); +} + +static bool img_request_discard_test(struct rbd_img_request *img_request) +{ + smp_mb(); + return test_bit(IMG_REQ_DISCARD, &img_request->flags) != 0; +} + static void img_request_child_set(struct rbd_img_request *img_request) { set_bit(IMG_REQ_CHILD, &img_request->flags); @@ -1636,6 +1672,17 @@ static bool img_request_layered_test(struct rbd_img_request *img_request) return test_bit(IMG_REQ_LAYERED, &img_request->flags) != 0; } +static enum obj_operation_type +rbd_img_request_op_type(struct rbd_img_request *img_request) +{ + if (img_request_write_test(img_request)) + return OBJ_OP_WRITE; + else if (img_request_discard_test(img_request)) + return OBJ_OP_DISCARD; + else + return OBJ_OP_READ; +} + static void rbd_img_obj_request_read_callback(struct rbd_obj_request *obj_request) { @@ -1722,6 +1769,21 @@ static void rbd_osd_write_callback(struct rbd_obj_request *obj_request) obj_request_done_set(obj_request); } +static void rbd_osd_discard_callback(struct rbd_obj_request *obj_request) +{ + dout("%s: obj %p result %d %llu\n", __func__, obj_request, + obj_request->result, obj_request->length); + /* + * There is no such thing as a successful short discard. Set + * it to our originally-requested length. + */ + obj_request->xferred = obj_request->length; + /* discarding a non-existent object is not a problem */ + if (obj_request->result == -ENOENT) + obj_request->result = 0; + obj_request_done_set(obj_request); +} + /* * For a simple stat call there's nothing to do. We'll do more if * this is part of a write sequence for a layered image. @@ -1773,6 +1835,11 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req, case CEPH_OSD_OP_STAT: rbd_osd_stat_callback(obj_request); break; + case CEPH_OSD_OP_DELETE: + case CEPH_OSD_OP_TRUNCATE: + case CEPH_OSD_OP_ZERO: + rbd_osd_discard_callback(obj_request); + break; case CEPH_OSD_OP_CALL: case CEPH_OSD_OP_NOTIFY_ACK: case CEPH_OSD_OP_WATCH: @@ -1823,7 +1890,7 @@ static void rbd_osd_req_format_write(struct rbd_obj_request *obj_request) */ static struct ceph_osd_request *rbd_osd_req_create( struct rbd_device *rbd_dev, - bool write_request, + enum obj_operation_type op_type, unsigned int num_ops, struct rbd_obj_request *obj_request) { @@ -1831,16 +1898,18 @@ static struct ceph_osd_request *rbd_osd_req_create( struct ceph_osd_client *osdc; struct ceph_osd_request *osd_req; - if (obj_request_img_data_test(obj_request)) { + if (obj_request_img_data_test(obj_request) && + (op_type == OBJ_OP_DISCARD || op_type == OBJ_OP_WRITE)) { struct rbd_img_request *img_request = obj_request->img_request; - - rbd_assert(write_request == - img_request_write_test(img_request)); - if (write_request) - snapc = img_request->snapc; + if (op_type == OBJ_OP_WRITE) { + rbd_assert(img_request_write_test(img_request)); + } else { + rbd_assert(img_request_discard_test(img_request)); + } + snapc = img_request->snapc; } - rbd_assert(num_ops == 1 || (write_request && num_ops == 2)); + rbd_assert(num_ops == 1 || ((op_type == OBJ_OP_WRITE) && num_ops == 2)); /* Allocate and initialize the request, for the num_ops ops */ @@ -1850,7 +1919,7 @@ static struct ceph_osd_request *rbd_osd_req_create( if (!osd_req) return NULL; /* ENOMEM */ - if (write_request) + if (op_type == OBJ_OP_WRITE || op_type == OBJ_OP_DISCARD) osd_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK; else osd_req->r_flags = CEPH_OSD_FLAG_READ; @@ -1865,9 +1934,10 @@ static struct ceph_osd_request *rbd_osd_req_create( } /* - * Create a copyup osd request based on the information in the - * object request supplied. A copyup request has three osd ops, - * a copyup method call, a hint op, and a write op. + * Create a copyup osd request based on the information in the object + * request supplied. A copyup request has two or three osd ops, a + * copyup method call, potentially a hint op, and a write or truncate + * or zero op. */ static struct ceph_osd_request * rbd_osd_req_create_copyup(struct rbd_obj_request *obj_request) @@ -1877,18 +1947,24 @@ rbd_osd_req_create_copyup(struct rbd_obj_request *obj_request) struct rbd_device *rbd_dev; struct ceph_osd_client *osdc; struct ceph_osd_request *osd_req; + int num_osd_ops = 3; rbd_assert(obj_request_img_data_test(obj_request)); img_request = obj_request->img_request; rbd_assert(img_request); - rbd_assert(img_request_write_test(img_request)); + rbd_assert(img_request_write_test(img_request) || + img_request_discard_test(img_request)); + + if (img_request_discard_test(img_request)) + num_osd_ops = 2; - /* Allocate and initialize the request, for the three ops */ + /* Allocate and initialize the request, for all the ops */ snapc = img_request->snapc; rbd_dev = img_request->rbd_dev; osdc = &rbd_dev->rbd_client->client->osdc; - osd_req = ceph_osdc_alloc_request(osdc, snapc, 3, false, GFP_ATOMIC); + osd_req = ceph_osdc_alloc_request(osdc, snapc, num_osd_ops, + false, GFP_ATOMIC); if (!osd_req) return NULL; /* ENOMEM */ @@ -2057,7 +2133,8 @@ static bool rbd_dev_parent_get(struct rbd_device *rbd_dev) static struct rbd_img_request *rbd_img_request_create( struct rbd_device *rbd_dev, u64 offset, u64 length, - bool write_request) + enum obj_operation_type op_type, + struct ceph_snap_context *snapc) { struct rbd_img_request *img_request; @@ -2065,20 +2142,17 @@ static struct rbd_img_request *rbd_img_request_create( if (!img_request) return NULL; - if (write_request) { - down_read(&rbd_dev->header_rwsem); - ceph_get_snap_context(rbd_dev->header.snapc); - up_read(&rbd_dev->header_rwsem); - } - img_request->rq = NULL; img_request->rbd_dev = rbd_dev; img_request->offset = offset; img_request->length = length; img_request->flags = 0; - if (write_request) { + if (op_type == OBJ_OP_DISCARD) { + img_request_discard_set(img_request); + img_request->snapc = snapc; + } else if (op_type == OBJ_OP_WRITE) { img_request_write_set(img_request); - img_request->snapc = rbd_dev->header.snapc; + img_request->snapc = snapc; } else { img_request->snap_id = rbd_dev->spec->snap_id; } @@ -2093,8 +2167,7 @@ static struct rbd_img_request *rbd_img_request_create( kref_init(&img_request->kref); dout("%s: rbd_dev %p %s %llu/%llu -> img %p\n", __func__, rbd_dev, - write_request ? "write" : "read", offset, length, - img_request); + obj_op_name(op_type), offset, length, img_request); return img_request; } @@ -2118,7 +2191,8 @@ static void rbd_img_request_destroy(struct kref *kref) rbd_dev_parent_put(img_request->rbd_dev); } - if (img_request_write_test(img_request)) + if (img_request_write_test(img_request) || + img_request_discard_test(img_request)) ceph_put_snap_context(img_request->snapc); kmem_cache_free(rbd_img_request_cache, img_request); @@ -2134,8 +2208,8 @@ static struct rbd_img_request *rbd_parent_request_create( rbd_assert(obj_request->img_request); rbd_dev = obj_request->img_request->rbd_dev; - parent_request = rbd_img_request_create(rbd_dev->parent, - img_offset, length, false); + parent_request = rbd_img_request_create(rbd_dev->parent, img_offset, + length, OBJ_OP_READ, NULL); if (!parent_request) return NULL; @@ -2176,11 +2250,18 @@ static bool rbd_img_obj_end_request(struct rbd_obj_request *obj_request) result = obj_request->result; if (result) { struct rbd_device *rbd_dev = img_request->rbd_dev; + enum obj_operation_type op_type; + + if (img_request_discard_test(img_request)) + op_type = OBJ_OP_DISCARD; + else if (img_request_write_test(img_request)) + op_type = OBJ_OP_WRITE; + else + op_type = OBJ_OP_READ; rbd_warn(rbd_dev, "%s %llx at %llx (%llx)", - img_request_write_test(img_request) ? "write" : "read", - obj_request->length, obj_request->img_offset, - obj_request->offset); + obj_op_name(op_type), obj_request->length, + obj_request->img_offset, obj_request->offset); rbd_warn(rbd_dev, " result %d xferred %x", result, xferred); if (!img_request->result) @@ -2245,6 +2326,67 @@ out: } /* + * Add individual osd ops to the given ceph_osd_request and prepare + * them for submission. num_ops is the current number of + * osd operations already to the object request. + */ +static void rbd_img_obj_request_fill(struct rbd_obj_request *obj_request, + struct ceph_osd_request *osd_request, + enum obj_operation_type op_type, + unsigned int num_ops) +{ + struct rbd_img_request *img_request = obj_request->img_request; + struct rbd_device *rbd_dev = img_request->rbd_dev; + u64 object_size = rbd_obj_bytes(&rbd_dev->header); + u64 offset = obj_request->offset; + u64 length = obj_request->length; + u64 img_end; + u16 opcode; + + if (op_type == OBJ_OP_DISCARD) { + if (!offset && length == object_size && + (!img_request_layered_test(img_request) || + !obj_request_overlaps_parent(obj_request))) { + opcode = CEPH_OSD_OP_DELETE; + } else if ((offset + length == object_size)) { + opcode = CEPH_OSD_OP_TRUNCATE; + } else { + down_read(&rbd_dev->header_rwsem); + img_end = rbd_dev->header.image_size; + up_read(&rbd_dev->header_rwsem); + + if (obj_request->img_offset + length == img_end) + opcode = CEPH_OSD_OP_TRUNCATE; + else + opcode = CEPH_OSD_OP_ZERO; + } + } else if (op_type == OBJ_OP_WRITE) { + opcode = CEPH_OSD_OP_WRITE; + osd_req_op_alloc_hint_init(osd_request, num_ops, + object_size, object_size); + num_ops++; + } else { + opcode = CEPH_OSD_OP_READ; + } + + osd_req_op_extent_init(osd_request, num_ops, opcode, offset, length, + 0, 0); + if (obj_request->type == OBJ_REQUEST_BIO) + osd_req_op_extent_osd_data_bio(osd_request, num_ops, + obj_request->bio_list, length); + else if (obj_request->type == OBJ_REQUEST_PAGES) + osd_req_op_extent_osd_data_pages(osd_request, num_ops, + obj_request->pages, length, + offset & ~PAGE_MASK, false, false); + + /* Discards are also writes */ + if (op_type == OBJ_OP_WRITE || op_type == OBJ_OP_DISCARD) + rbd_osd_req_format_write(obj_request); + else + rbd_osd_req_format_read(obj_request); +} + +/* * Split up an image request into one or more object requests, each * to a different object. The "type" parameter indicates whether * "data_desc" is the pointer to the head of a list of bio @@ -2259,28 +2401,26 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request, struct rbd_device *rbd_dev = img_request->rbd_dev; struct rbd_obj_request *obj_request = NULL; struct rbd_obj_request *next_obj_request; - bool write_request = img_request_write_test(img_request); struct bio *bio_list = NULL; unsigned int bio_offset = 0; struct page **pages = NULL; + enum obj_operation_type op_type; u64 img_offset; u64 resid; - u16 opcode; dout("%s: img %p type %d data_desc %p\n", __func__, img_request, (int)type, data_desc); - opcode = write_request ? CEPH_OSD_OP_WRITE : CEPH_OSD_OP_READ; img_offset = img_request->offset; resid = img_request->length; rbd_assert(resid > 0); + op_type = rbd_img_request_op_type(img_request); if (type == OBJ_REQUEST_BIO) { bio_list = data_desc; rbd_assert(img_offset == bio_list->bi_iter.bi_sector << SECTOR_SHIFT); - } else { - rbd_assert(type == OBJ_REQUEST_PAGES); + } else if (type == OBJ_REQUEST_PAGES) { pages = data_desc; } @@ -2289,7 +2429,6 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request, const char *object_name; u64 offset; u64 length; - unsigned int which = 0; object_name = rbd_segment_name(rbd_dev, img_offset); if (!object_name) @@ -2321,7 +2460,7 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request, GFP_ATOMIC); if (!obj_request->bio_list) goto out_unwind; - } else { + } else if (type == OBJ_REQUEST_PAGES) { unsigned int page_count; obj_request->pages = pages; @@ -2332,38 +2471,19 @@ static int rbd_img_request_fill(struct rbd_img_request *img_request, pages += page_count; } - osd_req = rbd_osd_req_create(rbd_dev, write_request, - (write_request ? 2 : 1), - obj_request); + osd_req = rbd_osd_req_create(rbd_dev, op_type, + (op_type == OBJ_OP_WRITE) ? 2 : 1, + obj_request); if (!osd_req) goto out_unwind; + obj_request->osd_req = osd_req; obj_request->callback = rbd_img_obj_callback; - rbd_img_request_get(img_request); - - if (write_request) { - osd_req_op_alloc_hint_init(osd_req, which, - rbd_obj_bytes(&rbd_dev->header), - rbd_obj_bytes(&rbd_dev->header)); - which++; - } - - osd_req_op_extent_init(osd_req, which, opcode, offset, length, - 0, 0); - if (type == OBJ_REQUEST_BIO) - osd_req_op_extent_osd_data_bio(osd_req, which, - obj_request->bio_list, length); - else - osd_req_op_extent_osd_data_pages(osd_req, which, - obj_request->pages, length, - offset & ~PAGE_MASK, false, false); + obj_request->img_offset = img_offset; - if (write_request) - rbd_osd_req_format_write(obj_request); - else - rbd_osd_req_format_read(obj_request); + rbd_img_obj_request_fill(obj_request, osd_req, op_type, 0); - obj_request->img_offset = img_offset; + rbd_img_request_get(img_request); img_offset += length; resid -= length; @@ -2386,7 +2506,8 @@ rbd_img_obj_copyup_callback(struct rbd_obj_request *obj_request) struct page **pages; u32 page_count; - rbd_assert(obj_request->type == OBJ_REQUEST_BIO); + rbd_assert(obj_request->type == OBJ_REQUEST_BIO || + obj_request->type == OBJ_REQUEST_NODATA); rbd_assert(obj_request_img_data_test(obj_request)); img_request = obj_request->img_request; rbd_assert(img_request); @@ -2424,11 +2545,10 @@ rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request) struct ceph_osd_client *osdc; struct rbd_device *rbd_dev; struct page **pages; + enum obj_operation_type op_type; u32 page_count; int img_result; u64 parent_length; - u64 offset; - u64 length; rbd_assert(img_request_child_test(img_request)); @@ -2492,26 +2612,10 @@ rbd_img_obj_parent_read_full_callback(struct rbd_img_request *img_request) osd_req_op_cls_request_data_pages(osd_req, 0, pages, parent_length, 0, false, false); - /* Then the hint op */ + /* Add the other op(s) */ - osd_req_op_alloc_hint_init(osd_req, 1, rbd_obj_bytes(&rbd_dev->header), - rbd_obj_bytes(&rbd_dev->header)); - - /* And the original write request op */ - - offset = orig_request->offset; - length = orig_request->length; - osd_req_op_extent_init(osd_req, 2, CEPH_OSD_OP_WRITE, - offset, length, 0, 0); - if (orig_request->type == OBJ_REQUEST_BIO) - osd_req_op_extent_osd_data_bio(osd_req, 2, - orig_request->bio_list, length); - else - osd_req_op_extent_osd_data_pages(osd_req, 2, - orig_request->pages, length, - offset & ~PAGE_MASK, false, false); - - rbd_osd_req_format_write(orig_request); + op_type = rbd_img_request_op_type(orig_request->img_request); + rbd_img_obj_request_fill(orig_request, osd_req, op_type, 1); /* All set, send it off. */ @@ -2728,7 +2832,7 @@ static int rbd_img_obj_exists_submit(struct rbd_obj_request *obj_request) rbd_assert(obj_request->img_request); rbd_dev = obj_request->img_request->rbd_dev; - stat_request->osd_req = rbd_osd_req_create(rbd_dev, false, 1, + stat_request->osd_req = rbd_osd_req_create(rbd_dev, OBJ_OP_READ, 1, stat_request); if (!stat_request->osd_req) goto out; @@ -2748,11 +2852,10 @@ out: return ret; } -static int rbd_img_obj_request_submit(struct rbd_obj_request *obj_request) +static bool img_obj_request_simple(struct rbd_obj_request *obj_request) { struct rbd_img_request *img_request; struct rbd_device *rbd_dev; - bool known; rbd_assert(obj_request_img_data_test(obj_request)); @@ -2760,22 +2863,44 @@ static int rbd_img_obj_request_submit(struct rbd_obj_request *obj_request) rbd_assert(img_request); rbd_dev = img_request->rbd_dev; + /* Reads */ + if (!img_request_write_test(img_request) && + !img_request_discard_test(img_request)) + return true; + + /* Non-layered writes */ + if (!img_request_layered_test(img_request)) + return true; + + /* + * Layered writes outside of the parent overlap range don't + * share any data with the parent. + */ + if (!obj_request_overlaps_parent(obj_request)) + return true; + /* - * Only writes to layered images need special handling. - * Reads and non-layered writes are simple object requests. - * Layered writes that start beyond the end of the overlap - * with the parent have no parent data, so they too are - * simple object requests. Finally, if the target object is - * known to already exist, its parent data has already been - * copied, so a write to the object can also be handled as a - * simple object request. + * Entire-object layered writes - we will overwrite whatever + * parent data there is anyway. */ - if (!img_request_write_test(img_request) || - !img_request_layered_test(img_request) || - !obj_request_overlaps_parent(obj_request) || - ((known = obj_request_known_test(obj_request)) && - obj_request_exists_test(obj_request))) { + if (!obj_request->offset && + obj_request->length == rbd_obj_bytes(&rbd_dev->header)) + return true; + /* + * If the object is known to already exist, its parent data has + * already been copied. + */ + if (obj_request_known_test(obj_request) && + obj_request_exists_test(obj_request)) + return true; + + return false; +} + +static int rbd_img_obj_request_submit(struct rbd_obj_request *obj_request) +{ + if (img_obj_request_simple(obj_request)) { struct rbd_device *rbd_dev; struct ceph_osd_client *osdc; @@ -2791,7 +2916,7 @@ static int rbd_img_obj_request_submit(struct rbd_obj_request *obj_request) * start by reading the data for the full target object from * the parent so we can use it for a copyup to the target. */ - if (known) + if (obj_request_known_test(obj_request)) return rbd_img_obj_parent_read_full(obj_request); /* We don't know whether the target exists. Go find out. */ @@ -2932,7 +3057,7 @@ static int rbd_obj_notify_ack_sync(struct rbd_device *rbd_dev, u64 notify_id) return -ENOMEM; ret = -ENOMEM; - obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, 1, + obj_request->osd_req = rbd_osd_req_create(rbd_dev, OBJ_OP_READ, 1, obj_request); if (!obj_request->osd_req) goto out; @@ -2995,7 +3120,7 @@ static struct rbd_obj_request *rbd_obj_watch_request_helper( if (!obj_request) return ERR_PTR(-ENOMEM); - obj_request->osd_req = rbd_osd_req_create(rbd_dev, true, 1, + obj_request->osd_req = rbd_osd_req_create(rbd_dev, OBJ_OP_WRITE, 1, obj_request); if (!obj_request->osd_req) { ret = -ENOMEM; @@ -3133,7 +3258,7 @@ static int rbd_obj_method_sync(struct rbd_device *rbd_dev, obj_request->pages = pages; obj_request->page_count = page_count; - obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, 1, + obj_request->osd_req = rbd_osd_req_create(rbd_dev, OBJ_OP_READ, 1, obj_request); if (!obj_request->osd_req) goto out; @@ -3183,11 +3308,20 @@ out: static void rbd_handle_request(struct rbd_device *rbd_dev, struct request *rq) { struct rbd_img_request *img_request; + struct ceph_snap_context *snapc = NULL; u64 offset = (u64)blk_rq_pos(rq) << SECTOR_SHIFT; u64 length = blk_rq_bytes(rq); - bool wr = rq_data_dir(rq) == WRITE; + enum obj_operation_type op_type; + u64 mapping_size; int result; + if (rq->cmd_flags & REQ_DISCARD) + op_type = OBJ_OP_DISCARD; + else if (rq->cmd_flags & REQ_WRITE) + op_type = OBJ_OP_WRITE; + else + op_type = OBJ_OP_READ; + /* Ignore/skip any zero-length requests */ if (!length) { @@ -3196,9 +3330,9 @@ static void rbd_handle_request(struct rbd_device *rbd_dev, struct request *rq) goto err_rq; } - /* Disallow writes to a read-only device */ + /* Only reads are allowed to a read-only device */ - if (wr) { + if (op_type != OBJ_OP_READ) { if (rbd_dev->mapping.read_only) { result = -EROFS; goto err_rq; @@ -3226,21 +3360,35 @@ static void rbd_handle_request(struct rbd_device *rbd_dev, struct request *rq) goto err_rq; /* Shouldn't happen */ } - if (offset + length > rbd_dev->mapping.size) { + down_read(&rbd_dev->header_rwsem); + mapping_size = rbd_dev->mapping.size; + if (op_type != OBJ_OP_READ) { + snapc = rbd_dev->header.snapc; + ceph_get_snap_context(snapc); + } + up_read(&rbd_dev->header_rwsem); + + if (offset + length > mapping_size) { rbd_warn(rbd_dev, "beyond EOD (%llu~%llu > %llu)", offset, - length, rbd_dev->mapping.size); + length, mapping_size); result = -EIO; goto err_rq; } - img_request = rbd_img_request_create(rbd_dev, offset, length, wr); + img_request = rbd_img_request_create(rbd_dev, offset, length, op_type, + snapc); if (!img_request) { result = -ENOMEM; goto err_rq; } img_request->rq = rq; - result = rbd_img_request_fill(img_request, OBJ_REQUEST_BIO, rq->bio); + if (op_type == OBJ_OP_DISCARD) + result = rbd_img_request_fill(img_request, OBJ_REQUEST_NODATA, + NULL); + else + result = rbd_img_request_fill(img_request, OBJ_REQUEST_BIO, + rq->bio); if (result) goto err_img_request; @@ -3255,7 +3403,9 @@ err_img_request: err_rq: if (result) rbd_warn(rbd_dev, "%s %llx at %llx result %d", - wr ? "write" : "read", length, offset, result); + obj_op_name(op_type), length, offset, result); + if (snapc) + ceph_put_snap_context(snapc); blk_end_request_all(rq, result); } @@ -3393,7 +3543,7 @@ static int rbd_obj_read_sync(struct rbd_device *rbd_dev, obj_request->pages = pages; obj_request->page_count = page_count; - obj_request->osd_req = rbd_osd_req_create(rbd_dev, false, 1, + obj_request->osd_req = rbd_osd_req_create(rbd_dev, OBJ_OP_READ, 1, obj_request); if (!obj_request->osd_req) goto out; @@ -3610,6 +3760,13 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) blk_queue_io_min(q, segment_size); blk_queue_io_opt(q, segment_size); + /* enable the discard support */ + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); + q->limits.discard_granularity = segment_size; + q->limits.discard_alignment = segment_size; + q->limits.max_discard_sectors = segment_size / SECTOR_SIZE; + q->limits.discard_zeroes_data = 1; + blk_queue_merge_bvec(q, rbd_merge_bvec); disk->queue = q; @@ -4924,7 +5081,7 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev) ret = image_id ? 0 : -ENOMEM; if (!ret) rbd_dev->image_format = 1; - } else if (ret > sizeof (__le32)) { + } else if (ret >= 0) { void *p = response; image_id = ceph_extract_encoded_string(&p, p + ret, @@ -4932,8 +5089,6 @@ static int rbd_dev_image_id(struct rbd_device *rbd_dev) ret = PTR_ERR_OR_ZERO(image_id); if (!ret) rbd_dev->image_format = 2; - } else { - ret = -EINVAL; } if (!ret) { @@ -5087,9 +5242,12 @@ static int rbd_dev_device_setup(struct rbd_device *rbd_dev) set_capacity(rbd_dev->disk, rbd_dev->mapping.size / SECTOR_SIZE); set_disk_ro(rbd_dev->disk, rbd_dev->mapping.read_only); - rbd_dev->rq_wq = alloc_workqueue(rbd_dev->disk->disk_name, 0, 0); - if (!rbd_dev->rq_wq) + rbd_dev->rq_wq = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, + rbd_dev->disk->disk_name); + if (!rbd_dev->rq_wq) { + ret = -ENOMEM; goto err_out_mapping; + } ret = rbd_bus_add_dev(rbd_dev); if (ret) diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c index 9f913087cb81..d8b2488aaade 100644 --- a/drivers/block/rsxx/core.c +++ b/drivers/block/rsxx/core.c @@ -62,12 +62,6 @@ static DEFINE_SPINLOCK(rsxx_ida_lock); /* --------------------Debugfs Setup ------------------- */ -struct rsxx_cram { - u32 f_pos; - u32 offset; - void *i_private; -}; - static int rsxx_attr_pci_regs_show(struct seq_file *m, void *p) { struct rsxx_cardinfo *card = m->private; @@ -184,93 +178,50 @@ static int rsxx_attr_pci_regs_open(struct inode *inode, struct file *file) static ssize_t rsxx_cram_read(struct file *fp, char __user *ubuf, size_t cnt, loff_t *ppos) { - struct rsxx_cram *info = fp->private_data; - struct rsxx_cardinfo *card = info->i_private; + struct rsxx_cardinfo *card = file_inode(fp)->i_private; char *buf; - int st; + ssize_t st; - buf = kzalloc(sizeof(*buf) * cnt, GFP_KERNEL); + buf = kzalloc(cnt, GFP_KERNEL); if (!buf) return -ENOMEM; - info->f_pos = (u32)*ppos + info->offset; - - st = rsxx_creg_read(card, CREG_ADD_CRAM + info->f_pos, cnt, buf, 1); - if (st) - return st; - - st = copy_to_user(ubuf, buf, cnt); + st = rsxx_creg_read(card, CREG_ADD_CRAM + (u32)*ppos, cnt, buf, 1); + if (!st) + st = copy_to_user(ubuf, buf, cnt); + kfree(buf); if (st) return st; - - info->offset += cnt; - - kfree(buf); - + *ppos += cnt; return cnt; } static ssize_t rsxx_cram_write(struct file *fp, const char __user *ubuf, size_t cnt, loff_t *ppos) { - struct rsxx_cram *info = fp->private_data; - struct rsxx_cardinfo *card = info->i_private; + struct rsxx_cardinfo *card = file_inode(fp)->i_private; char *buf; - int st; + ssize_t st; - buf = kzalloc(sizeof(*buf) * cnt, GFP_KERNEL); + buf = kzalloc(cnt, GFP_KERNEL); if (!buf) return -ENOMEM; st = copy_from_user(buf, ubuf, cnt); + if (!st) + st = rsxx_creg_write(card, CREG_ADD_CRAM + (u32)*ppos, cnt, + buf, 1); + kfree(buf); if (st) return st; - - info->f_pos = (u32)*ppos + info->offset; - - st = rsxx_creg_write(card, CREG_ADD_CRAM + info->f_pos, cnt, buf, 1); - if (st) - return st; - - info->offset += cnt; - - kfree(buf); - + *ppos += cnt; return cnt; } -static int rsxx_cram_open(struct inode *inode, struct file *file) -{ - struct rsxx_cram *info = kzalloc(sizeof(*info), GFP_KERNEL); - if (!info) - return -ENOMEM; - - info->i_private = inode->i_private; - info->f_pos = file->f_pos; - file->private_data = info; - - return 0; -} - -static int rsxx_cram_release(struct inode *inode, struct file *file) -{ - struct rsxx_cram *info = file->private_data; - - if (!info) - return 0; - - kfree(info); - file->private_data = NULL; - - return 0; -} - static const struct file_operations debugfs_cram_fops = { .owner = THIS_MODULE, - .open = rsxx_cram_open, .read = rsxx_cram_read, .write = rsxx_cram_write, - .release = rsxx_cram_release, }; static const struct file_operations debugfs_stats_fops = { diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c index 5814deb6963d..756b8ec00f16 100644 --- a/drivers/block/sunvdc.c +++ b/drivers/block/sunvdc.c @@ -9,6 +9,7 @@ #include <linux/blkdev.h> #include <linux/hdreg.h> #include <linux/genhd.h> +#include <linux/cdrom.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/completion.h> @@ -22,8 +23,8 @@ #define DRV_MODULE_NAME "sunvdc" #define PFX DRV_MODULE_NAME ": " -#define DRV_MODULE_VERSION "1.0" -#define DRV_MODULE_RELDATE "June 25, 2007" +#define DRV_MODULE_VERSION "1.1" +#define DRV_MODULE_RELDATE "February 13, 2013" static char version[] = DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n"; @@ -32,7 +33,7 @@ MODULE_DESCRIPTION("Sun LDOM virtual disk client driver"); MODULE_LICENSE("GPL"); MODULE_VERSION(DRV_MODULE_VERSION); -#define VDC_TX_RING_SIZE 256 +#define VDC_TX_RING_SIZE 512 #define WAITING_FOR_LINK_UP 0x01 #define WAITING_FOR_TX_SPACE 0x02 @@ -65,10 +66,10 @@ struct vdc_port { u64 operations; u32 vdisk_size; u8 vdisk_type; + u8 vdisk_mtype; char disk_name[32]; - struct vio_disk_geom geom; struct vio_disk_vtoc label; }; @@ -79,9 +80,16 @@ static inline struct vdc_port *to_vdc_port(struct vio_driver_state *vio) /* Ordered from largest major to lowest */ static struct vio_version vdc_versions[] = { + { .major = 1, .minor = 1 }, { .major = 1, .minor = 0 }, }; +static inline int vdc_version_supported(struct vdc_port *port, + u16 major, u16 minor) +{ + return port->vio.ver.major == major && port->vio.ver.minor >= minor; +} + #define VDCBLK_NAME "vdisk" static int vdc_major; #define PARTITION_SHIFT 3 @@ -94,18 +102,54 @@ static inline u32 vdc_tx_dring_avail(struct vio_dring_state *dr) static int vdc_getgeo(struct block_device *bdev, struct hd_geometry *geo) { struct gendisk *disk = bdev->bd_disk; - struct vdc_port *port = disk->private_data; + sector_t nsect = get_capacity(disk); + sector_t cylinders = nsect; - geo->heads = (u8) port->geom.num_hd; - geo->sectors = (u8) port->geom.num_sec; - geo->cylinders = port->geom.num_cyl; + geo->heads = 0xff; + geo->sectors = 0x3f; + sector_div(cylinders, geo->heads * geo->sectors); + geo->cylinders = cylinders; + if ((sector_t)(geo->cylinders + 1) * geo->heads * geo->sectors < nsect) + geo->cylinders = 0xffff; return 0; } +/* Add ioctl/CDROM_GET_CAPABILITY to support cdrom_id in udev + * when vdisk_mtype is VD_MEDIA_TYPE_CD or VD_MEDIA_TYPE_DVD. + * Needed to be able to install inside an ldom from an iso image. + */ +static int vdc_ioctl(struct block_device *bdev, fmode_t mode, + unsigned command, unsigned long argument) +{ + int i; + struct gendisk *disk; + + switch (command) { + case CDROMMULTISESSION: + pr_debug(PFX "Multisession CDs not supported\n"); + for (i = 0; i < sizeof(struct cdrom_multisession); i++) + if (put_user(0, (char __user *)(argument + i))) + return -EFAULT; + return 0; + + case CDROM_GET_CAPABILITY: + disk = bdev->bd_disk; + + if (bdev->bd_disk && (disk->flags & GENHD_FL_CD)) + return 0; + return -EINVAL; + + default: + pr_debug(PFX "ioctl %08x not supported\n", command); + return -EINVAL; + } +} + static const struct block_device_operations vdc_fops = { .owner = THIS_MODULE, .getgeo = vdc_getgeo, + .ioctl = vdc_ioctl, }; static void vdc_finish(struct vio_driver_state *vio, int err, int waiting_for) @@ -165,9 +209,9 @@ static int vdc_handle_attr(struct vio_driver_state *vio, void *arg) struct vio_disk_attr_info *pkt = arg; viodbg(HS, "GOT ATTR stype[0x%x] ops[%llx] disk_size[%llu] disk_type[%x] " - "xfer_mode[0x%x] blksz[%u] max_xfer[%llu]\n", + "mtype[0x%x] xfer_mode[0x%x] blksz[%u] max_xfer[%llu]\n", pkt->tag.stype, pkt->operations, - pkt->vdisk_size, pkt->vdisk_type, + pkt->vdisk_size, pkt->vdisk_type, pkt->vdisk_mtype, pkt->xfer_mode, pkt->vdisk_block_size, pkt->max_xfer_size); @@ -192,8 +236,11 @@ static int vdc_handle_attr(struct vio_driver_state *vio, void *arg) } port->operations = pkt->operations; - port->vdisk_size = pkt->vdisk_size; port->vdisk_type = pkt->vdisk_type; + if (vdc_version_supported(port, 1, 1)) { + port->vdisk_size = pkt->vdisk_size; + port->vdisk_mtype = pkt->vdisk_mtype; + } if (pkt->max_xfer_size < port->max_xfer_size) port->max_xfer_size = pkt->max_xfer_size; port->vdisk_block_size = pkt->vdisk_block_size; @@ -236,7 +283,9 @@ static void vdc_end_one(struct vdc_port *port, struct vio_dring_state *dr, __blk_end_request(req, (desc->status ? -EIO : 0), desc->size); - if (blk_queue_stopped(port->disk->queue)) + /* restart blk queue when ring is half emptied */ + if (blk_queue_stopped(port->disk->queue) && + vdc_tx_dring_avail(dr) * 100 / VDC_TX_RING_SIZE >= 50) blk_start_queue(port->disk->queue); } @@ -388,12 +437,6 @@ static int __send_request(struct request *req) for (i = 0; i < nsg; i++) len += sg[i].length; - if (unlikely(vdc_tx_dring_avail(dr) < 1)) { - blk_stop_queue(port->disk->queue); - err = -ENOMEM; - goto out; - } - desc = vio_dring_cur(dr); err = ldc_map_sg(port->vio.lp, sg, nsg, @@ -433,21 +476,32 @@ static int __send_request(struct request *req) port->req_id++; dr->prod = (dr->prod + 1) & (VDC_TX_RING_SIZE - 1); } -out: return err; } -static void do_vdc_request(struct request_queue *q) +static void do_vdc_request(struct request_queue *rq) { - while (1) { - struct request *req = blk_fetch_request(q); + struct request *req; - if (!req) - break; + while ((req = blk_peek_request(rq)) != NULL) { + struct vdc_port *port; + struct vio_dring_state *dr; + + port = req->rq_disk->private_data; + dr = &port->vio.drings[VIO_DRIVER_TX_RING]; + if (unlikely(vdc_tx_dring_avail(dr) < 1)) + goto wait; + + blk_start_request(req); - if (__send_request(req) < 0) - __blk_end_request_all(req, -EIO); + if (__send_request(req) < 0) { + blk_requeue_request(rq, req); +wait: + /* Avoid pointless unplugs. */ + blk_stop_queue(rq); + break; + } } } @@ -663,18 +717,27 @@ static int probe_disk(struct vdc_port *port) return err; } - err = generic_request(port, VD_OP_GET_DISKGEOM, - &port->geom, sizeof(port->geom)); - if (err < 0) { - printk(KERN_ERR PFX "VD_OP_GET_DISKGEOM returns " - "error %d\n", err); - return err; + if (vdc_version_supported(port, 1, 1)) { + /* vdisk_size should be set during the handshake, if it wasn't + * then the underlying disk is reserved by another system + */ + if (port->vdisk_size == -1) + return -ENODEV; + } else { + struct vio_disk_geom geom; + + err = generic_request(port, VD_OP_GET_DISKGEOM, + &geom, sizeof(geom)); + if (err < 0) { + printk(KERN_ERR PFX "VD_OP_GET_DISKGEOM returns " + "error %d\n", err); + return err; + } + port->vdisk_size = ((u64)geom.num_cyl * + (u64)geom.num_hd * + (u64)geom.num_sec); } - port->vdisk_size = ((u64)port->geom.num_cyl * - (u64)port->geom.num_hd * - (u64)port->geom.num_sec); - q = blk_init_queue(do_vdc_request, &port->vio.lock); if (!q) { printk(KERN_ERR PFX "%s: Could not allocate queue.\n", @@ -691,6 +754,10 @@ static int probe_disk(struct vdc_port *port) port->disk = g; + /* Each segment in a request is up to an aligned page in size. */ + blk_queue_segment_boundary(q, PAGE_SIZE - 1); + blk_queue_max_segment_size(q, PAGE_SIZE); + blk_queue_max_segments(q, port->ring_cookies); blk_queue_max_hw_sectors(q, port->max_xfer_size); g->major = vdc_major; @@ -704,9 +771,32 @@ static int probe_disk(struct vdc_port *port) set_capacity(g, port->vdisk_size); - printk(KERN_INFO PFX "%s: %u sectors (%u MB)\n", + if (vdc_version_supported(port, 1, 1)) { + switch (port->vdisk_mtype) { + case VD_MEDIA_TYPE_CD: + pr_info(PFX "Virtual CDROM %s\n", port->disk_name); + g->flags |= GENHD_FL_CD; + g->flags |= GENHD_FL_REMOVABLE; + set_disk_ro(g, 1); + break; + + case VD_MEDIA_TYPE_DVD: + pr_info(PFX "Virtual DVD %s\n", port->disk_name); + g->flags |= GENHD_FL_CD; + g->flags |= GENHD_FL_REMOVABLE; + set_disk_ro(g, 1); + break; + + case VD_MEDIA_TYPE_FIXED: + pr_info(PFX "Virtual Hard disk %s\n", port->disk_name); + break; + } + } + + pr_info(PFX "%s: %u sectors (%u MB) protocol %d.%d\n", g->disk_name, - port->vdisk_size, (port->vdisk_size >> (20 - 9))); + port->vdisk_size, (port->vdisk_size >> (20 - 9)), + port->vio.ver.major, port->vio.ver.minor); add_disk(g); @@ -765,6 +855,7 @@ static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id) else snprintf(port->disk_name, sizeof(port->disk_name), VDCBLK_NAME "%c", 'a' + ((int)vdev->dev_no % 26)); + port->vdisk_size = -1; err = vio_driver_init(&port->vio, vdev, VDEV_DISK, vdc_versions, ARRAY_SIZE(vdc_versions), diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 0a581400de0f..c6a27d54ad62 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -41,12 +41,6 @@ struct virtio_blk /* Process context for config space updates */ struct work_struct config_work; - /* Lock for config space updates */ - struct mutex config_lock; - - /* enable config space updates */ - bool config_enable; - /* What host tells us, plus 2 for header & tailer. */ unsigned int sg_elems; @@ -135,7 +129,7 @@ static inline void virtblk_request_done(struct request *req) req->errors = (error != 0); } - blk_mq_end_io(req, error); + blk_mq_end_request(req, error); } static void virtblk_done(struct virtqueue *vq) @@ -164,14 +158,14 @@ static void virtblk_done(struct virtqueue *vq) spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); } -static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req) +static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req, + bool last) { struct virtio_blk *vblk = hctx->queue->queuedata; struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); unsigned long flags; unsigned int num; int qid = hctx->queue_num; - const bool last = (req->cmd_flags & REQ_END) != 0; int err; bool notify = false; @@ -205,6 +199,8 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req) } } + blk_mq_start_request(req); + num = blk_rq_map_sg(hctx->queue, vbr->req, vbr->sg); if (num) { if (rq_data_dir(vbr->req) == WRITE) @@ -347,10 +343,6 @@ static void virtblk_config_changed_work(struct work_struct *work) char *envp[] = { "RESIZE=1", NULL }; u64 capacity, size; - mutex_lock(&vblk->config_lock); - if (!vblk->config_enable) - goto done; - /* Host must always specify the capacity. */ virtio_cread(vdev, struct virtio_blk_config, capacity, &capacity); @@ -374,8 +366,6 @@ static void virtblk_config_changed_work(struct work_struct *work) set_capacity(vblk->disk, capacity); revalidate_disk(vblk->disk); kobject_uevent_env(&disk_to_dev(vblk->disk)->kobj, KOBJ_CHANGE, envp); -done: - mutex_unlock(&vblk->config_lock); } static void virtblk_config_changed(struct virtio_device *vdev) @@ -606,10 +596,8 @@ static int virtblk_probe(struct virtio_device *vdev) vblk->vdev = vdev; vblk->sg_elems = sg_elems; - mutex_init(&vblk->config_lock); INIT_WORK(&vblk->config_work, virtblk_config_changed_work); - vblk->config_enable = true; err = init_vq(vblk); if (err) @@ -733,6 +721,8 @@ static int virtblk_probe(struct virtio_device *vdev) if (!err && opt_io_size) blk_queue_io_opt(q, blk_size * opt_io_size); + virtio_device_ready(vdev); + add_disk(vblk->disk); err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial); if (err) @@ -771,10 +761,8 @@ static void virtblk_remove(struct virtio_device *vdev) int index = vblk->index; int refc; - /* Prevent config work handler from accessing the device. */ - mutex_lock(&vblk->config_lock); - vblk->config_enable = false; - mutex_unlock(&vblk->config_lock); + /* Make sure no work handler is accessing the device. */ + flush_work(&vblk->config_work); del_gendisk(vblk->disk); blk_cleanup_queue(vblk->disk->queue); @@ -784,8 +772,6 @@ static void virtblk_remove(struct virtio_device *vdev) /* Stop all the virtqueues. */ vdev->config->reset(vdev); - flush_work(&vblk->config_work); - refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount); put_disk(vblk->disk); vdev->config->del_vqs(vdev); @@ -805,11 +791,7 @@ static int virtblk_freeze(struct virtio_device *vdev) /* Ensure we don't receive any more interrupts */ vdev->config->reset(vdev); - /* Prevent config work handler from accessing the device. */ - mutex_lock(&vblk->config_lock); - vblk->config_enable = false; - mutex_unlock(&vblk->config_lock); - + /* Make sure no work handler is accessing the device. */ flush_work(&vblk->config_work); blk_mq_stop_hw_queues(vblk->disk->queue); @@ -823,12 +805,14 @@ static int virtblk_restore(struct virtio_device *vdev) struct virtio_blk *vblk = vdev->priv; int ret; - vblk->config_enable = true; ret = init_vq(vdev->priv); - if (!ret) - blk_mq_start_stopped_hw_queues(vblk->disk->queue, true); + if (ret) + return ret; + + virtio_device_ready(vdev); - return ret; + blk_mq_start_stopped_hw_queues(vblk->disk->queue, true); + return 0; } #endif diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 54f408963201..630a489e757d 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -907,22 +907,17 @@ static int connect_ring(struct backend_info *be) return 0; } - -/* ** Driver Registration ** */ - - static const struct xenbus_device_id xen_blkbk_ids[] = { { "vbd" }, { "" } }; - -static DEFINE_XENBUS_DRIVER(xen_blkbk, , +static struct xenbus_driver xen_blkbk_driver = { + .ids = xen_blkbk_ids, .probe = xen_blkbk_probe, .remove = xen_blkbk_remove, .otherend_changed = frontend_changed -); - +}; int xen_blkif_xenbus_init(void) { diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 6fc056ed1efd..5ac312f6e0be 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -2061,13 +2061,14 @@ static const struct xenbus_device_id blkfront_ids[] = { { "" } }; -static DEFINE_XENBUS_DRIVER(blkfront, , +static struct xenbus_driver blkfront_driver = { + .ids = blkfront_ids, .probe = blkfront_probe, .remove = blkfront_remove, .resume = blkfront_resume, .otherend_changed = blkback_changed, .is_ready = blkfront_is_ready, -); +}; static int __init xlblk_init(void) { diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c index ab3ea62e5dfc..c4328d9d9981 100644 --- a/drivers/block/xsysace.c +++ b/drivers/block/xsysace.c @@ -1203,7 +1203,6 @@ static struct platform_driver ace_platform_driver = { .probe = ace_probe, .remove = ace_remove, .driver = { - .owner = THIS_MODULE, .name = "xsysace", .of_match_table = ace_of_match, }, diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 6dd2cef5b865..0e63e8aa8279 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -103,10 +103,10 @@ static ssize_t mem_used_total_show(struct device *dev, down_read(&zram->init_lock); if (init_done(zram)) - val = zs_get_total_size_bytes(meta->mem_pool); + val = zs_get_total_pages(meta->mem_pool); up_read(&zram->init_lock); - return scnprintf(buf, PAGE_SIZE, "%llu\n", val); + return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT); } static ssize_t max_comp_streams_show(struct device *dev, @@ -122,6 +122,72 @@ static ssize_t max_comp_streams_show(struct device *dev, return scnprintf(buf, PAGE_SIZE, "%d\n", val); } +static ssize_t mem_limit_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + u64 val; + struct zram *zram = dev_to_zram(dev); + + down_read(&zram->init_lock); + val = zram->limit_pages; + up_read(&zram->init_lock); + + return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT); +} + +static ssize_t mem_limit_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + u64 limit; + char *tmp; + struct zram *zram = dev_to_zram(dev); + + limit = memparse(buf, &tmp); + if (buf == tmp) /* no chars parsed, invalid input */ + return -EINVAL; + + down_write(&zram->init_lock); + zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT; + up_write(&zram->init_lock); + + return len; +} + +static ssize_t mem_used_max_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + u64 val = 0; + struct zram *zram = dev_to_zram(dev); + + down_read(&zram->init_lock); + if (init_done(zram)) + val = atomic_long_read(&zram->stats.max_used_pages); + up_read(&zram->init_lock); + + return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT); +} + +static ssize_t mem_used_max_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + int err; + unsigned long val; + struct zram *zram = dev_to_zram(dev); + struct zram_meta *meta = zram->meta; + + err = kstrtoul(buf, 10, &val); + if (err || val != 0) + return -EINVAL; + + down_read(&zram->init_lock); + if (init_done(zram)) + atomic_long_set(&zram->stats.max_used_pages, + zs_get_total_pages(meta->mem_pool)); + up_read(&zram->init_lock); + + return len; +} + static ssize_t max_comp_streams_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { @@ -378,7 +444,6 @@ static int zram_decompress_page(struct zram *zram, char *mem, u32 index) /* Should NEVER happen. Return bio error if it does. */ if (unlikely(ret)) { pr_err("Decompression failed! err=%d, page=%u\n", ret, index); - atomic64_inc(&zram->stats.failed_reads); return ret; } @@ -435,6 +500,21 @@ out_cleanup: return ret; } +static inline void update_used_max(struct zram *zram, + const unsigned long pages) +{ + int old_max, cur_max; + + old_max = atomic_long_read(&zram->stats.max_used_pages); + + do { + cur_max = old_max; + if (pages > cur_max) + old_max = atomic_long_cmpxchg( + &zram->stats.max_used_pages, cur_max, pages); + } while (old_max != cur_max); +} + static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, int offset) { @@ -446,6 +526,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, struct zram_meta *meta = zram->meta; struct zcomp_strm *zstrm; bool locked = false; + unsigned long alloced_pages; page = bvec->bv_page; if (is_partial_io(bvec)) { @@ -514,6 +595,16 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index, ret = -ENOMEM; goto out; } + + alloced_pages = zs_get_total_pages(meta->mem_pool); + if (zram->limit_pages && alloced_pages > zram->limit_pages) { + zs_free(meta->mem_pool, handle); + ret = -ENOMEM; + goto out; + } + + update_used_max(zram, alloced_pages); + cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO); if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) { @@ -547,8 +638,6 @@ out: zcomp_strm_release(zram->comp, zstrm); if (is_partial_io(bvec)) kfree(uncmem); - if (ret) - atomic64_inc(&zram->stats.failed_writes); return ret; } @@ -566,6 +655,13 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, ret = zram_bvec_write(zram, bvec, index, offset); } + if (unlikely(ret)) { + if (rw == READ) + atomic64_inc(&zram->stats.failed_reads); + else + atomic64_inc(&zram->stats.failed_writes); + } + return ret; } @@ -602,6 +698,7 @@ static void zram_bio_discard(struct zram *zram, u32 index, bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value); zram_free_page(zram, index); bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value); + atomic64_inc(&zram->stats.notify_free); index++; n -= PAGE_SIZE; } @@ -613,6 +710,9 @@ static void zram_reset_device(struct zram *zram, bool reset_capacity) struct zram_meta *meta; down_write(&zram->init_lock); + + zram->limit_pages = 0; + if (!init_done(zram)) { up_write(&zram->init_lock); return; @@ -853,6 +953,10 @@ static DEVICE_ATTR(initstate, S_IRUGO, initstate_show, NULL); static DEVICE_ATTR(reset, S_IWUSR, NULL, reset_store); static DEVICE_ATTR(orig_data_size, S_IRUGO, orig_data_size_show, NULL); static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL); +static DEVICE_ATTR(mem_limit, S_IRUGO | S_IWUSR, mem_limit_show, + mem_limit_store); +static DEVICE_ATTR(mem_used_max, S_IRUGO | S_IWUSR, mem_used_max_show, + mem_used_max_store); static DEVICE_ATTR(max_comp_streams, S_IRUGO | S_IWUSR, max_comp_streams_show, max_comp_streams_store); static DEVICE_ATTR(comp_algorithm, S_IRUGO | S_IWUSR, @@ -881,6 +985,8 @@ static struct attribute *zram_disk_attrs[] = { &dev_attr_orig_data_size.attr, &dev_attr_compr_data_size.attr, &dev_attr_mem_used_total.attr, + &dev_attr_mem_limit.attr, + &dev_attr_mem_used_max.attr, &dev_attr_max_comp_streams.attr, &dev_attr_comp_algorithm.attr, NULL, diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index 5b0afde729cd..c6ee271317f5 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -84,12 +84,13 @@ struct zram_stats { atomic64_t compr_data_size; /* compressed size of pages stored */ atomic64_t num_reads; /* failed + successful */ atomic64_t num_writes; /* --do-- */ - atomic64_t failed_reads; /* should NEVER! happen */ + atomic64_t failed_reads; /* can happen when memory is too low */ atomic64_t failed_writes; /* can happen when memory is too low */ atomic64_t invalid_io; /* non-page-aligned I/O requests */ atomic64_t notify_free; /* no. of swap slot free notifications */ atomic64_t zero_pages; /* no. of zero filled pages */ atomic64_t pages_stored; /* no. of pages currently stored */ + atomic_long_t max_used_pages; /* no. of maximum pages stored */ }; struct zram_meta { @@ -112,6 +113,11 @@ struct zram { u64 disksize; /* bytes */ int max_comp_streams; struct zram_stats stats; + /* + * the number of pages zram can consume for storing compressed data + */ + unsigned long limit_pages; + char compressor[10]; }; #endif |