summaryrefslogtreecommitdiff
path: root/drivers/block
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/Kconfig9
-rw-r--r--drivers/block/Makefile2
-rw-r--r--drivers/block/drbd/drbd_bitmap.c49
-rw-r--r--drivers/block/drbd/drbd_req.c2
-rw-r--r--drivers/block/nbd.c6
-rw-r--r--drivers/block/null_blk/main.c108
-rw-r--r--drivers/block/null_blk/null_blk.h2
-rw-r--r--drivers/block/pktcdvd.c2
-rw-r--r--drivers/block/ps3vram.c2
-rw-r--r--drivers/block/rnbd/rnbd-clt-sysfs.c2
-rw-r--r--drivers/block/rnbd/rnbd-clt.c201
-rw-r--r--drivers/block/rnbd/rnbd-clt.h18
-rw-r--r--drivers/block/rnbd/rnbd-srv.c20
-rw-r--r--drivers/block/rnbd/rnbd-srv.h4
-rw-r--r--drivers/block/sx8.c1582
-rw-r--r--drivers/block/ublk_drv.c348
16 files changed, 568 insertions, 1789 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index e19fcab016ba..db1b4b202646 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -248,15 +248,6 @@ config BLK_DEV_NBD
If unsure, say N.
-config BLK_DEV_SX8
- tristate "Promise SATA SX8 support"
- depends on PCI
- help
- Saying Y or M here will enable support for the
- Promise SATA SX8 controllers.
-
- Use devices /dev/sx8/$N and /dev/sx8/$Np$M.
-
config BLK_DEV_RAM
tristate "RAM block device support"
help
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index be631352567e..101612cba303 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -26,8 +26,6 @@ obj-$(CONFIG_SUNVDC) += sunvdc.o
obj-$(CONFIG_BLK_DEV_NBD) += nbd.o
obj-$(CONFIG_VIRTIO_BLK) += virtio_blk.o
-obj-$(CONFIG_BLK_DEV_SX8) += sx8.o
-
obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += xen-blkfront.o
obj-$(CONFIG_XEN_BLKDEV_BACKEND) += xen-blkback/
obj-$(CONFIG_BLK_DEV_DRBD) += drbd/
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index 603f6828dd79..7d9db33363de 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -974,25 +974,58 @@ static void drbd_bm_endio(struct bio *bio)
}
}
+/* For the layout, see comment above drbd_md_set_sector_offsets(). */
+static inline sector_t drbd_md_last_bitmap_sector(struct drbd_backing_dev *bdev)
+{
+ switch (bdev->md.meta_dev_idx) {
+ case DRBD_MD_INDEX_INTERNAL:
+ case DRBD_MD_INDEX_FLEX_INT:
+ return bdev->md.md_offset + bdev->md.al_offset -1;
+ case DRBD_MD_INDEX_FLEX_EXT:
+ default:
+ return bdev->md.md_offset + bdev->md.md_size_sect -1;
+ }
+}
+
static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_hold(local)
{
struct drbd_device *device = ctx->device;
enum req_op op = ctx->flags & BM_AIO_READ ? REQ_OP_READ : REQ_OP_WRITE;
- struct bio *bio = bio_alloc_bioset(device->ldev->md_bdev, 1, op,
- GFP_NOIO, &drbd_md_io_bio_set);
struct drbd_bitmap *b = device->bitmap;
+ struct bio *bio;
struct page *page;
+ sector_t last_bm_sect;
+ sector_t first_bm_sect;
+ sector_t on_disk_sector;
unsigned int len;
- sector_t on_disk_sector =
- device->ldev->md.md_offset + device->ldev->md.bm_offset;
- on_disk_sector += ((sector_t)page_nr) << (PAGE_SHIFT-9);
+ first_bm_sect = device->ldev->md.md_offset + device->ldev->md.bm_offset;
+ on_disk_sector = first_bm_sect + (((sector_t)page_nr) << (PAGE_SHIFT-SECTOR_SHIFT));
/* this might happen with very small
* flexible external meta data device,
* or with PAGE_SIZE > 4k */
- len = min_t(unsigned int, PAGE_SIZE,
- (drbd_md_last_sector(device->ldev) - on_disk_sector + 1)<<9);
+ last_bm_sect = drbd_md_last_bitmap_sector(device->ldev);
+ if (first_bm_sect <= on_disk_sector && last_bm_sect >= on_disk_sector) {
+ sector_t len_sect = last_bm_sect - on_disk_sector + 1;
+ if (len_sect < PAGE_SIZE/SECTOR_SIZE)
+ len = (unsigned int)len_sect*SECTOR_SIZE;
+ else
+ len = PAGE_SIZE;
+ } else {
+ if (__ratelimit(&drbd_ratelimit_state)) {
+ drbd_err(device, "Invalid offset during on-disk bitmap access: "
+ "page idx %u, sector %llu\n", page_nr, on_disk_sector);
+ }
+ ctx->error = -EIO;
+ bm_set_page_io_err(b->bm_pages[page_nr]);
+ if (atomic_dec_and_test(&ctx->in_flight)) {
+ ctx->done = 1;
+ wake_up(&device->misc_wait);
+ kref_put(&ctx->kref, &drbd_bm_aio_ctx_destroy);
+ }
+ return;
+ }
/* serialize IO on this page */
bm_page_lock_io(device, page_nr);
@@ -1007,6 +1040,8 @@ static void bm_page_io_async(struct drbd_bm_aio_ctx *ctx, int page_nr) __must_ho
bm_store_page_idx(page, page_nr);
} else
page = b->bm_pages[page_nr];
+ bio = bio_alloc_bioset(device->ldev->md_bdev, 1, op, GFP_NOIO,
+ &drbd_md_io_bio_set);
bio->bi_iter.bi_sector = on_disk_sector;
/* bio_add_page of a single page to an empty bio will always succeed,
* according to api. Do we want to assert that? */
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c
index 6d8dd14458c6..8f7f144e54f3 100644
--- a/drivers/block/drbd/drbd_req.c
+++ b/drivers/block/drbd/drbd_req.c
@@ -1608,7 +1608,7 @@ void drbd_submit_bio(struct bio *bio)
{
struct drbd_device *device = bio->bi_bdev->bd_disk->private_data;
- blk_queue_split(&bio);
+ bio = bio_split_to_limits(bio);
/*
* what we "blindly" assume:
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index f5d098a148cb..2a709daefbc4 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -11,6 +11,8 @@
* (part of code stolen from loop.c)
*/
+#define pr_fmt(fmt) "nbd: " fmt
+
#include <linux/major.h>
#include <linux/blkdev.h>
@@ -1950,7 +1952,7 @@ again:
test_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags)) ||
!refcount_inc_not_zero(&nbd->refs)) {
mutex_unlock(&nbd_index_mutex);
- pr_err("nbd: device at index %d is going down\n",
+ pr_err("device at index %d is going down\n",
index);
return -EINVAL;
}
@@ -1961,7 +1963,7 @@ again:
if (!nbd) {
nbd = nbd_dev_add(index, 2);
if (IS_ERR(nbd)) {
- pr_err("nbd: failed to add new device\n");
+ pr_err("failed to add new device\n");
return PTR_ERR(nbd);
}
}
diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index 8b224ede2e33..c451c477978f 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -201,6 +201,22 @@ static bool g_use_per_node_hctx;
module_param_named(use_per_node_hctx, g_use_per_node_hctx, bool, 0444);
MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: false");
+static bool g_memory_backed;
+module_param_named(memory_backed, g_memory_backed, bool, 0444);
+MODULE_PARM_DESC(memory_backed, "Create a memory-backed block device. Default: false");
+
+static bool g_discard;
+module_param_named(discard, g_discard, bool, 0444);
+MODULE_PARM_DESC(discard, "Support discard operations (requires memory-backed null_blk device). Default: false");
+
+static unsigned long g_cache_size;
+module_param_named(cache_size, g_cache_size, ulong, 0444);
+MODULE_PARM_DESC(mbps, "Cache size in MiB for memory-backed device. Default: 0 (none)");
+
+static unsigned int g_mbps;
+module_param_named(mbps, g_mbps, uint, 0444);
+MODULE_PARM_DESC(mbps, "Limit maximum bandwidth (in MiB/s). Default: 0 (no limit)");
+
static bool g_zoned;
module_param_named(zoned, g_zoned, bool, S_IRUGO);
MODULE_PARM_DESC(zoned, "Make device as a host-managed zoned block device. Default: false");
@@ -409,6 +425,8 @@ NULLB_DEVICE_ATTR(zone_nr_conv, uint, NULL);
NULLB_DEVICE_ATTR(zone_max_open, uint, NULL);
NULLB_DEVICE_ATTR(zone_max_active, uint, NULL);
NULLB_DEVICE_ATTR(virt_boundary, bool, NULL);
+NULLB_DEVICE_ATTR(no_sched, bool, NULL);
+NULLB_DEVICE_ATTR(shared_tag_bitmap, bool, NULL);
static ssize_t nullb_device_power_show(struct config_item *item, char *page)
{
@@ -532,6 +550,8 @@ static struct configfs_attribute *nullb_device_attrs[] = {
&nullb_device_attr_zone_max_open,
&nullb_device_attr_zone_max_active,
&nullb_device_attr_virt_boundary,
+ &nullb_device_attr_no_sched,
+ &nullb_device_attr_shared_tag_bitmap,
NULL,
};
@@ -588,7 +608,13 @@ nullb_group_drop_item(struct config_group *group, struct config_item *item)
static ssize_t memb_group_features_show(struct config_item *item, char *page)
{
return snprintf(page, PAGE_SIZE,
- "memory_backed,discard,bandwidth,cache,badblocks,zoned,zone_size,zone_capacity,zone_nr_conv,zone_max_open,zone_max_active,blocksize,max_sectors,virt_boundary\n");
+ "badblocks,blocking,blocksize,cache_size,"
+ "completion_nsec,discard,home_node,hw_queue_depth,"
+ "irqmode,max_sectors,mbps,memory_backed,no_sched,"
+ "poll_queues,power,queue_mode,shared_tag_bitmap,size,"
+ "submit_queues,use_per_node_hctx,virt_boundary,zoned,"
+ "zone_capacity,zone_max_active,zone_max_open,"
+ "zone_nr_conv,zone_size\n");
}
CONFIGFS_ATTR_RO(memb_group_, features);
@@ -650,6 +676,10 @@ static struct nullb_device *null_alloc_dev(void)
dev->irqmode = g_irqmode;
dev->hw_queue_depth = g_hw_queue_depth;
dev->blocking = g_blocking;
+ dev->memory_backed = g_memory_backed;
+ dev->discard = g_discard;
+ dev->cache_size = g_cache_size;
+ dev->mbps = g_mbps;
dev->use_per_node_hctx = g_use_per_node_hctx;
dev->zoned = g_zoned;
dev->zone_size = g_zone_size;
@@ -658,6 +688,8 @@ static struct nullb_device *null_alloc_dev(void)
dev->zone_max_open = g_zone_max_open;
dev->zone_max_active = g_zone_max_active;
dev->virt_boundary = g_virt_boundary;
+ dev->no_sched = g_no_sched;
+ dev->shared_tag_bitmap = g_shared_tag_bitmap;
return dev;
}
@@ -1655,7 +1687,7 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
static void cleanup_queue(struct nullb_queue *nq)
{
- kfree(nq->tag_map);
+ bitmap_free(nq->tag_map);
kfree(nq->cmds);
}
@@ -1782,14 +1814,13 @@ static const struct block_device_operations null_rq_ops = {
static int setup_commands(struct nullb_queue *nq)
{
struct nullb_cmd *cmd;
- int i, tag_size;
+ int i;
nq->cmds = kcalloc(nq->queue_depth, sizeof(*cmd), GFP_KERNEL);
if (!nq->cmds)
return -ENOMEM;
- tag_size = ALIGN(nq->queue_depth, BITS_PER_LONG) / BITS_PER_LONG;
- nq->tag_map = kcalloc(tag_size, sizeof(unsigned long), GFP_KERNEL);
+ nq->tag_map = bitmap_zalloc(nq->queue_depth, GFP_KERNEL);
if (!nq->tag_map) {
kfree(nq->cmds);
return -ENOMEM;
@@ -1866,31 +1897,48 @@ static int null_gendisk_register(struct nullb *nullb)
static int null_init_tag_set(struct nullb *nullb, struct blk_mq_tag_set *set)
{
+ unsigned int flags = BLK_MQ_F_SHOULD_MERGE;
+ int hw_queues, numa_node;
+ unsigned int queue_depth;
int poll_queues;
+ if (nullb) {
+ hw_queues = nullb->dev->submit_queues;
+ poll_queues = nullb->dev->poll_queues;
+ queue_depth = nullb->dev->hw_queue_depth;
+ numa_node = nullb->dev->home_node;
+ if (nullb->dev->no_sched)
+ flags |= BLK_MQ_F_NO_SCHED;
+ if (nullb->dev->shared_tag_bitmap)
+ flags |= BLK_MQ_F_TAG_HCTX_SHARED;
+ if (nullb->dev->blocking)
+ flags |= BLK_MQ_F_BLOCKING;
+ } else {
+ hw_queues = g_submit_queues;
+ poll_queues = g_poll_queues;
+ queue_depth = g_hw_queue_depth;
+ numa_node = g_home_node;
+ if (g_no_sched)
+ flags |= BLK_MQ_F_NO_SCHED;
+ if (g_shared_tag_bitmap)
+ flags |= BLK_MQ_F_TAG_HCTX_SHARED;
+ if (g_blocking)
+ flags |= BLK_MQ_F_BLOCKING;
+ }
+
set->ops = &null_mq_ops;
- set->nr_hw_queues = nullb ? nullb->dev->submit_queues :
- g_submit_queues;
- poll_queues = nullb ? nullb->dev->poll_queues : g_poll_queues;
- if (poll_queues)
- set->nr_hw_queues += poll_queues;
- set->queue_depth = nullb ? nullb->dev->hw_queue_depth :
- g_hw_queue_depth;
- set->numa_node = nullb ? nullb->dev->home_node : g_home_node;
set->cmd_size = sizeof(struct nullb_cmd);
- set->flags = BLK_MQ_F_SHOULD_MERGE;
- if (g_no_sched)
- set->flags |= BLK_MQ_F_NO_SCHED;
- if (g_shared_tag_bitmap)
- set->flags |= BLK_MQ_F_TAG_HCTX_SHARED;
+ set->flags = flags;
set->driver_data = nullb;
- if (poll_queues)
+ set->nr_hw_queues = hw_queues;
+ set->queue_depth = queue_depth;
+ set->numa_node = numa_node;
+ if (poll_queues) {
+ set->nr_hw_queues += poll_queues;
set->nr_maps = 3;
- else
+ } else {
set->nr_maps = 1;
-
- if ((nullb && nullb->dev->blocking) || g_blocking)
- set->flags |= BLK_MQ_F_BLOCKING;
+ }
return blk_mq_alloc_tag_set(set);
}
@@ -2042,8 +2090,13 @@ static int null_add_dev(struct nullb_device *dev)
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, nullb->q);
mutex_lock(&lock);
- nullb->index = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL);
- dev->index = nullb->index;
+ rv = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL);
+ if (rv < 0) {
+ mutex_unlock(&lock);
+ goto out_cleanup_zone;
+ }
+ nullb->index = rv;
+ dev->index = rv;
mutex_unlock(&lock);
blk_queue_logical_block_size(nullb->q, dev->blocksize);
@@ -2069,7 +2122,7 @@ static int null_add_dev(struct nullb_device *dev)
rv = null_gendisk_register(nullb);
if (rv)
- goto out_cleanup_zone;
+ goto out_ida_free;
mutex_lock(&lock);
list_add_tail(&nullb->list, &nullb_list);
@@ -2078,6 +2131,9 @@ static int null_add_dev(struct nullb_device *dev)
pr_info("disk %s created\n", nullb->disk_name);
return 0;
+
+out_ida_free:
+ ida_free(&nullb_indexes, nullb->index);
out_cleanup_zone:
null_free_zoned_dev(dev);
out_cleanup_disk:
diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h
index 6fbf0a1b2622..94ff68052b1e 100644
--- a/drivers/block/null_blk/null_blk.h
+++ b/drivers/block/null_blk/null_blk.h
@@ -113,6 +113,8 @@ struct nullb_device {
bool discard; /* if support discard */
bool zoned; /* if device is zoned */
bool virt_boundary; /* virtual boundary on/off for the device */
+ bool no_sched; /* no IO scheduler for the device */
+ bool shared_tag_bitmap; /* use hostwide shared tags */
};
struct nullb {
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 01a15dbd9cde..4cea3b08087e 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2399,7 +2399,7 @@ static void pkt_submit_bio(struct bio *bio)
struct pktcdvd_device *pd = bio->bi_bdev->bd_disk->queue->queuedata;
struct bio *split;
- blk_queue_split(&bio);
+ bio = bio_split_to_limits(bio);
pkt_dbg(2, pd, "start = %6llx stop = %6llx\n",
(unsigned long long)bio->bi_iter.bi_sector,
diff --git a/drivers/block/ps3vram.c b/drivers/block/ps3vram.c
index d1e0fefec90b..e1d080f680ed 100644
--- a/drivers/block/ps3vram.c
+++ b/drivers/block/ps3vram.c
@@ -586,7 +586,7 @@ static void ps3vram_submit_bio(struct bio *bio)
dev_dbg(&dev->core, "%s\n", __func__);
- blk_queue_split(&bio);
+ bio = bio_split_to_limits(bio);
spin_lock_irq(&priv->lock);
busy = !bio_list_empty(&priv->list);
diff --git a/drivers/block/rnbd/rnbd-clt-sysfs.c b/drivers/block/rnbd/rnbd-clt-sysfs.c
index 2be5d87a3ca6..e7c7d9a68168 100644
--- a/drivers/block/rnbd/rnbd-clt-sysfs.c
+++ b/drivers/block/rnbd/rnbd-clt-sysfs.c
@@ -376,7 +376,7 @@ static ssize_t rnbd_clt_resize_dev_store(struct kobject *kobj,
if (ret)
return ret;
- ret = rnbd_clt_resize_disk(dev, (size_t)sectors);
+ ret = rnbd_clt_resize_disk(dev, sectors);
if (ret)
return ret;
diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c
index b8d9e2824d9c..04da33a22ef4 100644
--- a/drivers/block/rnbd/rnbd-clt.c
+++ b/drivers/block/rnbd/rnbd-clt.c
@@ -68,39 +68,18 @@ static inline bool rnbd_clt_get_dev(struct rnbd_clt_dev *dev)
return refcount_inc_not_zero(&dev->refcount);
}
-static int rnbd_clt_set_dev_attr(struct rnbd_clt_dev *dev,
- const struct rnbd_msg_open_rsp *rsp)
+static void rnbd_clt_change_capacity(struct rnbd_clt_dev *dev,
+ sector_t new_nsectors)
{
- struct rnbd_clt_session *sess = dev->sess;
-
- if (!rsp->logical_block_size)
- return -EINVAL;
-
- dev->device_id = le32_to_cpu(rsp->device_id);
- dev->nsectors = le64_to_cpu(rsp->nsectors);
- dev->logical_block_size = le16_to_cpu(rsp->logical_block_size);
- dev->physical_block_size = le16_to_cpu(rsp->physical_block_size);
- dev->max_discard_sectors = le32_to_cpu(rsp->max_discard_sectors);
- dev->discard_granularity = le32_to_cpu(rsp->discard_granularity);
- dev->discard_alignment = le32_to_cpu(rsp->discard_alignment);
- dev->secure_discard = le16_to_cpu(rsp->secure_discard);
- dev->wc = !!(rsp->cache_policy & RNBD_WRITEBACK);
- dev->fua = !!(rsp->cache_policy & RNBD_FUA);
-
- dev->max_hw_sectors = sess->max_io_size / SECTOR_SIZE;
- dev->max_segments = sess->max_segments;
-
- return 0;
-}
+ if (get_capacity(dev->gd) == new_nsectors)
+ return;
-static int rnbd_clt_change_capacity(struct rnbd_clt_dev *dev,
- size_t new_nsectors)
-{
- rnbd_clt_info(dev, "Device size changed from %zu to %zu sectors\n",
- dev->nsectors, new_nsectors);
- dev->nsectors = new_nsectors;
- set_capacity_and_notify(dev->gd, dev->nsectors);
- return 0;
+ /*
+ * If the size changed, we need to revalidate it
+ */
+ rnbd_clt_info(dev, "Device size changed from %llu to %llu sectors\n",
+ get_capacity(dev->gd), new_nsectors);
+ set_capacity_and_notify(dev->gd, new_nsectors);
}
static int process_msg_open_rsp(struct rnbd_clt_dev *dev,
@@ -119,19 +98,16 @@ static int process_msg_open_rsp(struct rnbd_clt_dev *dev,
if (dev->dev_state == DEV_STATE_MAPPED_DISCONNECTED) {
u64 nsectors = le64_to_cpu(rsp->nsectors);
- /*
- * If the device was remapped and the size changed in the
- * meantime we need to revalidate it
- */
- if (dev->nsectors != nsectors)
- rnbd_clt_change_capacity(dev, nsectors);
+ rnbd_clt_change_capacity(dev, nsectors);
gd_kobj = &disk_to_dev(dev->gd)->kobj;
kobject_uevent(gd_kobj, KOBJ_ONLINE);
rnbd_clt_info(dev, "Device online, device remapped successfully\n");
}
- err = rnbd_clt_set_dev_attr(dev, rsp);
- if (err)
+ if (!rsp->logical_block_size) {
+ err = -EINVAL;
goto out;
+ }
+ dev->device_id = le32_to_cpu(rsp->device_id);
dev->dev_state = DEV_STATE_MAPPED;
out:
@@ -140,7 +116,7 @@ out:
return err;
}
-int rnbd_clt_resize_disk(struct rnbd_clt_dev *dev, size_t newsize)
+int rnbd_clt_resize_disk(struct rnbd_clt_dev *dev, sector_t newsize)
{
int ret = 0;
@@ -150,7 +126,7 @@ int rnbd_clt_resize_disk(struct rnbd_clt_dev *dev, size_t newsize)
ret = -ENOENT;
goto out;
}
- ret = rnbd_clt_change_capacity(dev, newsize);
+ rnbd_clt_change_capacity(dev, newsize);
out:
mutex_unlock(&dev->lock);
@@ -507,6 +483,11 @@ static void msg_open_conf(struct work_struct *work)
struct rnbd_msg_open_rsp *rsp = iu->buf;
struct rnbd_clt_dev *dev = iu->dev;
int errno = iu->errno;
+ bool from_map = false;
+
+ /* INIT state is only triggered from rnbd_clt_map_device */
+ if (dev->dev_state == DEV_STATE_INIT)
+ from_map = true;
if (errno) {
rnbd_clt_err(dev,
@@ -523,7 +504,9 @@ static void msg_open_conf(struct work_struct *work)
send_msg_close(dev, device_id, RTRS_PERMIT_NOWAIT);
}
}
- kfree(rsp);
+ /* We free rsp in rnbd_clt_map_device for map scenario */
+ if (!from_map)
+ kfree(rsp);
wake_up_iu_comp(iu, errno);
rnbd_put_iu(dev->sess, iu);
rnbd_clt_put_dev(dev);
@@ -942,7 +925,7 @@ static int rnbd_client_open(struct block_device *block_device, fmode_t mode)
{
struct rnbd_clt_dev *dev = block_device->bd_disk->private_data;
- if (dev->read_only && (mode & FMODE_WRITE))
+ if (get_disk_ro(dev->gd) && (mode & FMODE_WRITE))
return -EPERM;
if (dev->dev_state == DEV_STATE_UNMAPPED ||
@@ -963,10 +946,10 @@ static int rnbd_client_getgeo(struct block_device *block_device,
struct hd_geometry *geo)
{
u64 size;
- struct rnbd_clt_dev *dev;
+ struct rnbd_clt_dev *dev = block_device->bd_disk->private_data;
+ struct queue_limits *limit = &dev->queue->limits;
- dev = block_device->bd_disk->private_data;
- size = dev->size * (dev->logical_block_size / SECTOR_SIZE);
+ size = dev->size * (limit->logical_block_size / SECTOR_SIZE);
geo->cylinders = size >> 6; /* size/64 */
geo->heads = 4;
geo->sectors = 16;
@@ -1350,11 +1333,15 @@ static void rnbd_init_mq_hw_queues(struct rnbd_clt_dev *dev)
}
}
-static void setup_request_queue(struct rnbd_clt_dev *dev)
+static void setup_request_queue(struct rnbd_clt_dev *dev,
+ struct rnbd_msg_open_rsp *rsp)
{
- blk_queue_logical_block_size(dev->queue, dev->logical_block_size);
- blk_queue_physical_block_size(dev->queue, dev->physical_block_size);
- blk_queue_max_hw_sectors(dev->queue, dev->max_hw_sectors);
+ blk_queue_logical_block_size(dev->queue,
+ le16_to_cpu(rsp->logical_block_size));
+ blk_queue_physical_block_size(dev->queue,
+ le16_to_cpu(rsp->physical_block_size));
+ blk_queue_max_hw_sectors(dev->queue,
+ dev->sess->max_io_size / SECTOR_SIZE);
/*
* we don't support discards to "discontiguous" segments
@@ -1362,21 +1349,27 @@ static void setup_request_queue(struct rnbd_clt_dev *dev)
*/
blk_queue_max_discard_segments(dev->queue, 1);
- blk_queue_max_discard_sectors(dev->queue, dev->max_discard_sectors);
- dev->queue->limits.discard_granularity = dev->discard_granularity;
- dev->queue->limits.discard_alignment = dev->discard_alignment;
- if (dev->secure_discard)
+ blk_queue_max_discard_sectors(dev->queue,
+ le32_to_cpu(rsp->max_discard_sectors));
+ dev->queue->limits.discard_granularity =
+ le32_to_cpu(rsp->discard_granularity);
+ dev->queue->limits.discard_alignment =
+ le32_to_cpu(rsp->discard_alignment);
+ if (le16_to_cpu(rsp->secure_discard))
blk_queue_max_secure_erase_sectors(dev->queue,
- dev->max_discard_sectors);
+ le32_to_cpu(rsp->max_discard_sectors));
blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, dev->queue);
blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, dev->queue);
- blk_queue_max_segments(dev->queue, dev->max_segments);
+ blk_queue_max_segments(dev->queue, dev->sess->max_segments);
blk_queue_io_opt(dev->queue, dev->sess->max_io_size);
blk_queue_virt_boundary(dev->queue, SZ_4K - 1);
- blk_queue_write_cache(dev->queue, dev->wc, dev->fua);
+ blk_queue_write_cache(dev->queue,
+ !!(rsp->cache_policy & RNBD_WRITEBACK),
+ !!(rsp->cache_policy & RNBD_FUA));
}
-static int rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev, int idx)
+static int rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev,
+ struct rnbd_msg_open_rsp *rsp, int idx)
{
int err;
@@ -1388,19 +1381,15 @@ static int rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev, int idx)
dev->gd->private_data = dev;
snprintf(dev->gd->disk_name, sizeof(dev->gd->disk_name), "rnbd%d",
idx);
- pr_debug("disk_name=%s, capacity=%zu\n",
+ pr_debug("disk_name=%s, capacity=%llu\n",
dev->gd->disk_name,
- dev->nsectors * (dev->logical_block_size / SECTOR_SIZE)
- );
+ le64_to_cpu(rsp->nsectors) *
+ (le16_to_cpu(rsp->logical_block_size) / SECTOR_SIZE));
- set_capacity(dev->gd, dev->nsectors);
+ set_capacity(dev->gd, le64_to_cpu(rsp->nsectors));
- if (dev->access_mode == RNBD_ACCESS_RO) {
- dev->read_only = true;
+ if (dev->access_mode == RNBD_ACCESS_RO)
set_disk_ro(dev->gd, true);
- } else {
- dev->read_only = false;
- }
/*
* Network device does not need rotational
@@ -1413,11 +1402,13 @@ static int rnbd_clt_setup_gen_disk(struct rnbd_clt_dev *dev, int idx)
return err;
}
-static int rnbd_client_setup_device(struct rnbd_clt_dev *dev)
+static int rnbd_client_setup_device(struct rnbd_clt_dev *dev,
+ struct rnbd_msg_open_rsp *rsp)
{
int idx = dev->clt_device_id;
- dev->size = dev->nsectors * dev->logical_block_size;
+ dev->size = le64_to_cpu(rsp->nsectors) *
+ le16_to_cpu(rsp->logical_block_size);
dev->gd = blk_mq_alloc_disk(&dev->sess->tag_set, dev);
if (IS_ERR(dev->gd))
@@ -1425,8 +1416,8 @@ static int rnbd_client_setup_device(struct rnbd_clt_dev *dev)
dev->queue = dev->gd->queue;
rnbd_init_mq_hw_queues(dev);
- setup_request_queue(dev);
- return rnbd_clt_setup_gen_disk(dev, idx);
+ setup_request_queue(dev, rsp);
+ return rnbd_clt_setup_gen_disk(dev, rsp, idx);
}
static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess,
@@ -1562,7 +1553,14 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname,
{
struct rnbd_clt_session *sess;
struct rnbd_clt_dev *dev;
- int ret;
+ int ret, errno;
+ struct rnbd_msg_open_rsp *rsp;
+ struct rnbd_msg_open msg;
+ struct rnbd_iu *iu;
+ struct kvec vec = {
+ .iov_base = &msg,
+ .iov_len = sizeof(msg)
+ };
if (exists_devpath(pathname, sessname))
return ERR_PTR(-EEXIST);
@@ -1582,17 +1580,47 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname,
ret = -EEXIST;
goto put_dev;
}
- ret = send_msg_open(dev, RTRS_PERMIT_WAIT);
+
+ rsp = kzalloc(sizeof(*rsp), GFP_KERNEL);
+ if (!rsp) {
+ ret = -ENOMEM;
+ goto del_dev;
+ }
+
+ iu = rnbd_get_iu(sess, RTRS_ADMIN_CON, RTRS_PERMIT_WAIT);
+ if (!iu) {
+ ret = -ENOMEM;
+ kfree(rsp);
+ goto del_dev;
+ }
+ iu->buf = rsp;
+ iu->dev = dev;
+ sg_init_one(iu->sgt.sgl, rsp, sizeof(*rsp));
+
+ msg.hdr.type = cpu_to_le16(RNBD_MSG_OPEN);
+ msg.access_mode = dev->access_mode;
+ strscpy(msg.dev_name, dev->pathname, sizeof(msg.dev_name));
+
+ WARN_ON(!rnbd_clt_get_dev(dev));
+ ret = send_usr_msg(sess->rtrs, READ, iu,
+ &vec, sizeof(*rsp), iu->sgt.sgl, 1,
+ msg_open_conf, &errno, RTRS_PERMIT_WAIT);
+ if (ret) {
+ rnbd_clt_put_dev(dev);
+ rnbd_put_iu(sess, iu);
+ } else {
+ ret = errno;
+ }
if (ret) {
rnbd_clt_err(dev,
"map_device: failed, can't open remote device, err: %d\n",
ret);
- goto del_dev;
+ goto put_iu;
}
mutex_lock(&dev->lock);
pr_debug("Opened remote device: session=%s, path='%s'\n",
sess->sessname, pathname);
- ret = rnbd_client_setup_device(dev);
+ ret = rnbd_client_setup_device(dev, rsp);
if (ret) {
rnbd_clt_err(dev,
"map_device: Failed to configure device, err: %d\n",
@@ -1602,21 +1630,30 @@ struct rnbd_clt_dev *rnbd_clt_map_device(const char *sessname,
}
rnbd_clt_info(dev,
- "map_device: Device mapped as %s (nsectors: %zu, logical_block_size: %d, physical_block_size: %d, max_discard_sectors: %d, discard_granularity: %d, discard_alignment: %d, secure_discard: %d, max_segments: %d, max_hw_sectors: %d, wc: %d, fua: %d)\n",
- dev->gd->disk_name, dev->nsectors,
- dev->logical_block_size, dev->physical_block_size,
- dev->max_discard_sectors,
- dev->discard_granularity, dev->discard_alignment,
- dev->secure_discard, dev->max_segments,
- dev->max_hw_sectors, dev->wc, dev->fua);
+ "map_device: Device mapped as %s (nsectors: %llu, logical_block_size: %d, physical_block_size: %d, max_discard_sectors: %d, discard_granularity: %d, discard_alignment: %d, secure_discard: %d, max_segments: %d, max_hw_sectors: %d, wc: %d, fua: %d)\n",
+ dev->gd->disk_name, le64_to_cpu(rsp->nsectors),
+ le16_to_cpu(rsp->logical_block_size),
+ le16_to_cpu(rsp->physical_block_size),
+ le32_to_cpu(rsp->max_discard_sectors),
+ le32_to_cpu(rsp->discard_granularity),
+ le32_to_cpu(rsp->discard_alignment),
+ le16_to_cpu(rsp->secure_discard),
+ sess->max_segments, sess->max_io_size / SECTOR_SIZE,
+ !!(rsp->cache_policy & RNBD_WRITEBACK),
+ !!(rsp->cache_policy & RNBD_FUA));
mutex_unlock(&dev->lock);
+ kfree(rsp);
+ rnbd_put_iu(sess, iu);
rnbd_clt_put_sess(sess);
return dev;
send_close:
send_msg_close(dev, dev->device_id, RTRS_PERMIT_WAIT);
+put_iu:
+ kfree(rsp);
+ rnbd_put_iu(sess, iu);
del_dev:
delete_dev(dev);
put_dev:
diff --git a/drivers/block/rnbd/rnbd-clt.h b/drivers/block/rnbd/rnbd-clt.h
index 2e2e8c4a85c1..a48e040abe63 100644
--- a/drivers/block/rnbd/rnbd-clt.h
+++ b/drivers/block/rnbd/rnbd-clt.h
@@ -106,6 +106,7 @@ struct rnbd_queue {
};
struct rnbd_clt_dev {
+ struct kobject kobj;
struct rnbd_clt_session *sess;
struct request_queue *queue;
struct rnbd_queue *hw_queues;
@@ -114,27 +115,14 @@ struct rnbd_clt_dev {
u32 clt_device_id;
struct mutex lock;
enum rnbd_clt_dev_state dev_state;
+ refcount_t refcount;
char *pathname;
enum rnbd_access_mode access_mode;
u32 nr_poll_queues;
- bool read_only;
- bool wc;
- bool fua;
- u32 max_hw_sectors;
- u32 max_discard_sectors;
- u32 discard_granularity;
- u32 discard_alignment;
- u16 secure_discard;
- u16 physical_block_size;
- u16 logical_block_size;
- u16 max_segments;
- size_t nsectors;
u64 size; /* device size in bytes */
struct list_head list;
struct gendisk *gd;
- struct kobject kobj;
char *blk_symlink_name;
- refcount_t refcount;
struct work_struct unmap_on_rmmod_work;
};
@@ -150,7 +138,7 @@ int rnbd_clt_unmap_device(struct rnbd_clt_dev *dev, bool force,
const struct attribute *sysfs_self);
int rnbd_clt_remap_device(struct rnbd_clt_dev *dev);
-int rnbd_clt_resize_disk(struct rnbd_clt_dev *dev, size_t newsize);
+int rnbd_clt_resize_disk(struct rnbd_clt_dev *dev, sector_t newsize);
/* rnbd-clt-sysfs.c */
diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c
index 0713014bf423..5e08da277ddf 100644
--- a/drivers/block/rnbd/rnbd-srv.c
+++ b/drivers/block/rnbd/rnbd-srv.c
@@ -224,7 +224,6 @@ void rnbd_destroy_sess_dev(struct rnbd_srv_sess_dev *sess_dev, bool keep_id)
wait_for_completion(&dc); /* wait for inflights to drop to zero */
rnbd_dev_close(sess_dev->rnbd_dev);
- list_del(&sess_dev->sess_list);
mutex_lock(&sess_dev->dev->lock);
list_del(&sess_dev->dev_list);
if (sess_dev->open_flags & FMODE_WRITE)
@@ -239,14 +238,14 @@ void rnbd_destroy_sess_dev(struct rnbd_srv_sess_dev *sess_dev, bool keep_id)
static void destroy_sess(struct rnbd_srv_session *srv_sess)
{
- struct rnbd_srv_sess_dev *sess_dev, *tmp;
+ struct rnbd_srv_sess_dev *sess_dev;
+ unsigned long index;
- if (list_empty(&srv_sess->sess_dev_list))
+ if (xa_empty(&srv_sess->index_idr))
goto out;
mutex_lock(&srv_sess->lock);
- list_for_each_entry_safe(sess_dev, tmp, &srv_sess->sess_dev_list,
- sess_list)
+ xa_for_each(&srv_sess->index_idr, index, sess_dev)
rnbd_srv_destroy_dev_session_sysfs(sess_dev);
mutex_unlock(&srv_sess->lock);
@@ -281,7 +280,6 @@ static int create_sess(struct rtrs_srv_sess *rtrs)
srv_sess->queue_depth = rtrs_srv_get_queue_depth(rtrs);
xa_init_flags(&srv_sess->index_idr, XA_FLAGS_ALLOC);
- INIT_LIST_HEAD(&srv_sess->sess_dev_list);
mutex_init(&srv_sess->lock);
mutex_lock(&sess_lock);
list_add(&srv_sess->list, &sess_list);
@@ -323,10 +321,11 @@ void rnbd_srv_sess_dev_force_close(struct rnbd_srv_sess_dev *sess_dev,
{
struct rnbd_srv_session *sess = sess_dev->sess;
- sess_dev->keep_id = true;
/* It is already started to close by client's close message. */
if (!mutex_trylock(&sess->lock))
return;
+
+ sess_dev->keep_id = true;
/* first remove sysfs itself to avoid deadlock */
sysfs_remove_file_self(&sess_dev->kobj, &attr->attr);
rnbd_srv_destroy_dev_session_sysfs(sess_dev);
@@ -666,11 +665,12 @@ static struct rnbd_srv_sess_dev *
find_srv_sess_dev(struct rnbd_srv_session *srv_sess, const char *dev_name)
{
struct rnbd_srv_sess_dev *sess_dev;
+ unsigned long index;
- if (list_empty(&srv_sess->sess_dev_list))
+ if (xa_empty(&srv_sess->index_idr))
return NULL;
- list_for_each_entry(sess_dev, &srv_sess->sess_dev_list, sess_list)
+ xa_for_each(&srv_sess->index_idr, index, sess_dev)
if (!strcmp(sess_dev->pathname, dev_name))
return sess_dev;
@@ -780,8 +780,6 @@ static int process_msg_open(struct rnbd_srv_session *srv_sess,
list_add(&srv_sess_dev->dev_list, &srv_dev->sess_dev_list);
mutex_unlock(&srv_dev->lock);
- list_add(&srv_sess_dev->sess_list, &srv_sess->sess_dev_list);
-
rnbd_srv_info(srv_sess_dev, "Opened device '%s'\n", srv_dev->id);
kfree(full_path);
diff --git a/drivers/block/rnbd/rnbd-srv.h b/drivers/block/rnbd/rnbd-srv.h
index 6926f9069dc4..081bceaf4ae9 100644
--- a/drivers/block/rnbd/rnbd-srv.h
+++ b/drivers/block/rnbd/rnbd-srv.h
@@ -25,8 +25,6 @@ struct rnbd_srv_session {
int queue_depth;
struct xarray index_idr;
- /* List of struct rnbd_srv_sess_dev */
- struct list_head sess_dev_list;
struct mutex lock;
u8 ver;
};
@@ -48,8 +46,6 @@ struct rnbd_srv_dev {
struct rnbd_srv_sess_dev {
/* Entry inside rnbd_srv_dev struct */
struct list_head dev_list;
- /* Entry inside rnbd_srv_session struct */
- struct list_head sess_list;
struct rnbd_dev *rnbd_dev;
struct rnbd_srv_session *sess;
struct rnbd_srv_dev *dev;
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
deleted file mode 100644
index 0e1a484cab0b..000000000000
--- a/drivers/block/sx8.c
+++ /dev/null
@@ -1,1582 +0,0 @@
-/*
- * sx8.c: Driver for Promise SATA SX8 looks-like-I2O hardware
- *
- * Copyright 2004-2005 Red Hat, Inc.
- *
- * Author/maintainer: Jeff Garzik <jgarzik@pobox.com>
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/pci.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/blk-mq.h>
-#include <linux/sched.h>
-#include <linux/interrupt.h>
-#include <linux/compiler.h>
-#include <linux/workqueue.h>
-#include <linux/bitops.h>
-#include <linux/delay.h>
-#include <linux/ktime.h>
-#include <linux/hdreg.h>
-#include <linux/dma-mapping.h>
-#include <linux/completion.h>
-#include <linux/scatterlist.h>
-#include <asm/io.h>
-#include <linux/uaccess.h>
-
-#if 0
-#define CARM_DEBUG
-#define CARM_VERBOSE_DEBUG
-#else
-#undef CARM_DEBUG
-#undef CARM_VERBOSE_DEBUG
-#endif
-#undef CARM_NDEBUG
-
-#define DRV_NAME "sx8"
-#define DRV_VERSION "1.0"
-#define PFX DRV_NAME ": "
-
-MODULE_AUTHOR("Jeff Garzik");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Promise SATA SX8 block driver");
-MODULE_VERSION(DRV_VERSION);
-
-/*
- * SX8 hardware has a single message queue for all ATA ports.
- * When this driver was written, the hardware (firmware?) would
- * corrupt data eventually, if more than one request was outstanding.
- * As one can imagine, having 8 ports bottlenecking on a single
- * command hurts performance.
- *
- * Based on user reports, later versions of the hardware (firmware?)
- * seem to be able to survive with more than one command queued.
- *
- * Therefore, we default to the safe option -- 1 command -- but
- * allow the user to increase this.
- *
- * SX8 should be able to support up to ~60 queued commands (CARM_MAX_REQ),
- * but problems seem to occur when you exceed ~30, even on newer hardware.
- */
-static int max_queue = 1;
-module_param(max_queue, int, 0444);
-MODULE_PARM_DESC(max_queue, "Maximum number of queued commands. (min==1, max==30, safe==1)");
-
-
-#define NEXT_RESP(idx) ((idx + 1) % RMSG_Q_LEN)
-
-/* 0xf is just arbitrary, non-zero noise; this is sorta like poisoning */
-#define TAG_ENCODE(tag) (((tag) << 16) | 0xf)
-#define TAG_DECODE(tag) (((tag) >> 16) & 0x1f)
-#define TAG_VALID(tag) ((((tag) & 0xf) == 0xf) && (TAG_DECODE(tag) < 32))
-
-/* note: prints function name for you */
-#ifdef CARM_DEBUG
-#define DPRINTK(fmt, args...) printk(KERN_ERR "%s: " fmt, __func__, ## args)
-#ifdef CARM_VERBOSE_DEBUG
-#define VPRINTK(fmt, args...) printk(KERN_ERR "%s: " fmt, __func__, ## args)
-#else
-#define VPRINTK(fmt, args...)
-#endif /* CARM_VERBOSE_DEBUG */
-#else
-#define DPRINTK(fmt, args...)
-#define VPRINTK(fmt, args...)
-#endif /* CARM_DEBUG */
-
-#ifdef CARM_NDEBUG
-#define assert(expr)
-#else
-#define assert(expr) \
- if(unlikely(!(expr))) { \
- printk(KERN_ERR "Assertion failed! %s,%s,%s,line=%d\n", \
- #expr, __FILE__, __func__, __LINE__); \
- }
-#endif
-
-/* defines only for the constants which don't work well as enums */
-struct carm_host;
-
-enum {
- /* adapter-wide limits */
- CARM_MAX_PORTS = 8,
- CARM_SHM_SIZE = (4096 << 7),
- CARM_MINORS_PER_MAJOR = 256 / CARM_MAX_PORTS,
- CARM_MAX_WAIT_Q = CARM_MAX_PORTS + 1,
-
- /* command message queue limits */
- CARM_MAX_REQ = 64, /* max command msgs per host */
- CARM_MSG_LOW_WATER = (CARM_MAX_REQ / 4), /* refill mark */
-
- /* S/G limits, host-wide and per-request */
- CARM_MAX_REQ_SG = 32, /* max s/g entries per request */
- CARM_MAX_HOST_SG = 600, /* max s/g entries per host */
- CARM_SG_LOW_WATER = (CARM_MAX_HOST_SG / 4), /* re-fill mark */
-
- /* hardware registers */
- CARM_IHQP = 0x1c,
- CARM_INT_STAT = 0x10, /* interrupt status */
- CARM_INT_MASK = 0x14, /* interrupt mask */
- CARM_HMUC = 0x18, /* host message unit control */
- RBUF_ADDR_LO = 0x20, /* response msg DMA buf low 32 bits */
- RBUF_ADDR_HI = 0x24, /* response msg DMA buf high 32 bits */
- RBUF_BYTE_SZ = 0x28,
- CARM_RESP_IDX = 0x2c,
- CARM_CMS0 = 0x30, /* command message size reg 0 */
- CARM_LMUC = 0x48,
- CARM_HMPHA = 0x6c,
- CARM_INITC = 0xb5,
-
- /* bits in CARM_INT_{STAT,MASK} */
- INT_RESERVED = 0xfffffff0,
- INT_WATCHDOG = (1 << 3), /* watchdog timer */
- INT_Q_OVERFLOW = (1 << 2), /* cmd msg q overflow */
- INT_Q_AVAILABLE = (1 << 1), /* cmd msg q has free space */
- INT_RESPONSE = (1 << 0), /* response msg available */
- INT_ACK_MASK = INT_WATCHDOG | INT_Q_OVERFLOW,
- INT_DEF_MASK = INT_RESERVED | INT_Q_OVERFLOW |
- INT_RESPONSE,
-
- /* command messages, and related register bits */
- CARM_HAVE_RESP = 0x01,
- CARM_MSG_READ = 1,
- CARM_MSG_WRITE = 2,
- CARM_MSG_VERIFY = 3,
- CARM_MSG_GET_CAPACITY = 4,
- CARM_MSG_FLUSH = 5,
- CARM_MSG_IOCTL = 6,
- CARM_MSG_ARRAY = 8,
- CARM_MSG_MISC = 9,
- CARM_CME = (1 << 2),
- CARM_RME = (1 << 1),
- CARM_WZBC = (1 << 0),
- CARM_RMI = (1 << 0),
- CARM_Q_FULL = (1 << 3),
- CARM_MSG_SIZE = 288,
- CARM_Q_LEN = 48,
-
- /* CARM_MSG_IOCTL messages */
- CARM_IOC_SCAN_CHAN = 5, /* scan channels for devices */
- CARM_IOC_GET_TCQ = 13, /* get tcq/ncq depth */
- CARM_IOC_SET_TCQ = 14, /* set tcq/ncq depth */
-
- IOC_SCAN_CHAN_NODEV = 0x1f,
- IOC_SCAN_CHAN_OFFSET = 0x40,
-
- /* CARM_MSG_ARRAY messages */
- CARM_ARRAY_INFO = 0,
-
- ARRAY_NO_EXIST = (1 << 31),
-
- /* response messages */
- RMSG_SZ = 8, /* sizeof(struct carm_response) */
- RMSG_Q_LEN = 48, /* resp. msg list length */
- RMSG_OK = 1, /* bit indicating msg was successful */
- /* length of entire resp. msg buffer */
- RBUF_LEN = RMSG_SZ * RMSG_Q_LEN,
-
- PDC_SHM_SIZE = (4096 << 7), /* length of entire h/w buffer */
-
- /* CARM_MSG_MISC messages */
- MISC_GET_FW_VER = 2,
- MISC_ALLOC_MEM = 3,
- MISC_SET_TIME = 5,
-
- /* MISC_GET_FW_VER feature bits */
- FW_VER_4PORT = (1 << 2), /* 1=4 ports, 0=8 ports */
- FW_VER_NON_RAID = (1 << 1), /* 1=non-RAID firmware, 0=RAID */
- FW_VER_ZCR = (1 << 0), /* zero channel RAID (whatever that is) */
-
- /* carm_host flags */
- FL_NON_RAID = FW_VER_NON_RAID,
- FL_4PORT = FW_VER_4PORT,
- FL_FW_VER_MASK = (FW_VER_NON_RAID | FW_VER_4PORT),
- FL_DYN_MAJOR = (1 << 17),
-};
-
-enum {
- CARM_SG_BOUNDARY = 0xffffUL, /* s/g segment boundary */
-};
-
-enum scatter_gather_types {
- SGT_32BIT = 0,
- SGT_64BIT = 1,
-};
-
-enum host_states {
- HST_INVALID, /* invalid state; never used */
- HST_ALLOC_BUF, /* setting up master SHM area */
- HST_ERROR, /* we never leave here */
- HST_PORT_SCAN, /* start dev scan */
- HST_DEV_SCAN_START, /* start per-device probe */
- HST_DEV_SCAN, /* continue per-device probe */
- HST_DEV_ACTIVATE, /* activate devices we found */
- HST_PROBE_FINISHED, /* probe is complete */
- HST_PROBE_START, /* initiate probe */
- HST_SYNC_TIME, /* tell firmware what time it is */
- HST_GET_FW_VER, /* get firmware version, adapter port cnt */
-};
-
-#ifdef CARM_DEBUG
-static const char *state_name[] = {
- "HST_INVALID",
- "HST_ALLOC_BUF",
- "HST_ERROR",
- "HST_PORT_SCAN",
- "HST_DEV_SCAN_START",
- "HST_DEV_SCAN",
- "HST_DEV_ACTIVATE",
- "HST_PROBE_FINISHED",
- "HST_PROBE_START",
- "HST_SYNC_TIME",
- "HST_GET_FW_VER",
-};
-#endif
-
-struct carm_port {
- unsigned int port_no;
- struct gendisk *disk;
- struct carm_host *host;
-
- /* attached device characteristics */
- u64 capacity;
- char name[41];
- u16 dev_geom_head;
- u16 dev_geom_sect;
- u16 dev_geom_cyl;
-};
-
-struct carm_request {
- int n_elem;
- unsigned int msg_type;
- unsigned int msg_subtype;
- unsigned int msg_bucket;
- struct scatterlist sg[CARM_MAX_REQ_SG];
-};
-
-struct carm_host {
- unsigned long flags;
- void __iomem *mmio;
- void *shm;
- dma_addr_t shm_dma;
-
- int major;
- int id;
- char name[32];
-
- spinlock_t lock;
- struct pci_dev *pdev;
- unsigned int state;
- u32 fw_ver;
-
- struct blk_mq_tag_set tag_set;
- struct request_queue *oob_q;
- unsigned int n_oob;
-
- unsigned int hw_sg_used;
-
- unsigned int resp_idx;
-
- unsigned int wait_q_prod;
- unsigned int wait_q_cons;
- struct request_queue *wait_q[CARM_MAX_WAIT_Q];
-
- void *msg_base;
- dma_addr_t msg_dma;
-
- int cur_scan_dev;
- unsigned long dev_active;
- unsigned long dev_present;
- struct carm_port port[CARM_MAX_PORTS];
-
- struct work_struct fsm_task;
-
- int probe_err;
- struct completion probe_comp;
-};
-
-struct carm_response {
- __le32 ret_handle;
- __le32 status;
-} __attribute__((packed));
-
-struct carm_msg_sg {
- __le32 start;
- __le32 len;
-} __attribute__((packed));
-
-struct carm_msg_rw {
- u8 type;
- u8 id;
- u8 sg_count;
- u8 sg_type;
- __le32 handle;
- __le32 lba;
- __le16 lba_count;
- __le16 lba_high;
- struct carm_msg_sg sg[32];
-} __attribute__((packed));
-
-struct carm_msg_allocbuf {
- u8 type;
- u8 subtype;
- u8 n_sg;
- u8 sg_type;
- __le32 handle;
- __le32 addr;
- __le32 len;
- __le32 evt_pool;
- __le32 n_evt;
- __le32 rbuf_pool;
- __le32 n_rbuf;
- __le32 msg_pool;
- __le32 n_msg;
- struct carm_msg_sg sg[8];
-} __attribute__((packed));
-
-struct carm_msg_ioctl {
- u8 type;
- u8 subtype;
- u8 array_id;
- u8 reserved1;
- __le32 handle;
- __le32 data_addr;
- u32 reserved2;
-} __attribute__((packed));
-
-struct carm_msg_sync_time {
- u8 type;
- u8 subtype;
- u16 reserved1;
- __le32 handle;
- u32 reserved2;
- __le32 timestamp;
-} __attribute__((packed));
-
-struct carm_msg_get_fw_ver {
- u8 type;
- u8 subtype;
- u16 reserved1;
- __le32 handle;
- __le32 data_addr;
- u32 reserved2;
-} __attribute__((packed));
-
-struct carm_fw_ver {
- __le32 version;
- u8 features;
- u8 reserved1;
- u16 reserved2;
-} __attribute__((packed));
-
-struct carm_array_info {
- __le32 size;
-
- __le16 size_hi;
- __le16 stripe_size;
-
- __le32 mode;
-
- __le16 stripe_blk_sz;
- __le16 reserved1;
-
- __le16 cyl;
- __le16 head;
-
- __le16 sect;
- u8 array_id;
- u8 reserved2;
-
- char name[40];
-
- __le32 array_status;
-
- /* device list continues beyond this point? */
-} __attribute__((packed));
-
-static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent);
-static void carm_remove_one (struct pci_dev *pdev);
-static int carm_bdev_getgeo(struct block_device *bdev, struct hd_geometry *geo);
-
-static const struct pci_device_id carm_pci_tbl[] = {
- { PCI_VENDOR_ID_PROMISE, 0x8000, PCI_ANY_ID, PCI_ANY_ID, 0, 0, },
- { PCI_VENDOR_ID_PROMISE, 0x8002, PCI_ANY_ID, PCI_ANY_ID, 0, 0, },
- { } /* terminate list */
-};
-MODULE_DEVICE_TABLE(pci, carm_pci_tbl);
-
-static struct pci_driver carm_driver = {
- .name = DRV_NAME,
- .id_table = carm_pci_tbl,
- .probe = carm_init_one,
- .remove = carm_remove_one,
-};
-
-static const struct block_device_operations carm_bd_ops = {
- .owner = THIS_MODULE,
- .getgeo = carm_bdev_getgeo,
-};
-
-static unsigned int carm_host_id;
-static unsigned long carm_major_alloc;
-
-
-
-static int carm_bdev_getgeo(struct block_device *bdev, struct hd_geometry *geo)
-{
- struct carm_port *port = bdev->bd_disk->private_data;
-
- geo->heads = (u8) port->dev_geom_head;
- geo->sectors = (u8) port->dev_geom_sect;
- geo->cylinders = port->dev_geom_cyl;
- return 0;
-}
-
-static const u32 msg_sizes[] = { 32, 64, 128, CARM_MSG_SIZE };
-
-static inline int carm_lookup_bucket(u32 msg_size)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(msg_sizes); i++)
- if (msg_size <= msg_sizes[i])
- return i;
-
- return -ENOENT;
-}
-
-static void carm_init_buckets(void __iomem *mmio)
-{
- unsigned int i;
-
- for (i = 0; i < ARRAY_SIZE(msg_sizes); i++)
- writel(msg_sizes[i], mmio + CARM_CMS0 + (4 * i));
-}
-
-static inline void *carm_ref_msg(struct carm_host *host,
- unsigned int msg_idx)
-{
- return host->msg_base + (msg_idx * CARM_MSG_SIZE);
-}
-
-static inline dma_addr_t carm_ref_msg_dma(struct carm_host *host,
- unsigned int msg_idx)
-{
- return host->msg_dma + (msg_idx * CARM_MSG_SIZE);
-}
-
-static int carm_send_msg(struct carm_host *host,
- struct carm_request *crq, unsigned tag)
-{
- void __iomem *mmio = host->mmio;
- u32 msg = (u32) carm_ref_msg_dma(host, tag);
- u32 cm_bucket = crq->msg_bucket;
- u32 tmp;
- int rc = 0;
-
- VPRINTK("ENTER\n");
-
- tmp = readl(mmio + CARM_HMUC);
- if (tmp & CARM_Q_FULL) {
-#if 0
- tmp = readl(mmio + CARM_INT_MASK);
- tmp |= INT_Q_AVAILABLE;
- writel(tmp, mmio + CARM_INT_MASK);
- readl(mmio + CARM_INT_MASK); /* flush */
-#endif
- DPRINTK("host msg queue full\n");
- rc = -EBUSY;
- } else {
- writel(msg | (cm_bucket << 1), mmio + CARM_IHQP);
- readl(mmio + CARM_IHQP); /* flush */
- }
-
- return rc;
-}
-
-static int carm_array_info (struct carm_host *host, unsigned int array_idx)
-{
- struct carm_msg_ioctl *ioc;
- u32 msg_data;
- dma_addr_t msg_dma;
- struct carm_request *crq;
- struct request *rq;
- int rc;
-
- rq = blk_mq_alloc_request(host->oob_q, REQ_OP_DRV_OUT, 0);
- if (IS_ERR(rq)) {
- rc = -ENOMEM;
- goto err_out;
- }
- crq = blk_mq_rq_to_pdu(rq);
-
- ioc = carm_ref_msg(host, rq->tag);
- msg_dma = carm_ref_msg_dma(host, rq->tag);
- msg_data = (u32) (msg_dma + sizeof(struct carm_array_info));
-
- crq->msg_type = CARM_MSG_ARRAY;
- crq->msg_subtype = CARM_ARRAY_INFO;
- rc = carm_lookup_bucket(sizeof(struct carm_msg_ioctl) +
- sizeof(struct carm_array_info));
- BUG_ON(rc < 0);
- crq->msg_bucket = (u32) rc;
-
- memset(ioc, 0, sizeof(*ioc));
- ioc->type = CARM_MSG_ARRAY;
- ioc->subtype = CARM_ARRAY_INFO;
- ioc->array_id = (u8) array_idx;
- ioc->handle = cpu_to_le32(TAG_ENCODE(rq->tag));
- ioc->data_addr = cpu_to_le32(msg_data);
-
- spin_lock_irq(&host->lock);
- assert(host->state == HST_DEV_SCAN_START ||
- host->state == HST_DEV_SCAN);
- spin_unlock_irq(&host->lock);
-
- DPRINTK("blk_execute_rq_nowait, tag == %u\n", rq->tag);
- blk_execute_rq_nowait(rq, true);
-
- return 0;
-
-err_out:
- spin_lock_irq(&host->lock);
- host->state = HST_ERROR;
- spin_unlock_irq(&host->lock);
- return rc;
-}
-
-typedef unsigned int (*carm_sspc_t)(struct carm_host *, unsigned int, void *);
-
-static int carm_send_special (struct carm_host *host, carm_sspc_t func)
-{
- struct request *rq;
- struct carm_request *crq;
- struct carm_msg_ioctl *ioc;
- void *mem;
- unsigned int msg_size;
- int rc;
-
- rq = blk_mq_alloc_request(host->oob_q, REQ_OP_DRV_OUT, 0);
- if (IS_ERR(rq))
- return -ENOMEM;
- crq = blk_mq_rq_to_pdu(rq);
-
- mem = carm_ref_msg(host, rq->tag);
-
- msg_size = func(host, rq->tag, mem);
-
- ioc = mem;
- crq->msg_type = ioc->type;
- crq->msg_subtype = ioc->subtype;
- rc = carm_lookup_bucket(msg_size);
- BUG_ON(rc < 0);
- crq->msg_bucket = (u32) rc;
-
- DPRINTK("blk_execute_rq_nowait, tag == %u\n", rq->tag);
- blk_execute_rq_nowait(rq, true);
-
- return 0;
-}
-
-static unsigned int carm_fill_sync_time(struct carm_host *host,
- unsigned int idx, void *mem)
-{
- struct carm_msg_sync_time *st = mem;
-
- time64_t tv = ktime_get_real_seconds();
-
- memset(st, 0, sizeof(*st));
- st->type = CARM_MSG_MISC;
- st->subtype = MISC_SET_TIME;
- st->handle = cpu_to_le32(TAG_ENCODE(idx));
- st->timestamp = cpu_to_le32(tv);
-
- return sizeof(struct carm_msg_sync_time);
-}
-
-static unsigned int carm_fill_alloc_buf(struct carm_host *host,
- unsigned int idx, void *mem)
-{
- struct carm_msg_allocbuf *ab = mem;
-
- memset(ab, 0, sizeof(*ab));
- ab->type = CARM_MSG_MISC;
- ab->subtype = MISC_ALLOC_MEM;
- ab->handle = cpu_to_le32(TAG_ENCODE(idx));
- ab->n_sg = 1;
- ab->sg_type = SGT_32BIT;
- ab->addr = cpu_to_le32(host->shm_dma + (PDC_SHM_SIZE >> 1));
- ab->len = cpu_to_le32(PDC_SHM_SIZE >> 1);
- ab->evt_pool = cpu_to_le32(host->shm_dma + (16 * 1024));
- ab->n_evt = cpu_to_le32(1024);
- ab->rbuf_pool = cpu_to_le32(host->shm_dma);
- ab->n_rbuf = cpu_to_le32(RMSG_Q_LEN);
- ab->msg_pool = cpu_to_le32(host->shm_dma + RBUF_LEN);
- ab->n_msg = cpu_to_le32(CARM_Q_LEN);
- ab->sg[0].start = cpu_to_le32(host->shm_dma + (PDC_SHM_SIZE >> 1));
- ab->sg[0].len = cpu_to_le32(65536);
-
- return sizeof(struct carm_msg_allocbuf);
-}
-
-static unsigned int carm_fill_scan_channels(struct carm_host *host,
- unsigned int idx, void *mem)
-{
- struct carm_msg_ioctl *ioc = mem;
- u32 msg_data = (u32) (carm_ref_msg_dma(host, idx) +
- IOC_SCAN_CHAN_OFFSET);
-
- memset(ioc, 0, sizeof(*ioc));
- ioc->type = CARM_MSG_IOCTL;
- ioc->subtype = CARM_IOC_SCAN_CHAN;
- ioc->handle = cpu_to_le32(TAG_ENCODE(idx));
- ioc->data_addr = cpu_to_le32(msg_data);
-
- /* fill output data area with "no device" default values */
- mem += IOC_SCAN_CHAN_OFFSET;
- memset(mem, IOC_SCAN_CHAN_NODEV, CARM_MAX_PORTS);
-
- return IOC_SCAN_CHAN_OFFSET + CARM_MAX_PORTS;
-}
-
-static unsigned int carm_fill_get_fw_ver(struct carm_host *host,
- unsigned int idx, void *mem)
-{
- struct carm_msg_get_fw_ver *ioc = mem;
- u32 msg_data = (u32) (carm_ref_msg_dma(host, idx) + sizeof(*ioc));
-
- memset(ioc, 0, sizeof(*ioc));
- ioc->type = CARM_MSG_MISC;
- ioc->subtype = MISC_GET_FW_VER;
- ioc->handle = cpu_to_le32(TAG_ENCODE(idx));
- ioc->data_addr = cpu_to_le32(msg_data);
-
- return sizeof(struct carm_msg_get_fw_ver) +
- sizeof(struct carm_fw_ver);
-}
-
-static inline void carm_push_q (struct carm_host *host, struct request_queue *q)
-{
- unsigned int idx = host->wait_q_prod % CARM_MAX_WAIT_Q;
-
- blk_mq_stop_hw_queues(q);
- VPRINTK("STOPPED QUEUE %p\n", q);
-
- host->wait_q[idx] = q;
- host->wait_q_prod++;
- BUG_ON(host->wait_q_prod == host->wait_q_cons); /* overrun */
-}
-
-static inline struct request_queue *carm_pop_q(struct carm_host *host)
-{
- unsigned int idx;
-
- if (host->wait_q_prod == host->wait_q_cons)
- return NULL;
-
- idx = host->wait_q_cons % CARM_MAX_WAIT_Q;
- host->wait_q_cons++;
-
- return host->wait_q[idx];
-}
-
-static inline void carm_round_robin(struct carm_host *host)
-{
- struct request_queue *q = carm_pop_q(host);
- if (q) {
- blk_mq_start_hw_queues(q);
- VPRINTK("STARTED QUEUE %p\n", q);
- }
-}
-
-static inline enum dma_data_direction carm_rq_dir(struct request *rq)
-{
- return op_is_write(req_op(rq)) ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
-}
-
-static blk_status_t carm_queue_rq(struct blk_mq_hw_ctx *hctx,
- const struct blk_mq_queue_data *bd)
-{
- struct request_queue *q = hctx->queue;
- struct request *rq = bd->rq;
- struct carm_port *port = q->queuedata;
- struct carm_host *host = port->host;
- struct carm_request *crq = blk_mq_rq_to_pdu(rq);
- struct carm_msg_rw *msg;
- struct scatterlist *sg;
- int i, n_elem = 0, rc;
- unsigned int msg_size;
- u32 tmp;
-
- crq->n_elem = 0;
- sg_init_table(crq->sg, CARM_MAX_REQ_SG);
-
- blk_mq_start_request(rq);
-
- spin_lock_irq(&host->lock);
- if (req_op(rq) == REQ_OP_DRV_OUT)
- goto send_msg;
-
- /* get scatterlist from block layer */
- sg = &crq->sg[0];
- n_elem = blk_rq_map_sg(q, rq, sg);
- if (n_elem <= 0)
- goto out_ioerr;
-
- /* map scatterlist to PCI bus addresses */
- n_elem = dma_map_sg(&host->pdev->dev, sg, n_elem, carm_rq_dir(rq));
- if (n_elem <= 0)
- goto out_ioerr;
-
- /* obey global hardware limit on S/G entries */
- if (host->hw_sg_used >= CARM_MAX_HOST_SG - n_elem)
- goto out_resource;
-
- crq->n_elem = n_elem;
- host->hw_sg_used += n_elem;
-
- /*
- * build read/write message
- */
-
- VPRINTK("build msg\n");
- msg = (struct carm_msg_rw *) carm_ref_msg(host, rq->tag);
-
- if (rq_data_dir(rq) == WRITE) {
- msg->type = CARM_MSG_WRITE;
- crq->msg_type = CARM_MSG_WRITE;
- } else {
- msg->type = CARM_MSG_READ;
- crq->msg_type = CARM_MSG_READ;
- }
-
- msg->id = port->port_no;
- msg->sg_count = n_elem;
- msg->sg_type = SGT_32BIT;
- msg->handle = cpu_to_le32(TAG_ENCODE(rq->tag));
- msg->lba = cpu_to_le32(blk_rq_pos(rq) & 0xffffffff);
- tmp = (blk_rq_pos(rq) >> 16) >> 16;
- msg->lba_high = cpu_to_le16( (u16) tmp );
- msg->lba_count = cpu_to_le16(blk_rq_sectors(rq));
-
- msg_size = sizeof(struct carm_msg_rw) - sizeof(msg->sg);
- for (i = 0; i < n_elem; i++) {
- struct carm_msg_sg *carm_sg = &msg->sg[i];
- carm_sg->start = cpu_to_le32(sg_dma_address(&crq->sg[i]));
- carm_sg->len = cpu_to_le32(sg_dma_len(&crq->sg[i]));
- msg_size += sizeof(struct carm_msg_sg);
- }
-
- rc = carm_lookup_bucket(msg_size);
- BUG_ON(rc < 0);
- crq->msg_bucket = (u32) rc;
-send_msg:
- /*
- * queue read/write message to hardware
- */
- VPRINTK("send msg, tag == %u\n", rq->tag);
- rc = carm_send_msg(host, crq, rq->tag);
- if (rc) {
- host->hw_sg_used -= n_elem;
- goto out_resource;
- }
-
- spin_unlock_irq(&host->lock);
- return BLK_STS_OK;
-out_resource:
- dma_unmap_sg(&host->pdev->dev, &crq->sg[0], n_elem, carm_rq_dir(rq));
- carm_push_q(host, q);
- spin_unlock_irq(&host->lock);
- return BLK_STS_DEV_RESOURCE;
-out_ioerr:
- carm_round_robin(host);
- spin_unlock_irq(&host->lock);
- return BLK_STS_IOERR;
-}
-
-static void carm_handle_array_info(struct carm_host *host,
- struct carm_request *crq, u8 *mem,
- blk_status_t error)
-{
- struct carm_port *port;
- u8 *msg_data = mem + sizeof(struct carm_array_info);
- struct carm_array_info *desc = (struct carm_array_info *) msg_data;
- u64 lo, hi;
- int cur_port;
- size_t slen;
-
- DPRINTK("ENTER\n");
-
- if (error)
- goto out;
- if (le32_to_cpu(desc->array_status) & ARRAY_NO_EXIST)
- goto out;
-
- cur_port = host->cur_scan_dev;
-
- /* should never occur */
- if ((cur_port < 0) || (cur_port >= CARM_MAX_PORTS)) {
- printk(KERN_ERR PFX "BUG: cur_scan_dev==%d, array_id==%d\n",
- cur_port, (int) desc->array_id);
- goto out;
- }
-
- port = &host->port[cur_port];
-
- lo = (u64) le32_to_cpu(desc->size);
- hi = (u64) le16_to_cpu(desc->size_hi);
-
- port->capacity = lo | (hi << 32);
- port->dev_geom_head = le16_to_cpu(desc->head);
- port->dev_geom_sect = le16_to_cpu(desc->sect);
- port->dev_geom_cyl = le16_to_cpu(desc->cyl);
-
- host->dev_active |= (1 << cur_port);
-
- strncpy(port->name, desc->name, sizeof(port->name));
- port->name[sizeof(port->name) - 1] = 0;
- slen = strlen(port->name);
- while (slen && (port->name[slen - 1] == ' ')) {
- port->name[slen - 1] = 0;
- slen--;
- }
-
- printk(KERN_INFO DRV_NAME "(%s): port %u device %Lu sectors\n",
- pci_name(host->pdev), port->port_no,
- (unsigned long long) port->capacity);
- printk(KERN_INFO DRV_NAME "(%s): port %u device \"%s\"\n",
- pci_name(host->pdev), port->port_no, port->name);
-
-out:
- assert(host->state == HST_DEV_SCAN);
- schedule_work(&host->fsm_task);
-}
-
-static void carm_handle_scan_chan(struct carm_host *host,
- struct carm_request *crq, u8 *mem,
- blk_status_t error)
-{
- u8 *msg_data = mem + IOC_SCAN_CHAN_OFFSET;
- unsigned int i, dev_count = 0;
- int new_state = HST_DEV_SCAN_START;
-
- DPRINTK("ENTER\n");
-
- if (error) {
- new_state = HST_ERROR;
- goto out;
- }
-
- /* TODO: scan and support non-disk devices */
- for (i = 0; i < 8; i++)
- if (msg_data[i] == 0) { /* direct-access device (disk) */
- host->dev_present |= (1 << i);
- dev_count++;
- }
-
- printk(KERN_INFO DRV_NAME "(%s): found %u interesting devices\n",
- pci_name(host->pdev), dev_count);
-
-out:
- assert(host->state == HST_PORT_SCAN);
- host->state = new_state;
- schedule_work(&host->fsm_task);
-}
-
-static void carm_handle_generic(struct carm_host *host,
- struct carm_request *crq, blk_status_t error,
- int cur_state, int next_state)
-{
- DPRINTK("ENTER\n");
-
- assert(host->state == cur_state);
- if (error)
- host->state = HST_ERROR;
- else
- host->state = next_state;
- schedule_work(&host->fsm_task);
-}
-
-static inline void carm_handle_resp(struct carm_host *host,
- __le32 ret_handle_le, u32 status)
-{
- u32 handle = le32_to_cpu(ret_handle_le);
- unsigned int msg_idx;
- struct request *rq;
- struct carm_request *crq;
- blk_status_t error = (status == RMSG_OK) ? 0 : BLK_STS_IOERR;
- u8 *mem;
-
- VPRINTK("ENTER, handle == 0x%x\n", handle);
-
- if (unlikely(!TAG_VALID(handle))) {
- printk(KERN_ERR DRV_NAME "(%s): BUG: invalid tag 0x%x\n",
- pci_name(host->pdev), handle);
- return;
- }
-
- msg_idx = TAG_DECODE(handle);
- VPRINTK("tag == %u\n", msg_idx);
-
- rq = blk_mq_tag_to_rq(host->tag_set.tags[0], msg_idx);
- crq = blk_mq_rq_to_pdu(rq);
-
- /* fast path */
- if (likely(crq->msg_type == CARM_MSG_READ ||
- crq->msg_type == CARM_MSG_WRITE)) {
- dma_unmap_sg(&host->pdev->dev, &crq->sg[0], crq->n_elem,
- carm_rq_dir(rq));
- goto done;
- }
-
- mem = carm_ref_msg(host, msg_idx);
-
- switch (crq->msg_type) {
- case CARM_MSG_IOCTL: {
- switch (crq->msg_subtype) {
- case CARM_IOC_SCAN_CHAN:
- carm_handle_scan_chan(host, crq, mem, error);
- goto done;
- default:
- /* unknown / invalid response */
- goto err_out;
- }
- break;
- }
-
- case CARM_MSG_MISC: {
- switch (crq->msg_subtype) {
- case MISC_ALLOC_MEM:
- carm_handle_generic(host, crq, error,
- HST_ALLOC_BUF, HST_SYNC_TIME);
- goto done;
- case MISC_SET_TIME:
- carm_handle_generic(host, crq, error,
- HST_SYNC_TIME, HST_GET_FW_VER);
- goto done;
- case MISC_GET_FW_VER: {
- struct carm_fw_ver *ver = (struct carm_fw_ver *)
- (mem + sizeof(struct carm_msg_get_fw_ver));
- if (!error) {
- host->fw_ver = le32_to_cpu(ver->version);
- host->flags |= (ver->features & FL_FW_VER_MASK);
- }
- carm_handle_generic(host, crq, error,
- HST_GET_FW_VER, HST_PORT_SCAN);
- goto done;
- }
- default:
- /* unknown / invalid response */
- goto err_out;
- }
- break;
- }
-
- case CARM_MSG_ARRAY: {
- switch (crq->msg_subtype) {
- case CARM_ARRAY_INFO:
- carm_handle_array_info(host, crq, mem, error);
- break;
- default:
- /* unknown / invalid response */
- goto err_out;
- }
- break;
- }
-
- default:
- /* unknown / invalid response */
- goto err_out;
- }
-
- return;
-
-err_out:
- printk(KERN_WARNING DRV_NAME "(%s): BUG: unhandled message type %d/%d\n",
- pci_name(host->pdev), crq->msg_type, crq->msg_subtype);
- error = BLK_STS_IOERR;
-done:
- host->hw_sg_used -= crq->n_elem;
- blk_mq_end_request(blk_mq_rq_from_pdu(crq), error);
-
- if (host->hw_sg_used <= CARM_SG_LOW_WATER)
- carm_round_robin(host);
-}
-
-static inline void carm_handle_responses(struct carm_host *host)
-{
- void __iomem *mmio = host->mmio;
- struct carm_response *resp = (struct carm_response *) host->shm;
- unsigned int work = 0;
- unsigned int idx = host->resp_idx % RMSG_Q_LEN;
-
- while (1) {
- u32 status = le32_to_cpu(resp[idx].status);
-
- if (status == 0xffffffff) {
- VPRINTK("ending response on index %u\n", idx);
- writel(idx << 3, mmio + CARM_RESP_IDX);
- break;
- }
-
- /* response to a message we sent */
- else if ((status & (1 << 31)) == 0) {
- VPRINTK("handling msg response on index %u\n", idx);
- carm_handle_resp(host, resp[idx].ret_handle, status);
- resp[idx].status = cpu_to_le32(0xffffffff);
- }
-
- /* asynchronous events the hardware throws our way */
- else if ((status & 0xff000000) == (1 << 31)) {
- u8 *evt_type_ptr = (u8 *) &resp[idx];
- u8 evt_type = *evt_type_ptr;
- printk(KERN_WARNING DRV_NAME "(%s): unhandled event type %d\n",
- pci_name(host->pdev), (int) evt_type);
- resp[idx].status = cpu_to_le32(0xffffffff);
- }
-
- idx = NEXT_RESP(idx);
- work++;
- }
-
- VPRINTK("EXIT, work==%u\n", work);
- host->resp_idx += work;
-}
-
-static irqreturn_t carm_interrupt(int irq, void *__host)
-{
- struct carm_host *host = __host;
- void __iomem *mmio;
- u32 mask;
- int handled = 0;
- unsigned long flags;
-
- if (!host) {
- VPRINTK("no host\n");
- return IRQ_NONE;
- }
-
- spin_lock_irqsave(&host->lock, flags);
-
- mmio = host->mmio;
-
- /* reading should also clear interrupts */
- mask = readl(mmio + CARM_INT_STAT);
-
- if (mask == 0 || mask == 0xffffffff) {
- VPRINTK("no work, mask == 0x%x\n", mask);
- goto out;
- }
-
- if (mask & INT_ACK_MASK)
- writel(mask, mmio + CARM_INT_STAT);
-
- if (unlikely(host->state == HST_INVALID)) {
- VPRINTK("not initialized yet, mask = 0x%x\n", mask);
- goto out;
- }
-
- if (mask & CARM_HAVE_RESP) {
- handled = 1;
- carm_handle_responses(host);
- }
-
-out:
- spin_unlock_irqrestore(&host->lock, flags);
- VPRINTK("EXIT\n");
- return IRQ_RETVAL(handled);
-}
-
-static void carm_fsm_task (struct work_struct *work)
-{
- struct carm_host *host =
- container_of(work, struct carm_host, fsm_task);
- unsigned long flags;
- unsigned int state;
- int rc, i, next_dev;
- int reschedule = 0;
- int new_state = HST_INVALID;
-
- spin_lock_irqsave(&host->lock, flags);
- state = host->state;
- spin_unlock_irqrestore(&host->lock, flags);
-
- DPRINTK("ENTER, state == %s\n", state_name[state]);
-
- switch (state) {
- case HST_PROBE_START:
- new_state = HST_ALLOC_BUF;
- reschedule = 1;
- break;
-
- case HST_ALLOC_BUF:
- rc = carm_send_special(host, carm_fill_alloc_buf);
- if (rc) {
- new_state = HST_ERROR;
- reschedule = 1;
- }
- break;
-
- case HST_SYNC_TIME:
- rc = carm_send_special(host, carm_fill_sync_time);
- if (rc) {
- new_state = HST_ERROR;
- reschedule = 1;
- }
- break;
-
- case HST_GET_FW_VER:
- rc = carm_send_special(host, carm_fill_get_fw_ver);
- if (rc) {
- new_state = HST_ERROR;
- reschedule = 1;
- }
- break;
-
- case HST_PORT_SCAN:
- rc = carm_send_special(host, carm_fill_scan_channels);
- if (rc) {
- new_state = HST_ERROR;
- reschedule = 1;
- }
- break;
-
- case HST_DEV_SCAN_START:
- host->cur_scan_dev = -1;
- new_state = HST_DEV_SCAN;
- reschedule = 1;
- break;
-
- case HST_DEV_SCAN:
- next_dev = -1;
- for (i = host->cur_scan_dev + 1; i < CARM_MAX_PORTS; i++)
- if (host->dev_present & (1 << i)) {
- next_dev = i;
- break;
- }
-
- if (next_dev >= 0) {
- host->cur_scan_dev = next_dev;
- rc = carm_array_info(host, next_dev);
- if (rc) {
- new_state = HST_ERROR;
- reschedule = 1;
- }
- } else {
- new_state = HST_DEV_ACTIVATE;
- reschedule = 1;
- }
- break;
-
- case HST_DEV_ACTIVATE: {
- int activated = 0;
- for (i = 0; i < CARM_MAX_PORTS; i++)
- if (host->dev_active & (1 << i)) {
- struct carm_port *port = &host->port[i];
- struct gendisk *disk = port->disk;
-
- set_capacity(disk, port->capacity);
- host->probe_err = add_disk(disk);
- if (!host->probe_err)
- activated++;
- else
- break;
- }
-
- printk(KERN_INFO DRV_NAME "(%s): %d ports activated\n",
- pci_name(host->pdev), activated);
-
- new_state = HST_PROBE_FINISHED;
- reschedule = 1;
- break;
- }
- case HST_PROBE_FINISHED:
- complete(&host->probe_comp);
- break;
- case HST_ERROR:
- /* FIXME: TODO */
- break;
-
- default:
- /* should never occur */
- printk(KERN_ERR PFX "BUG: unknown state %d\n", state);
- assert(0);
- break;
- }
-
- if (new_state != HST_INVALID) {
- spin_lock_irqsave(&host->lock, flags);
- host->state = new_state;
- spin_unlock_irqrestore(&host->lock, flags);
- }
- if (reschedule)
- schedule_work(&host->fsm_task);
-}
-
-static int carm_init_wait(void __iomem *mmio, u32 bits, unsigned int test_bit)
-{
- unsigned int i;
-
- for (i = 0; i < 50000; i++) {
- u32 tmp = readl(mmio + CARM_LMUC);
- udelay(100);
-
- if (test_bit) {
- if ((tmp & bits) == bits)
- return 0;
- } else {
- if ((tmp & bits) == 0)
- return 0;
- }
-
- cond_resched();
- }
-
- printk(KERN_ERR PFX "carm_init_wait timeout, bits == 0x%x, test_bit == %s\n",
- bits, test_bit ? "yes" : "no");
- return -EBUSY;
-}
-
-static void carm_init_responses(struct carm_host *host)
-{
- void __iomem *mmio = host->mmio;
- unsigned int i;
- struct carm_response *resp = (struct carm_response *) host->shm;
-
- for (i = 0; i < RMSG_Q_LEN; i++)
- resp[i].status = cpu_to_le32(0xffffffff);
-
- writel(0, mmio + CARM_RESP_IDX);
-}
-
-static int carm_init_host(struct carm_host *host)
-{
- void __iomem *mmio = host->mmio;
- u32 tmp;
- u8 tmp8;
- int rc;
-
- DPRINTK("ENTER\n");
-
- writel(0, mmio + CARM_INT_MASK);
-
- tmp8 = readb(mmio + CARM_INITC);
- if (tmp8 & 0x01) {
- tmp8 &= ~0x01;
- writeb(tmp8, mmio + CARM_INITC);
- readb(mmio + CARM_INITC); /* flush */
-
- DPRINTK("snooze...\n");
- msleep(5000);
- }
-
- tmp = readl(mmio + CARM_HMUC);
- if (tmp & CARM_CME) {
- DPRINTK("CME bit present, waiting\n");
- rc = carm_init_wait(mmio, CARM_CME, 1);
- if (rc) {
- DPRINTK("EXIT, carm_init_wait 1 failed\n");
- return rc;
- }
- }
- if (tmp & CARM_RME) {
- DPRINTK("RME bit present, waiting\n");
- rc = carm_init_wait(mmio, CARM_RME, 1);
- if (rc) {
- DPRINTK("EXIT, carm_init_wait 2 failed\n");
- return rc;
- }
- }
-
- tmp &= ~(CARM_RME | CARM_CME);
- writel(tmp, mmio + CARM_HMUC);
- readl(mmio + CARM_HMUC); /* flush */
-
- rc = carm_init_wait(mmio, CARM_RME | CARM_CME, 0);
- if (rc) {
- DPRINTK("EXIT, carm_init_wait 3 failed\n");
- return rc;
- }
-
- carm_init_buckets(mmio);
-
- writel(host->shm_dma & 0xffffffff, mmio + RBUF_ADDR_LO);
- writel((host->shm_dma >> 16) >> 16, mmio + RBUF_ADDR_HI);
- writel(RBUF_LEN, mmio + RBUF_BYTE_SZ);
-
- tmp = readl(mmio + CARM_HMUC);
- tmp |= (CARM_RME | CARM_CME | CARM_WZBC);
- writel(tmp, mmio + CARM_HMUC);
- readl(mmio + CARM_HMUC); /* flush */
-
- rc = carm_init_wait(mmio, CARM_RME | CARM_CME, 1);
- if (rc) {
- DPRINTK("EXIT, carm_init_wait 4 failed\n");
- return rc;
- }
-
- writel(0, mmio + CARM_HMPHA);
- writel(INT_DEF_MASK, mmio + CARM_INT_MASK);
-
- carm_init_responses(host);
-
- /* start initialization, probing state machine */
- spin_lock_irq(&host->lock);
- assert(host->state == HST_INVALID);
- host->state = HST_PROBE_START;
- spin_unlock_irq(&host->lock);
- schedule_work(&host->fsm_task);
-
- DPRINTK("EXIT\n");
- return 0;
-}
-
-static const struct blk_mq_ops carm_mq_ops = {
- .queue_rq = carm_queue_rq,
-};
-
-static int carm_init_disk(struct carm_host *host, unsigned int port_no)
-{
- struct carm_port *port = &host->port[port_no];
- struct gendisk *disk;
-
- port->host = host;
- port->port_no = port_no;
-
- disk = blk_mq_alloc_disk(&host->tag_set, port);
- if (IS_ERR(disk))
- return PTR_ERR(disk);
-
- port->disk = disk;
- sprintf(disk->disk_name, DRV_NAME "/%u",
- (unsigned int)host->id * CARM_MAX_PORTS + port_no);
- disk->major = host->major;
- disk->first_minor = port_no * CARM_MINORS_PER_MAJOR;
- disk->minors = CARM_MINORS_PER_MAJOR;
- disk->fops = &carm_bd_ops;
- disk->private_data = port;
-
- blk_queue_max_segments(disk->queue, CARM_MAX_REQ_SG);
- blk_queue_segment_boundary(disk->queue, CARM_SG_BOUNDARY);
- return 0;
-}
-
-static void carm_free_disk(struct carm_host *host, unsigned int port_no)
-{
- struct carm_port *port = &host->port[port_no];
- struct gendisk *disk = port->disk;
-
- if (!disk)
- return;
-
- if (host->state > HST_DEV_ACTIVATE)
- del_gendisk(disk);
- put_disk(disk);
-}
-
-static int carm_init_shm(struct carm_host *host)
-{
- host->shm = dma_alloc_coherent(&host->pdev->dev, CARM_SHM_SIZE,
- &host->shm_dma, GFP_KERNEL);
- if (!host->shm)
- return -ENOMEM;
-
- host->msg_base = host->shm + RBUF_LEN;
- host->msg_dma = host->shm_dma + RBUF_LEN;
-
- memset(host->shm, 0xff, RBUF_LEN);
- memset(host->msg_base, 0, PDC_SHM_SIZE - RBUF_LEN);
-
- return 0;
-}
-
-static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
-{
- struct carm_host *host;
- int rc;
- struct request_queue *q;
- unsigned int i;
-
- printk_once(KERN_DEBUG DRV_NAME " version " DRV_VERSION "\n");
-
- rc = pci_enable_device(pdev);
- if (rc)
- return rc;
-
- rc = pci_request_regions(pdev, DRV_NAME);
- if (rc)
- goto err_out;
-
- rc = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
- if (rc) {
- printk(KERN_ERR DRV_NAME "(%s): DMA mask failure\n",
- pci_name(pdev));
- goto err_out_regions;
- }
-
- host = kzalloc(sizeof(*host), GFP_KERNEL);
- if (!host) {
- rc = -ENOMEM;
- goto err_out_regions;
- }
-
- host->pdev = pdev;
- spin_lock_init(&host->lock);
- INIT_WORK(&host->fsm_task, carm_fsm_task);
- init_completion(&host->probe_comp);
-
- host->mmio = ioremap(pci_resource_start(pdev, 0),
- pci_resource_len(pdev, 0));
- if (!host->mmio) {
- printk(KERN_ERR DRV_NAME "(%s): MMIO alloc failure\n",
- pci_name(pdev));
- rc = -ENOMEM;
- goto err_out_kfree;
- }
-
- rc = carm_init_shm(host);
- if (rc) {
- printk(KERN_ERR DRV_NAME "(%s): DMA SHM alloc failure\n",
- pci_name(pdev));
- goto err_out_iounmap;
- }
-
- memset(&host->tag_set, 0, sizeof(host->tag_set));
- host->tag_set.ops = &carm_mq_ops;
- host->tag_set.cmd_size = sizeof(struct carm_request);
- host->tag_set.nr_hw_queues = 1;
- host->tag_set.nr_maps = 1;
- host->tag_set.queue_depth = max_queue;
- host->tag_set.numa_node = NUMA_NO_NODE;
- host->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
-
- rc = blk_mq_alloc_tag_set(&host->tag_set);
- if (rc)
- goto err_out_dma_free;
-
- q = blk_mq_init_queue(&host->tag_set);
- if (IS_ERR(q)) {
- rc = PTR_ERR(q);
- blk_mq_free_tag_set(&host->tag_set);
- goto err_out_dma_free;
- }
-
- host->oob_q = q;
- q->queuedata = host;
-
- /*
- * Figure out which major to use: 160, 161, or dynamic
- */
- if (!test_and_set_bit(0, &carm_major_alloc))
- host->major = 160;
- else if (!test_and_set_bit(1, &carm_major_alloc))
- host->major = 161;
- else
- host->flags |= FL_DYN_MAJOR;
-
- host->id = carm_host_id;
- sprintf(host->name, DRV_NAME "%d", carm_host_id);
-
- rc = register_blkdev(host->major, host->name);
- if (rc < 0)
- goto err_out_free_majors;
- if (host->flags & FL_DYN_MAJOR)
- host->major = rc;
-
- for (i = 0; i < CARM_MAX_PORTS; i++) {
- rc = carm_init_disk(host, i);
- if (rc)
- goto err_out_blkdev_disks;
- }
-
- pci_set_master(pdev);
-
- rc = request_irq(pdev->irq, carm_interrupt, IRQF_SHARED, DRV_NAME, host);
- if (rc) {
- printk(KERN_ERR DRV_NAME "(%s): irq alloc failure\n",
- pci_name(pdev));
- goto err_out_blkdev_disks;
- }
-
- rc = carm_init_host(host);
- if (rc)
- goto err_out_free_irq;
-
- DPRINTK("waiting for probe_comp\n");
- host->probe_err = -ENODEV;
- wait_for_completion(&host->probe_comp);
- if (host->probe_err) {
- rc = host->probe_err;
- goto err_out_free_irq;
- }
-
- printk(KERN_INFO "%s: pci %s, ports %d, io %llx, irq %u, major %d\n",
- host->name, pci_name(pdev), (int) CARM_MAX_PORTS,
- (unsigned long long)pci_resource_start(pdev, 0),
- pdev->irq, host->major);
-
- carm_host_id++;
- pci_set_drvdata(pdev, host);
- return 0;
-
-err_out_free_irq:
- free_irq(pdev->irq, host);
-err_out_blkdev_disks:
- for (i = 0; i < CARM_MAX_PORTS; i++)
- carm_free_disk(host, i);
- unregister_blkdev(host->major, host->name);
-err_out_free_majors:
- if (host->major == 160)
- clear_bit(0, &carm_major_alloc);
- else if (host->major == 161)
- clear_bit(1, &carm_major_alloc);
- blk_mq_destroy_queue(host->oob_q);
- blk_mq_free_tag_set(&host->tag_set);
-err_out_dma_free:
- dma_free_coherent(&pdev->dev, CARM_SHM_SIZE, host->shm, host->shm_dma);
-err_out_iounmap:
- iounmap(host->mmio);
-err_out_kfree:
- kfree(host);
-err_out_regions:
- pci_release_regions(pdev);
-err_out:
- pci_disable_device(pdev);
- return rc;
-}
-
-static void carm_remove_one (struct pci_dev *pdev)
-{
- struct carm_host *host = pci_get_drvdata(pdev);
- unsigned int i;
-
- if (!host) {
- printk(KERN_ERR PFX "BUG: no host data for PCI(%s)\n",
- pci_name(pdev));
- return;
- }
-
- free_irq(pdev->irq, host);
- for (i = 0; i < CARM_MAX_PORTS; i++)
- carm_free_disk(host, i);
- unregister_blkdev(host->major, host->name);
- if (host->major == 160)
- clear_bit(0, &carm_major_alloc);
- else if (host->major == 161)
- clear_bit(1, &carm_major_alloc);
- blk_mq_destroy_queue(host->oob_q);
- blk_mq_free_tag_set(&host->tag_set);
- dma_free_coherent(&pdev->dev, CARM_SHM_SIZE, host->shm, host->shm_dma);
- iounmap(host->mmio);
- kfree(host);
- pci_release_regions(pdev);
- pci_disable_device(pdev);
-}
-
-module_pci_driver(carm_driver);
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 3f1906965ac8..2b7d1db5c4a7 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -47,7 +47,12 @@
#define UBLK_MINORS (1U << MINORBITS)
/* All UBLK_F_* have to be included into UBLK_F_ALL */
-#define UBLK_F_ALL (UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_URING_CMD_COMP_IN_TASK)
+#define UBLK_F_ALL (UBLK_F_SUPPORT_ZERO_COPY \
+ | UBLK_F_URING_CMD_COMP_IN_TASK \
+ | UBLK_F_NEED_GET_DATA)
+
+/* All UBLK_PARAM_TYPE_* should be included here */
+#define UBLK_PARAM_TYPE_ALL (UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD)
struct ublk_rq_data {
struct callback_head work;
@@ -86,6 +91,15 @@ struct ublk_uring_cmd_pdu {
*/
#define UBLK_IO_FLAG_ABORTED 0x04
+/*
+ * UBLK_IO_FLAG_NEED_GET_DATA is set because IO command requires
+ * get data buffer address from ublksrv.
+ *
+ * Then, bio data could be copied into this data buffer for a WRITE request
+ * after the IO command is issued again and UBLK_IO_FLAG_NEED_GET_DATA is unset.
+ */
+#define UBLK_IO_FLAG_NEED_GET_DATA 0x08
+
struct ublk_io {
/* userspace buffer address from io cmd */
__u64 addr;
@@ -119,7 +133,6 @@ struct ublk_device {
char *__queues;
unsigned short queue_size;
- unsigned short bs_shift;
struct ublksrv_ctrl_dev_info dev_info;
struct blk_mq_tag_set tag_set;
@@ -137,6 +150,8 @@ struct ublk_device {
spinlock_t mm_lock;
struct mm_struct *mm;
+ struct ublk_params params;
+
struct completion completion;
unsigned int nr_queues_ready;
atomic_t nr_aborted_queues;
@@ -149,6 +164,12 @@ struct ublk_device {
struct work_struct stop_work;
};
+/* header of ublk_params */
+struct ublk_params_header {
+ __u32 len;
+ __u32 types;
+};
+
static dev_t ublk_chr_devt;
static struct class *ublk_chr_class;
@@ -160,6 +181,90 @@ static DEFINE_MUTEX(ublk_ctl_mutex);
static struct miscdevice ublk_misc;
+static void ublk_dev_param_basic_apply(struct ublk_device *ub)
+{
+ struct request_queue *q = ub->ub_disk->queue;
+ const struct ublk_param_basic *p = &ub->params.basic;
+
+ blk_queue_logical_block_size(q, 1 << p->logical_bs_shift);
+ blk_queue_physical_block_size(q, 1 << p->physical_bs_shift);
+ blk_queue_io_min(q, 1 << p->io_min_shift);
+ blk_queue_io_opt(q, 1 << p->io_opt_shift);
+
+ blk_queue_write_cache(q, p->attrs & UBLK_ATTR_VOLATILE_CACHE,
+ p->attrs & UBLK_ATTR_FUA);
+ if (p->attrs & UBLK_ATTR_ROTATIONAL)
+ blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
+ else
+ blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
+
+ blk_queue_max_hw_sectors(q, p->max_sectors);
+ blk_queue_chunk_sectors(q, p->chunk_sectors);
+ blk_queue_virt_boundary(q, p->virt_boundary_mask);
+
+ if (p->attrs & UBLK_ATTR_READ_ONLY)
+ set_disk_ro(ub->ub_disk, true);
+
+ set_capacity(ub->ub_disk, p->dev_sectors);
+}
+
+static void ublk_dev_param_discard_apply(struct ublk_device *ub)
+{
+ struct request_queue *q = ub->ub_disk->queue;
+ const struct ublk_param_discard *p = &ub->params.discard;
+
+ q->limits.discard_alignment = p->discard_alignment;
+ q->limits.discard_granularity = p->discard_granularity;
+ blk_queue_max_discard_sectors(q, p->max_discard_sectors);
+ blk_queue_max_write_zeroes_sectors(q,
+ p->max_write_zeroes_sectors);
+ blk_queue_max_discard_segments(q, p->max_discard_segments);
+}
+
+static int ublk_validate_params(const struct ublk_device *ub)
+{
+ /* basic param is the only one which must be set */
+ if (ub->params.types & UBLK_PARAM_TYPE_BASIC) {
+ const struct ublk_param_basic *p = &ub->params.basic;
+
+ if (p->logical_bs_shift > PAGE_SHIFT)
+ return -EINVAL;
+
+ if (p->logical_bs_shift > p->physical_bs_shift)
+ return -EINVAL;
+
+ if (p->max_sectors > (ub->dev_info.max_io_buf_bytes >> 9))
+ return -EINVAL;
+ } else
+ return -EINVAL;
+
+ if (ub->params.types & UBLK_PARAM_TYPE_DISCARD) {
+ const struct ublk_param_discard *p = &ub->params.discard;
+
+ /* So far, only support single segment discard */
+ if (p->max_discard_sectors && p->max_discard_segments != 1)
+ return -EINVAL;
+
+ if (!p->discard_granularity)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int ublk_apply_params(struct ublk_device *ub)
+{
+ if (!(ub->params.types & UBLK_PARAM_TYPE_BASIC))
+ return -EINVAL;
+
+ ublk_dev_param_basic_apply(ub);
+
+ if (ub->params.types & UBLK_PARAM_TYPE_DISCARD)
+ ublk_dev_param_discard_apply(ub);
+
+ return 0;
+}
+
static inline bool ublk_can_use_task_work(const struct ublk_queue *ubq)
{
if (IS_BUILTIN(CONFIG_BLK_DEV_UBLK) &&
@@ -168,6 +273,13 @@ static inline bool ublk_can_use_task_work(const struct ublk_queue *ubq)
return false;
}
+static inline bool ublk_need_get_data(const struct ublk_queue *ubq)
+{
+ if (ubq->flags & UBLK_F_NEED_GET_DATA)
+ return true;
+ return false;
+}
+
static struct ublk_device *ublk_get_device(struct ublk_device *ub)
{
if (kobject_get_unless_zero(&ub->cdev_dev.kobj))
@@ -509,6 +621,21 @@ static void __ublk_fail_req(struct ublk_io *io, struct request *req)
}
}
+static void ubq_complete_io_cmd(struct ublk_io *io, int res)
+{
+ /* mark this cmd owned by ublksrv */
+ io->flags |= UBLK_IO_FLAG_OWNED_BY_SRV;
+
+ /*
+ * clear ACTIVE since we are done with this sqe/cmd slot
+ * We can only accept io cmd in case of being not active.
+ */
+ io->flags &= ~UBLK_IO_FLAG_ACTIVE;
+
+ /* tell ublksrv one io request is coming */
+ io_uring_cmd_done(io->cmd, res, 0);
+}
+
#define UBLK_REQUEUE_DELAY_MS 3
static inline void __ublk_rq_task_work(struct request *req)
@@ -531,6 +658,30 @@ static inline void __ublk_rq_task_work(struct request *req)
return;
}
+ if (ublk_need_get_data(ubq) &&
+ (req_op(req) == REQ_OP_WRITE ||
+ req_op(req) == REQ_OP_FLUSH)) {
+ /*
+ * We have not handled UBLK_IO_NEED_GET_DATA command yet,
+ * so immepdately pass UBLK_IO_RES_NEED_GET_DATA to ublksrv
+ * and notify it.
+ */
+ if (!(io->flags & UBLK_IO_FLAG_NEED_GET_DATA)) {
+ io->flags |= UBLK_IO_FLAG_NEED_GET_DATA;
+ pr_devel("%s: need get data. op %d, qid %d tag %d io_flags %x\n",
+ __func__, io->cmd->cmd_op, ubq->q_id,
+ req->tag, io->flags);
+ ubq_complete_io_cmd(io, UBLK_IO_RES_NEED_GET_DATA);
+ return;
+ }
+ /*
+ * We have handled UBLK_IO_NEED_GET_DATA command,
+ * so clear UBLK_IO_FLAG_NEED_GET_DATA now and just
+ * do the copy work.
+ */
+ io->flags &= ~UBLK_IO_FLAG_NEED_GET_DATA;
+ }
+
mapped_bytes = ublk_map_io(ubq, req, io);
/* partially mapped, update io descriptor */
@@ -553,17 +704,7 @@ static inline void __ublk_rq_task_work(struct request *req)
mapped_bytes >> 9;
}
- /* mark this cmd owned by ublksrv */
- io->flags |= UBLK_IO_FLAG_OWNED_BY_SRV;
-
- /*
- * clear ACTIVE since we are done with this sqe/cmd slot
- * We can only accept io cmd in case of being not active.
- */
- io->flags &= ~UBLK_IO_FLAG_ACTIVE;
-
- /* tell ublksrv one io request is coming */
- io_uring_cmd_done(io->cmd, UBLK_IO_RES_OK, 0);
+ ubq_complete_io_cmd(io, UBLK_IO_RES_OK);
}
static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd)
@@ -788,16 +929,27 @@ static void ublk_daemon_monitor_work(struct work_struct *work)
UBLK_DAEMON_MONITOR_PERIOD);
}
+static inline bool ublk_queue_ready(struct ublk_queue *ubq)
+{
+ return ubq->nr_io_ready == ubq->q_depth;
+}
+
static void ublk_cancel_queue(struct ublk_queue *ubq)
{
int i;
+ if (!ublk_queue_ready(ubq))
+ return;
+
for (i = 0; i < ubq->q_depth; i++) {
struct ublk_io *io = &ubq->ios[i];
if (io->flags & UBLK_IO_FLAG_ACTIVE)
io_uring_cmd_done(io->cmd, UBLK_IO_RES_ABORT, 0);
}
+
+ /* all io commands are canceled */
+ ubq->nr_io_ready = 0;
}
/* Cancel all pending commands, must be called after del_gendisk() returns */
@@ -818,19 +970,14 @@ static void ublk_stop_dev(struct ublk_device *ub)
del_gendisk(ub->ub_disk);
ub->dev_info.state = UBLK_S_DEV_DEAD;
ub->dev_info.ublksrv_pid = -1;
- ublk_cancel_dev(ub);
put_disk(ub->ub_disk);
ub->ub_disk = NULL;
unlock:
+ ublk_cancel_dev(ub);
mutex_unlock(&ub->mutex);
cancel_delayed_work_sync(&ub->monitor_work);
}
-static inline bool ublk_queue_ready(struct ublk_queue *ubq)
-{
- return ubq->nr_io_ready == ubq->q_depth;
-}
-
/* device can only be started after all IOs are ready */
static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq)
{
@@ -846,6 +993,25 @@ static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq)
mutex_unlock(&ub->mutex);
}
+static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id,
+ int tag, struct io_uring_cmd *cmd)
+{
+ struct ublk_queue *ubq = ublk_get_queue(ub, q_id);
+ struct request *req = blk_mq_tag_to_rq(ub->tag_set.tags[q_id], tag);
+
+ if (ublk_can_use_task_work(ubq)) {
+ struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
+
+ /* should not fail since we call it just in ubq->ubq_daemon */
+ task_work_add(ubq->ubq_daemon, &data->work, TWA_SIGNAL_NO_IPI);
+ } else {
+ struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
+
+ pdu->req = req;
+ io_uring_cmd_complete_in_task(cmd, ublk_rq_task_work_cb);
+ }
+}
+
static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
{
struct ublksrv_io_cmd *ub_cmd = (struct ublksrv_io_cmd *)cmd->cmd;
@@ -884,6 +1050,14 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
goto out;
}
+ /*
+ * ensure that the user issues UBLK_IO_NEED_GET_DATA
+ * iff the driver have set the UBLK_IO_FLAG_NEED_GET_DATA.
+ */
+ if ((!!(io->flags & UBLK_IO_FLAG_NEED_GET_DATA))
+ ^ (cmd_op == UBLK_IO_NEED_GET_DATA))
+ goto out;
+
switch (cmd_op) {
case UBLK_IO_FETCH_REQ:
/* UBLK_IO_FETCH_REQ is only allowed before queue is setup */
@@ -917,6 +1091,14 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
io->cmd = cmd;
ublk_commit_completion(ub, ub_cmd);
break;
+ case UBLK_IO_NEED_GET_DATA:
+ if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV))
+ goto out;
+ io->addr = ub_cmd->addr;
+ io->cmd = cmd;
+ io->flags |= UBLK_IO_FLAG_ACTIVE;
+ ublk_handle_need_get_data(ub, ub_cmd->q_id, ub_cmd->tag, cmd);
+ break;
default:
goto out;
}
@@ -1083,13 +1265,13 @@ static void ublk_stop_work_fn(struct work_struct *work)
ublk_stop_dev(ub);
}
-/* align maximum I/O size to PAGE_SIZE */
+/* align max io buffer size with PAGE_SIZE */
static void ublk_align_max_io_size(struct ublk_device *ub)
{
- unsigned int max_rq_bytes = ub->dev_info.rq_max_blocks << ub->bs_shift;
+ unsigned int max_io_bytes = ub->dev_info.max_io_buf_bytes;
- ub->dev_info.rq_max_blocks =
- round_down(max_rq_bytes, PAGE_SIZE) >> ub->bs_shift;
+ ub->dev_info.max_io_buf_bytes =
+ round_down(max_io_bytes, PAGE_SIZE);
}
static int ublk_add_tag_set(struct ublk_device *ub)
@@ -1132,7 +1314,6 @@ static int ublk_ctrl_start_dev(struct io_uring_cmd *cmd)
{
struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
int ublksrv_pid = (int)header->data[0];
- unsigned long dev_blocks = header->data[1];
struct ublk_device *ub;
struct gendisk *disk;
int ret = -EINVAL;
@@ -1155,10 +1336,6 @@ static int ublk_ctrl_start_dev(struct io_uring_cmd *cmd)
goto out_unlock;
}
- /* We may get disk size updated */
- if (dev_blocks)
- ub->dev_info.dev_blocks = dev_blocks;
-
disk = blk_mq_alloc_disk(&ub->tag_set, ub);
if (IS_ERR(disk)) {
ret = PTR_ERR(disk);
@@ -1168,27 +1345,28 @@ static int ublk_ctrl_start_dev(struct io_uring_cmd *cmd)
disk->fops = &ub_fops;
disk->private_data = ub;
- blk_queue_logical_block_size(disk->queue, ub->dev_info.block_size);
- blk_queue_physical_block_size(disk->queue, ub->dev_info.block_size);
- blk_queue_io_min(disk->queue, ub->dev_info.block_size);
- blk_queue_max_hw_sectors(disk->queue,
- ub->dev_info.rq_max_blocks << (ub->bs_shift - 9));
- disk->queue->limits.discard_granularity = PAGE_SIZE;
- blk_queue_max_discard_sectors(disk->queue, UINT_MAX >> 9);
- blk_queue_max_write_zeroes_sectors(disk->queue, UINT_MAX >> 9);
-
- set_capacity(disk, ub->dev_info.dev_blocks << (ub->bs_shift - 9));
-
ub->dev_info.ublksrv_pid = ublksrv_pid;
ub->ub_disk = disk;
+
+ ret = ublk_apply_params(ub);
+ if (ret)
+ goto out_put_disk;
+
get_device(&ub->cdev_dev);
ret = add_disk(disk);
if (ret) {
- put_disk(disk);
- goto out_unlock;
+ /*
+ * Has to drop the reference since ->free_disk won't be
+ * called in case of add_disk failure.
+ */
+ ublk_put_device(ub);
+ goto out_put_disk;
}
set_bit(UB_STATE_USED, &ub->state);
ub->dev_info.state = UBLK_S_DEV_LIVE;
+out_put_disk:
+ if (ret)
+ put_disk(disk);
out_unlock:
mutex_unlock(&ub->mutex);
ublk_put_device(ub);
@@ -1250,9 +1428,8 @@ static inline void ublk_dump_dev_info(struct ublksrv_ctrl_dev_info *info)
{
pr_devel("%s: dev id %d flags %llx\n", __func__,
info->dev_id, info->flags);
- pr_devel("\t nr_hw_queues %d queue_depth %d block size %d dev_capacity %lld\n",
- info->nr_hw_queues, info->queue_depth,
- info->block_size, info->dev_blocks);
+ pr_devel("\t nr_hw_queues %d queue_depth %d\n",
+ info->nr_hw_queues, info->queue_depth);
}
static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
@@ -1312,7 +1489,6 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
/* We are not ready to support zero copy */
ub->dev_info.flags &= ~UBLK_F_SUPPORT_ZERO_COPY;
- ub->bs_shift = ilog2(ub->dev_info.block_size);
ub->dev_info.nr_hw_queues = min_t(unsigned int,
ub->dev_info.nr_hw_queues, nr_cpu_ids);
ublk_align_max_io_size(ub);
@@ -1436,6 +1612,82 @@ static int ublk_ctrl_get_dev_info(struct io_uring_cmd *cmd)
return ret;
}
+static int ublk_ctrl_get_params(struct io_uring_cmd *cmd)
+{
+ struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+ void __user *argp = (void __user *)(unsigned long)header->addr;
+ struct ublk_params_header ph;
+ struct ublk_device *ub;
+ int ret;
+
+ if (header->len <= sizeof(ph) || !header->addr)
+ return -EINVAL;
+
+ if (copy_from_user(&ph, argp, sizeof(ph)))
+ return -EFAULT;
+
+ if (ph.len > header->len || !ph.len)
+ return -EINVAL;
+
+ if (ph.len > sizeof(struct ublk_params))
+ ph.len = sizeof(struct ublk_params);
+
+ ub = ublk_get_device_from_id(header->dev_id);
+ if (!ub)
+ return -EINVAL;
+
+ mutex_lock(&ub->mutex);
+ if (copy_to_user(argp, &ub->params, ph.len))
+ ret = -EFAULT;
+ else
+ ret = 0;
+ mutex_unlock(&ub->mutex);
+
+ ublk_put_device(ub);
+ return ret;
+}
+
+static int ublk_ctrl_set_params(struct io_uring_cmd *cmd)
+{
+ struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+ void __user *argp = (void __user *)(unsigned long)header->addr;
+ struct ublk_params_header ph;
+ struct ublk_device *ub;
+ int ret = -EFAULT;
+
+ if (header->len <= sizeof(ph) || !header->addr)
+ return -EINVAL;
+
+ if (copy_from_user(&ph, argp, sizeof(ph)))
+ return -EFAULT;
+
+ if (ph.len > header->len || !ph.len || !ph.types)
+ return -EINVAL;
+
+ if (ph.len > sizeof(struct ublk_params))
+ ph.len = sizeof(struct ublk_params);
+
+ ub = ublk_get_device_from_id(header->dev_id);
+ if (!ub)
+ return -EINVAL;
+
+ /* parameters can only be changed when device isn't live */
+ mutex_lock(&ub->mutex);
+ if (ub->dev_info.state == UBLK_S_DEV_LIVE) {
+ ret = -EACCES;
+ } else if (copy_from_user(&ub->params, argp, ph.len)) {
+ ret = -EFAULT;
+ } else {
+ /* clear all we don't support yet */
+ ub->params.types &= UBLK_PARAM_TYPE_ALL;
+ ret = ublk_validate_params(ub);
+ }
+ mutex_unlock(&ub->mutex);
+ ublk_put_device(ub);
+
+ return ret;
+}
+
static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
unsigned int issue_flags)
{
@@ -1471,6 +1723,12 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
case UBLK_CMD_GET_QUEUE_AFFINITY:
ret = ublk_ctrl_get_queue_affinity(cmd);
break;
+ case UBLK_CMD_GET_PARAMS:
+ ret = ublk_ctrl_get_params(cmd);
+ break;
+ case UBLK_CMD_SET_PARAMS:
+ ret = ublk_ctrl_set_params(cmd);
+ break;
default:
break;
}