diff options
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/Kconfig | 5 | ||||
-rw-r--r-- | drivers/block/aoe/aoecmd.c | 41 | ||||
-rw-r--r-- | drivers/block/brd.c | 39 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_actlog.c | 2 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_main.c | 2 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_receiver.c | 16 | ||||
-rw-r--r-- | drivers/block/floppy.c | 10 | ||||
-rw-r--r-- | drivers/block/loop.c | 2 | ||||
-rw-r--r-- | drivers/block/mtip32xx/mtip32xx.c | 18 | ||||
-rw-r--r-- | drivers/block/nbd.c | 443 | ||||
-rw-r--r-- | drivers/block/null_blk.c | 1 | ||||
-rw-r--r-- | drivers/block/pktcdvd.c | 49 | ||||
-rw-r--r-- | drivers/block/skd_main.c | 238 | ||||
-rw-r--r-- | drivers/block/umem.c | 2 | ||||
-rw-r--r-- | drivers/block/xen-blkback/blkback.c | 10 | ||||
-rw-r--r-- | drivers/block/xen-blkfront.c | 3 | ||||
-rw-r--r-- | drivers/block/zram/zcomp.c | 76 | ||||
-rw-r--r-- | drivers/block/zram/zcomp.h | 5 | ||||
-rw-r--r-- | drivers/block/zram/zram_drv.c | 20 |
19 files changed, 437 insertions, 545 deletions
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 39dd30b6ef86..223ff2fcae7e 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -384,9 +384,12 @@ config BLK_DEV_RAM_DAX allocated from highmem (only a problem for highmem systems). config CDROM_PKTCDVD - tristate "Packet writing on CD/DVD media" + tristate "Packet writing on CD/DVD media (DEPRECATED)" depends on !UML help + Note: This driver is deprecated and will be removed from the + kernel in the near future! + If you have a CDROM/DVD drive that supports packet writing, say Y to include support. It should work with any MMC/Mt Fuji compliant ATAPI or SCSI drive, which is just about any newer diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index ab19adb07a12..3c606c09fd5a 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -853,45 +853,6 @@ rqbiocnt(struct request *r) return n; } -/* This can be removed if we are certain that no users of the block - * layer will ever use zero-count pages in bios. Otherwise we have to - * protect against the put_page sometimes done by the network layer. - * - * See http://oss.sgi.com/archives/xfs/2007-01/msg00594.html for - * discussion. - * - * We cannot use get_page in the workaround, because it insists on a - * positive page count as a precondition. So we use _refcount directly. - */ -static void -bio_pageinc(struct bio *bio) -{ - struct bio_vec bv; - struct page *page; - struct bvec_iter iter; - - bio_for_each_segment(bv, bio, iter) { - /* Non-zero page count for non-head members of - * compound pages is no longer allowed by the kernel. - */ - page = compound_head(bv.bv_page); - page_ref_inc(page); - } -} - -static void -bio_pagedec(struct bio *bio) -{ - struct page *page; - struct bio_vec bv; - struct bvec_iter iter; - - bio_for_each_segment(bv, bio, iter) { - page = compound_head(bv.bv_page); - page_ref_dec(page); - } -} - static void bufinit(struct buf *buf, struct request *rq, struct bio *bio) { @@ -899,7 +860,6 @@ bufinit(struct buf *buf, struct request *rq, struct bio *bio) buf->rq = rq; buf->bio = bio; buf->iter = bio->bi_iter; - bio_pageinc(bio); } static struct buf * @@ -1127,7 +1087,6 @@ aoe_end_buf(struct aoedev *d, struct buf *buf) if (buf == d->ip.buf) d->ip.buf = NULL; rq = buf->rq; - bio_pagedec(buf->bio); mempool_free(buf, d->bufpool); n = (unsigned long) rq->special; rq->special = (void *) --n; diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 0c76d4016eeb..ad793f35632c 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -395,44 +395,9 @@ static long brd_direct_access(struct block_device *bdev, sector_t sector, #define brd_direct_access NULL #endif -static int brd_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long arg) -{ - int error; - struct brd_device *brd = bdev->bd_disk->private_data; - - if (cmd != BLKFLSBUF) - return -ENOTTY; - - /* - * ram device BLKFLSBUF has special semantics, we want to actually - * release and destroy the ramdisk data. - */ - mutex_lock(&brd_mutex); - mutex_lock(&bdev->bd_mutex); - error = -EBUSY; - if (bdev->bd_openers <= 1) { - /* - * Kill the cache first, so it isn't written back to the - * device. - * - * Another thread might instantiate more buffercache here, - * but there is not much we can do to close that race. - */ - kill_bdev(bdev); - brd_free_pages(brd); - error = 0; - } - mutex_unlock(&bdev->bd_mutex); - mutex_unlock(&brd_mutex); - - return error; -} - static const struct block_device_operations brd_fops = { .owner = THIS_MODULE, .rw_page = brd_rw_page, - .ioctl = brd_ioctl, .direct_access = brd_direct_access, }; @@ -443,8 +408,8 @@ static int rd_nr = CONFIG_BLK_DEV_RAM_COUNT; module_param(rd_nr, int, S_IRUGO); MODULE_PARM_DESC(rd_nr, "Maximum number of brd devices"); -int rd_size = CONFIG_BLK_DEV_RAM_SIZE; -module_param(rd_size, int, S_IRUGO); +unsigned long rd_size = CONFIG_BLK_DEV_RAM_SIZE; +module_param(rd_size, ulong, S_IRUGO); MODULE_PARM_DESC(rd_size, "Size of each RAM disk in kbytes."); static int max_part = 1; diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 2d3d50ab74bf..8d7bcfa49c12 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c @@ -148,7 +148,7 @@ static int _drbd_md_sync_page_io(struct drbd_device *device, if ((op == REQ_OP_WRITE) && !test_bit(MD_NO_FUA, &device->flags)) op_flags |= REQ_FUA | REQ_PREFLUSH; - op_flags |= REQ_SYNC | REQ_NOIDLE; + op_flags |= REQ_SYNC; bio = bio_alloc_drbd(GFP_NOIO); bio->bi_bdev = bdev->md_bdev; diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 100be556e613..83482721bc01 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -1871,7 +1871,7 @@ int drbd_send(struct drbd_connection *connection, struct socket *sock, drbd_update_congested(connection); } do { - rv = kernel_sendmsg(sock, &msg, &iov, 1, size); + rv = kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len); if (rv == -EAGAIN) { if (we_should_drop_the_connection(connection, sock)) break; diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 942384f34e22..c7728dd77230 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1266,7 +1266,7 @@ static void submit_one_flush(struct drbd_device *device, struct issue_flush_cont bio->bi_bdev = device->ldev->backing_bdev; bio->bi_private = octx; bio->bi_end_io = one_flush_endio; - bio_set_op_attrs(bio, REQ_OP_FLUSH, WRITE_FLUSH); + bio->bi_opf = REQ_OP_FLUSH | REQ_PREFLUSH; device->flush_jif = jiffies; set_bit(FLUSH_PENDING, &device->flags); @@ -1648,20 +1648,8 @@ next_bio: page_chain_for_each(page) { unsigned len = min_t(unsigned, data_size, PAGE_SIZE); - if (!bio_add_page(bio, page, len, 0)) { - /* A single page must always be possible! - * But in case it fails anyways, - * we deal with it, and complain (below). */ - if (bio->bi_vcnt == 0) { - drbd_err(device, - "bio_add_page failed for len=%u, " - "bi_vcnt=0 (bi_sector=%llu)\n", - len, (uint64_t)bio->bi_iter.bi_sector); - err = -ENOSPC; - goto fail; - } + if (!bio_add_page(bio, page, len, 0)) goto next_bio; - } data_size -= len; sector += len >> 9; --nr_pages; diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index e3d8e4ced4a2..a391a3cfb3fe 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -3806,14 +3806,10 @@ static int __floppy_read_block_0(struct block_device *bdev, int drive) cbdata.drive = drive; - bio_init(&bio); - bio.bi_io_vec = &bio_vec; - bio_vec.bv_page = page; - bio_vec.bv_len = size; - bio_vec.bv_offset = 0; - bio.bi_vcnt = 1; - bio.bi_iter.bi_size = size; + bio_init(&bio, &bio_vec, 1); bio.bi_bdev = bdev; + bio_add_page(&bio, page, size, 0); + bio.bi_iter.bi_sector = 0; bio.bi_flags |= (1 << BIO_QUIET); bio.bi_private = &cbdata; diff --git a/drivers/block/loop.c b/drivers/block/loop.c index fa1b7a90ba11..4af818766797 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1646,7 +1646,7 @@ static int loop_queue_rq(struct blk_mq_hw_ctx *hctx, blk_mq_start_request(bd->rq); if (lo->lo_state != Lo_bound) - return -EIO; + return BLK_MQ_RQ_QUEUE_ERROR; switch (req_op(cmd->rq)) { case REQ_OP_FLUSH: diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 3cfd879267b2..f96ab717534c 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -2035,18 +2035,14 @@ static int exec_drive_taskfile(struct driver_data *dd, taskout = req_task->out_size; taskin = req_task->in_size; /* 130560 = 512 * 0xFF*/ - if (taskin > 130560 || taskout > 130560) { - err = -EINVAL; - goto abort; - } + if (taskin > 130560 || taskout > 130560) + return -EINVAL; if (taskout) { outbuf = memdup_user(buf + outtotal, taskout); - if (IS_ERR(outbuf)) { - err = PTR_ERR(outbuf); - outbuf = NULL; - goto abort; - } + if (IS_ERR(outbuf)) + return PTR_ERR(outbuf); + outbuf_dma = pci_map_single(dd->pdev, outbuf, taskout, @@ -3937,8 +3933,10 @@ static int mtip_block_initialize(struct driver_data *dd) /* Generate the disk name, implemented same as in sd.c */ do { - if (!ida_pre_get(&rssd_index_ida, GFP_KERNEL)) + if (!ida_pre_get(&rssd_index_ida, GFP_KERNEL)) { + rv = -ENOMEM; goto ida_get_error; + } spin_lock(&rssd_index_lock); rv = ida_get_new(&rssd_index_ida, &index); diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 19a16b2dbb91..99c84468f154 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -41,26 +41,34 @@ #include <linux/nbd.h> +struct nbd_sock { + struct socket *sock; + struct mutex tx_lock; +}; + #define NBD_TIMEDOUT 0 #define NBD_DISCONNECT_REQUESTED 1 +#define NBD_DISCONNECTED 2 +#define NBD_RUNNING 3 struct nbd_device { u32 flags; unsigned long runtime_flags; - struct socket * sock; /* If == NULL, device is not ready, yet */ + struct nbd_sock **socks; int magic; struct blk_mq_tag_set tag_set; - struct mutex tx_lock; + struct mutex config_lock; struct gendisk *disk; - int blksize; + int num_connections; + atomic_t recv_threads; + wait_queue_head_t recv_wq; + loff_t blksize; loff_t bytesize; - /* protects initialization and shutdown of the socket */ - spinlock_t sock_lock; struct task_struct *task_recv; - struct task_struct *task_send; + struct task_struct *task_setup; #if IS_ENABLED(CONFIG_DEBUG_FS) struct dentry *dbg_dir; @@ -69,7 +77,7 @@ struct nbd_device { struct nbd_cmd { struct nbd_device *nbd; - struct list_head list; + struct completion send_complete; }; #if IS_ENABLED(CONFIG_DEBUG_FS) @@ -126,7 +134,7 @@ static void nbd_size_update(struct nbd_device *nbd, struct block_device *bdev) } static int nbd_size_set(struct nbd_device *nbd, struct block_device *bdev, - int blocksize, int nr_blocks) + loff_t blocksize, loff_t nr_blocks) { int ret; @@ -135,7 +143,7 @@ static int nbd_size_set(struct nbd_device *nbd, struct block_device *bdev, return ret; nbd->blksize = blocksize; - nbd->bytesize = (loff_t)blocksize * (loff_t)nr_blocks; + nbd->bytesize = blocksize * nr_blocks; nbd_size_update(nbd, bdev); @@ -159,22 +167,20 @@ static void nbd_end_request(struct nbd_cmd *cmd) */ static void sock_shutdown(struct nbd_device *nbd) { - struct socket *sock; - - spin_lock(&nbd->sock_lock); + int i; - if (!nbd->sock) { - spin_unlock(&nbd->sock_lock); + if (nbd->num_connections == 0) + return; + if (test_and_set_bit(NBD_DISCONNECTED, &nbd->runtime_flags)) return; - } - - sock = nbd->sock; - dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n"); - nbd->sock = NULL; - spin_unlock(&nbd->sock_lock); - kernel_sock_shutdown(sock, SHUT_RDWR); - sockfd_put(sock); + for (i = 0; i < nbd->num_connections; i++) { + struct nbd_sock *nsock = nbd->socks[i]; + mutex_lock(&nsock->tx_lock); + kernel_sock_shutdown(nsock->sock, SHUT_RDWR); + mutex_unlock(&nsock->tx_lock); + } + dev_warn(disk_to_dev(nbd->disk), "shutting down sockets\n"); } static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, @@ -182,42 +188,38 @@ static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req, { struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req); struct nbd_device *nbd = cmd->nbd; - struct socket *sock = NULL; - - spin_lock(&nbd->sock_lock); + dev_err(nbd_to_dev(nbd), "Connection timed out, shutting down connection\n"); set_bit(NBD_TIMEDOUT, &nbd->runtime_flags); - - if (nbd->sock) { - sock = nbd->sock; - get_file(sock->file); - } - - spin_unlock(&nbd->sock_lock); - if (sock) { - kernel_sock_shutdown(sock, SHUT_RDWR); - sockfd_put(sock); - } - req->errors++; - dev_err(nbd_to_dev(nbd), "Connection timed out, shutting down connection\n"); + + /* + * If our disconnect packet times out then we're already holding the + * config_lock and could deadlock here, so just set an error and return, + * we'll handle shutting everything down later. + */ + if (req->cmd_type == REQ_TYPE_DRV_PRIV) + return BLK_EH_HANDLED; + mutex_lock(&nbd->config_lock); + sock_shutdown(nbd); + mutex_unlock(&nbd->config_lock); return BLK_EH_HANDLED; } /* * Send or receive packet. */ -static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size, - int msg_flags) +static int sock_xmit(struct nbd_device *nbd, int index, int send, void *buf, + int size, int msg_flags) { - struct socket *sock = nbd->sock; + struct socket *sock = nbd->socks[index]->sock; int result; struct msghdr msg; struct kvec iov; unsigned long pflags = current->flags; if (unlikely(!sock)) { - dev_err(disk_to_dev(nbd->disk), + dev_err_ratelimited(disk_to_dev(nbd->disk), "Attempted %s on closed socket in sock_xmit\n", (send ? "send" : "recv")); return -EINVAL; @@ -254,29 +256,29 @@ static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size, return result; } -static inline int sock_send_bvec(struct nbd_device *nbd, struct bio_vec *bvec, - int flags) +static inline int sock_send_bvec(struct nbd_device *nbd, int index, + struct bio_vec *bvec, int flags) { int result; void *kaddr = kmap(bvec->bv_page); - result = sock_xmit(nbd, 1, kaddr + bvec->bv_offset, + result = sock_xmit(nbd, index, 1, kaddr + bvec->bv_offset, bvec->bv_len, flags); kunmap(bvec->bv_page); return result; } /* always call with the tx_lock held */ -static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd) +static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index) { struct request *req = blk_mq_rq_from_pdu(cmd); int result, flags; struct nbd_request request; unsigned long size = blk_rq_bytes(req); + struct bio *bio; u32 type; + u32 tag = blk_mq_unique_tag(req); - if (req->cmd_type == REQ_TYPE_DRV_PRIV) - type = NBD_CMD_DISC; - else if (req_op(req) == REQ_OP_DISCARD) + if (req_op(req) == REQ_OP_DISCARD) type = NBD_CMD_TRIM; else if (req_op(req) == REQ_OP_FLUSH) type = NBD_CMD_FLUSH; @@ -288,73 +290,89 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd) memset(&request, 0, sizeof(request)); request.magic = htonl(NBD_REQUEST_MAGIC); request.type = htonl(type); - if (type != NBD_CMD_FLUSH && type != NBD_CMD_DISC) { + if (type != NBD_CMD_FLUSH) { request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9); request.len = htonl(size); } - memcpy(request.handle, &req->tag, sizeof(req->tag)); + memcpy(request.handle, &tag, sizeof(tag)); dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n", cmd, nbdcmd_to_ascii(type), (unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req)); - result = sock_xmit(nbd, 1, &request, sizeof(request), + result = sock_xmit(nbd, index, 1, &request, sizeof(request), (type == NBD_CMD_WRITE) ? MSG_MORE : 0); if (result <= 0) { - dev_err(disk_to_dev(nbd->disk), + dev_err_ratelimited(disk_to_dev(nbd->disk), "Send control failed (result %d)\n", result); return -EIO; } - if (type == NBD_CMD_WRITE) { - struct req_iterator iter; + if (type != NBD_CMD_WRITE) + return 0; + + flags = 0; + bio = req->bio; + while (bio) { + struct bio *next = bio->bi_next; + struct bvec_iter iter; struct bio_vec bvec; - /* - * we are really probing at internals to determine - * whether to set MSG_MORE or not... - */ - rq_for_each_segment(bvec, req, iter) { - flags = 0; - if (!rq_iter_last(bvec, iter)) + + bio_for_each_segment(bvec, bio, iter) { + bool is_last = !next && bio_iter_last(bvec, iter); + + if (is_last) flags = MSG_MORE; dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n", cmd, bvec.bv_len); - result = sock_send_bvec(nbd, &bvec, flags); + result = sock_send_bvec(nbd, index, &bvec, flags); if (result <= 0) { dev_err(disk_to_dev(nbd->disk), "Send data failed (result %d)\n", result); return -EIO; } + /* + * The completion might already have come in, + * so break for the last one instead of letting + * the iterator do it. This prevents use-after-free + * of the bio. + */ + if (is_last) + break; } + bio = next; } return 0; } -static inline int sock_recv_bvec(struct nbd_device *nbd, struct bio_vec *bvec) +static inline int sock_recv_bvec(struct nbd_device *nbd, int index, + struct bio_vec *bvec) { int result; void *kaddr = kmap(bvec->bv_page); - result = sock_xmit(nbd, 0, kaddr + bvec->bv_offset, bvec->bv_len, - MSG_WAITALL); + result = sock_xmit(nbd, index, 0, kaddr + bvec->bv_offset, + bvec->bv_len, MSG_WAITALL); kunmap(bvec->bv_page); return result; } /* NULL returned = something went wrong, inform userspace */ -static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd) +static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index) { int result; struct nbd_reply reply; struct nbd_cmd *cmd; struct request *req = NULL; u16 hwq; - int tag; + u32 tag; reply.magic = 0; - result = sock_xmit(nbd, 0, &reply, sizeof(reply), MSG_WAITALL); + result = sock_xmit(nbd, index, 0, &reply, sizeof(reply), MSG_WAITALL); if (result <= 0) { - dev_err(disk_to_dev(nbd->disk), - "Receive control failed (result %d)\n", result); + if (!test_bit(NBD_DISCONNECTED, &nbd->runtime_flags) && + !test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags)) + dev_err(disk_to_dev(nbd->disk), + "Receive control failed (result %d)\n", result); return ERR_PTR(result); } @@ -364,7 +382,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd) return ERR_PTR(-EPROTO); } - memcpy(&tag, reply.handle, sizeof(int)); + memcpy(&tag, reply.handle, sizeof(u32)); hwq = blk_mq_unique_tag_to_hwq(tag); if (hwq < nbd->tag_set.nr_hw_queues) @@ -376,7 +394,6 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd) return ERR_PTR(-ENOENT); } cmd = blk_mq_rq_to_pdu(req); - if (ntohl(reply.error)) { dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n", ntohl(reply.error)); @@ -390,7 +407,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd) struct bio_vec bvec; rq_for_each_segment(bvec, req, iter) { - result = sock_recv_bvec(nbd, &bvec); + result = sock_recv_bvec(nbd, index, &bvec); if (result <= 0) { dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n", result); @@ -400,6 +417,9 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd) dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n", cmd, bvec.bv_len); } + } else { + /* See the comment in nbd_queue_rq. */ + wait_for_completion(&cmd->send_complete); } return cmd; } @@ -418,25 +438,24 @@ static struct device_attribute pid_attr = { .show = pid_show, }; -static int nbd_thread_recv(struct nbd_device *nbd, struct block_device *bdev) +struct recv_thread_args { + struct work_struct work; + struct nbd_device *nbd; + int index; +}; + +static void recv_work(struct work_struct *work) { + struct recv_thread_args *args = container_of(work, + struct recv_thread_args, + work); + struct nbd_device *nbd = args->nbd; struct nbd_cmd *cmd; - int ret; + int ret = 0; BUG_ON(nbd->magic != NBD_MAGIC); - - sk_set_memalloc(nbd->sock->sk); - - ret = device_create_file(disk_to_dev(nbd->disk), &pid_attr); - if (ret) { - dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n"); - return ret; - } - - nbd_size_update(nbd, bdev); - while (1) { - cmd = nbd_read_stat(nbd); + cmd = nbd_read_stat(nbd, args->index); if (IS_ERR(cmd)) { ret = PTR_ERR(cmd); break; @@ -445,10 +464,14 @@ static int nbd_thread_recv(struct nbd_device *nbd, struct block_device *bdev) nbd_end_request(cmd); } - nbd_size_clear(nbd, bdev); - - device_remove_file(disk_to_dev(nbd->disk), &pid_attr); - return ret; + /* + * We got an error, shut everybody down if this wasn't the result of a + * disconnect request. + */ + if (ret && !test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags)) + sock_shutdown(nbd); + atomic_dec(&nbd->recv_threads); + wake_up(&nbd->recv_wq); } static void nbd_clear_req(struct request *req, void *data, bool reserved) @@ -466,51 +489,60 @@ static void nbd_clear_que(struct nbd_device *nbd) { BUG_ON(nbd->magic != NBD_MAGIC); - /* - * Because we have set nbd->sock to NULL under the tx_lock, all - * modifications to the list must have completed by now. - */ - BUG_ON(nbd->sock); - blk_mq_tagset_busy_iter(&nbd->tag_set, nbd_clear_req, NULL); dev_dbg(disk_to_dev(nbd->disk), "queue cleared\n"); } -static void nbd_handle_cmd(struct nbd_cmd *cmd) +static void nbd_handle_cmd(struct nbd_cmd *cmd, int index) { struct request *req = blk_mq_rq_from_pdu(cmd); struct nbd_device *nbd = cmd->nbd; + struct nbd_sock *nsock; + + if (index >= nbd->num_connections) { + dev_err_ratelimited(disk_to_dev(nbd->disk), + "Attempted send on invalid socket\n"); + goto error_out; + } + + if (test_bit(NBD_DISCONNECTED, &nbd->runtime_flags)) { + dev_err_ratelimited(disk_to_dev(nbd->disk), + "Attempted send on closed socket\n"); + goto error_out; + } - if (req->cmd_type != REQ_TYPE_FS) + if (req->cmd_type != REQ_TYPE_FS && + req->cmd_type != REQ_TYPE_DRV_PRIV) goto error_out; - if (rq_data_dir(req) == WRITE && + if (req->cmd_type == REQ_TYPE_FS && + rq_data_dir(req) == WRITE && (nbd->flags & NBD_FLAG_READ_ONLY)) { - dev_err(disk_to_dev(nbd->disk), - "Write on read-only\n"); + dev_err_ratelimited(disk_to_dev(nbd->disk), + "Write on read-only\n"); goto error_out; } req->errors = 0; - mutex_lock(&nbd->tx_lock); - nbd->task_send = current; - if (unlikely(!nbd->sock)) { - mutex_unlock(&nbd->tx_lock); - dev_err(disk_to_dev(nbd->disk), - "Attempted send on closed socket\n"); + nsock = nbd->socks[index]; + mutex_lock(&nsock->tx_lock); + if (unlikely(!nsock->sock)) { + mutex_unlock(&nsock->tx_lock); + dev_err_ratelimited(disk_to_dev(nbd->disk), + "Attempted send on closed socket\n"); goto error_out; } - if (nbd_send_cmd(nbd, cmd) != 0) { - dev_err(disk_to_dev(nbd->disk), "Request send failed\n"); + if (nbd_send_cmd(nbd, cmd, index) != 0) { + dev_err_ratelimited(disk_to_dev(nbd->disk), + "Request send failed\n"); req->errors++; nbd_end_request(cmd); } - nbd->task_send = NULL; - mutex_unlock(&nbd->tx_lock); + mutex_unlock(&nsock->tx_lock); return; @@ -524,39 +556,70 @@ static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx, { struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq); + /* + * Since we look at the bio's to send the request over the network we + * need to make sure the completion work doesn't mark this request done + * before we are done doing our send. This keeps us from dereferencing + * freed data if we have particularly fast completions (ie we get the + * completion before we exit sock_xmit on the last bvec) or in the case + * that the server is misbehaving (or there was an error) before we're + * done sending everything over the wire. + */ + init_completion(&cmd->send_complete); blk_mq_start_request(bd->rq); - nbd_handle_cmd(cmd); + nbd_handle_cmd(cmd, hctx->queue_num); + complete(&cmd->send_complete); + return BLK_MQ_RQ_QUEUE_OK; } -static int nbd_set_socket(struct nbd_device *nbd, struct socket *sock) +static int nbd_add_socket(struct nbd_device *nbd, struct socket *sock) { - int ret = 0; - - spin_lock_irq(&nbd->sock_lock); + struct nbd_sock **socks; + struct nbd_sock *nsock; - if (nbd->sock) { - ret = -EBUSY; - goto out; + if (!nbd->task_setup) + nbd->task_setup = current; + if (nbd->task_setup != current) { + dev_err(disk_to_dev(nbd->disk), + "Device being setup by another task"); + return -EINVAL; } - nbd->sock = sock; + socks = krealloc(nbd->socks, (nbd->num_connections + 1) * + sizeof(struct nbd_sock *), GFP_KERNEL); + if (!socks) + return -ENOMEM; + nsock = kzalloc(sizeof(struct nbd_sock), GFP_KERNEL); + if (!nsock) + return -ENOMEM; -out: - spin_unlock_irq(&nbd->sock_lock); + nbd->socks = socks; + + mutex_init(&nsock->tx_lock); + nsock->sock = sock; + socks[nbd->num_connections++] = nsock; - return ret; + return 0; } /* Reset all properties of an NBD device */ static void nbd_reset(struct nbd_device *nbd) { + int i; + + for (i = 0; i < nbd->num_connections; i++) + kfree(nbd->socks[i]); + kfree(nbd->socks); + nbd->socks = NULL; nbd->runtime_flags = 0; nbd->blksize = 1024; nbd->bytesize = 0; set_capacity(nbd->disk, 0); nbd->flags = 0; nbd->tag_set.timeout = 0; + nbd->num_connections = 0; + nbd->task_setup = NULL; queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue); } @@ -582,48 +645,68 @@ static void nbd_parse_flags(struct nbd_device *nbd, struct block_device *bdev) blk_queue_write_cache(nbd->disk->queue, false, false); } +static void send_disconnects(struct nbd_device *nbd) +{ + struct nbd_request request = {}; + int i, ret; + + request.magic = htonl(NBD_REQUEST_MAGIC); + request.type = htonl(NBD_CMD_DISC); + + for (i = 0; i < nbd->num_connections; i++) { + ret = sock_xmit(nbd, i, 1, &request, sizeof(request), 0); + if (ret <= 0) + dev_err(disk_to_dev(nbd->disk), + "Send disconnect failed %d\n", ret); + } +} + static int nbd_dev_dbg_init(struct nbd_device *nbd); static void nbd_dev_dbg_close(struct nbd_device *nbd); -/* Must be called with tx_lock held */ - +/* Must be called with config_lock held */ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, unsigned int cmd, unsigned long arg) { switch (cmd) { case NBD_DISCONNECT: { - struct request *sreq; - dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n"); - if (!nbd->sock) + if (!nbd->socks) return -EINVAL; - sreq = blk_mq_alloc_request(bdev_get_queue(bdev), WRITE, 0); - if (!sreq) - return -ENOMEM; - - mutex_unlock(&nbd->tx_lock); + mutex_unlock(&nbd->config_lock); fsync_bdev(bdev); - mutex_lock(&nbd->tx_lock); - sreq->cmd_type = REQ_TYPE_DRV_PRIV; + mutex_lock(&nbd->config_lock); /* Check again after getting mutex back. */ - if (!nbd->sock) { - blk_mq_free_request(sreq); + if (!nbd->socks) return -EINVAL; - } - set_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags); - - nbd_send_cmd(nbd, blk_mq_rq_to_pdu(sreq)); - blk_mq_free_request(sreq); + if (!test_and_set_bit(NBD_DISCONNECT_REQUESTED, + &nbd->runtime_flags)) + send_disconnects(nbd); return 0; } - + case NBD_CLEAR_SOCK: sock_shutdown(nbd); nbd_clear_que(nbd); kill_bdev(bdev); + nbd_bdev_reset(bdev); + /* + * We want to give the run thread a chance to wait for everybody + * to clean up and then do it's own cleanup. + */ + if (!test_bit(NBD_RUNNING, &nbd->runtime_flags)) { + int i; + + for (i = 0; i < nbd->num_connections; i++) + kfree(nbd->socks[i]); + kfree(nbd->socks); + nbd->socks = NULL; + nbd->num_connections = 0; + nbd->task_setup = NULL; + } return 0; case NBD_SET_SOCK: { @@ -633,7 +716,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, if (!sock) return err; - err = nbd_set_socket(nbd, sock); + err = nbd_add_socket(nbd, sock); if (!err && max_part) bdev->bd_invalidated = 1; @@ -648,7 +731,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, case NBD_SET_SIZE: return nbd_size_set(nbd, bdev, nbd->blksize, - arg / nbd->blksize); + div_s64(arg, nbd->blksize)); case NBD_SET_SIZE_BLOCKS: return nbd_size_set(nbd, bdev, nbd->blksize, arg); @@ -662,26 +745,61 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, return 0; case NBD_DO_IT: { - int error; + struct recv_thread_args *args; + int num_connections = nbd->num_connections; + int error = 0, i; if (nbd->task_recv) return -EBUSY; - if (!nbd->sock) + if (!nbd->socks) return -EINVAL; + if (num_connections > 1 && + !(nbd->flags & NBD_FLAG_CAN_MULTI_CONN)) { + dev_err(disk_to_dev(nbd->disk), "server does not support multiple connections per device.\n"); + error = -EINVAL; + goto out_err; + } - /* We have to claim the device under the lock */ + set_bit(NBD_RUNNING, &nbd->runtime_flags); + blk_mq_update_nr_hw_queues(&nbd->tag_set, nbd->num_connections); + args = kcalloc(num_connections, sizeof(*args), GFP_KERNEL); + if (!args) { + error = -ENOMEM; + goto out_err; + } nbd->task_recv = current; - mutex_unlock(&nbd->tx_lock); + mutex_unlock(&nbd->config_lock); nbd_parse_flags(nbd, bdev); + error = device_create_file(disk_to_dev(nbd->disk), &pid_attr); + if (error) { + dev_err(disk_to_dev(nbd->disk), "device_create_file failed!\n"); + goto out_recv; + } + + nbd_size_update(nbd, bdev); + nbd_dev_dbg_init(nbd); - error = nbd_thread_recv(nbd, bdev); + for (i = 0; i < num_connections; i++) { + sk_set_memalloc(nbd->socks[i]->sock->sk); + atomic_inc(&nbd->recv_threads); + INIT_WORK(&args[i].work, recv_work); + args[i].nbd = nbd; + args[i].index = i; + queue_work(system_long_wq, &args[i].work); + } + wait_event_interruptible(nbd->recv_wq, + atomic_read(&nbd->recv_threads) == 0); + for (i = 0; i < num_connections; i++) + flush_work(&args[i].work); nbd_dev_dbg_close(nbd); - - mutex_lock(&nbd->tx_lock); + nbd_size_clear(nbd, bdev); + device_remove_file(disk_to_dev(nbd->disk), &pid_attr); +out_recv: + mutex_lock(&nbd->config_lock); nbd->task_recv = NULL; - +out_err: sock_shutdown(nbd); nbd_clear_que(nbd); kill_bdev(bdev); @@ -694,7 +812,6 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd, error = -ETIMEDOUT; nbd_reset(nbd); - return error; } @@ -726,9 +843,9 @@ static int nbd_ioctl(struct block_device *bdev, fmode_t mode, BUG_ON(nbd->magic != NBD_MAGIC); - mutex_lock(&nbd->tx_lock); + mutex_lock(&nbd->config_lock); error = __nbd_ioctl(bdev, nbd, cmd, arg); - mutex_unlock(&nbd->tx_lock); + mutex_unlock(&nbd->config_lock); return error; } @@ -748,8 +865,6 @@ static int nbd_dbg_tasks_show(struct seq_file *s, void *unused) if (nbd->task_recv) seq_printf(s, "recv: %d\n", task_pid_nr(nbd->task_recv)); - if (nbd->task_send) - seq_printf(s, "send: %d\n", task_pid_nr(nbd->task_send)); return 0; } @@ -817,7 +932,7 @@ static int nbd_dev_dbg_init(struct nbd_device *nbd) debugfs_create_file("tasks", 0444, dir, nbd, &nbd_dbg_tasks_ops); debugfs_create_u64("size_bytes", 0444, dir, &nbd->bytesize); debugfs_create_u32("timeout", 0444, dir, &nbd->tag_set.timeout); - debugfs_create_u32("blocksize", 0444, dir, &nbd->blksize); + debugfs_create_u64("blocksize", 0444, dir, &nbd->blksize); debugfs_create_file("flags", 0444, dir, nbd, &nbd_dbg_flags_ops); return 0; @@ -873,9 +988,7 @@ static int nbd_init_request(void *data, struct request *rq, unsigned int numa_node) { struct nbd_cmd *cmd = blk_mq_rq_to_pdu(rq); - cmd->nbd = data; - INIT_LIST_HEAD(&cmd->list); return 0; } @@ -985,13 +1098,13 @@ static int __init nbd_init(void) for (i = 0; i < nbds_max; i++) { struct gendisk *disk = nbd_dev[i].disk; nbd_dev[i].magic = NBD_MAGIC; - spin_lock_init(&nbd_dev[i].sock_lock); - mutex_init(&nbd_dev[i].tx_lock); + mutex_init(&nbd_dev[i].config_lock); disk->major = NBD_MAJOR; disk->first_minor = i << part_shift; disk->fops = &nbd_fops; disk->private_data = &nbd_dev[i]; sprintf(disk->disk_name, "nbd%d", i); + init_waitqueue_head(&nbd_dev[i].recv_wq); nbd_reset(&nbd_dev[i]); add_disk(disk); } diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index ba6f4a2e73db..4943ee22716e 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -577,6 +577,7 @@ static void null_nvm_unregister(struct nullb *nullb) #else static int null_nvm_register(struct nullb *nullb) { + pr_err("null_blk: CONFIG_NVM needs to be enabled for LightNVM\n"); return -EINVAL; } static void null_nvm_unregister(struct nullb *nullb) {} diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 90fa4ac149db..95c98de92971 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -721,7 +721,7 @@ static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command * rq->timeout = 60*HZ; if (cgc->quiet) - rq->cmd_flags |= REQ_QUIET; + rq->rq_flags |= RQF_QUIET; blk_execute_rq(rq->q, pd->bdev->bd_disk, rq, 0); if (rq->errors) @@ -944,39 +944,6 @@ static int pkt_set_segment_merging(struct pktcdvd_device *pd, struct request_que } } -/* - * Copy all data for this packet to pkt->pages[], so that - * a) The number of required segments for the write bio is minimized, which - * is necessary for some scsi controllers. - * b) The data can be used as cache to avoid read requests if we receive a - * new write request for the same zone. - */ -static void pkt_make_local_copy(struct packet_data *pkt, struct bio_vec *bvec) -{ - int f, p, offs; - - /* Copy all data to pkt->pages[] */ - p = 0; - offs = 0; - for (f = 0; f < pkt->frames; f++) { - if (bvec[f].bv_page != pkt->pages[p]) { - void *vfrom = kmap_atomic(bvec[f].bv_page) + bvec[f].bv_offset; - void *vto = page_address(pkt->pages[p]) + offs; - memcpy(vto, vfrom, CD_FRAMESIZE); - kunmap_atomic(vfrom); - bvec[f].bv_page = pkt->pages[p]; - bvec[f].bv_offset = offs; - } else { - BUG_ON(bvec[f].bv_offset != offs); - } - offs += CD_FRAMESIZE; - if (offs >= PAGE_SIZE) { - offs = 0; - p++; - } - } -} - static void pkt_end_io_read(struct bio *bio) { struct packet_data *pkt = bio->bi_private; @@ -1298,7 +1265,6 @@ try_next_bio: static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt) { int f; - struct bio_vec *bvec = pkt->w_bio->bi_io_vec; bio_reset(pkt->w_bio); pkt->w_bio->bi_iter.bi_sector = pkt->sector; @@ -1308,9 +1274,10 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt) /* XXX: locking? */ for (f = 0; f < pkt->frames; f++) { - bvec[f].bv_page = pkt->pages[(f * CD_FRAMESIZE) / PAGE_SIZE]; - bvec[f].bv_offset = (f * CD_FRAMESIZE) % PAGE_SIZE; - if (!bio_add_page(pkt->w_bio, bvec[f].bv_page, CD_FRAMESIZE, bvec[f].bv_offset)) + struct page *page = pkt->pages[(f * CD_FRAMESIZE) / PAGE_SIZE]; + unsigned offset = (f * CD_FRAMESIZE) % PAGE_SIZE; + + if (!bio_add_page(pkt->w_bio, page, CD_FRAMESIZE, offset)) BUG(); } pkt_dbg(2, pd, "vcnt=%d\n", pkt->w_bio->bi_vcnt); @@ -1327,12 +1294,10 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt) pkt_dbg(2, pd, "Writing %d frames for zone %llx\n", pkt->write_size, (unsigned long long)pkt->sector); - if (test_bit(PACKET_MERGE_SEGS, &pd->flags) || (pkt->write_size < pkt->frames)) { - pkt_make_local_copy(pkt, bvec); + if (test_bit(PACKET_MERGE_SEGS, &pd->flags) || (pkt->write_size < pkt->frames)) pkt->cache_valid = 1; - } else { + else pkt->cache_valid = 0; - } /* Start the write request */ atomic_set(&pkt->io_wait, 1); diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c index 3822eae102db..abf805e332e2 100644 --- a/drivers/block/skd_main.c +++ b/drivers/block/skd_main.c @@ -36,7 +36,6 @@ #include <linux/scatterlist.h> #include <linux/version.h> #include <linux/err.h> -#include <linux/scatterlist.h> #include <linux/aer.h> #include <linux/ctype.h> #include <linux/wait.h> @@ -270,8 +269,6 @@ struct skd_device { resource_size_t mem_phys[SKD_MAX_BARS]; u32 mem_size[SKD_MAX_BARS]; - skd_irq_type_t irq_type; - u32 msix_count; struct skd_msix_entry *msix_entries; struct pci_dev *pdev; @@ -2138,12 +2135,8 @@ static void skd_send_fitmsg(struct skd_device *skdev, u8 *bp = (u8 *)skmsg->msg_buf; int i; for (i = 0; i < skmsg->length; i += 8) { - pr_debug("%s:%s:%d msg[%2d] %02x %02x %02x %02x " - "%02x %02x %02x %02x\n", - skdev->name, __func__, __LINE__, - i, bp[i + 0], bp[i + 1], bp[i + 2], - bp[i + 3], bp[i + 4], bp[i + 5], - bp[i + 6], bp[i + 7]); + pr_debug("%s:%s:%d msg[%2d] %8ph\n", + skdev->name, __func__, __LINE__, i, &bp[i]); if (i == 0) i = 64 - 8; } @@ -2164,7 +2157,6 @@ static void skd_send_fitmsg(struct skd_device *skdev, qcmd |= FIT_QCMD_MSGSIZE_64; SKD_WRITEQ(skdev, qcmd, FIT_Q_COMMAND); - } static void skd_send_special_fitmsg(struct skd_device *skdev, @@ -2177,11 +2169,8 @@ static void skd_send_special_fitmsg(struct skd_device *skdev, int i; for (i = 0; i < SKD_N_SPECIAL_FITMSG_BYTES; i += 8) { - pr_debug("%s:%s:%d spcl[%2d] %02x %02x %02x %02x " - "%02x %02x %02x %02x\n", - skdev->name, __func__, __LINE__, i, - bp[i + 0], bp[i + 1], bp[i + 2], bp[i + 3], - bp[i + 4], bp[i + 5], bp[i + 6], bp[i + 7]); + pr_debug("%s:%s:%d spcl[%2d] %8ph\n", + skdev->name, __func__, __LINE__, i, &bp[i]); if (i == 0) i = 64 - 8; } @@ -2955,8 +2944,8 @@ static void skd_completion_worker(struct work_struct *work) static void skd_isr_msg_from_dev(struct skd_device *skdev); -irqreturn_t -static skd_isr(int irq, void *ptr) +static irqreturn_t +skd_isr(int irq, void *ptr) { struct skd_device *skdev; u32 intstat; @@ -3821,10 +3810,6 @@ static irqreturn_t skd_qfull_isr(int irq, void *skd_host_data) */ struct skd_msix_entry { - int have_irq; - u32 vector; - u32 entry; - struct skd_device *rsp; char isr_name[30]; }; @@ -3853,193 +3838,121 @@ static struct skd_init_msix_entry msix_entries[SKD_MAX_MSIX_COUNT] = { { "(Queue Full 3)", skd_qfull_isr }, }; -static void skd_release_msix(struct skd_device *skdev) -{ - struct skd_msix_entry *qentry; - int i; - - if (skdev->msix_entries) { - for (i = 0; i < skdev->msix_count; i++) { - qentry = &skdev->msix_entries[i]; - skdev = qentry->rsp; - - if (qentry->have_irq) - devm_free_irq(&skdev->pdev->dev, - qentry->vector, qentry->rsp); - } - - kfree(skdev->msix_entries); - } - - if (skdev->msix_count) - pci_disable_msix(skdev->pdev); - - skdev->msix_count = 0; - skdev->msix_entries = NULL; -} - static int skd_acquire_msix(struct skd_device *skdev) { int i, rc; struct pci_dev *pdev = skdev->pdev; - struct msix_entry *entries; - struct skd_msix_entry *qentry; - - entries = kzalloc(sizeof(struct msix_entry) * SKD_MAX_MSIX_COUNT, - GFP_KERNEL); - if (!entries) - return -ENOMEM; - - for (i = 0; i < SKD_MAX_MSIX_COUNT; i++) - entries[i].entry = i; - rc = pci_enable_msix_exact(pdev, entries, SKD_MAX_MSIX_COUNT); - if (rc) { + rc = pci_alloc_irq_vectors(pdev, SKD_MAX_MSIX_COUNT, SKD_MAX_MSIX_COUNT, + PCI_IRQ_MSIX); + if (rc < 0) { pr_err("(%s): failed to enable MSI-X %d\n", skd_name(skdev), rc); - goto msix_out; + goto out; } - skdev->msix_count = SKD_MAX_MSIX_COUNT; - skdev->msix_entries = kzalloc(sizeof(struct skd_msix_entry) * - skdev->msix_count, GFP_KERNEL); + skdev->msix_entries = kcalloc(SKD_MAX_MSIX_COUNT, + sizeof(struct skd_msix_entry), GFP_KERNEL); if (!skdev->msix_entries) { rc = -ENOMEM; pr_err("(%s): msix table allocation error\n", skd_name(skdev)); - goto msix_out; - } - - for (i = 0; i < skdev->msix_count; i++) { - qentry = &skdev->msix_entries[i]; - qentry->vector = entries[i].vector; - qentry->entry = entries[i].entry; - qentry->rsp = NULL; - qentry->have_irq = 0; - pr_debug("%s:%s:%d %s: <%s> msix (%d) vec %d, entry %x\n", - skdev->name, __func__, __LINE__, - pci_name(pdev), skdev->name, - i, qentry->vector, qentry->entry); + goto out; } /* Enable MSI-X vectors for the base queue */ - for (i = 0; i < skdev->msix_count; i++) { - qentry = &skdev->msix_entries[i]; + for (i = 0; i < SKD_MAX_MSIX_COUNT; i++) { + struct skd_msix_entry *qentry = &skdev->msix_entries[i]; + snprintf(qentry->isr_name, sizeof(qentry->isr_name), "%s%d-msix %s", DRV_NAME, skdev->devno, msix_entries[i].name); - rc = devm_request_irq(&skdev->pdev->dev, qentry->vector, - msix_entries[i].handler, 0, - qentry->isr_name, skdev); + + rc = devm_request_irq(&skdev->pdev->dev, + pci_irq_vector(skdev->pdev, i), + msix_entries[i].handler, 0, + qentry->isr_name, skdev); if (rc) { pr_err("(%s): Unable to register(%d) MSI-X " "handler %d: %s\n", skd_name(skdev), rc, i, qentry->isr_name); goto msix_out; - } else { - qentry->have_irq = 1; - qentry->rsp = skdev; } } + pr_debug("%s:%s:%d %s: <%s> msix %d irq(s) enabled\n", skdev->name, __func__, __LINE__, - pci_name(pdev), skdev->name, skdev->msix_count); + pci_name(pdev), skdev->name, SKD_MAX_MSIX_COUNT); return 0; msix_out: - if (entries) - kfree(entries); - skd_release_msix(skdev); + while (--i >= 0) + devm_free_irq(&pdev->dev, pci_irq_vector(pdev, i), skdev); +out: + kfree(skdev->msix_entries); + skdev->msix_entries = NULL; return rc; } static int skd_acquire_irq(struct skd_device *skdev) { + struct pci_dev *pdev = skdev->pdev; + unsigned int irq_flag = PCI_IRQ_LEGACY; int rc; - struct pci_dev *pdev; - - pdev = skdev->pdev; - skdev->msix_count = 0; -RETRY_IRQ_TYPE: - switch (skdev->irq_type) { - case SKD_IRQ_MSIX: + if (skd_isr_type == SKD_IRQ_MSIX) { rc = skd_acquire_msix(skdev); if (!rc) - pr_info("(%s): MSI-X %d irqs enabled\n", - skd_name(skdev), skdev->msix_count); - else { - pr_err( - "(%s): failed to enable MSI-X, re-trying with MSI %d\n", - skd_name(skdev), rc); - skdev->irq_type = SKD_IRQ_MSI; - goto RETRY_IRQ_TYPE; - } - break; - case SKD_IRQ_MSI: - snprintf(skdev->isr_name, sizeof(skdev->isr_name), "%s%d-msi", - DRV_NAME, skdev->devno); - rc = pci_enable_msi_range(pdev, 1, 1); - if (rc > 0) { - rc = devm_request_irq(&pdev->dev, pdev->irq, skd_isr, 0, - skdev->isr_name, skdev); - if (rc) { - pci_disable_msi(pdev); - pr_err( - "(%s): failed to allocate the MSI interrupt %d\n", - skd_name(skdev), rc); - goto RETRY_IRQ_LEGACY; - } - pr_info("(%s): MSI irq %d enabled\n", - skd_name(skdev), pdev->irq); - } else { -RETRY_IRQ_LEGACY: - pr_err( - "(%s): failed to enable MSI, re-trying with LEGACY %d\n", - skd_name(skdev), rc); - skdev->irq_type = SKD_IRQ_LEGACY; - goto RETRY_IRQ_TYPE; - } - break; - case SKD_IRQ_LEGACY: - snprintf(skdev->isr_name, sizeof(skdev->isr_name), - "%s%d-legacy", DRV_NAME, skdev->devno); - rc = devm_request_irq(&pdev->dev, pdev->irq, skd_isr, - IRQF_SHARED, skdev->isr_name, skdev); - if (!rc) - pr_info("(%s): LEGACY irq %d enabled\n", - skd_name(skdev), pdev->irq); - else - pr_err("(%s): request LEGACY irq error %d\n", - skd_name(skdev), rc); - break; - default: - pr_info("(%s): irq_type %d invalid, re-set to %d\n", - skd_name(skdev), skdev->irq_type, SKD_IRQ_DEFAULT); - skdev->irq_type = SKD_IRQ_LEGACY; - goto RETRY_IRQ_TYPE; + return 0; + + pr_err("(%s): failed to enable MSI-X, re-trying with MSI %d\n", + skd_name(skdev), rc); } - return rc; + + snprintf(skdev->isr_name, sizeof(skdev->isr_name), "%s%d", DRV_NAME, + skdev->devno); + + if (skd_isr_type != SKD_IRQ_LEGACY) + irq_flag |= PCI_IRQ_MSI; + rc = pci_alloc_irq_vectors(pdev, 1, 1, irq_flag); + if (rc < 0) { + pr_err("(%s): failed to allocate the MSI interrupt %d\n", + skd_name(skdev), rc); + return rc; + } + + rc = devm_request_irq(&pdev->dev, pdev->irq, skd_isr, + pdev->msi_enabled ? 0 : IRQF_SHARED, + skdev->isr_name, skdev); + if (rc) { + pci_free_irq_vectors(pdev); + pr_err("(%s): failed to allocate interrupt %d\n", + skd_name(skdev), rc); + return rc; + } + + return 0; } static void skd_release_irq(struct skd_device *skdev) { - switch (skdev->irq_type) { - case SKD_IRQ_MSIX: - skd_release_msix(skdev); - break; - case SKD_IRQ_MSI: - devm_free_irq(&skdev->pdev->dev, skdev->pdev->irq, skdev); - pci_disable_msi(skdev->pdev); - break; - case SKD_IRQ_LEGACY: - devm_free_irq(&skdev->pdev->dev, skdev->pdev->irq, skdev); - break; - default: - pr_err("(%s): wrong irq type %d!", - skd_name(skdev), skdev->irq_type); - break; + struct pci_dev *pdev = skdev->pdev; + + if (skdev->msix_entries) { + int i; + + for (i = 0; i < SKD_MAX_MSIX_COUNT; i++) { + devm_free_irq(&pdev->dev, pci_irq_vector(pdev, i), + skdev); + } + + kfree(skdev->msix_entries); + skdev->msix_entries = NULL; + } else { + devm_free_irq(&pdev->dev, pdev->irq, skdev); } + + pci_free_irq_vectors(pdev); } /* @@ -4402,7 +4315,6 @@ static struct skd_device *skd_construct(struct pci_dev *pdev) skdev->pdev = pdev; skdev->devno = skd_next_devno++; skdev->major = blk_major; - skdev->irq_type = skd_isr_type; sprintf(skdev->name, DRV_NAME "%d", skdev->devno); skdev->dev_max_queue_depth = 0; diff --git a/drivers/block/umem.c b/drivers/block/umem.c index be90e15854ed..46f4c719fed9 100644 --- a/drivers/block/umem.c +++ b/drivers/block/umem.c @@ -535,7 +535,7 @@ static blk_qc_t mm_make_request(struct request_queue *q, struct bio *bio) *card->biotail = bio; bio->bi_next = NULL; card->biotail = &bio->bi_next; - if (bio->bi_opf & REQ_SYNC || !mm_check_plugged(card)) + if (op_is_sync(bio->bi_opf) || !mm_check_plugged(card)) activate(card); spin_unlock_irq(&card->lock); diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 4a80ee752597..726c32e35db9 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -1253,14 +1253,14 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring, case BLKIF_OP_WRITE: ring->st_wr_req++; operation = REQ_OP_WRITE; - operation_flags = WRITE_ODIRECT; + operation_flags = REQ_SYNC | REQ_IDLE; break; case BLKIF_OP_WRITE_BARRIER: drain = true; case BLKIF_OP_FLUSH_DISKCACHE: ring->st_f_req++; operation = REQ_OP_WRITE; - operation_flags = WRITE_FLUSH; + operation_flags = REQ_PREFLUSH; break; default: operation = 0; /* make gcc happy */ @@ -1272,7 +1272,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring, nseg = req->operation == BLKIF_OP_INDIRECT ? req->u.indirect.nr_segments : req->u.rw.nr_segments; - if (unlikely(nseg == 0 && operation_flags != WRITE_FLUSH) || + if (unlikely(nseg == 0 && operation_flags != REQ_PREFLUSH) || unlikely((req->operation != BLKIF_OP_INDIRECT) && (nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) || unlikely((req->operation == BLKIF_OP_INDIRECT) && @@ -1334,7 +1334,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring, } /* Wait on all outstanding I/O's and once that has been completed - * issue the WRITE_FLUSH. + * issue the flush. */ if (drain) xen_blk_drain_io(pending_req->ring); @@ -1380,7 +1380,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring, /* This will be hit if the operation was a flush or discard. */ if (!bio) { - BUG_ON(operation_flags != WRITE_FLUSH); + BUG_ON(operation_flags != REQ_PREFLUSH); bio = bio_alloc(GFP_KERNEL, 0); if (unlikely(bio == NULL)) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 2ee9646d8a5f..b2bdfa81f929 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -2035,8 +2035,9 @@ static int blkif_recover(struct blkfront_info *info) /* Requeue pending requests (flush or discard) */ list_del_init(&req->queuelist); BUG_ON(req->nr_phys_segments > segs); - blk_mq_requeue_request(req); + blk_mq_requeue_request(req, false); } + blk_mq_start_stopped_hw_queues(info->rq, true); blk_mq_kick_requeue_list(info->rq); while ((bio = bio_list_pop(&info->bio_list)) != NULL) { diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c index 4b5cd3a7b2b6..12046f4f00e4 100644 --- a/drivers/block/zram/zcomp.c +++ b/drivers/block/zram/zcomp.c @@ -160,82 +160,56 @@ int zcomp_decompress(struct zcomp_strm *zstrm, dst, &dst_len); } -static int __zcomp_cpu_notifier(struct zcomp *comp, - unsigned long action, unsigned long cpu) +int zcomp_cpu_up_prepare(unsigned int cpu, struct hlist_node *node) { + struct zcomp *comp = hlist_entry(node, struct zcomp, node); struct zcomp_strm *zstrm; - switch (action) { - case CPU_UP_PREPARE: - if (WARN_ON(*per_cpu_ptr(comp->stream, cpu))) - break; - zstrm = zcomp_strm_alloc(comp); - if (IS_ERR_OR_NULL(zstrm)) { - pr_err("Can't allocate a compression stream\n"); - return NOTIFY_BAD; - } - *per_cpu_ptr(comp->stream, cpu) = zstrm; - break; - case CPU_DEAD: - case CPU_UP_CANCELED: - zstrm = *per_cpu_ptr(comp->stream, cpu); - if (!IS_ERR_OR_NULL(zstrm)) - zcomp_strm_free(zstrm); - *per_cpu_ptr(comp->stream, cpu) = NULL; - break; - default: - break; + if (WARN_ON(*per_cpu_ptr(comp->stream, cpu))) + return 0; + + zstrm = zcomp_strm_alloc(comp); + if (IS_ERR_OR_NULL(zstrm)) { + pr_err("Can't allocate a compression stream\n"); + return -ENOMEM; } - return NOTIFY_OK; + *per_cpu_ptr(comp->stream, cpu) = zstrm; + return 0; } -static int zcomp_cpu_notifier(struct notifier_block *nb, - unsigned long action, void *pcpu) +int zcomp_cpu_dead(unsigned int cpu, struct hlist_node *node) { - unsigned long cpu = (unsigned long)pcpu; - struct zcomp *comp = container_of(nb, typeof(*comp), notifier); + struct zcomp *comp = hlist_entry(node, struct zcomp, node); + struct zcomp_strm *zstrm; - return __zcomp_cpu_notifier(comp, action, cpu); + zstrm = *per_cpu_ptr(comp->stream, cpu); + if (!IS_ERR_OR_NULL(zstrm)) + zcomp_strm_free(zstrm); + *per_cpu_ptr(comp->stream, cpu) = NULL; + return 0; } static int zcomp_init(struct zcomp *comp) { - unsigned long cpu; int ret; - comp->notifier.notifier_call = zcomp_cpu_notifier; - comp->stream = alloc_percpu(struct zcomp_strm *); if (!comp->stream) return -ENOMEM; - cpu_notifier_register_begin(); - for_each_online_cpu(cpu) { - ret = __zcomp_cpu_notifier(comp, CPU_UP_PREPARE, cpu); - if (ret == NOTIFY_BAD) - goto cleanup; - } - __register_cpu_notifier(&comp->notifier); - cpu_notifier_register_done(); + ret = cpuhp_state_add_instance(CPUHP_ZCOMP_PREPARE, &comp->node); + if (ret < 0) + goto cleanup; return 0; cleanup: - for_each_online_cpu(cpu) - __zcomp_cpu_notifier(comp, CPU_UP_CANCELED, cpu); - cpu_notifier_register_done(); - return -ENOMEM; + free_percpu(comp->stream); + return ret; } void zcomp_destroy(struct zcomp *comp) { - unsigned long cpu; - - cpu_notifier_register_begin(); - for_each_online_cpu(cpu) - __zcomp_cpu_notifier(comp, CPU_UP_CANCELED, cpu); - __unregister_cpu_notifier(&comp->notifier); - cpu_notifier_register_done(); - + cpuhp_state_remove_instance(CPUHP_ZCOMP_PREPARE, &comp->node); free_percpu(comp->stream); kfree(comp); } diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h index 478cac2ed465..41c1002a7d7d 100644 --- a/drivers/block/zram/zcomp.h +++ b/drivers/block/zram/zcomp.h @@ -19,11 +19,12 @@ struct zcomp_strm { /* dynamic per-device compression frontend */ struct zcomp { struct zcomp_strm * __percpu *stream; - struct notifier_block notifier; - const char *name; + struct hlist_node node; }; +int zcomp_cpu_up_prepare(unsigned int cpu, struct hlist_node *node); +int zcomp_cpu_dead(unsigned int cpu, struct hlist_node *node); ssize_t zcomp_available_show(const char *comp, char *buf); bool zcomp_available_algorithm(const char *comp); diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 04365b17ee67..15f58ab44d0b 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -30,6 +30,7 @@ #include <linux/err.h> #include <linux/idr.h> #include <linux/sysfs.h> +#include <linux/cpuhotplug.h> #include "zram_drv.h" @@ -1403,7 +1404,8 @@ static ssize_t hot_remove_store(struct class *class, zram = idr_find(&zram_index_idr, dev_id); if (zram) { ret = zram_remove(zram); - idr_remove(&zram_index_idr, dev_id); + if (!ret) + idr_remove(&zram_index_idr, dev_id); } else { ret = -ENODEV; } @@ -1412,8 +1414,14 @@ static ssize_t hot_remove_store(struct class *class, return ret ? ret : count; } +/* + * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a + * sense that reading from this file does alter the state of your system -- it + * creates a new un-initialized zram device and returns back this device's + * device_id (or an error code if it fails to create a new device). + */ static struct class_attribute zram_control_class_attrs[] = { - __ATTR_RO(hot_add), + __ATTR(hot_add, 0400, hot_add_show, NULL), __ATTR_WO(hot_remove), __ATTR_NULL, }; @@ -1436,15 +1444,22 @@ static void destroy_devices(void) idr_for_each(&zram_index_idr, &zram_remove_cb, NULL); idr_destroy(&zram_index_idr); unregister_blkdev(zram_major, "zram"); + cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); } static int __init zram_init(void) { int ret; + ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare", + zcomp_cpu_up_prepare, zcomp_cpu_dead); + if (ret < 0) + return ret; + ret = class_register(&zram_control_class); if (ret) { pr_err("Unable to register zram-control class\n"); + cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); return ret; } @@ -1452,6 +1467,7 @@ static int __init zram_init(void) if (zram_major <= 0) { pr_err("Unable to get major number\n"); class_unregister(&zram_control_class); + cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE); return -EBUSY; } |