diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-12-29 01:57:10 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-12-29 01:57:10 +0300 |
commit | 5d24ae67a961c51beb255a28c9c417d9710247c2 (patch) | |
tree | c23c71b2f17f4502554c80b84be476e4c08f7160 /drivers/infiniband/ulp/srp | |
parent | 938edb8a31b976c9a92eb0cd4ff481e93f76c1f1 (diff) | |
parent | f617e5ffe04fd46010b618c9eeadaa04588704c9 (diff) | |
download | linux-5d24ae67a961c51beb255a28c9c417d9710247c2.tar.xz |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe:
"This has been a fairly typical cycle, with the usual sorts of driver
updates. Several series continue to come through which improve and
modernize various parts of the core code, and we finally are starting
to get the uAPI command interface cleaned up.
- Various driver fixes for bnxt_re, cxgb3/4, hfi1, hns, i40iw, mlx4,
mlx5, qib, rxe, usnic
- Rework the entire syscall flow for uverbs to be able to run over
ioctl(). Finally getting past the historic bad choice to use
write() for command execution
- More functional coverage with the mlx5 'devx' user API
- Start of the HFI1 series for 'TID RDMA'
- SRQ support in the hns driver
- Support for new IBTA defined 2x lane widths
- A big series to consolidate all the driver function pointers into a
big struct and have drivers provide a 'static const' version of the
struct instead of open coding initialization
- New 'advise_mr' uAPI to control device caching/loading of page
tables
- Support for inline data in SRPT
- Modernize how umad uses the driver core and creates cdev's and
sysfs files
- First steps toward removing 'uobject' from the view of the drivers"
* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (193 commits)
RDMA/srpt: Use kmem_cache_free() instead of kfree()
RDMA/mlx5: Signedness bug in UVERBS_HANDLER()
IB/uverbs: Signedness bug in UVERBS_HANDLER()
IB/mlx5: Allocate the per-port Q counter shared when DEVX is supported
IB/umad: Start using dev_groups of class
IB/umad: Use class_groups and let core create class file
IB/umad: Refactor code to use cdev_device_add()
IB/umad: Avoid destroying device while it is accessed
IB/umad: Simplify and avoid dynamic allocation of class
IB/mlx5: Fix wrong error unwind
IB/mlx4: Remove set but not used variable 'pd'
RDMA/iwcm: Don't copy past the end of dev_name() string
IB/mlx5: Fix long EEH recover time with NVMe offloads
IB/mlx5: Simplify netdev unbinding
IB/core: Move query port to ioctl
RDMA/nldev: Expose port_cap_flags2
IB/core: uverbs copy to struct or zero helper
IB/rxe: Reuse code which sets port state
IB/rxe: Make counters thread safe
IB/mlx5: Use the correct commands for UMEM and UCTX allocation
...
Diffstat (limited to 'drivers/infiniband/ulp/srp')
-rw-r--r-- | drivers/infiniband/ulp/srp/ib_srp.c | 159 | ||||
-rw-r--r-- | drivers/infiniband/ulp/srp/ib_srp.h | 20 |
2 files changed, 147 insertions, 32 deletions
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index d27fe970ceba..31d91538bbf4 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -132,6 +132,15 @@ MODULE_PARM_DESC(dev_loss_tmo, " if fast_io_fail_tmo has not been set. \"off\" means that" " this functionality is disabled."); +static bool srp_use_imm_data = true; +module_param_named(use_imm_data, srp_use_imm_data, bool, 0644); +MODULE_PARM_DESC(use_imm_data, + "Whether or not to request permission to use immediate data during SRP login."); + +static unsigned int srp_max_imm_data = 8 * 1024; +module_param_named(max_imm_data, srp_max_imm_data, uint, 0644); +MODULE_PARM_DESC(max_imm_data, "Maximum immediate data size."); + static unsigned ch_count; module_param(ch_count, uint, 0444); MODULE_PARM_DESC(ch_count, @@ -573,7 +582,7 @@ static int srp_create_ch_ib(struct srp_rdma_ch *ch) init_attr->cap.max_send_wr = m * target->queue_size; init_attr->cap.max_recv_wr = target->queue_size + 1; init_attr->cap.max_recv_sge = 1; - init_attr->cap.max_send_sge = 1; + init_attr->cap.max_send_sge = SRP_MAX_SGE; init_attr->sq_sig_type = IB_SIGNAL_REQ_WR; init_attr->qp_type = IB_QPT_RC; init_attr->send_cq = send_cq; @@ -823,7 +832,8 @@ static u8 srp_get_subnet_timeout(struct srp_host *host) return subnet_timeout; } -static int srp_send_req(struct srp_rdma_ch *ch, bool multich) +static int srp_send_req(struct srp_rdma_ch *ch, uint32_t max_iu_len, + bool multich) { struct srp_target_port *target = ch->target; struct { @@ -852,11 +862,15 @@ static int srp_send_req(struct srp_rdma_ch *ch, bool multich) req->ib_req.opcode = SRP_LOGIN_REQ; req->ib_req.tag = 0; - req->ib_req.req_it_iu_len = cpu_to_be32(target->max_iu_len); + req->ib_req.req_it_iu_len = cpu_to_be32(max_iu_len); req->ib_req.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT); req->ib_req.req_flags = (multich ? SRP_MULTICHAN_MULTI : SRP_MULTICHAN_SINGLE); + if (srp_use_imm_data) { + req->ib_req.req_flags |= SRP_IMMED_REQUESTED; + req->ib_req.imm_data_offset = cpu_to_be16(SRP_IMM_DATA_OFFSET); + } if (target->using_rdma_cm) { req->rdma_param.flow_control = req->ib_param.flow_control; @@ -873,6 +887,7 @@ static int srp_send_req(struct srp_rdma_ch *ch, bool multich) req->rdma_req.req_it_iu_len = req->ib_req.req_it_iu_len; req->rdma_req.req_buf_fmt = req->ib_req.req_buf_fmt; req->rdma_req.req_flags = req->ib_req.req_flags; + req->rdma_req.imm_data_offset = req->ib_req.imm_data_offset; ipi = req->rdma_req.initiator_port_id; tpi = req->rdma_req.target_port_id; @@ -1145,7 +1160,8 @@ static int srp_connected_ch(struct srp_target_port *target) return c; } -static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich) +static int srp_connect_ch(struct srp_rdma_ch *ch, uint32_t max_iu_len, + bool multich) { struct srp_target_port *target = ch->target; int ret; @@ -1158,7 +1174,7 @@ static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich) while (1) { init_completion(&ch->done); - ret = srp_send_req(ch, multich); + ret = srp_send_req(ch, max_iu_len, multich); if (ret) goto out; ret = wait_for_completion_interruptible(&ch->done); @@ -1344,6 +1360,20 @@ static void srp_terminate_io(struct srp_rport *rport) } } +/* Calculate maximum initiator to target information unit length. */ +static uint32_t srp_max_it_iu_len(int cmd_sg_cnt, bool use_imm_data) +{ + uint32_t max_iu_len = sizeof(struct srp_cmd) + SRP_MAX_ADD_CDB_LEN + + sizeof(struct srp_indirect_buf) + + cmd_sg_cnt * sizeof(struct srp_direct_buf); + + if (use_imm_data) + max_iu_len = max(max_iu_len, SRP_IMM_DATA_OFFSET + + srp_max_imm_data); + + return max_iu_len; +} + /* * It is up to the caller to ensure that srp_rport_reconnect() calls are * serialized and that no concurrent srp_queuecommand(), srp_abort(), @@ -1357,6 +1387,8 @@ static int srp_rport_reconnect(struct srp_rport *rport) { struct srp_target_port *target = rport->lld_data; struct srp_rdma_ch *ch; + uint32_t max_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt, + srp_use_imm_data); int i, j, ret = 0; bool multich = false; @@ -1402,7 +1434,7 @@ static int srp_rport_reconnect(struct srp_rport *rport) ch = &target->ch[i]; if (ret) break; - ret = srp_connect_ch(ch, multich); + ret = srp_connect_ch(ch, max_iu_len, multich); multich = true; } @@ -1764,25 +1796,29 @@ static void srp_check_mapping(struct srp_map_state *state, * @req: SRP request * * Returns the length in bytes of the SRP_CMD IU or a negative value if - * mapping failed. + * mapping failed. The size of any immediate data is not included in the + * return value. */ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, struct srp_request *req) { struct srp_target_port *target = ch->target; - struct scatterlist *scat; + struct scatterlist *scat, *sg; struct srp_cmd *cmd = req->cmd->buf; - int len, nents, count, ret; + int i, len, nents, count, ret; struct srp_device *dev; struct ib_device *ibdev; struct srp_map_state state; struct srp_indirect_buf *indirect_hdr; + u64 data_len; u32 idb_len, table_len; __be32 idb_rkey; u8 fmt; + req->cmd->num_sge = 1; + if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE) - return sizeof (struct srp_cmd); + return sizeof(struct srp_cmd) + cmd->add_cdb_len; if (scmnd->sc_data_direction != DMA_FROM_DEVICE && scmnd->sc_data_direction != DMA_TO_DEVICE) { @@ -1794,6 +1830,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, nents = scsi_sg_count(scmnd); scat = scsi_sglist(scmnd); + data_len = scsi_bufflen(scmnd); dev = target->srp_host->srp_dev; ibdev = dev->dev; @@ -1802,8 +1839,31 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, if (unlikely(count == 0)) return -EIO; + if (ch->use_imm_data && + count <= SRP_MAX_IMM_SGE && + SRP_IMM_DATA_OFFSET + data_len <= ch->max_it_iu_len && + scmnd->sc_data_direction == DMA_TO_DEVICE) { + struct srp_imm_buf *buf; + struct ib_sge *sge = &req->cmd->sge[1]; + + fmt = SRP_DATA_DESC_IMM; + len = SRP_IMM_DATA_OFFSET; + req->nmdesc = 0; + buf = (void *)cmd->add_data + cmd->add_cdb_len; + buf->len = cpu_to_be32(data_len); + WARN_ON_ONCE((void *)(buf + 1) > (void *)cmd + len); + for_each_sg(scat, sg, count, i) { + sge[i].addr = ib_sg_dma_address(ibdev, sg); + sge[i].length = ib_sg_dma_len(ibdev, sg); + sge[i].lkey = target->lkey; + } + req->cmd->num_sge += count; + goto map_complete; + } + fmt = SRP_DATA_DESC_DIRECT; - len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf); + len = sizeof(struct srp_cmd) + cmd->add_cdb_len + + sizeof(struct srp_direct_buf); if (count == 1 && target->global_rkey) { /* @@ -1812,8 +1872,9 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, * single entry. So a direct descriptor along with * the DMA MR suffices. */ - struct srp_direct_buf *buf = (void *) cmd->add_data; + struct srp_direct_buf *buf; + buf = (void *)cmd->add_data + cmd->add_cdb_len; buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, scat)); buf->key = cpu_to_be32(target->global_rkey); buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat)); @@ -1826,7 +1887,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, * We have more than one scatter/gather entry, so build our indirect * descriptor table, trying to merge as many entries as we can. */ - indirect_hdr = (void *) cmd->add_data; + indirect_hdr = (void *)cmd->add_data + cmd->add_cdb_len; ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr, target->indirect_size, DMA_TO_DEVICE); @@ -1861,8 +1922,9 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, * Memory registration collapsed the sg-list into one entry, * so use a direct descriptor. */ - struct srp_direct_buf *buf = (void *) cmd->add_data; + struct srp_direct_buf *buf; + buf = (void *)cmd->add_data + cmd->add_cdb_len; *buf = req->indirect_desc[0]; goto map_complete; } @@ -1880,7 +1942,8 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, idb_len = sizeof(struct srp_indirect_buf) + table_len; fmt = SRP_DATA_DESC_INDIRECT; - len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf); + len = sizeof(struct srp_cmd) + cmd->add_cdb_len + + sizeof(struct srp_indirect_buf); len += count * sizeof (struct srp_direct_buf); memcpy(indirect_hdr->desc_list, req->indirect_desc, @@ -2001,22 +2064,30 @@ static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc) list_add(&iu->list, &ch->free_tx); } +/** + * srp_post_send() - send an SRP information unit + * @ch: RDMA channel over which to send the information unit. + * @iu: Information unit to send. + * @len: Length of the information unit excluding immediate data. + */ static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len) { struct srp_target_port *target = ch->target; - struct ib_sge list; struct ib_send_wr wr; - list.addr = iu->dma; - list.length = len; - list.lkey = target->lkey; + if (WARN_ON_ONCE(iu->num_sge > SRP_MAX_SGE)) + return -EINVAL; + + iu->sge[0].addr = iu->dma; + iu->sge[0].length = len; + iu->sge[0].lkey = target->lkey; iu->cqe.done = srp_send_done; wr.next = NULL; wr.wr_cqe = &iu->cqe; - wr.sg_list = &list; - wr.num_sge = 1; + wr.sg_list = &iu->sge[0]; + wr.num_sge = iu->num_sge; wr.opcode = IB_WR_SEND; wr.send_flags = IB_SEND_SIGNALED; @@ -2129,6 +2200,7 @@ static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta, return 1; } + iu->num_sge = 1; ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE); memcpy(iu->buf, rsp, len); ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE); @@ -2312,7 +2384,7 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) req = &ch->req_ring[idx]; dev = target->srp_host->srp_dev->dev; - ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len, + ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_it_iu_len, DMA_TO_DEVICE); scmnd->host_scribble = (void *) req; @@ -2324,6 +2396,12 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) int_to_scsilun(scmnd->device->lun, &cmd->lun); cmd->tag = tag; memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len); + if (unlikely(scmnd->cmd_len > sizeof(cmd->cdb))) { + cmd->add_cdb_len = round_up(scmnd->cmd_len - sizeof(cmd->cdb), + 4); + if (WARN_ON_ONCE(cmd->add_cdb_len > SRP_MAX_ADD_CDB_LEN)) + goto err_iu; + } req->scmnd = scmnd; req->cmd = iu; @@ -2343,11 +2421,12 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd) goto err_iu; } - ib_dma_sync_single_for_device(dev, iu->dma, target->max_iu_len, + ib_dma_sync_single_for_device(dev, iu->dma, ch->max_it_iu_len, DMA_TO_DEVICE); if (srp_post_send(ch, iu, len)) { shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n"); + scmnd->result = DID_ERROR << 16; goto err_unmap; } @@ -2410,7 +2489,7 @@ static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch) for (i = 0; i < target->queue_size; ++i) { ch->tx_ring[i] = srp_alloc_iu(target->srp_host, - target->max_iu_len, + ch->max_it_iu_len, GFP_KERNEL, DMA_TO_DEVICE); if (!ch->tx_ring[i]) goto err; @@ -2476,6 +2555,15 @@ static void srp_cm_rep_handler(struct ib_cm_id *cm_id, if (lrsp->opcode == SRP_LOGIN_RSP) { ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len); ch->req_lim = be32_to_cpu(lrsp->req_lim_delta); + ch->use_imm_data = lrsp->rsp_flags & SRP_LOGIN_RSP_IMMED_SUPP; + ch->max_it_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt, + ch->use_imm_data); + WARN_ON_ONCE(ch->max_it_iu_len > + be32_to_cpu(lrsp->max_it_iu_len)); + + if (ch->use_imm_data) + shost_printk(KERN_DEBUG, target->scsi_host, + PFX "using immediate data\n"); /* * Reserve credits for task management so we don't @@ -2864,6 +2952,8 @@ static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun, return -1; } + iu->num_sge = 1; + ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt, DMA_TO_DEVICE); tsk_mgmt = iu->buf; @@ -3402,6 +3492,9 @@ static const match_table_t srp_opt_tokens = { /** * srp_parse_in - parse an IP address and port number combination + * @net: [in] Network namespace. + * @sa: [out] Address family, IP address and port number. + * @addr_port_str: [in] IP address and port number. * * Parse the following address formats: * - IPv4: <ip_address>:<port>, e.g. 1.2.3.4:5. @@ -3719,6 +3812,7 @@ static ssize_t srp_create_target(struct device *dev, int ret, node_idx, node, cpu, i; unsigned int max_sectors_per_mr, mr_per_cmd = 0; bool multich = false; + uint32_t max_iu_len; target_host = scsi_host_alloc(&srp_template, sizeof (struct srp_target_port)); @@ -3824,9 +3918,7 @@ static ssize_t srp_create_target(struct device *dev, target->mr_per_cmd = mr_per_cmd; target->indirect_size = target->sg_tablesize * sizeof (struct srp_direct_buf); - target->max_iu_len = sizeof (struct srp_cmd) + - sizeof (struct srp_indirect_buf) + - target->cmd_sg_cnt * sizeof (struct srp_direct_buf); + max_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt, srp_use_imm_data); INIT_WORK(&target->tl_err_work, srp_tl_err_work); INIT_WORK(&target->remove_work, srp_remove_work); @@ -3881,7 +3973,7 @@ static ssize_t srp_create_target(struct device *dev, if (ret) goto err_disconnect; - ret = srp_connect_ch(ch, multich); + ret = srp_connect_ch(ch, max_iu_len, multich); if (ret) { char dst[64]; @@ -4062,8 +4154,10 @@ static void srp_add_one(struct ib_device *device) srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR, max_pages_per_mr); - srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr && - device->map_phys_fmr && device->unmap_fmr); + srp_dev->has_fmr = (device->ops.alloc_fmr && + device->ops.dealloc_fmr && + device->ops.map_phys_fmr && + device->ops.unmap_fmr); srp_dev->has_fr = (attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS); if (!never_register && !srp_dev->has_fmr && !srp_dev->has_fr) { @@ -4171,6 +4265,11 @@ static int __init srp_init_module(void) { int ret; + BUILD_BUG_ON(sizeof(struct srp_imm_buf) != 4); + BUILD_BUG_ON(sizeof(struct srp_login_req) != 64); + BUILD_BUG_ON(sizeof(struct srp_login_req_rdma) != 56); + BUILD_BUG_ON(sizeof(struct srp_cmd) != 48); + if (srp_sg_tablesize) { pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n"); if (!cmd_sg_entries) diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index a2706086b9c7..b2861cd2087a 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -67,6 +67,17 @@ enum { SRP_TAG_TSK_MGMT = 1U << 31, SRP_MAX_PAGES_PER_MR = 512, + + SRP_MAX_ADD_CDB_LEN = 16, + + SRP_MAX_IMM_SGE = 2, + SRP_MAX_SGE = SRP_MAX_IMM_SGE + 1, + /* + * Choose the immediate data offset such that a 32 byte CDB still fits. + */ + SRP_IMM_DATA_OFFSET = sizeof(struct srp_cmd) + + SRP_MAX_ADD_CDB_LEN + + sizeof(struct srp_imm_buf), }; enum srp_target_state { @@ -130,6 +141,8 @@ struct srp_request { /** * struct srp_rdma_ch * @comp_vector: Completion vector used by this RDMA channel. + * @max_it_iu_len: Maximum initiator-to-target information unit length. + * @max_ti_iu_len: Maximum target-to-initiator information unit length. */ struct srp_rdma_ch { /* These are RW in the hot path, and commonly used together */ @@ -146,6 +159,9 @@ struct srp_rdma_ch { struct ib_fmr_pool *fmr_pool; struct srp_fr_pool *fr_pool; }; + uint32_t max_it_iu_len; + uint32_t max_ti_iu_len; + bool use_imm_data; /* Everything above this point is used in the hot path of * command processing. Try to keep them packed into cachelines. @@ -169,7 +185,6 @@ struct srp_rdma_ch { struct srp_iu **tx_ring; struct srp_iu **rx_ring; struct srp_request *req_ring; - int max_ti_iu_len; int comp_vector; u64 tsk_mgmt_tag; @@ -194,7 +209,6 @@ struct srp_target_port { u32 ch_count; u32 lkey; enum srp_target_state state; - unsigned int max_iu_len; unsigned int cmd_sg_cnt; unsigned int indirect_size; bool allow_ext_sg; @@ -259,6 +273,8 @@ struct srp_iu { void *buf; size_t size; enum dma_data_direction direction; + u32 num_sge; + struct ib_sge sge[SRP_MAX_SGE]; struct ib_cqe cqe; }; |