diff options
author | Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | 2012-04-18 23:52:50 +0400 |
---|---|---|
committer | Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> | 2012-04-18 23:52:50 +0400 |
commit | 681e4a5e13c1c8315694eb4f44e0cdd84c9082d2 (patch) | |
tree | 699f14527c118859026e8ce0214e689d0b9c88cb /drivers/infiniband | |
parent | b960d6c43a63ebd2d8518b328da3816b833ee8cc (diff) | |
parent | c104f1fa1ecf4ee0fc06e31b1f77630b2551be81 (diff) | |
download | linux-681e4a5e13c1c8315694eb4f44e0cdd84c9082d2.tar.xz |
Merge commit 'c104f1fa1ecf4ee0fc06e31b1f77630b2551be81' into stable/for-linus-3.4
* commit 'c104f1fa1ecf4ee0fc06e31b1f77630b2551be81': (14566 commits)
cpufreq: OMAP: fix build errors: depends on ARCH_OMAP2PLUS
sparc64: Eliminate obsolete __handle_softirq() function
sparc64: Fix bootup crash on sun4v.
kconfig: delete last traces of __enabled_ from autoconf.h
Revert "kconfig: fix __enabled_ macros definition for invisible and un-selected symbols"
kconfig: fix IS_ENABLED to not require all options to be defined
irq_domain: fix type mismatch in debugfs output format
staging: android: fix mem leaks in __persistent_ram_init()
staging: vt6656: Don't leak memory in drivers/staging/vt6656/ioctl.c::private_ioctl()
staging: iio: hmc5843: Fix crash in probe function.
panic: fix stack dump print on direct call to panic()
drivers/rtc/rtc-pl031.c: enable clock on all ST variants
Revert "mm: vmscan: fix misused nr_reclaimed in shrink_mem_cgroup_zone()"
hugetlb: fix race condition in hugetlb_fault()
drivers/rtc/rtc-twl.c: use static register while reading time
drivers/rtc/rtc-s3c.c: add placeholder for driver private data
drivers/rtc/rtc-s3c.c: fix compilation error
MAINTAINERS: add PCDP console maintainer
memcg: do not open code accesses to res_counter members
drivers/rtc/rtc-efi.c: fix section mismatch warning
...
Diffstat (limited to 'drivers/infiniband')
63 files changed, 5133 insertions, 387 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index 0f9a84c1046a..eb0add311dc8 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -55,6 +55,7 @@ source "drivers/infiniband/hw/nes/Kconfig" source "drivers/infiniband/ulp/ipoib/Kconfig" source "drivers/infiniband/ulp/srp/Kconfig" +source "drivers/infiniband/ulp/srpt/Kconfig" source "drivers/infiniband/ulp/iser/Kconfig" diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile index 9cc7a47d3e67..a3b2d8eac86e 100644 --- a/drivers/infiniband/Makefile +++ b/drivers/infiniband/Makefile @@ -10,4 +10,5 @@ obj-$(CONFIG_MLX4_INFINIBAND) += hw/mlx4/ obj-$(CONFIG_INFINIBAND_NES) += hw/nes/ obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/ obj-$(CONFIG_INFINIBAND_SRP) += ulp/srp/ +obj-$(CONFIG_INFINIBAND_SRPT) += ulp/srpt/ obj-$(CONFIG_INFINIBAND_ISER) += ulp/iser/ diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 1612cfd50f39..6ef660c1332f 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -178,22 +178,26 @@ static void queue_req(struct addr_req *req) mutex_unlock(&lock); } -static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *addr) +static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *dev_addr, void *daddr) { struct neighbour *n; int ret; + n = dst_neigh_lookup(dst, daddr); + rcu_read_lock(); - n = dst_get_neighbour_noref(dst); if (!n || !(n->nud_state & NUD_VALID)) { if (n) neigh_event_send(n, NULL); ret = -ENODATA; } else { - ret = rdma_copy_addr(addr, dst->dev, n->ha); + ret = rdma_copy_addr(dev_addr, dst->dev, n->ha); } rcu_read_unlock(); + if (n) + neigh_release(n); + return ret; } @@ -232,7 +236,7 @@ static int addr4_resolve(struct sockaddr_in *src_in, goto put; } - ret = dst_fetch_ha(&rt->dst, addr); + ret = dst_fetch_ha(&rt->dst, addr, &fl4.daddr); put: ip_rt_put(rt); out: @@ -280,7 +284,7 @@ static int addr6_resolve(struct sockaddr_in6 *src_in, goto put; } - ret = dst_fetch_ha(dst, addr); + ret = dst_fetch_ha(dst, addr, &fl6.daddr); put: dst_release(dst); return ret; diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index 1a696f76b616..0bb99bb38809 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -624,17 +624,6 @@ static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv, */ BUG_ON(iw_event->status); - /* - * We could be destroying the listening id. If so, ignore this - * upcall. - */ - spin_lock_irqsave(&listen_id_priv->lock, flags); - if (listen_id_priv->state != IW_CM_STATE_LISTEN) { - spin_unlock_irqrestore(&listen_id_priv->lock, flags); - goto out; - } - spin_unlock_irqrestore(&listen_id_priv->lock, flags); - cm_id = iw_create_cm_id(listen_id_priv->id.device, listen_id_priv->id.cm_handler, listen_id_priv->id.context); @@ -649,6 +638,19 @@ static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv, cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); cm_id_priv->state = IW_CM_STATE_CONN_RECV; + /* + * We could be destroying the listening id. If so, ignore this + * upcall. + */ + spin_lock_irqsave(&listen_id_priv->lock, flags); + if (listen_id_priv->state != IW_CM_STATE_LISTEN) { + spin_unlock_irqrestore(&listen_id_priv->lock, flags); + iw_cm_reject(cm_id, NULL, 0); + iw_destroy_cm_id(cm_id); + goto out; + } + spin_unlock_irqrestore(&listen_id_priv->lock, flags); + ret = alloc_work_entries(cm_id_priv, 3); if (ret) { iw_cm_reject(cm_id, NULL, 0); diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 2fe428bba54c..426bb7617ec6 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -1842,6 +1842,24 @@ static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv, } } +static bool generate_unmatched_resp(struct ib_mad_private *recv, + struct ib_mad_private *response) +{ + if (recv->mad.mad.mad_hdr.method == IB_MGMT_METHOD_GET || + recv->mad.mad.mad_hdr.method == IB_MGMT_METHOD_SET) { + memcpy(response, recv, sizeof *response); + response->header.recv_wc.wc = &response->header.wc; + response->header.recv_wc.recv_buf.mad = &response->mad.mad; + response->header.recv_wc.recv_buf.grh = &response->grh; + response->mad.mad.mad_hdr.method = IB_MGMT_METHOD_GET_RESP; + response->mad.mad.mad_hdr.status = + cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB); + + return true; + } else { + return false; + } +} static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv, struct ib_wc *wc) { @@ -1963,6 +1981,9 @@ local: * or via recv_handler in ib_mad_complete_recv() */ recv = NULL; + } else if (generate_unmatched_resp(recv, response)) { + agent_send_response(&response->mad.mad, &recv->grh, wc, + port_priv->device, port_num, qp_info->qp->qp_num); } out: diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index d1c8196d15d7..396e29370304 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -147,9 +147,13 @@ static int ibnl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (op < 0 || op >= client->nops || !client->cb_table[RDMA_NL_GET_OP(op)].dump) return -EINVAL; - return netlink_dump_start(nls, skb, nlh, - client->cb_table[op].dump, - NULL, 0); + + { + struct netlink_dump_control c = { + .dump = client->cb_table[op].dump, + }; + return netlink_dump_start(nls, skb, nlh, &c); + } } } diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index c61bca30fd2d..246fdc151652 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -179,32 +179,36 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused, { struct ib_port_attr attr; char *speed = ""; - int rate; + int rate; /* in deci-Gb/sec */ ssize_t ret; ret = ib_query_port(p->ibdev, p->port_num, &attr); if (ret) return ret; - rate = (25 * attr.active_speed) / 10; - switch (attr.active_speed) { - case 2: + case IB_SPEED_DDR: speed = " DDR"; + rate = 50; break; - case 4: + case IB_SPEED_QDR: speed = " QDR"; + rate = 100; break; - case 8: + case IB_SPEED_FDR10: speed = " FDR10"; - rate = 10; + rate = 100; break; - case 16: + case IB_SPEED_FDR: speed = " FDR"; - rate = 14; + rate = 140; break; - case 32: + case IB_SPEED_EDR: speed = " EDR"; + rate = 250; + break; + case IB_SPEED_SDR: + default: /* default to SDR for invalid rates */ rate = 25; break; } @@ -214,7 +218,7 @@ static ssize_t rate_show(struct ib_port *p, struct port_attribute *unused, return -EINVAL; return sprintf(buf, "%d%s Gb/sec (%dX%s)\n", - rate, (attr.active_speed == 1) ? ".5" : "", + rate / 10, rate % 10 ? ".5" : "", ib_width_enum_to_int(attr.active_width), speed); } diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index b37b0c02a7b9..5861cdb22b7c 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -449,24 +449,6 @@ static void ucma_cleanup_multicast(struct ucma_context *ctx) mutex_unlock(&mut); } -static void ucma_cleanup_events(struct ucma_context *ctx) -{ - struct ucma_event *uevent, *tmp; - - list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) { - if (uevent->ctx != ctx) - continue; - - list_del(&uevent->list); - - /* clear incoming connections. */ - if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) - rdma_destroy_id(uevent->cm_id); - - kfree(uevent); - } -} - static void ucma_cleanup_mc_events(struct ucma_multicast *mc) { struct ucma_event *uevent, *tmp; @@ -480,9 +462,16 @@ static void ucma_cleanup_mc_events(struct ucma_multicast *mc) } } +/* + * We cannot hold file->mut when calling rdma_destroy_id() or we can + * deadlock. We also acquire file->mut in ucma_event_handler(), and + * rdma_destroy_id() will wait until all callbacks have completed. + */ static int ucma_free_ctx(struct ucma_context *ctx) { int events_reported; + struct ucma_event *uevent, *tmp; + LIST_HEAD(list); /* No new events will be generated after destroying the id. */ rdma_destroy_id(ctx->cm_id); @@ -491,10 +480,20 @@ static int ucma_free_ctx(struct ucma_context *ctx) /* Cleanup events not yet reported to the user. */ mutex_lock(&ctx->file->mut); - ucma_cleanup_events(ctx); + list_for_each_entry_safe(uevent, tmp, &ctx->file->event_list, list) { + if (uevent->ctx == ctx) + list_move_tail(&uevent->list, &list); + } list_del(&ctx->list); mutex_unlock(&ctx->file->mut); + list_for_each_entry_safe(uevent, tmp, &list, list) { + list_del(&uevent->list); + if (uevent->resp.event == RDMA_CM_EVENT_CONNECT_REQUEST) + rdma_destroy_id(uevent->cm_id); + kfree(uevent); + } + events_reported = ctx->events_reported; kfree(ctx); return events_reported; @@ -808,9 +807,12 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf, return PTR_ERR(ctx); if (cmd.conn_param.valid) { - ctx->uid = cmd.uid; ucma_copy_conn_param(&conn_param, &cmd.conn_param); + mutex_lock(&file->mut); ret = rdma_accept(ctx->cm_id, &conn_param); + if (!ret) + ctx->uid = cmd.uid; + mutex_unlock(&file->mut); } else ret = rdma_accept(ctx->cm_id, NULL); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index b930da4c0c63..4d27e4c3fe34 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1485,6 +1485,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, qp->event_handler = attr.event_handler; qp->qp_context = attr.qp_context; qp->qp_type = attr.qp_type; + atomic_set(&qp->usecnt, 0); atomic_inc(&pd->usecnt); atomic_inc(&attr.send_cq->usecnt); if (attr.recv_cq) diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 602b1bd723a9..575b78045aaf 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -421,6 +421,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, qp->uobject = NULL; qp->qp_type = qp_init_attr->qp_type; + atomic_set(&qp->usecnt, 0); if (qp_init_attr->qp_type == IB_QPT_XRC_TGT) { qp->event_handler = __ib_shared_qp_event_handler; qp->qp_context = qp; @@ -430,7 +431,6 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, qp->xrcd = qp_init_attr->xrcd; atomic_inc(&qp_init_attr->xrcd->usecnt); INIT_LIST_HEAD(&qp->open_list); - atomic_set(&qp->usecnt, 0); real_qp = qp; qp = __ib_open_qp(real_qp, qp_init_attr->event_handler, diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c index 12f923d64e42..07eb3a8067d8 100644 --- a/drivers/infiniband/hw/amso1100/c2_provider.c +++ b/drivers/infiniband/hw/amso1100/c2_provider.c @@ -94,7 +94,7 @@ static int c2_query_port(struct ib_device *ibdev, props->pkey_tbl_len = 1; props->qkey_viol_cntr = 0; props->active_width = 1; - props->active_speed = 1; + props->active_speed = IB_SPEED_SDR; return 0; } diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 37c224fc3ad9..0bdf09aa6f42 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -1227,7 +1227,7 @@ static int iwch_query_port(struct ib_device *ibdev, props->gid_tbl_len = 1; props->pkey_tbl_len = 1; props->active_width = 2; - props->active_speed = 2; + props->active_speed = IB_SPEED_DDR; props->max_msg_sz = -1; return 0; diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c index bea5839d89ee..6de8463f453b 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_qp.c +++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c @@ -803,7 +803,7 @@ int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg) * Assumes qhp lock is held. */ static void __flush_qp(struct iwch_qp *qhp, struct iwch_cq *rchp, - struct iwch_cq *schp, unsigned long *flag) + struct iwch_cq *schp) { int count; int flushed; @@ -812,44 +812,44 @@ static void __flush_qp(struct iwch_qp *qhp, struct iwch_cq *rchp, PDBG("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp); /* take a ref on the qhp since we must release the lock */ atomic_inc(&qhp->refcnt); - spin_unlock_irqrestore(&qhp->lock, *flag); + spin_unlock(&qhp->lock); /* locking hierarchy: cq lock first, then qp lock. */ - spin_lock_irqsave(&rchp->lock, *flag); + spin_lock(&rchp->lock); spin_lock(&qhp->lock); cxio_flush_hw_cq(&rchp->cq); cxio_count_rcqes(&rchp->cq, &qhp->wq, &count); flushed = cxio_flush_rq(&qhp->wq, &rchp->cq, count); spin_unlock(&qhp->lock); - spin_unlock_irqrestore(&rchp->lock, *flag); + spin_unlock(&rchp->lock); if (flushed) { - spin_lock_irqsave(&rchp->comp_handler_lock, *flag); + spin_lock(&rchp->comp_handler_lock); (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); - spin_unlock_irqrestore(&rchp->comp_handler_lock, *flag); + spin_unlock(&rchp->comp_handler_lock); } /* locking hierarchy: cq lock first, then qp lock. */ - spin_lock_irqsave(&schp->lock, *flag); + spin_lock(&schp->lock); spin_lock(&qhp->lock); cxio_flush_hw_cq(&schp->cq); cxio_count_scqes(&schp->cq, &qhp->wq, &count); flushed = cxio_flush_sq(&qhp->wq, &schp->cq, count); spin_unlock(&qhp->lock); - spin_unlock_irqrestore(&schp->lock, *flag); + spin_unlock(&schp->lock); if (flushed) { - spin_lock_irqsave(&schp->comp_handler_lock, *flag); + spin_lock(&schp->comp_handler_lock); (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); - spin_unlock_irqrestore(&schp->comp_handler_lock, *flag); + spin_unlock(&schp->comp_handler_lock); } /* deref */ if (atomic_dec_and_test(&qhp->refcnt)) wake_up(&qhp->wait); - spin_lock_irqsave(&qhp->lock, *flag); + spin_lock(&qhp->lock); } -static void flush_qp(struct iwch_qp *qhp, unsigned long *flag) +static void flush_qp(struct iwch_qp *qhp) { struct iwch_cq *rchp, *schp; @@ -859,19 +859,19 @@ static void flush_qp(struct iwch_qp *qhp, unsigned long *flag) if (qhp->ibqp.uobject) { cxio_set_wq_in_error(&qhp->wq); cxio_set_cq_in_error(&rchp->cq); - spin_lock_irqsave(&rchp->comp_handler_lock, *flag); + spin_lock(&rchp->comp_handler_lock); (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); - spin_unlock_irqrestore(&rchp->comp_handler_lock, *flag); + spin_unlock(&rchp->comp_handler_lock); if (schp != rchp) { cxio_set_cq_in_error(&schp->cq); - spin_lock_irqsave(&schp->comp_handler_lock, *flag); + spin_lock(&schp->comp_handler_lock); (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); - spin_unlock_irqrestore(&schp->comp_handler_lock, *flag); + spin_unlock(&schp->comp_handler_lock); } return; } - __flush_qp(qhp, rchp, schp, flag); + __flush_qp(qhp, rchp, schp); } @@ -1030,7 +1030,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp, break; case IWCH_QP_STATE_ERROR: qhp->attr.state = IWCH_QP_STATE_ERROR; - flush_qp(qhp, &flag); + flush_qp(qhp); break; default: ret = -EINVAL; @@ -1078,7 +1078,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp, } switch (attrs->next_state) { case IWCH_QP_STATE_IDLE: - flush_qp(qhp, &flag); + flush_qp(qhp); qhp->attr.state = IWCH_QP_STATE_IDLE; qhp->attr.llp_stream_handle = NULL; put_ep(&qhp->ep->com); @@ -1132,7 +1132,7 @@ err: free=1; wake_up(&qhp->wait); BUG_ON(!ep); - flush_qp(qhp, &flag); + flush_qp(qhp); out: spin_unlock_irqrestore(&qhp->lock, flag); diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 0668bb3472d0..92b4c2b0308b 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1114,7 +1114,7 @@ static void process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb) * generated when moving QP to RTS state. * A TERM message will be sent after QP has moved to RTS state */ - if ((ep->mpa_attr.version == 2) && + if ((ep->mpa_attr.version == 2) && peer2peer && (ep->mpa_attr.p2p_type != p2p_type)) { ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED; rtr_mismatch = 1; @@ -1562,11 +1562,11 @@ static int import_ep(struct c4iw_ep *ep, __be32 peer_ip, struct dst_entry *dst, struct neighbour *n; int err, step; - rcu_read_lock(); - n = dst_get_neighbour_noref(dst); - err = -ENODEV; + n = dst_neigh_lookup(dst, &peer_ip); if (!n) - goto out; + return -ENODEV; + + rcu_read_lock(); err = -ENOMEM; if (n->dev->flags & IFF_LOOPBACK) { struct net_device *pdev; @@ -1614,6 +1614,8 @@ static int import_ep(struct c4iw_ep *ep, __be32 peer_ip, struct dst_entry *dst, out: rcu_read_unlock(); + neigh_release(n); + return err; } diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 247fe706e7fa..be1c18f44400 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -329,7 +329,7 @@ static int c4iw_query_port(struct ib_device *ibdev, u8 port, props->gid_tbl_len = 1; props->pkey_tbl_len = 1; props->active_width = 2; - props->active_speed = 2; + props->active_speed = IB_SPEED_DDR; props->max_msg_sz = -1; return 0; diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index aaf6023a4835..f08f6eaf3fa8 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -379,8 +379,8 @@ extern spinlock_t shca_list_lock; extern int ehca_static_rate; extern int ehca_port_act_time; -extern int ehca_use_hp_mr; -extern int ehca_scaling_code; +extern bool ehca_use_hp_mr; +extern bool ehca_scaling_code; extern int ehca_lock_hcalls; extern int ehca_nr_ports; extern int ehca_max_cq; diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c index 73edc3668663..9ed4d2588304 100644 --- a/drivers/infiniband/hw/ehca/ehca_hca.c +++ b/drivers/infiniband/hw/ehca/ehca_hca.c @@ -233,7 +233,7 @@ int ehca_query_port(struct ib_device *ibdev, props->phys_state = 5; props->state = rblock->state; props->active_width = IB_WIDTH_12X; - props->active_speed = 0x1; + props->active_speed = IB_SPEED_SDR; } query_port1: diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c index e571e60ecb88..53589000fd07 100644 --- a/drivers/infiniband/hw/ehca/ehca_irq.c +++ b/drivers/infiniband/hw/ehca/ehca_irq.c @@ -786,7 +786,8 @@ static struct task_struct *create_comp_task(struct ehca_comp_pool *pool, spin_lock_init(&cct->task_lock); INIT_LIST_HEAD(&cct->cq_list); init_waitqueue_head(&cct->wait_queue); - cct->task = kthread_create(comp_task, cct, "ehca_comp/%d", cpu); + cct->task = kthread_create_on_node(comp_task, cct, cpu_to_node(cpu), + "ehca_comp/%d", cpu); return cct->task; } diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index c240e9972cb0..832e7a7d0aee 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -59,16 +59,16 @@ MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>"); MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver"); MODULE_VERSION(HCAD_VERSION); -static int ehca_open_aqp1 = 0; +static bool ehca_open_aqp1 = 0; static int ehca_hw_level = 0; -static int ehca_poll_all_eqs = 1; +static bool ehca_poll_all_eqs = 1; int ehca_debug_level = 0; int ehca_nr_ports = -1; -int ehca_use_hp_mr = 0; +bool ehca_use_hp_mr = 0; int ehca_port_act_time = 30; int ehca_static_rate = -1; -int ehca_scaling_code = 0; +bool ehca_scaling_code = 0; int ehca_lock_hcalls = -1; int ehca_max_cq = -1; int ehca_max_qp = -1; @@ -82,7 +82,7 @@ module_param_named(port_act_time, ehca_port_act_time, int, S_IRUGO); module_param_named(poll_all_eqs, ehca_poll_all_eqs, bool, S_IRUGO); module_param_named(static_rate, ehca_static_rate, int, S_IRUGO); module_param_named(scaling_code, ehca_scaling_code, bool, S_IRUGO); -module_param_named(lock_hcalls, ehca_lock_hcalls, bool, S_IRUGO); +module_param_named(lock_hcalls, ehca_lock_hcalls, bint, S_IRUGO); module_param_named(number_of_cqs, ehca_max_cq, int, S_IRUGO); module_param_named(number_of_qps, ehca_max_qp, int, S_IRUGO); diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c index 43cae84005f0..b781b2cb0624 100644 --- a/drivers/infiniband/hw/ehca/ehca_mrmw.c +++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c @@ -112,7 +112,7 @@ static u32 ehca_encode_hwpage_size(u32 pgsize) static u64 ehca_get_max_hwpage_size(struct ehca_shca *shca) { - return 1UL << ilog2(shca->hca_cap_mr_pgsize); + return rounddown_pow_of_two(shca->hca_cap_mr_pgsize); } static struct ehca_mr *ehca_mr_new(void) diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c index 9a3fbfca9b41..fd05f48f6b0b 100644 --- a/drivers/infiniband/hw/ehca/ehca_reqs.c +++ b/drivers/infiniband/hw/ehca/ehca_reqs.c @@ -42,7 +42,6 @@ */ -#include <asm/system.h> #include "ehca_classes.h" #include "ehca_tools.h" #include "ehca_qes.h" diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index b7d4216db3c3..a4de9d58e9b4 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -89,7 +89,7 @@ static int create_file(const char *name, umode_t mode, error = ipathfs_mknod(parent->d_inode, *dentry, mode, fops, data); else - error = PTR_ERR(dentry); + error = PTR_ERR(*dentry); mutex_unlock(&parent->d_inode->i_mutex); return error; diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 5ecf38d97269..77c8cb4c5073 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -720,7 +720,8 @@ repoll: wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f; wc->wc_flags |= g_mlpath_rqpn & 0x80000000 ? IB_WC_GRH : 0; wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f; - wc->csum_ok = mlx4_ib_ipoib_csum_ok(cqe->status, cqe->checksum); + wc->wc_flags |= mlx4_ib_ipoib_csum_ok(cqe->status, + cqe->checksum) ? IB_WC_IP_CSUM_OK : 0; if (rdma_port_get_link_layer(wc->qp->device, (*cur_qp)->port) == IB_LINK_LAYER_ETHERNET) wc->sl = be16_to_cpu(cqe->sl_vid) >> 13; @@ -747,8 +748,7 @@ int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) break; } - if (npolled) - mlx4_cq_set_ci(&cq->mcq); + mlx4_cq_set_ci(&cq->mcq); spin_unlock_irqrestore(&cq->lock, flags); diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 95c94d8f0254..259b0670b51c 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -257,12 +257,9 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, return IB_MAD_RESULT_SUCCESS; /* - * Don't process SMInfo queries or vendor-specific - * MADs -- the SMA can't handle them. + * Don't process SMInfo queries -- the SMA can't handle them. */ - if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO || - ((in_mad->mad_hdr.attr_id & IB_SMP_ATTR_VENDOR_MASK) == - IB_SMP_ATTR_VENDOR_MASK)) + if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO) return IB_MAD_RESULT_SUCCESS; } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT || in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS1 || diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 7b445df6a667..669673e81439 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -163,7 +163,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm; props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * props->max_mcast_grp; - props->max_map_per_fmr = (1 << (32 - ilog2(dev->dev->caps.num_mpts))) - 1; + props->max_map_per_fmr = dev->dev->caps.max_fmr_maps; out: kfree(in_mad); @@ -182,12 +182,27 @@ mlx4_ib_port_link_layer(struct ib_device *device, u8 port_num) } static int ib_link_query_port(struct ib_device *ibdev, u8 port, - struct ib_port_attr *props, - struct ib_smp *in_mad, - struct ib_smp *out_mad) + struct ib_port_attr *props) { + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; int ext_active_speed; - int err; + int err = -ENOMEM; + + in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); + out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; + in_mad->attr_mod = cpu_to_be32(port); + + err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, + in_mad, out_mad); + if (err) + goto out; + props->lid = be16_to_cpup((__be16 *) (out_mad->data + 16)); props->lmc = out_mad->data[34] & 0x7; @@ -215,34 +230,38 @@ static int ib_link_query_port(struct ib_device *ibdev, u8 port, switch (ext_active_speed) { case 1: - props->active_speed = 16; /* FDR */ + props->active_speed = IB_SPEED_FDR; break; case 2: - props->active_speed = 32; /* EDR */ + props->active_speed = IB_SPEED_EDR; break; } } /* If reported active speed is QDR, check if is FDR-10 */ - if (props->active_speed == 4) { - if (to_mdev(ibdev)->dev->caps.ext_port_cap[port] & - MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO) { - init_query_mad(in_mad); - in_mad->attr_id = MLX4_ATTR_EXTENDED_PORT_INFO; - in_mad->attr_mod = cpu_to_be32(port); - - err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, - NULL, NULL, in_mad, out_mad); - if (err) - return err; + if (props->active_speed == IB_SPEED_QDR) { + init_query_mad(in_mad); + in_mad->attr_id = MLX4_ATTR_EXTENDED_PORT_INFO; + in_mad->attr_mod = cpu_to_be32(port); - /* Checking LinkSpeedActive for FDR-10 */ - if (out_mad->data[15] & 0x1) - props->active_speed = 8; - } + err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, + NULL, NULL, in_mad, out_mad); + if (err) + return err; + + /* Checking LinkSpeedActive for FDR-10 */ + if (out_mad->data[15] & 0x1) + props->active_speed = IB_SPEED_FDR10; } - return 0; + /* Avoid wrong speed value returned by FW if the IB link is down. */ + if (props->state == IB_PORT_DOWN) + props->active_speed = IB_SPEED_SDR; + +out: + kfree(in_mad); + kfree(out_mad); + return err; } static u8 state_to_phys_state(enum ib_port_state state) @@ -251,32 +270,42 @@ static u8 state_to_phys_state(enum ib_port_state state) } static int eth_link_query_port(struct ib_device *ibdev, u8 port, - struct ib_port_attr *props, - struct ib_smp *out_mad) + struct ib_port_attr *props) { - struct mlx4_ib_iboe *iboe = &to_mdev(ibdev)->iboe; + + struct mlx4_ib_dev *mdev = to_mdev(ibdev); + struct mlx4_ib_iboe *iboe = &mdev->iboe; struct net_device *ndev; enum ib_mtu tmp; + struct mlx4_cmd_mailbox *mailbox; + int err = 0; + + mailbox = mlx4_alloc_cmd_mailbox(mdev->dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, port, 0, + MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_WRAPPED); + if (err) + goto out; - props->active_width = IB_WIDTH_1X; - props->active_speed = 4; + props->active_width = (((u8 *)mailbox->buf)[5] == 0x40) ? + IB_WIDTH_4X : IB_WIDTH_1X; + props->active_speed = IB_SPEED_QDR; props->port_cap_flags = IB_PORT_CM_SUP; - props->gid_tbl_len = to_mdev(ibdev)->dev->caps.gid_table_len[port]; - props->max_msg_sz = to_mdev(ibdev)->dev->caps.max_msg_sz; + props->gid_tbl_len = mdev->dev->caps.gid_table_len[port]; + props->max_msg_sz = mdev->dev->caps.max_msg_sz; props->pkey_tbl_len = 1; - props->bad_pkey_cntr = be16_to_cpup((__be16 *) (out_mad->data + 46)); - props->qkey_viol_cntr = be16_to_cpup((__be16 *) (out_mad->data + 48)); props->max_mtu = IB_MTU_4096; - props->subnet_timeout = 0; - props->max_vl_num = out_mad->data[37] >> 4; - props->init_type_reply = 0; + props->max_vl_num = 2; props->state = IB_PORT_DOWN; props->phys_state = state_to_phys_state(props->state); props->active_mtu = IB_MTU_256; spin_lock(&iboe->lock); ndev = iboe->netdevs[port - 1]; if (!ndev) - goto out; + goto out_unlock; tmp = iboe_get_mtu(ndev->mtu); props->active_mtu = tmp ? min(props->max_mtu, tmp) : IB_MTU_256; @@ -284,41 +313,23 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port, props->state = (netif_running(ndev) && netif_carrier_ok(ndev)) ? IB_PORT_ACTIVE : IB_PORT_DOWN; props->phys_state = state_to_phys_state(props->state); - -out: +out_unlock: spin_unlock(&iboe->lock); - return 0; +out: + mlx4_free_cmd_mailbox(mdev->dev, mailbox); + return err; } static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props) { - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; - int err = -ENOMEM; - - in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); - out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL); - if (!in_mad || !out_mad) - goto out; + int err; memset(props, 0, sizeof *props); - init_query_mad(in_mad); - in_mad->attr_id = IB_SMP_ATTR_PORT_INFO; - in_mad->attr_mod = cpu_to_be32(port); - - err = mlx4_MAD_IFC(to_mdev(ibdev), 1, 1, port, NULL, NULL, in_mad, out_mad); - if (err) - goto out; - err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ? - ib_link_query_port(ibdev, port, props, in_mad, out_mad) : - eth_link_query_port(ibdev, port, props, out_mad); - -out: - kfree(in_mad); - kfree(out_mad); + ib_link_query_port(ibdev, port, props) : + eth_link_query_port(ibdev, port, props); return err; } diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index aa2aefa4236c..3a7848966627 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1884,6 +1884,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, wmb(); if (wr->opcode < 0 || wr->opcode >= ARRAY_SIZE(mlx4_ib_opcode)) { + *bad_wr = wr; err = -EINVAL; goto out; } diff --git a/drivers/infiniband/hw/mthca/mthca_cq.c b/drivers/infiniband/hw/mthca/mthca_cq.c index 53157b86a1ba..40ba83338155 100644 --- a/drivers/infiniband/hw/mthca/mthca_cq.c +++ b/drivers/infiniband/hw/mthca/mthca_cq.c @@ -643,7 +643,8 @@ static inline int mthca_poll_one(struct mthca_dev *dev, entry->wc_flags |= cqe->g_mlpath & 0x80 ? IB_WC_GRH : 0; checksum = (be32_to_cpu(cqe->rqpn) >> 24) | ((be32_to_cpu(cqe->my_ee) >> 16) & 0xff00); - entry->csum_ok = (cqe->sl_ipok & 1 && checksum == 0xffff); + entry->wc_flags |= (cqe->sl_ipok & 1 && checksum == 0xffff) ? + IB_WC_IP_CSUM_OK : 0; } entry->status = IB_WC_SUCCESS; diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index 5965b3df8f2f..7140199f562e 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -96,7 +96,7 @@ unsigned int wqm_quanta = 0x10000; module_param(wqm_quanta, int, 0644); MODULE_PARM_DESC(wqm_quanta, "WQM quanta"); -static unsigned int limit_maxrdreqsz; +static bool limit_maxrdreqsz; module_param(limit_maxrdreqsz, bool, 0644); MODULE_PARM_DESC(limit_maxrdreqsz, "Limit max read request size to 256 Bytes"); diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h index 568b4f11380a..c438e4691b3c 100644 --- a/drivers/infiniband/hw/nes/nes.h +++ b/drivers/infiniband/hw/nes/nes.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 425065b36b8c..71edfbbcce1c 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -233,6 +233,7 @@ static int send_mpa_reject(struct nes_cm_node *cm_node) u8 *start_ptr = &start_addr; u8 **start_buff = &start_ptr; u16 buff_len = 0; + struct ietf_mpa_v1 *mpa_frame; skb = dev_alloc_skb(MAX_CM_BUFFER); if (!skb) { @@ -242,6 +243,8 @@ static int send_mpa_reject(struct nes_cm_node *cm_node) /* send an MPA reject frame */ cm_build_mpa_frame(cm_node, start_buff, &buff_len, NULL, MPA_KEY_REPLY); + mpa_frame = (struct ietf_mpa_v1 *)*start_buff; + mpa_frame->flags |= IETF_MPA_FLAGS_REJECT; form_cm_frame(skb, cm_node, NULL, 0, *start_buff, buff_len, SET_ACK | SET_FIN); cm_node->state = NES_CM_STATE_FIN_WAIT1; @@ -335,18 +338,21 @@ static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 *type, case IETF_MPA_V2: { u16 ird_size; u16 ord_size; + u16 rtr_ctrl_ird; + u16 rtr_ctrl_ord; + mpa_v2_frame = (struct ietf_mpa_v2 *)buffer; mpa_hdr_len += IETF_RTR_MSG_SIZE; cm_node->mpa_frame_size -= IETF_RTR_MSG_SIZE; rtr_msg = &mpa_v2_frame->rtr_msg; /* parse rtr message */ - rtr_msg->ctrl_ird = ntohs(rtr_msg->ctrl_ird); - rtr_msg->ctrl_ord = ntohs(rtr_msg->ctrl_ord); - ird_size = rtr_msg->ctrl_ird & IETF_NO_IRD_ORD; - ord_size = rtr_msg->ctrl_ord & IETF_NO_IRD_ORD; + rtr_ctrl_ird = ntohs(rtr_msg->ctrl_ird); + rtr_ctrl_ord = ntohs(rtr_msg->ctrl_ord); + ird_size = rtr_ctrl_ird & IETF_NO_IRD_ORD; + ord_size = rtr_ctrl_ord & IETF_NO_IRD_ORD; - if (!(rtr_msg->ctrl_ird & IETF_PEER_TO_PEER)) { + if (!(rtr_ctrl_ird & IETF_PEER_TO_PEER)) { /* send reset */ return -EINVAL; } @@ -367,9 +373,9 @@ static int parse_mpa(struct nes_cm_node *cm_node, u8 *buffer, u32 *type, } } - if (rtr_msg->ctrl_ord & IETF_RDMA0_READ) { + if (rtr_ctrl_ord & IETF_RDMA0_READ) { cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO; - } else if (rtr_msg->ctrl_ord & IETF_RDMA0_WRITE) { + } else if (rtr_ctrl_ord & IETF_RDMA0_WRITE) { cm_node->send_rdma0_op = SEND_RDMA_WRITE_ZERO; } else { /* Not supported RDMA0 operation */ return -EINVAL; @@ -540,6 +546,8 @@ static void build_mpa_v2(struct nes_cm_node *cm_node, { struct ietf_mpa_v2 *mpa_frame = (struct ietf_mpa_v2 *)start_addr; struct ietf_rtr_msg *rtr_msg = &mpa_frame->rtr_msg; + u16 ctrl_ird; + u16 ctrl_ord; /* initialize the upper 5 bytes of the frame */ build_mpa_v1(cm_node, start_addr, mpa_key); @@ -547,31 +555,31 @@ static void build_mpa_v2(struct nes_cm_node *cm_node, mpa_frame->priv_data_len += htons(IETF_RTR_MSG_SIZE); /* initialize RTR msg */ - rtr_msg->ctrl_ird = (cm_node->ird_size > IETF_NO_IRD_ORD) ? + ctrl_ird = (cm_node->ird_size > IETF_NO_IRD_ORD) ? IETF_NO_IRD_ORD : cm_node->ird_size; - rtr_msg->ctrl_ord = (cm_node->ord_size > IETF_NO_IRD_ORD) ? + ctrl_ord = (cm_node->ord_size > IETF_NO_IRD_ORD) ? IETF_NO_IRD_ORD : cm_node->ord_size; - rtr_msg->ctrl_ird |= IETF_PEER_TO_PEER; - rtr_msg->ctrl_ird |= IETF_FLPDU_ZERO_LEN; + ctrl_ird |= IETF_PEER_TO_PEER; + ctrl_ird |= IETF_FLPDU_ZERO_LEN; switch (mpa_key) { case MPA_KEY_REQUEST: - rtr_msg->ctrl_ord |= IETF_RDMA0_WRITE; - rtr_msg->ctrl_ord |= IETF_RDMA0_READ; + ctrl_ord |= IETF_RDMA0_WRITE; + ctrl_ord |= IETF_RDMA0_READ; break; case MPA_KEY_REPLY: switch (cm_node->send_rdma0_op) { case SEND_RDMA_WRITE_ZERO: - rtr_msg->ctrl_ord |= IETF_RDMA0_WRITE; + ctrl_ord |= IETF_RDMA0_WRITE; break; case SEND_RDMA_READ_ZERO: - rtr_msg->ctrl_ord |= IETF_RDMA0_READ; + ctrl_ord |= IETF_RDMA0_READ; break; } } - rtr_msg->ctrl_ird = htons(rtr_msg->ctrl_ird); - rtr_msg->ctrl_ord = htons(rtr_msg->ctrl_ord); + rtr_msg->ctrl_ird = htons(ctrl_ird); + rtr_msg->ctrl_ord = htons(ctrl_ord); } /** @@ -1348,8 +1356,9 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi else netdev = nesvnic->netdev; + neigh = dst_neigh_lookup(&rt->dst, &dst_ip); + rcu_read_lock(); - neigh = dst_get_neighbour_noref(&rt->dst); if (neigh) { if (neigh->nud_state & NUD_VALID) { nes_debug(NES_DBG_CM, "Neighbor MAC address for 0x%08X" @@ -1360,8 +1369,7 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi if (!memcmp(nesadapter->arp_table[arpindex].mac_addr, neigh->ha, ETH_ALEN)) { /* Mac address same as in nes_arp_table */ - ip_rt_put(rt); - return rc; + goto out; } nes_manage_arp_cache(nesvnic->netdev, @@ -1377,7 +1385,12 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi neigh_event_send(neigh, NULL); } } +out: rcu_read_unlock(); + + if (neigh) + neigh_release(neigh); + ip_rt_put(rt); return rc; } diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h index bdfa1fbb35fc..4646e6666087 100644 --- a/drivers/infiniband/hw/nes/nes_cm.h +++ b/drivers/infiniband/hw/nes/nes_cm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/nes/nes_context.h b/drivers/infiniband/hw/nes/nes_context.h index b4393a16099d..a69eef16d72d 100644 --- a/drivers/infiniband/hw/nes/nes_context.h +++ b/drivers/infiniband/hw/nes/nes_context.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index 055f4b545df0..d42c9f435b1b 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h index 0b590e152c6a..d748e4b31b8d 100644 --- a/drivers/infiniband/hw/nes/nes_hw.h +++ b/drivers/infiniband/hw/nes/nes_hw.h @@ -1,5 +1,5 @@ /* -* Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved. +* Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/nes/nes_mgt.c b/drivers/infiniband/hw/nes/nes_mgt.c index b3b2a240c6e9..3ba7be369452 100644 --- a/drivers/infiniband/hw/nes/nes_mgt.c +++ b/drivers/infiniband/hw/nes/nes_mgt.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved. + * Copyright (c) 2006 - 2011 Intel-NE, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/nes/nes_mgt.h b/drivers/infiniband/hw/nes/nes_mgt.h index 8c8af254555a..4f7f701c4a81 100644 --- a/drivers/infiniband/hw/nes/nes_mgt.h +++ b/drivers/infiniband/hw/nes/nes_mgt.h @@ -1,5 +1,5 @@ /* -* Copyright (c) 2010 Intel-NE, Inc. All rights reserved. +* Copyright (c) 2006 - 2011 Intel-NE, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index 4b3fa711a247..f3a3ecf8d09e 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/nes/nes_user.h b/drivers/infiniband/hw/nes/nes_user.h index 71e133ab209b..4926de744488 100644 --- a/drivers/infiniband/hw/nes/nes_user.h +++ b/drivers/infiniband/hw/nes/nes_user.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Cisco Systems. All rights reserved. * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c index 8b4c2ff54888..e98f4fc0b768 100644 --- a/drivers/infiniband/hw/nes/nes_utils.c +++ b/drivers/infiniband/hw/nes/nes_utils.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 5095bc41c6cc..8b8812de4b5c 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -597,7 +597,7 @@ static int nes_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr props->pkey_tbl_len = 1; props->qkey_viol_cntr = 0; props->active_width = IB_WIDTH_4X; - props->active_speed = 1; + props->active_speed = IB_SPEED_SDR; props->max_msg_sz = 0x80000000; return 0; @@ -3428,6 +3428,8 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, NES_IWARP_SQ_FMR_WQE_LENGTH_LOW_IDX, ib_wr->wr.fast_reg.length); set_wqe_32bit_value(wqe->wqe_words, + NES_IWARP_SQ_FMR_WQE_LENGTH_HIGH_IDX, 0); + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_FMR_WQE_MR_STAG_IDX, ib_wr->wr.fast_reg.rkey); /* Set page size: */ @@ -3724,7 +3726,7 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) entry->opcode = IB_WC_SEND; break; case NES_IWARP_SQ_OP_LOCINV: - entry->opcode = IB_WR_LOCAL_INV; + entry->opcode = IB_WC_LOCAL_INV; break; case NES_IWARP_SQ_OP_FAST_REG: entry->opcode = IB_WC_FAST_REG_MR; diff --git a/drivers/infiniband/hw/nes/nes_verbs.h b/drivers/infiniband/hw/nes/nes_verbs.h index fe6b6e92fa90..0eff7c44d76b 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.h +++ b/drivers/infiniband/hw/nes/nes_verbs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index b881bdc401f5..6b811e3e8bd1 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -427,6 +427,14 @@ struct qib_verbs_txreq { /* how often we check for packet activity for "power on hours (in seconds) */ #define ACTIVITY_TIMER 5 +#define MAX_NAME_SIZE 64 +struct qib_msix_entry { + struct msix_entry msix; + void *arg; + char name[MAX_NAME_SIZE]; + cpumask_var_t mask; +}; + /* Below is an opaque struct. Each chip (device) can maintain * private data needed for its operation, but not germane to the * rest of the driver. For convenience, we define another that @@ -1355,7 +1363,7 @@ int qib_pcie_init(struct pci_dev *, const struct pci_device_id *); int qib_pcie_ddinit(struct qib_devdata *, struct pci_dev *, const struct pci_device_id *); void qib_pcie_ddcleanup(struct qib_devdata *); -int qib_pcie_params(struct qib_devdata *, u32, u32 *, struct msix_entry *); +int qib_pcie_params(struct qib_devdata *, u32, u32 *, struct qib_msix_entry *); int qib_reinit_intr(struct qib_devdata *); void qib_enable_intx(struct pci_dev *); void qib_nomsi(struct qib_devdata *); diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index 4f18e2d332df..d0c64d514813 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -2105,7 +2105,7 @@ static void alloc_dummy_hdrq(struct qib_devdata *dd) dd->cspec->dummy_hdrq = dma_alloc_coherent(&dd->pcidev->dev, dd->rcd[0]->rcvhdrq_size, &dd->cspec->dummy_hdrq_phys, - GFP_KERNEL | __GFP_COMP); + GFP_ATOMIC | __GFP_COMP); if (!dd->cspec->dummy_hdrq) { qib_devinfo(dd->pcidev, "Couldn't allocate dummy hdrq\n"); /* fallback to just 0'ing */ diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 41e92089e41b..060b96064469 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -541,8 +541,7 @@ struct qib_chip_specific { u32 lastbuf_for_pio; u32 stay_in_freeze; u32 recovery_ports_initted; - struct msix_entry *msix_entries; - void **msix_arg; + struct qib_msix_entry *msix_entries; unsigned long *sendchkenable; unsigned long *sendgrhchk; unsigned long *sendibchk; @@ -639,24 +638,24 @@ static struct { int lsb; int port; /* 0 if not port-specific, else port # */ } irq_table[] = { - { QIB_DRV_NAME, qib_7322intr, -1, 0 }, - { QIB_DRV_NAME " (buf avail)", qib_7322bufavail, + { "", qib_7322intr, -1, 0 }, + { " (buf avail)", qib_7322bufavail, SYM_LSB(IntStatus, SendBufAvail), 0 }, - { QIB_DRV_NAME " (sdma 0)", sdma_intr, + { " (sdma 0)", sdma_intr, SYM_LSB(IntStatus, SDmaInt_0), 1 }, - { QIB_DRV_NAME " (sdma 1)", sdma_intr, + { " (sdma 1)", sdma_intr, SYM_LSB(IntStatus, SDmaInt_1), 2 }, - { QIB_DRV_NAME " (sdmaI 0)", sdma_idle_intr, + { " (sdmaI 0)", sdma_idle_intr, SYM_LSB(IntStatus, SDmaIdleInt_0), 1 }, - { QIB_DRV_NAME " (sdmaI 1)", sdma_idle_intr, + { " (sdmaI 1)", sdma_idle_intr, SYM_LSB(IntStatus, SDmaIdleInt_1), 2 }, - { QIB_DRV_NAME " (sdmaP 0)", sdma_progress_intr, + { " (sdmaP 0)", sdma_progress_intr, SYM_LSB(IntStatus, SDmaProgressInt_0), 1 }, - { QIB_DRV_NAME " (sdmaP 1)", sdma_progress_intr, + { " (sdmaP 1)", sdma_progress_intr, SYM_LSB(IntStatus, SDmaProgressInt_1), 2 }, - { QIB_DRV_NAME " (sdmaC 0)", sdma_cleanup_intr, + { " (sdmaC 0)", sdma_cleanup_intr, SYM_LSB(IntStatus, SDmaCleanupDone_0), 1 }, - { QIB_DRV_NAME " (sdmaC 1)", sdma_cleanup_intr, + { " (sdmaC 1)", sdma_cleanup_intr, SYM_LSB(IntStatus, SDmaCleanupDone_1), 2 }, }; @@ -2567,9 +2566,13 @@ static void qib_7322_nomsix(struct qib_devdata *dd) int i; dd->cspec->num_msix_entries = 0; - for (i = 0; i < n; i++) - free_irq(dd->cspec->msix_entries[i].vector, - dd->cspec->msix_arg[i]); + for (i = 0; i < n; i++) { + irq_set_affinity_hint( + dd->cspec->msix_entries[i].msix.vector, NULL); + free_cpumask_var(dd->cspec->msix_entries[i].mask); + free_irq(dd->cspec->msix_entries[i].msix.vector, + dd->cspec->msix_entries[i].arg); + } qib_nomsix(dd); } /* make sure no MSIx interrupts are left pending */ @@ -2597,7 +2600,6 @@ static void qib_setup_7322_cleanup(struct qib_devdata *dd) kfree(dd->cspec->sendgrhchk); kfree(dd->cspec->sendibchk); kfree(dd->cspec->msix_entries); - kfree(dd->cspec->msix_arg); for (i = 0; i < dd->num_pports; i++) { unsigned long flags; u32 mask = QSFP_GPIO_MOD_PRS_N | @@ -3070,6 +3072,8 @@ static void qib_setup_7322_interrupt(struct qib_devdata *dd, int clearpend) int ret, i, msixnum; u64 redirect[6]; u64 mask; + const struct cpumask *local_mask; + int firstcpu, secondcpu = 0, currrcvcpu = 0; if (!dd->num_pports) return; @@ -3118,13 +3122,28 @@ try_intx: memset(redirect, 0, sizeof redirect); mask = ~0ULL; msixnum = 0; + local_mask = cpumask_of_pcibus(dd->pcidev->bus); + firstcpu = cpumask_first(local_mask); + if (firstcpu >= nr_cpu_ids || + cpumask_weight(local_mask) == num_online_cpus()) { + local_mask = topology_core_cpumask(0); + firstcpu = cpumask_first(local_mask); + } + if (firstcpu < nr_cpu_ids) { + secondcpu = cpumask_next(firstcpu, local_mask); + if (secondcpu >= nr_cpu_ids) + secondcpu = firstcpu; + currrcvcpu = secondcpu; + } for (i = 0; msixnum < dd->cspec->num_msix_entries; i++) { irq_handler_t handler; - const char *name; void *arg; u64 val; int lsb, reg, sh; + dd->cspec->msix_entries[msixnum]. + name[sizeof(dd->cspec->msix_entries[msixnum].name) - 1] + = '\0'; if (i < ARRAY_SIZE(irq_table)) { if (irq_table[i].port) { /* skip if for a non-configured port */ @@ -3135,7 +3154,11 @@ try_intx: arg = dd; lsb = irq_table[i].lsb; handler = irq_table[i].handler; - name = irq_table[i].name; + snprintf(dd->cspec->msix_entries[msixnum].name, + sizeof(dd->cspec->msix_entries[msixnum].name) + - 1, + QIB_DRV_NAME "%d%s", dd->unit, + irq_table[i].name); } else { unsigned ctxt; @@ -3148,23 +3171,28 @@ try_intx: continue; lsb = QIB_I_RCVAVAIL_LSB + ctxt; handler = qib_7322pintr; - name = QIB_DRV_NAME " (kctx)"; + snprintf(dd->cspec->msix_entries[msixnum].name, + sizeof(dd->cspec->msix_entries[msixnum].name) + - 1, + QIB_DRV_NAME "%d (kctx)", dd->unit); } - ret = request_irq(dd->cspec->msix_entries[msixnum].vector, - handler, 0, name, arg); + ret = request_irq( + dd->cspec->msix_entries[msixnum].msix.vector, + handler, 0, dd->cspec->msix_entries[msixnum].name, + arg); if (ret) { /* * Shouldn't happen since the enable said we could * have as many as we are trying to setup here. */ qib_dev_err(dd, "Couldn't setup MSIx " - "interrupt (vec=%d, irq=%d): %d\n", msixnum, - dd->cspec->msix_entries[msixnum].vector, - ret); + "interrupt (vec=%d, irq=%d): %d\n", msixnum, + dd->cspec->msix_entries[msixnum].msix.vector, + ret); qib_7322_nomsix(dd); goto try_intx; } - dd->cspec->msix_arg[msixnum] = arg; + dd->cspec->msix_entries[msixnum].arg = arg; if (lsb >= 0) { reg = lsb / IBA7322_REDIRECT_VEC_PER_REG; sh = (lsb % IBA7322_REDIRECT_VEC_PER_REG) * @@ -3174,6 +3202,25 @@ try_intx: } val = qib_read_kreg64(dd, 2 * msixnum + 1 + (QIB_7322_MsixTable_OFFS / sizeof(u64))); + if (firstcpu < nr_cpu_ids && + zalloc_cpumask_var( + &dd->cspec->msix_entries[msixnum].mask, + GFP_KERNEL)) { + if (handler == qib_7322pintr) { + cpumask_set_cpu(currrcvcpu, + dd->cspec->msix_entries[msixnum].mask); + currrcvcpu = cpumask_next(currrcvcpu, + local_mask); + if (currrcvcpu >= nr_cpu_ids) + currrcvcpu = secondcpu; + } else { + cpumask_set_cpu(firstcpu, + dd->cspec->msix_entries[msixnum].mask); + } + irq_set_affinity_hint( + dd->cspec->msix_entries[msixnum].msix.vector, + dd->cspec->msix_entries[msixnum].mask); + } msixnum++; } /* Initialize the vector mapping */ @@ -3365,7 +3412,7 @@ static int qib_do_7322_reset(struct qib_devdata *dd) if (msix_entries) { /* restore the MSIx vector address and data if saved above */ for (i = 0; i < msix_entries; i++) { - dd->cspec->msix_entries[i].entry = i; + dd->cspec->msix_entries[i].msix.entry = i; if (!msix_vecsave || !msix_vecsave[2 * i]) continue; qib_write_kreg(dd, 2 * i + @@ -6865,15 +6912,13 @@ struct qib_devdata *qib_init_iba7322_funcs(struct pci_dev *pdev, tabsize = actual_cnt; dd->cspec->msix_entries = kmalloc(tabsize * - sizeof(struct msix_entry), GFP_KERNEL); - dd->cspec->msix_arg = kmalloc(tabsize * - sizeof(void *), GFP_KERNEL); - if (!dd->cspec->msix_entries || !dd->cspec->msix_arg) { + sizeof(struct qib_msix_entry), GFP_KERNEL); + if (!dd->cspec->msix_entries) { qib_dev_err(dd, "No memory for MSIx table\n"); tabsize = 0; } for (i = 0; i < tabsize; i++) - dd->cspec->msix_entries[i].entry = i; + dd->cspec->msix_entries[i].msix.entry = i; if (qib_pcie_params(dd, 8, &tabsize, dd->cspec->msix_entries)) qib_dev_err(dd, "Failed to setup PCIe or interrupts; " diff --git a/drivers/infiniband/hw/qib/qib_mad.c b/drivers/infiniband/hw/qib/qib_mad.c index 3b3745f261f0..c4ff788823b5 100644 --- a/drivers/infiniband/hw/qib/qib_mad.c +++ b/drivers/infiniband/hw/qib/qib_mad.c @@ -433,7 +433,6 @@ static int subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev, struct qib_pportdata *ppd; struct qib_ibport *ibp; struct ib_port_info *pip = (struct ib_port_info *)smp->data; - u16 lid; u8 mtu; int ret; u32 state; @@ -469,8 +468,7 @@ static int subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev, ibp->mkeyprot == 1)) pip->mkey = ibp->mkey; pip->gid_prefix = ibp->gid_prefix; - lid = ppd->lid; - pip->lid = lid ? cpu_to_be16(lid) : IB_LID_PERMISSIVE; + pip->lid = cpu_to_be16(ppd->lid); pip->sm_lid = cpu_to_be16(ibp->sm_lid); pip->cap_mask = cpu_to_be32(ibp->port_cap_flags); /* pip->diag_code; */ diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c index f695061d688e..790646ef5106 100644 --- a/drivers/infiniband/hw/qib/qib_pcie.c +++ b/drivers/infiniband/hw/qib/qib_pcie.c @@ -194,11 +194,24 @@ void qib_pcie_ddcleanup(struct qib_devdata *dd) } static void qib_msix_setup(struct qib_devdata *dd, int pos, u32 *msixcnt, - struct msix_entry *msix_entry) + struct qib_msix_entry *qib_msix_entry) { int ret; u32 tabsize = 0; u16 msix_flags; + struct msix_entry *msix_entry; + int i; + + /* We can't pass qib_msix_entry array to qib_msix_setup + * so use a dummy msix_entry array and copy the allocated + * irq back to the qib_msix_entry array. */ + msix_entry = kmalloc(*msixcnt * sizeof(*msix_entry), GFP_KERNEL); + if (!msix_entry) { + ret = -ENOMEM; + goto do_intx; + } + for (i = 0; i < *msixcnt; i++) + msix_entry[i] = qib_msix_entry[i].msix; pci_read_config_word(dd->pcidev, pos + PCI_MSIX_FLAGS, &msix_flags); tabsize = 1 + (msix_flags & PCI_MSIX_FLAGS_QSIZE); @@ -209,11 +222,15 @@ static void qib_msix_setup(struct qib_devdata *dd, int pos, u32 *msixcnt, tabsize = ret; ret = pci_enable_msix(dd->pcidev, msix_entry, tabsize); } +do_intx: if (ret) { qib_dev_err(dd, "pci_enable_msix %d vectors failed: %d, " "falling back to INTx\n", tabsize, ret); tabsize = 0; } + for (i = 0; i < tabsize; i++) + qib_msix_entry[i].msix = msix_entry[i]; + kfree(msix_entry); *msixcnt = tabsize; if (ret) @@ -251,7 +268,7 @@ static int qib_msi_setup(struct qib_devdata *dd, int pos) } int qib_pcie_params(struct qib_devdata *dd, u32 minw, u32 *nent, - struct msix_entry *entry) + struct qib_msix_entry *entry) { u16 linkstat, speed; int pos = 0, pose, ret = 1; @@ -560,7 +577,7 @@ static int qib_tune_pcie_coalesce(struct qib_devdata *dd) * BIOS may not set PCIe bus-utilization parameters for best performance. * Check and optionally adjust them to maximize our throughput. */ -static int qib_pcie_caps = 0x51; +static int qib_pcie_caps; module_param_named(pcie_caps, qib_pcie_caps, int, S_IRUGO); MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (0..3), ReadReq (4..7)"); diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index 894afac26f3b..765b4cbaa020 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -2048,7 +2048,6 @@ send_last: wc.pkey_index = 0; wc.dlid_path_bits = 0; wc.port_num = 0; - wc.csum_ok = 0; /* Signal completion event if the solicited bit is set. */ qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, (ohdr->bth[0] & diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c index 847e7afdfd94..7ce2ac2ed219 100644 --- a/drivers/infiniband/hw/qib/qib_uc.c +++ b/drivers/infiniband/hw/qib/qib_uc.c @@ -422,7 +422,6 @@ last_imm: wc.pkey_index = 0; wc.dlid_path_bits = 0; wc.port_num = 0; - wc.csum_ok = 0; /* Signal completion event if the solicited bit is set. */ qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, (ohdr->bth[0] & diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index b3cc1e062b17..86df632ea612 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -44,6 +44,7 @@ #include <linux/mutex.h> #include <net/neighbour.h> +#include <net/sch_generic.h> #include <linux/atomic.h> @@ -117,8 +118,9 @@ struct ipoib_header { u16 reserved; }; -struct ipoib_pseudoheader { - u8 hwaddr[INFINIBAND_ALEN]; +struct ipoib_cb { + struct qdisc_skb_cb qdisc_cb; + u8 hwaddr[INFINIBAND_ALEN]; }; /* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */ diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 4115be54ba3b..5c1bc995e560 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -296,7 +296,8 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) dev->stats.rx_bytes += skb->len; skb->dev = dev; - if ((dev->features & NETIF_F_RXCSUM) && likely(wc->csum_ok)) + if ((dev->features & NETIF_F_RXCSUM) && + likely(wc->wc_flags & IB_WC_IP_CSUM_OK)) skb->ip_summed = CHECKSUM_UNNECESSARY; napi_gro_receive(&priv->napi, skb); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 3514ca05deea..3974c290b667 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -653,7 +653,7 @@ static void ipoib_path_lookup(struct sk_buff *skb, struct neighbour *n, struct n } static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, - struct ipoib_pseudoheader *phdr) + struct ipoib_cb *cb) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_path *path; @@ -661,17 +661,15 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, spin_lock_irqsave(&priv->lock, flags); - path = __path_find(dev, phdr->hwaddr + 4); + path = __path_find(dev, cb->hwaddr + 4); if (!path || !path->valid) { int new_path = 0; if (!path) { - path = path_rec_create(dev, phdr->hwaddr + 4); + path = path_rec_create(dev, cb->hwaddr + 4); new_path = 1; } if (path) { - /* put pseudoheader back on for next time */ - skb_push(skb, sizeof *phdr); __skb_queue_tail(&path->queue, skb); if (!path->query && path_rec_start(dev, path)) { @@ -695,12 +693,10 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, be16_to_cpu(path->pathrec.dlid)); spin_unlock_irqrestore(&priv->lock, flags); - ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr)); + ipoib_send(dev, skb, path->ah, IPOIB_QPN(cb->hwaddr)); return; } else if ((path->query || !path_rec_start(dev, path)) && skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) { - /* put pseudoheader back on for next time */ - skb_push(skb, sizeof *phdr); __skb_queue_tail(&path->queue, skb); } else { ++dev->stats.tx_dropped; @@ -774,16 +770,14 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) dev_kfree_skb_any(skb); } } else { - struct ipoib_pseudoheader *phdr = - (struct ipoib_pseudoheader *) skb->data; - skb_pull(skb, sizeof *phdr); + struct ipoib_cb *cb = (struct ipoib_cb *) skb->cb; - if (phdr->hwaddr[4] == 0xff) { + if (cb->hwaddr[4] == 0xff) { /* Add in the P_Key for multicast*/ - phdr->hwaddr[8] = (priv->pkey >> 8) & 0xff; - phdr->hwaddr[9] = priv->pkey & 0xff; + cb->hwaddr[8] = (priv->pkey >> 8) & 0xff; + cb->hwaddr[9] = priv->pkey & 0xff; - ipoib_mcast_send(dev, phdr->hwaddr + 4, skb); + ipoib_mcast_send(dev, cb->hwaddr + 4, skb); } else { /* unicast GID -- should be ARP or RARP reply */ @@ -792,14 +786,14 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) ipoib_warn(priv, "Unicast, no %s: type %04x, QPN %06x %pI6\n", skb_dst(skb) ? "neigh" : "dst", be16_to_cpup((__be16 *) skb->data), - IPOIB_QPN(phdr->hwaddr), - phdr->hwaddr + 4); + IPOIB_QPN(cb->hwaddr), + cb->hwaddr + 4); dev_kfree_skb_any(skb); ++dev->stats.tx_dropped; goto unlock; } - unicast_arp_send(skb, dev, phdr); + unicast_arp_send(skb, dev, cb); } } unlock: @@ -825,8 +819,6 @@ static int ipoib_hard_header(struct sk_buff *skb, const void *daddr, const void *saddr, unsigned len) { struct ipoib_header *header; - struct dst_entry *dst; - struct neighbour *n; header = (struct ipoib_header *) skb_push(skb, sizeof *header); @@ -834,18 +826,13 @@ static int ipoib_hard_header(struct sk_buff *skb, header->reserved = 0; /* - * If we don't have a neighbour structure, stuff the - * destination address onto the front of the skb so we can - * figure out where to send the packet later. + * If we don't have a dst_entry structure, stuff the + * destination address into skb->cb so we can figure out where + * to send the packet later. */ - dst = skb_dst(skb); - n = NULL; - if (dst) - n = dst_get_neighbour_noref_raw(dst); - if ((!dst || !n) && daddr) { - struct ipoib_pseudoheader *phdr = - (struct ipoib_pseudoheader *) skb_push(skb, sizeof *phdr); - memcpy(phdr->hwaddr, daddr, INFINIBAND_ALEN); + if (!skb_dst(skb)) { + struct ipoib_cb *cb = (struct ipoib_cb *) skb->cb; + memcpy(cb->hwaddr, daddr, INFINIBAND_ALEN); } return 0; @@ -1021,11 +1008,7 @@ static void ipoib_setup(struct net_device *dev) dev->flags |= IFF_BROADCAST | IFF_MULTICAST; - /* - * We add in INFINIBAND_ALEN to allow for the destination - * address "pseudoheader" for skbs without neighbour struct. - */ - dev->hard_header_len = IPOIB_ENCAP_LEN + INFINIBAND_ALEN; + dev->hard_header_len = IPOIB_ENCAP_LEN; dev->addr_len = INFINIBAND_ALEN; dev->type = ARPHRD_INFINIBAND; dev->tx_queue_len = ipoib_sendq_size * 2; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index f7ff9dd66cda..20ebc6fd1bb9 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -262,21 +262,13 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, netif_tx_lock_bh(dev); while (!skb_queue_empty(&mcast->pkt_queue)) { struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue); - struct dst_entry *dst = skb_dst(skb); - struct neighbour *n = NULL; netif_tx_unlock_bh(dev); skb->dev = dev; - if (dst) - n = dst_get_neighbour_noref_raw(dst); - if (!dst || !n) { - /* put pseudoheader back on for next time */ - skb_push(skb, sizeof (struct ipoib_pseudoheader)); - } - if (dev_queue_xmit(skb)) ipoib_warn(priv, "dev_queue_xmit failed to requeue packet\n"); + netif_tx_lock_bh(dev); } netif_tx_unlock_bh(dev); diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 9a43cb07f294..db43b3117168 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -364,6 +364,9 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session, } ib_conn = ep->dd_data; + if (iser_alloc_rx_descriptors(ib_conn)) + return -ENOMEM; + /* binds the iSER connection retrieved from the previously * connected ep_handle to the iSCSI layer connection. exchanges * connection pointers */ @@ -398,19 +401,6 @@ iscsi_iser_conn_stop(struct iscsi_cls_conn *cls_conn, int flag) iser_conn->ib_conn = NULL; } -static int -iscsi_iser_conn_start(struct iscsi_cls_conn *cls_conn) -{ - struct iscsi_conn *conn = cls_conn->dd_data; - int err; - - err = iser_conn_set_full_featured_mode(conn); - if (err) - return err; - - return iscsi_conn_start(cls_conn); -} - static void iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session) { struct Scsi_Host *shost = iscsi_session_to_shost(cls_session); @@ -724,7 +714,7 @@ static struct iscsi_transport iscsi_iser_transport = { .get_conn_param = iscsi_conn_get_param, .get_ep_param = iscsi_iser_get_ep_param, .get_session_param = iscsi_session_get_param, - .start_conn = iscsi_iser_conn_start, + .start_conn = iscsi_conn_start, .stop_conn = iscsi_iser_conn_stop, /* iscsi host params */ .get_host_param = iscsi_host_get_param, diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index db7ea3704da7..296be431a0e9 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -366,4 +366,5 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task, void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task); int iser_initialize_task_headers(struct iscsi_task *task, struct iser_tx_desc *tx_desc); +int iser_alloc_rx_descriptors(struct iser_conn *ib_conn); #endif diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index a607542fc796..a00ccd1ca333 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -170,7 +170,7 @@ static void iser_create_send_desc(struct iser_conn *ib_conn, } -static int iser_alloc_rx_descriptors(struct iser_conn *ib_conn) +int iser_alloc_rx_descriptors(struct iser_conn *ib_conn) { int i, j; u64 dma_addr; @@ -220,18 +220,6 @@ void iser_free_rx_descriptors(struct iser_conn *ib_conn) struct iser_rx_desc *rx_desc; struct iser_device *device = ib_conn->device; - if (ib_conn->login_buf) { - if (ib_conn->login_req_dma) - ib_dma_unmap_single(device->ib_device, - ib_conn->login_req_dma, - ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE); - if (ib_conn->login_resp_dma) - ib_dma_unmap_single(device->ib_device, - ib_conn->login_resp_dma, - ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); - kfree(ib_conn->login_buf); - } - if (!ib_conn->rx_descs) return; @@ -242,23 +230,24 @@ void iser_free_rx_descriptors(struct iser_conn *ib_conn) kfree(ib_conn->rx_descs); } -/** - * iser_conn_set_full_featured_mode - (iSER API) - */ -int iser_conn_set_full_featured_mode(struct iscsi_conn *conn) +static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req) { struct iscsi_iser_conn *iser_conn = conn->dd_data; - iser_dbg("Initially post: %d\n", ISER_MIN_POSTED_RX); - - /* Check that there is no posted recv or send buffers left - */ - /* they must be consumed during the login phase */ - BUG_ON(iser_conn->ib_conn->post_recv_buf_count != 0); - BUG_ON(atomic_read(&iser_conn->ib_conn->post_send_buf_count) != 0); + iser_dbg("req op %x flags %x\n", req->opcode, req->flags); + /* check if this is the last login - going to full feature phase */ + if ((req->flags & ISCSI_FULL_FEATURE_PHASE) != ISCSI_FULL_FEATURE_PHASE) + return 0; - if (iser_alloc_rx_descriptors(iser_conn->ib_conn)) - return -ENOMEM; + /* + * Check that there is one posted recv buffer (for the last login + * response) and no posted send buffers left - they must have been + * consumed during previous login phases. + */ + WARN_ON(iser_conn->ib_conn->post_recv_buf_count != 1); + WARN_ON(atomic_read(&iser_conn->ib_conn->post_send_buf_count) != 0); + iser_dbg("Initially post: %d\n", ISER_MIN_POSTED_RX); /* Initial post receive buffers */ if (iser_post_recvm(iser_conn->ib_conn, ISER_MIN_POSTED_RX)) return -ENOMEM; @@ -438,6 +427,9 @@ int iser_send_control(struct iscsi_conn *conn, err = iser_post_recvl(iser_conn->ib_conn); if (err) goto send_control_error; + err = iser_post_rx_bufs(conn, task->hdr); + if (err) + goto send_control_error; } err = iser_post_send(iser_conn->ib_conn, mdesc); diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index fb88d6896b67..2033a928d34d 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -73,11 +73,11 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, p = mem; for_each_sg(sgl, sg, data->size, i) { - from = kmap_atomic(sg_page(sg), KM_USER0); + from = kmap_atomic(sg_page(sg)); memcpy(p, from + sg->offset, sg->length); - kunmap_atomic(from, KM_USER0); + kunmap_atomic(from); p += sg->length; } } @@ -133,11 +133,11 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, p = mem; for_each_sg(sgl, sg, sg_size, i) { - to = kmap_atomic(sg_page(sg), KM_SOFTIRQ0); + to = kmap_atomic(sg_page(sg)); memcpy(to + sg->offset, p, sg->length); - kunmap_atomic(to, KM_SOFTIRQ0); + kunmap_atomic(to); p += sg->length; } } diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index e28877c4ce15..14224ba44fd8 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -274,6 +274,18 @@ static int iser_free_ib_conn_res(struct iser_conn *ib_conn, int can_destroy_id) ib_conn->cma_id = NULL; kfree(ib_conn->page_vec); + if (ib_conn->login_buf) { + if (ib_conn->login_req_dma) + ib_dma_unmap_single(ib_conn->device->ib_device, + ib_conn->login_req_dma, + ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE); + if (ib_conn->login_resp_dma) + ib_dma_unmap_single(ib_conn->device->ib_device, + ib_conn->login_resp_dma, + ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); + kfree(ib_conn->login_buf); + } + return 0; } diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 0bfa545675b8..bcbf22ee0aa7 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -30,6 +30,8 @@ * SOFTWARE. */ +#define pr_fmt(fmt) PFX fmt + #include <linux/module.h> #include <linux/init.h> #include <linux/slab.h> @@ -165,7 +167,7 @@ static void srp_free_iu(struct srp_host *host, struct srp_iu *iu) static void srp_qp_event(struct ib_event *event, void *context) { - printk(KERN_ERR PFX "QP event %d\n", event->event); + pr_debug("QP event %d\n", event->event); } static int srp_init_qp(struct srp_target_port *target, @@ -472,6 +474,21 @@ static void srp_free_req_data(struct srp_target_port *target) } } +/** + * srp_del_scsi_host_attr() - Remove attributes defined in the host template. + * @shost: SCSI host whose attributes to remove from sysfs. + * + * Note: Any attributes defined in the host template and that did not exist + * before invocation of this function will be ignored. + */ +static void srp_del_scsi_host_attr(struct Scsi_Host *shost) +{ + struct device_attribute **attr; + + for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr) + device_remove_file(&shost->shost_dev, *attr); +} + static void srp_remove_work(struct work_struct *work) { struct srp_target_port *target = @@ -484,6 +501,7 @@ static void srp_remove_work(struct work_struct *work) list_del(&target->list); spin_unlock(&target->srp_host->target_lock); + srp_del_scsi_host_attr(target->scsi_host); srp_remove_host(target->scsi_host); scsi_remove_host(target->scsi_host); ib_destroy_cm_id(target->cm_id); @@ -1676,10 +1694,6 @@ static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - if (target->state == SRP_TARGET_DEAD || - target->state == SRP_TARGET_REMOVED) - return -ENODEV; - return sprintf(buf, "0x%016llx\n", (unsigned long long) be64_to_cpu(target->id_ext)); } @@ -1689,10 +1703,6 @@ static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - if (target->state == SRP_TARGET_DEAD || - target->state == SRP_TARGET_REMOVED) - return -ENODEV; - return sprintf(buf, "0x%016llx\n", (unsigned long long) be64_to_cpu(target->ioc_guid)); } @@ -1702,10 +1712,6 @@ static ssize_t show_service_id(struct device *dev, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - if (target->state == SRP_TARGET_DEAD || - target->state == SRP_TARGET_REMOVED) - return -ENODEV; - return sprintf(buf, "0x%016llx\n", (unsigned long long) be64_to_cpu(target->service_id)); } @@ -1715,10 +1721,6 @@ static ssize_t show_pkey(struct device *dev, struct device_attribute *attr, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - if (target->state == SRP_TARGET_DEAD || - target->state == SRP_TARGET_REMOVED) - return -ENODEV; - return sprintf(buf, "0x%04x\n", be16_to_cpu(target->path.pkey)); } @@ -1727,10 +1729,6 @@ static ssize_t show_dgid(struct device *dev, struct device_attribute *attr, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - if (target->state == SRP_TARGET_DEAD || - target->state == SRP_TARGET_REMOVED) - return -ENODEV; - return sprintf(buf, "%pI6\n", target->path.dgid.raw); } @@ -1739,10 +1737,6 @@ static ssize_t show_orig_dgid(struct device *dev, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - if (target->state == SRP_TARGET_DEAD || - target->state == SRP_TARGET_REMOVED) - return -ENODEV; - return sprintf(buf, "%pI6\n", target->orig_dgid); } @@ -1751,10 +1745,6 @@ static ssize_t show_req_lim(struct device *dev, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - if (target->state == SRP_TARGET_DEAD || - target->state == SRP_TARGET_REMOVED) - return -ENODEV; - return sprintf(buf, "%d\n", target->req_lim); } @@ -1763,10 +1753,6 @@ static ssize_t show_zero_req_lim(struct device *dev, { struct srp_target_port *target = host_to_target(class_to_shost(dev)); - if (target->state == SRP_TARGET_DEAD || - target->state == SRP_TARGET_REMOVED) - return -ENODEV; - return sprintf(buf, "%d\n", target->zero_req_lim); } @@ -1989,7 +1975,7 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target) goto out; } if (strlen(p) != 32) { - printk(KERN_WARNING PFX "bad dest GID parameter '%s'\n", p); + pr_warn("bad dest GID parameter '%s'\n", p); kfree(p); goto out; } @@ -2004,7 +1990,7 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target) case SRP_OPT_PKEY: if (match_hex(args, &token)) { - printk(KERN_WARNING PFX "bad P_Key parameter '%s'\n", p); + pr_warn("bad P_Key parameter '%s'\n", p); goto out; } target->path.pkey = cpu_to_be16(token); @@ -2023,7 +2009,7 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target) case SRP_OPT_MAX_SECT: if (match_int(args, &token)) { - printk(KERN_WARNING PFX "bad max sect parameter '%s'\n", p); + pr_warn("bad max sect parameter '%s'\n", p); goto out; } target->scsi_host->max_sectors = token; @@ -2031,7 +2017,8 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target) case SRP_OPT_MAX_CMD_PER_LUN: if (match_int(args, &token)) { - printk(KERN_WARNING PFX "bad max cmd_per_lun parameter '%s'\n", p); + pr_warn("bad max cmd_per_lun parameter '%s'\n", + p); goto out; } target->scsi_host->cmd_per_lun = min(token, SRP_CMD_SQ_SIZE); @@ -2039,14 +2026,14 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target) case SRP_OPT_IO_CLASS: if (match_hex(args, &token)) { - printk(KERN_WARNING PFX "bad IO class parameter '%s' \n", p); + pr_warn("bad IO class parameter '%s'\n", p); goto out; } if (token != SRP_REV10_IB_IO_CLASS && token != SRP_REV16A_IB_IO_CLASS) { - printk(KERN_WARNING PFX "unknown IO class parameter value" - " %x specified (use %x or %x).\n", - token, SRP_REV10_IB_IO_CLASS, SRP_REV16A_IB_IO_CLASS); + pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n", + token, SRP_REV10_IB_IO_CLASS, + SRP_REV16A_IB_IO_CLASS); goto out; } target->io_class = token; @@ -2064,7 +2051,8 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target) case SRP_OPT_CMD_SG_ENTRIES: if (match_int(args, &token) || token < 1 || token > 255) { - printk(KERN_WARNING PFX "bad max cmd_sg_entries parameter '%s'\n", p); + pr_warn("bad max cmd_sg_entries parameter '%s'\n", + p); goto out; } target->cmd_sg_cnt = token; @@ -2072,7 +2060,7 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target) case SRP_OPT_ALLOW_EXT_SG: if (match_int(args, &token)) { - printk(KERN_WARNING PFX "bad allow_ext_sg parameter '%s'\n", p); + pr_warn("bad allow_ext_sg parameter '%s'\n", p); goto out; } target->allow_ext_sg = !!token; @@ -2081,15 +2069,16 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target) case SRP_OPT_SG_TABLESIZE: if (match_int(args, &token) || token < 1 || token > SCSI_MAX_SG_CHAIN_SEGMENTS) { - printk(KERN_WARNING PFX "bad max sg_tablesize parameter '%s'\n", p); + pr_warn("bad max sg_tablesize parameter '%s'\n", + p); goto out; } target->sg_tablesize = token; break; default: - printk(KERN_WARNING PFX "unknown parameter or missing value " - "'%s' in target creation request\n", p); + pr_warn("unknown parameter or missing value '%s' in target creation request\n", + p); goto out; } } @@ -2100,9 +2089,8 @@ static int srp_parse_options(const char *buf, struct srp_target_port *target) for (i = 0; i < ARRAY_SIZE(srp_opt_tokens); ++i) if ((srp_opt_tokens[i].token & SRP_OPT_ALL) && !(srp_opt_tokens[i].token & opt_mask)) - printk(KERN_WARNING PFX "target creation request is " - "missing parameter '%s'\n", - srp_opt_tokens[i].pattern); + pr_warn("target creation request is missing parameter '%s'\n", + srp_opt_tokens[i].pattern); out: kfree(options); @@ -2149,7 +2137,7 @@ static ssize_t srp_create_target(struct device *dev, if (!host->srp_dev->fmr_pool && !target->allow_ext_sg && target->cmd_sg_cnt < target->sg_tablesize) { - printk(KERN_WARNING PFX "No FMR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n"); + pr_warn("No FMR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n"); target->sg_tablesize = target->cmd_sg_cnt; } @@ -2309,8 +2297,7 @@ static void srp_add_one(struct ib_device *device) return; if (ib_query_device(device, dev_attr)) { - printk(KERN_WARNING PFX "Query device failed for %s\n", - device->name); + pr_warn("Query device failed for %s\n", device->name); goto free_attr; } @@ -2429,6 +2416,7 @@ static void srp_remove_one(struct ib_device *device) list_for_each_entry_safe(target, tmp_target, &host->target_list, list) { + srp_del_scsi_host_attr(target->scsi_host); srp_remove_host(target->scsi_host); scsi_remove_host(target->scsi_host); srp_disconnect_target(target); @@ -2459,7 +2447,7 @@ static int __init srp_init_module(void) BUILD_BUG_ON(FIELD_SIZEOF(struct ib_wc, wr_id) < sizeof(void *)); if (srp_sg_tablesize) { - printk(KERN_WARNING PFX "srp_sg_tablesize is deprecated, please use cmd_sg_entries\n"); + pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n"); if (!cmd_sg_entries) cmd_sg_entries = srp_sg_tablesize; } @@ -2468,14 +2456,15 @@ static int __init srp_init_module(void) cmd_sg_entries = SRP_DEF_SG_TABLESIZE; if (cmd_sg_entries > 255) { - printk(KERN_WARNING PFX "Clamping cmd_sg_entries to 255\n"); + pr_warn("Clamping cmd_sg_entries to 255\n"); cmd_sg_entries = 255; } if (!indirect_sg_entries) indirect_sg_entries = cmd_sg_entries; else if (indirect_sg_entries < cmd_sg_entries) { - printk(KERN_WARNING PFX "Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n", cmd_sg_entries); + pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n", + cmd_sg_entries); indirect_sg_entries = cmd_sg_entries; } @@ -2486,7 +2475,7 @@ static int __init srp_init_module(void) ret = class_register(&srp_class); if (ret) { - printk(KERN_ERR PFX "couldn't register class infiniband_srp\n"); + pr_err("couldn't register class infiniband_srp\n"); srp_release_transport(ib_srp_transport_template); return ret; } @@ -2495,7 +2484,7 @@ static int __init srp_init_module(void) ret = ib_register_client(&srp_client); if (ret) { - printk(KERN_ERR PFX "couldn't register IB client\n"); + pr_err("couldn't register IB client\n"); srp_release_transport(ib_srp_transport_template); ib_sa_unregister_client(&srp_sa_client); class_unregister(&srp_class); diff --git a/drivers/infiniband/ulp/srpt/Kconfig b/drivers/infiniband/ulp/srpt/Kconfig new file mode 100644 index 000000000000..31ee83d528d9 --- /dev/null +++ b/drivers/infiniband/ulp/srpt/Kconfig @@ -0,0 +1,12 @@ +config INFINIBAND_SRPT + tristate "InfiniBand SCSI RDMA Protocol target support" + depends on INFINIBAND && TARGET_CORE + ---help--- + + Support for the SCSI RDMA Protocol (SRP) Target driver. The + SRP protocol is a protocol that allows an initiator to access + a block storage device on another host (target) over a network + that supports the RDMA protocol. Currently the RDMA protocol is + supported by InfiniBand and by iWarp network hardware. More + information about the SRP protocol can be found on the website + of the INCITS T10 technical committee (http://www.t10.org/). diff --git a/drivers/infiniband/ulp/srpt/Makefile b/drivers/infiniband/ulp/srpt/Makefile new file mode 100644 index 000000000000..e3ee4bdfffa5 --- /dev/null +++ b/drivers/infiniband/ulp/srpt/Makefile @@ -0,0 +1,2 @@ +ccflags-y := -Idrivers/target +obj-$(CONFIG_INFINIBAND_SRPT) += ib_srpt.o diff --git a/drivers/infiniband/ulp/srpt/ib_dm_mad.h b/drivers/infiniband/ulp/srpt/ib_dm_mad.h new file mode 100644 index 000000000000..fb1de1f6f297 --- /dev/null +++ b/drivers/infiniband/ulp/srpt/ib_dm_mad.h @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2006 - 2009 Mellanox Technology Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef IB_DM_MAD_H +#define IB_DM_MAD_H + +#include <linux/types.h> + +#include <rdma/ib_mad.h> + +enum { + /* + * See also section 13.4.7 Status Field, table 115 MAD Common Status + * Field Bit Values and also section 16.3.1.1 Status Field in the + * InfiniBand Architecture Specification. + */ + DM_MAD_STATUS_UNSUP_METHOD = 0x0008, + DM_MAD_STATUS_UNSUP_METHOD_ATTR = 0x000c, + DM_MAD_STATUS_INVALID_FIELD = 0x001c, + DM_MAD_STATUS_NO_IOC = 0x0100, + + /* + * See also the Device Management chapter, section 16.3.3 Attributes, + * table 279 Device Management Attributes in the InfiniBand + * Architecture Specification. + */ + DM_ATTR_CLASS_PORT_INFO = 0x01, + DM_ATTR_IOU_INFO = 0x10, + DM_ATTR_IOC_PROFILE = 0x11, + DM_ATTR_SVC_ENTRIES = 0x12 +}; + +struct ib_dm_hdr { + u8 reserved[28]; +}; + +/* + * Structure of management datagram sent by the SRP target implementation. + * Contains a management datagram header, reliable multi-packet transaction + * protocol (RMPP) header and ib_dm_hdr. Notes: + * - The SRP target implementation does not use RMPP or ib_dm_hdr when sending + * management datagrams. + * - The header size must be exactly 64 bytes (IB_MGMT_DEVICE_HDR), since this + * is the header size that is passed to ib_create_send_mad() in ib_srpt.c. + * - The maximum supported size for a management datagram when not using RMPP + * is 256 bytes -- 64 bytes header and 192 (IB_MGMT_DEVICE_DATA) bytes data. + */ +struct ib_dm_mad { + struct ib_mad_hdr mad_hdr; + struct ib_rmpp_hdr rmpp_hdr; + struct ib_dm_hdr dm_hdr; + u8 data[IB_MGMT_DEVICE_DATA]; +}; + +/* + * IOUnitInfo as defined in section 16.3.3.3 IOUnitInfo of the InfiniBand + * Architecture Specification. + */ +struct ib_dm_iou_info { + __be16 change_id; + u8 max_controllers; + u8 op_rom; + u8 controller_list[128]; +}; + +/* + * IOControllerprofile as defined in section 16.3.3.4 IOControllerProfile of + * the InfiniBand Architecture Specification. + */ +struct ib_dm_ioc_profile { + __be64 guid; + __be32 vendor_id; + __be32 device_id; + __be16 device_version; + __be16 reserved1; + __be32 subsys_vendor_id; + __be32 subsys_device_id; + __be16 io_class; + __be16 io_subclass; + __be16 protocol; + __be16 protocol_version; + __be16 service_conn; + __be16 initiators_supported; + __be16 send_queue_depth; + u8 reserved2; + u8 rdma_read_depth; + __be32 send_size; + __be32 rdma_size; + u8 op_cap_mask; + u8 svc_cap_mask; + u8 num_svc_entries; + u8 reserved3[9]; + u8 id_string[64]; +}; + +struct ib_dm_svc_entry { + u8 name[40]; + __be64 id; +}; + +/* + * See also section 16.3.3.5 ServiceEntries in the InfiniBand Architecture + * Specification. See also section B.7, table B.8 in the T10 SRP r16a document. + */ +struct ib_dm_svc_entries { + struct ib_dm_svc_entry service_entries[4]; +}; + +#endif diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c new file mode 100644 index 000000000000..daf21b899999 --- /dev/null +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -0,0 +1,4055 @@ +/* + * Copyright (c) 2006 - 2009 Mellanox Technology Inc. All rights reserved. + * Copyright (C) 2008 - 2011 Bart Van Assche <bvanassche@acm.org>. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/err.h> +#include <linux/ctype.h> +#include <linux/kthread.h> +#include <linux/string.h> +#include <linux/delay.h> +#include <linux/atomic.h> +#include <scsi/scsi_tcq.h> +#include <target/configfs_macros.h> +#include <target/target_core_base.h> +#include <target/target_core_fabric_configfs.h> +#include <target/target_core_fabric.h> +#include <target/target_core_configfs.h> +#include "ib_srpt.h" + +/* Name of this kernel module. */ +#define DRV_NAME "ib_srpt" +#define DRV_VERSION "2.0.0" +#define DRV_RELDATE "2011-02-14" + +#define SRPT_ID_STRING "Linux SRP target" + +#undef pr_fmt +#define pr_fmt(fmt) DRV_NAME " " fmt + +MODULE_AUTHOR("Vu Pham and Bart Van Assche"); +MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol target " + "v" DRV_VERSION " (" DRV_RELDATE ")"); +MODULE_LICENSE("Dual BSD/GPL"); + +/* + * Global Variables + */ + +static u64 srpt_service_guid; +static DEFINE_SPINLOCK(srpt_dev_lock); /* Protects srpt_dev_list. */ +static LIST_HEAD(srpt_dev_list); /* List of srpt_device structures. */ + +static unsigned srp_max_req_size = DEFAULT_MAX_REQ_SIZE; +module_param(srp_max_req_size, int, 0444); +MODULE_PARM_DESC(srp_max_req_size, + "Maximum size of SRP request messages in bytes."); + +static int srpt_srq_size = DEFAULT_SRPT_SRQ_SIZE; +module_param(srpt_srq_size, int, 0444); +MODULE_PARM_DESC(srpt_srq_size, + "Shared receive queue (SRQ) size."); + +static int srpt_get_u64_x(char *buffer, struct kernel_param *kp) +{ + return sprintf(buffer, "0x%016llx", *(u64 *)kp->arg); +} +module_param_call(srpt_service_guid, NULL, srpt_get_u64_x, &srpt_service_guid, + 0444); +MODULE_PARM_DESC(srpt_service_guid, + "Using this value for ioc_guid, id_ext, and cm_listen_id" + " instead of using the node_guid of the first HCA."); + +static struct ib_client srpt_client; +static struct target_fabric_configfs *srpt_target; +static void srpt_release_channel(struct srpt_rdma_ch *ch); +static int srpt_queue_status(struct se_cmd *cmd); + +/** + * opposite_dma_dir() - Swap DMA_TO_DEVICE and DMA_FROM_DEVICE. + */ +static inline +enum dma_data_direction opposite_dma_dir(enum dma_data_direction dir) +{ + switch (dir) { + case DMA_TO_DEVICE: return DMA_FROM_DEVICE; + case DMA_FROM_DEVICE: return DMA_TO_DEVICE; + default: return dir; + } +} + +/** + * srpt_sdev_name() - Return the name associated with the HCA. + * + * Examples are ib0, ib1, ... + */ +static inline const char *srpt_sdev_name(struct srpt_device *sdev) +{ + return sdev->device->name; +} + +static enum rdma_ch_state srpt_get_ch_state(struct srpt_rdma_ch *ch) +{ + unsigned long flags; + enum rdma_ch_state state; + + spin_lock_irqsave(&ch->spinlock, flags); + state = ch->state; + spin_unlock_irqrestore(&ch->spinlock, flags); + return state; +} + +static enum rdma_ch_state +srpt_set_ch_state(struct srpt_rdma_ch *ch, enum rdma_ch_state new_state) +{ + unsigned long flags; + enum rdma_ch_state prev; + + spin_lock_irqsave(&ch->spinlock, flags); + prev = ch->state; + ch->state = new_state; + spin_unlock_irqrestore(&ch->spinlock, flags); + return prev; +} + +/** + * srpt_test_and_set_ch_state() - Test and set the channel state. + * + * Returns true if and only if the channel state has been set to the new state. + */ +static bool +srpt_test_and_set_ch_state(struct srpt_rdma_ch *ch, enum rdma_ch_state old, + enum rdma_ch_state new) +{ + unsigned long flags; + enum rdma_ch_state prev; + + spin_lock_irqsave(&ch->spinlock, flags); + prev = ch->state; + if (prev == old) + ch->state = new; + spin_unlock_irqrestore(&ch->spinlock, flags); + return prev == old; +} + +/** + * srpt_event_handler() - Asynchronous IB event callback function. + * + * Callback function called by the InfiniBand core when an asynchronous IB + * event occurs. This callback may occur in interrupt context. See also + * section 11.5.2, Set Asynchronous Event Handler in the InfiniBand + * Architecture Specification. + */ +static void srpt_event_handler(struct ib_event_handler *handler, + struct ib_event *event) +{ + struct srpt_device *sdev; + struct srpt_port *sport; + + sdev = ib_get_client_data(event->device, &srpt_client); + if (!sdev || sdev->device != event->device) + return; + + pr_debug("ASYNC event= %d on device= %s\n", event->event, + srpt_sdev_name(sdev)); + + switch (event->event) { + case IB_EVENT_PORT_ERR: + if (event->element.port_num <= sdev->device->phys_port_cnt) { + sport = &sdev->port[event->element.port_num - 1]; + sport->lid = 0; + sport->sm_lid = 0; + } + break; + case IB_EVENT_PORT_ACTIVE: + case IB_EVENT_LID_CHANGE: + case IB_EVENT_PKEY_CHANGE: + case IB_EVENT_SM_CHANGE: + case IB_EVENT_CLIENT_REREGISTER: + /* Refresh port data asynchronously. */ + if (event->element.port_num <= sdev->device->phys_port_cnt) { + sport = &sdev->port[event->element.port_num - 1]; + if (!sport->lid && !sport->sm_lid) + schedule_work(&sport->work); + } + break; + default: + printk(KERN_ERR "received unrecognized IB event %d\n", + event->event); + break; + } +} + +/** + * srpt_srq_event() - SRQ event callback function. + */ +static void srpt_srq_event(struct ib_event *event, void *ctx) +{ + printk(KERN_INFO "SRQ event %d\n", event->event); +} + +/** + * srpt_qp_event() - QP event callback function. + */ +static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch) +{ + pr_debug("QP event %d on cm_id=%p sess_name=%s state=%d\n", + event->event, ch->cm_id, ch->sess_name, srpt_get_ch_state(ch)); + + switch (event->event) { + case IB_EVENT_COMM_EST: + ib_cm_notify(ch->cm_id, event->event); + break; + case IB_EVENT_QP_LAST_WQE_REACHED: + if (srpt_test_and_set_ch_state(ch, CH_DRAINING, + CH_RELEASING)) + srpt_release_channel(ch); + else + pr_debug("%s: state %d - ignored LAST_WQE.\n", + ch->sess_name, srpt_get_ch_state(ch)); + break; + default: + printk(KERN_ERR "received unrecognized IB QP event %d\n", + event->event); + break; + } +} + +/** + * srpt_set_ioc() - Helper function for initializing an IOUnitInfo structure. + * + * @slot: one-based slot number. + * @value: four-bit value. + * + * Copies the lowest four bits of value in element slot of the array of four + * bit elements called c_list (controller list). The index slot is one-based. + */ +static void srpt_set_ioc(u8 *c_list, u32 slot, u8 value) +{ + u16 id; + u8 tmp; + + id = (slot - 1) / 2; + if (slot & 0x1) { + tmp = c_list[id] & 0xf; + c_list[id] = (value << 4) | tmp; + } else { + tmp = c_list[id] & 0xf0; + c_list[id] = (value & 0xf) | tmp; + } +} + +/** + * srpt_get_class_port_info() - Copy ClassPortInfo to a management datagram. + * + * See also section 16.3.3.1 ClassPortInfo in the InfiniBand Architecture + * Specification. + */ +static void srpt_get_class_port_info(struct ib_dm_mad *mad) +{ + struct ib_class_port_info *cif; + + cif = (struct ib_class_port_info *)mad->data; + memset(cif, 0, sizeof *cif); + cif->base_version = 1; + cif->class_version = 1; + cif->resp_time_value = 20; + + mad->mad_hdr.status = 0; +} + +/** + * srpt_get_iou() - Write IOUnitInfo to a management datagram. + * + * See also section 16.3.3.3 IOUnitInfo in the InfiniBand Architecture + * Specification. See also section B.7, table B.6 in the SRP r16a document. + */ +static void srpt_get_iou(struct ib_dm_mad *mad) +{ + struct ib_dm_iou_info *ioui; + u8 slot; + int i; + + ioui = (struct ib_dm_iou_info *)mad->data; + ioui->change_id = __constant_cpu_to_be16(1); + ioui->max_controllers = 16; + + /* set present for slot 1 and empty for the rest */ + srpt_set_ioc(ioui->controller_list, 1, 1); + for (i = 1, slot = 2; i < 16; i++, slot++) + srpt_set_ioc(ioui->controller_list, slot, 0); + + mad->mad_hdr.status = 0; +} + +/** + * srpt_get_ioc() - Write IOControllerprofile to a management datagram. + * + * See also section 16.3.3.4 IOControllerProfile in the InfiniBand + * Architecture Specification. See also section B.7, table B.7 in the SRP + * r16a document. + */ +static void srpt_get_ioc(struct srpt_port *sport, u32 slot, + struct ib_dm_mad *mad) +{ + struct srpt_device *sdev = sport->sdev; + struct ib_dm_ioc_profile *iocp; + + iocp = (struct ib_dm_ioc_profile *)mad->data; + + if (!slot || slot > 16) { + mad->mad_hdr.status + = __constant_cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD); + return; + } + + if (slot > 2) { + mad->mad_hdr.status + = __constant_cpu_to_be16(DM_MAD_STATUS_NO_IOC); + return; + } + + memset(iocp, 0, sizeof *iocp); + strcpy(iocp->id_string, SRPT_ID_STRING); + iocp->guid = cpu_to_be64(srpt_service_guid); + iocp->vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id); + iocp->device_id = cpu_to_be32(sdev->dev_attr.vendor_part_id); + iocp->device_version = cpu_to_be16(sdev->dev_attr.hw_ver); + iocp->subsys_vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id); + iocp->subsys_device_id = 0x0; + iocp->io_class = __constant_cpu_to_be16(SRP_REV16A_IB_IO_CLASS); + iocp->io_subclass = __constant_cpu_to_be16(SRP_IO_SUBCLASS); + iocp->protocol = __constant_cpu_to_be16(SRP_PROTOCOL); + iocp->protocol_version = __constant_cpu_to_be16(SRP_PROTOCOL_VERSION); + iocp->send_queue_depth = cpu_to_be16(sdev->srq_size); + iocp->rdma_read_depth = 4; + iocp->send_size = cpu_to_be32(srp_max_req_size); + iocp->rdma_size = cpu_to_be32(min(sport->port_attrib.srp_max_rdma_size, + 1U << 24)); + iocp->num_svc_entries = 1; + iocp->op_cap_mask = SRP_SEND_TO_IOC | SRP_SEND_FROM_IOC | + SRP_RDMA_READ_FROM_IOC | SRP_RDMA_WRITE_FROM_IOC; + + mad->mad_hdr.status = 0; +} + +/** + * srpt_get_svc_entries() - Write ServiceEntries to a management datagram. + * + * See also section 16.3.3.5 ServiceEntries in the InfiniBand Architecture + * Specification. See also section B.7, table B.8 in the SRP r16a document. + */ +static void srpt_get_svc_entries(u64 ioc_guid, + u16 slot, u8 hi, u8 lo, struct ib_dm_mad *mad) +{ + struct ib_dm_svc_entries *svc_entries; + + WARN_ON(!ioc_guid); + + if (!slot || slot > 16) { + mad->mad_hdr.status + = __constant_cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD); + return; + } + + if (slot > 2 || lo > hi || hi > 1) { + mad->mad_hdr.status + = __constant_cpu_to_be16(DM_MAD_STATUS_NO_IOC); + return; + } + + svc_entries = (struct ib_dm_svc_entries *)mad->data; + memset(svc_entries, 0, sizeof *svc_entries); + svc_entries->service_entries[0].id = cpu_to_be64(ioc_guid); + snprintf(svc_entries->service_entries[0].name, + sizeof(svc_entries->service_entries[0].name), + "%s%016llx", + SRP_SERVICE_NAME_PREFIX, + ioc_guid); + + mad->mad_hdr.status = 0; +} + +/** + * srpt_mgmt_method_get() - Process a received management datagram. + * @sp: source port through which the MAD has been received. + * @rq_mad: received MAD. + * @rsp_mad: response MAD. + */ +static void srpt_mgmt_method_get(struct srpt_port *sp, struct ib_mad *rq_mad, + struct ib_dm_mad *rsp_mad) +{ + u16 attr_id; + u32 slot; + u8 hi, lo; + + attr_id = be16_to_cpu(rq_mad->mad_hdr.attr_id); + switch (attr_id) { + case DM_ATTR_CLASS_PORT_INFO: + srpt_get_class_port_info(rsp_mad); + break; + case DM_ATTR_IOU_INFO: + srpt_get_iou(rsp_mad); + break; + case DM_ATTR_IOC_PROFILE: + slot = be32_to_cpu(rq_mad->mad_hdr.attr_mod); + srpt_get_ioc(sp, slot, rsp_mad); + break; + case DM_ATTR_SVC_ENTRIES: + slot = be32_to_cpu(rq_mad->mad_hdr.attr_mod); + hi = (u8) ((slot >> 8) & 0xff); + lo = (u8) (slot & 0xff); + slot = (u16) ((slot >> 16) & 0xffff); + srpt_get_svc_entries(srpt_service_guid, + slot, hi, lo, rsp_mad); + break; + default: + rsp_mad->mad_hdr.status = + __constant_cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR); + break; + } +} + +/** + * srpt_mad_send_handler() - Post MAD-send callback function. + */ +static void srpt_mad_send_handler(struct ib_mad_agent *mad_agent, + struct ib_mad_send_wc *mad_wc) +{ + ib_destroy_ah(mad_wc->send_buf->ah); + ib_free_send_mad(mad_wc->send_buf); +} + +/** + * srpt_mad_recv_handler() - MAD reception callback function. + */ +static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent, + struct ib_mad_recv_wc *mad_wc) +{ + struct srpt_port *sport = (struct srpt_port *)mad_agent->context; + struct ib_ah *ah; + struct ib_mad_send_buf *rsp; + struct ib_dm_mad *dm_mad; + + if (!mad_wc || !mad_wc->recv_buf.mad) + return; + + ah = ib_create_ah_from_wc(mad_agent->qp->pd, mad_wc->wc, + mad_wc->recv_buf.grh, mad_agent->port_num); + if (IS_ERR(ah)) + goto err; + + BUILD_BUG_ON(offsetof(struct ib_dm_mad, data) != IB_MGMT_DEVICE_HDR); + + rsp = ib_create_send_mad(mad_agent, mad_wc->wc->src_qp, + mad_wc->wc->pkey_index, 0, + IB_MGMT_DEVICE_HDR, IB_MGMT_DEVICE_DATA, + GFP_KERNEL); + if (IS_ERR(rsp)) + goto err_rsp; + + rsp->ah = ah; + + dm_mad = rsp->mad; + memcpy(dm_mad, mad_wc->recv_buf.mad, sizeof *dm_mad); + dm_mad->mad_hdr.method = IB_MGMT_METHOD_GET_RESP; + dm_mad->mad_hdr.status = 0; + + switch (mad_wc->recv_buf.mad->mad_hdr.method) { + case IB_MGMT_METHOD_GET: + srpt_mgmt_method_get(sport, mad_wc->recv_buf.mad, dm_mad); + break; + case IB_MGMT_METHOD_SET: + dm_mad->mad_hdr.status = + __constant_cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR); + break; + default: + dm_mad->mad_hdr.status = + __constant_cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD); + break; + } + + if (!ib_post_send_mad(rsp, NULL)) { + ib_free_recv_mad(mad_wc); + /* will destroy_ah & free_send_mad in send completion */ + return; + } + + ib_free_send_mad(rsp); + +err_rsp: + ib_destroy_ah(ah); +err: + ib_free_recv_mad(mad_wc); +} + +/** + * srpt_refresh_port() - Configure a HCA port. + * + * Enable InfiniBand management datagram processing, update the cached sm_lid, + * lid and gid values, and register a callback function for processing MADs + * on the specified port. + * + * Note: It is safe to call this function more than once for the same port. + */ +static int srpt_refresh_port(struct srpt_port *sport) +{ + struct ib_mad_reg_req reg_req; + struct ib_port_modify port_modify; + struct ib_port_attr port_attr; + int ret; + + memset(&port_modify, 0, sizeof port_modify); + port_modify.set_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP; + port_modify.clr_port_cap_mask = 0; + + ret = ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify); + if (ret) + goto err_mod_port; + + ret = ib_query_port(sport->sdev->device, sport->port, &port_attr); + if (ret) + goto err_query_port; + + sport->sm_lid = port_attr.sm_lid; + sport->lid = port_attr.lid; + + ret = ib_query_gid(sport->sdev->device, sport->port, 0, &sport->gid); + if (ret) + goto err_query_port; + + if (!sport->mad_agent) { + memset(®_req, 0, sizeof reg_req); + reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT; + reg_req.mgmt_class_version = IB_MGMT_BASE_VERSION; + set_bit(IB_MGMT_METHOD_GET, reg_req.method_mask); + set_bit(IB_MGMT_METHOD_SET, reg_req.method_mask); + + sport->mad_agent = ib_register_mad_agent(sport->sdev->device, + sport->port, + IB_QPT_GSI, + ®_req, 0, + srpt_mad_send_handler, + srpt_mad_recv_handler, + sport); + if (IS_ERR(sport->mad_agent)) { + ret = PTR_ERR(sport->mad_agent); + sport->mad_agent = NULL; + goto err_query_port; + } + } + + return 0; + +err_query_port: + + port_modify.set_port_cap_mask = 0; + port_modify.clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP; + ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify); + +err_mod_port: + + return ret; +} + +/** + * srpt_unregister_mad_agent() - Unregister MAD callback functions. + * + * Note: It is safe to call this function more than once for the same device. + */ +static void srpt_unregister_mad_agent(struct srpt_device *sdev) +{ + struct ib_port_modify port_modify = { + .clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP, + }; + struct srpt_port *sport; + int i; + + for (i = 1; i <= sdev->device->phys_port_cnt; i++) { + sport = &sdev->port[i - 1]; + WARN_ON(sport->port != i); + if (ib_modify_port(sdev->device, i, 0, &port_modify) < 0) + printk(KERN_ERR "disabling MAD processing failed.\n"); + if (sport->mad_agent) { + ib_unregister_mad_agent(sport->mad_agent); + sport->mad_agent = NULL; + } + } +} + +/** + * srpt_alloc_ioctx() - Allocate an SRPT I/O context structure. + */ +static struct srpt_ioctx *srpt_alloc_ioctx(struct srpt_device *sdev, + int ioctx_size, int dma_size, + enum dma_data_direction dir) +{ + struct srpt_ioctx *ioctx; + + ioctx = kmalloc(ioctx_size, GFP_KERNEL); + if (!ioctx) + goto err; + + ioctx->buf = kmalloc(dma_size, GFP_KERNEL); + if (!ioctx->buf) + goto err_free_ioctx; + + ioctx->dma = ib_dma_map_single(sdev->device, ioctx->buf, dma_size, dir); + if (ib_dma_mapping_error(sdev->device, ioctx->dma)) + goto err_free_buf; + + return ioctx; + +err_free_buf: + kfree(ioctx->buf); +err_free_ioctx: + kfree(ioctx); +err: + return NULL; +} + +/** + * srpt_free_ioctx() - Free an SRPT I/O context structure. + */ +static void srpt_free_ioctx(struct srpt_device *sdev, struct srpt_ioctx *ioctx, + int dma_size, enum dma_data_direction dir) +{ + if (!ioctx) + return; + + ib_dma_unmap_single(sdev->device, ioctx->dma, dma_size, dir); + kfree(ioctx->buf); + kfree(ioctx); +} + +/** + * srpt_alloc_ioctx_ring() - Allocate a ring of SRPT I/O context structures. + * @sdev: Device to allocate the I/O context ring for. + * @ring_size: Number of elements in the I/O context ring. + * @ioctx_size: I/O context size. + * @dma_size: DMA buffer size. + * @dir: DMA data direction. + */ +static struct srpt_ioctx **srpt_alloc_ioctx_ring(struct srpt_device *sdev, + int ring_size, int ioctx_size, + int dma_size, enum dma_data_direction dir) +{ + struct srpt_ioctx **ring; + int i; + + WARN_ON(ioctx_size != sizeof(struct srpt_recv_ioctx) + && ioctx_size != sizeof(struct srpt_send_ioctx)); + + ring = kmalloc(ring_size * sizeof(ring[0]), GFP_KERNEL); + if (!ring) + goto out; + for (i = 0; i < ring_size; ++i) { + ring[i] = srpt_alloc_ioctx(sdev, ioctx_size, dma_size, dir); + if (!ring[i]) + goto err; + ring[i]->index = i; + } + goto out; + +err: + while (--i >= 0) + srpt_free_ioctx(sdev, ring[i], dma_size, dir); + kfree(ring); + ring = NULL; +out: + return ring; +} + +/** + * srpt_free_ioctx_ring() - Free the ring of SRPT I/O context structures. + */ +static void srpt_free_ioctx_ring(struct srpt_ioctx **ioctx_ring, + struct srpt_device *sdev, int ring_size, + int dma_size, enum dma_data_direction dir) +{ + int i; + + for (i = 0; i < ring_size; ++i) + srpt_free_ioctx(sdev, ioctx_ring[i], dma_size, dir); + kfree(ioctx_ring); +} + +/** + * srpt_get_cmd_state() - Get the state of a SCSI command. + */ +static enum srpt_command_state srpt_get_cmd_state(struct srpt_send_ioctx *ioctx) +{ + enum srpt_command_state state; + unsigned long flags; + + BUG_ON(!ioctx); + + spin_lock_irqsave(&ioctx->spinlock, flags); + state = ioctx->state; + spin_unlock_irqrestore(&ioctx->spinlock, flags); + return state; +} + +/** + * srpt_set_cmd_state() - Set the state of a SCSI command. + * + * Does not modify the state of aborted commands. Returns the previous command + * state. + */ +static enum srpt_command_state srpt_set_cmd_state(struct srpt_send_ioctx *ioctx, + enum srpt_command_state new) +{ + enum srpt_command_state previous; + unsigned long flags; + + BUG_ON(!ioctx); + + spin_lock_irqsave(&ioctx->spinlock, flags); + previous = ioctx->state; + if (previous != SRPT_STATE_DONE) + ioctx->state = new; + spin_unlock_irqrestore(&ioctx->spinlock, flags); + + return previous; +} + +/** + * srpt_test_and_set_cmd_state() - Test and set the state of a command. + * + * Returns true if and only if the previous command state was equal to 'old'. + */ +static bool srpt_test_and_set_cmd_state(struct srpt_send_ioctx *ioctx, + enum srpt_command_state old, + enum srpt_command_state new) +{ + enum srpt_command_state previous; + unsigned long flags; + + WARN_ON(!ioctx); + WARN_ON(old == SRPT_STATE_DONE); + WARN_ON(new == SRPT_STATE_NEW); + + spin_lock_irqsave(&ioctx->spinlock, flags); + previous = ioctx->state; + if (previous == old) + ioctx->state = new; + spin_unlock_irqrestore(&ioctx->spinlock, flags); + return previous == old; +} + +/** + * srpt_post_recv() - Post an IB receive request. + */ +static int srpt_post_recv(struct srpt_device *sdev, + struct srpt_recv_ioctx *ioctx) +{ + struct ib_sge list; + struct ib_recv_wr wr, *bad_wr; + + BUG_ON(!sdev); + wr.wr_id = encode_wr_id(SRPT_RECV, ioctx->ioctx.index); + + list.addr = ioctx->ioctx.dma; + list.length = srp_max_req_size; + list.lkey = sdev->mr->lkey; + + wr.next = NULL; + wr.sg_list = &list; + wr.num_sge = 1; + + return ib_post_srq_recv(sdev->srq, &wr, &bad_wr); +} + +/** + * srpt_post_send() - Post an IB send request. + * + * Returns zero upon success and a non-zero value upon failure. + */ +static int srpt_post_send(struct srpt_rdma_ch *ch, + struct srpt_send_ioctx *ioctx, int len) +{ + struct ib_sge list; + struct ib_send_wr wr, *bad_wr; + struct srpt_device *sdev = ch->sport->sdev; + int ret; + + atomic_inc(&ch->req_lim); + + ret = -ENOMEM; + if (unlikely(atomic_dec_return(&ch->sq_wr_avail) < 0)) { + printk(KERN_WARNING "IB send queue full (needed 1)\n"); + goto out; + } + + ib_dma_sync_single_for_device(sdev->device, ioctx->ioctx.dma, len, + DMA_TO_DEVICE); + + list.addr = ioctx->ioctx.dma; + list.length = len; + list.lkey = sdev->mr->lkey; + + wr.next = NULL; + wr.wr_id = encode_wr_id(SRPT_SEND, ioctx->ioctx.index); + wr.sg_list = &list; + wr.num_sge = 1; + wr.opcode = IB_WR_SEND; + wr.send_flags = IB_SEND_SIGNALED; + + ret = ib_post_send(ch->qp, &wr, &bad_wr); + +out: + if (ret < 0) { + atomic_inc(&ch->sq_wr_avail); + atomic_dec(&ch->req_lim); + } + return ret; +} + +/** + * srpt_get_desc_tbl() - Parse the data descriptors of an SRP_CMD request. + * @ioctx: Pointer to the I/O context associated with the request. + * @srp_cmd: Pointer to the SRP_CMD request data. + * @dir: Pointer to the variable to which the transfer direction will be + * written. + * @data_len: Pointer to the variable to which the total data length of all + * descriptors in the SRP_CMD request will be written. + * + * This function initializes ioctx->nrbuf and ioctx->r_bufs. + * + * Returns -EINVAL when the SRP_CMD request contains inconsistent descriptors; + * -ENOMEM when memory allocation fails and zero upon success. + */ +static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx, + struct srp_cmd *srp_cmd, + enum dma_data_direction *dir, u64 *data_len) +{ + struct srp_indirect_buf *idb; + struct srp_direct_buf *db; + unsigned add_cdb_offset; + int ret; + + /* + * The pointer computations below will only be compiled correctly + * if srp_cmd::add_data is declared as s8*, u8*, s8[] or u8[], so check + * whether srp_cmd::add_data has been declared as a byte pointer. + */ + BUILD_BUG_ON(!__same_type(srp_cmd->add_data[0], (s8)0) + && !__same_type(srp_cmd->add_data[0], (u8)0)); + + BUG_ON(!dir); + BUG_ON(!data_len); + + ret = 0; + *data_len = 0; + + /* + * The lower four bits of the buffer format field contain the DATA-IN + * buffer descriptor format, and the highest four bits contain the + * DATA-OUT buffer descriptor format. + */ + *dir = DMA_NONE; + if (srp_cmd->buf_fmt & 0xf) + /* DATA-IN: transfer data from target to initiator (read). */ + *dir = DMA_FROM_DEVICE; + else if (srp_cmd->buf_fmt >> 4) + /* DATA-OUT: transfer data from initiator to target (write). */ + *dir = DMA_TO_DEVICE; + + /* + * According to the SRP spec, the lower two bits of the 'ADDITIONAL + * CDB LENGTH' field are reserved and the size in bytes of this field + * is four times the value specified in bits 3..7. Hence the "& ~3". + */ + add_cdb_offset = srp_cmd->add_cdb_len & ~3; + if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_DIRECT) || + ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_DIRECT)) { + ioctx->n_rbuf = 1; + ioctx->rbufs = &ioctx->single_rbuf; + + db = (struct srp_direct_buf *)(srp_cmd->add_data + + add_cdb_offset); + memcpy(ioctx->rbufs, db, sizeof *db); + *data_len = be32_to_cpu(db->len); + } else if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_INDIRECT) || + ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_INDIRECT)) { + idb = (struct srp_indirect_buf *)(srp_cmd->add_data + + add_cdb_offset); + + ioctx->n_rbuf = be32_to_cpu(idb->table_desc.len) / sizeof *db; + + if (ioctx->n_rbuf > + (srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) { + printk(KERN_ERR "received unsupported SRP_CMD request" + " type (%u out + %u in != %u / %zu)\n", + srp_cmd->data_out_desc_cnt, + srp_cmd->data_in_desc_cnt, + be32_to_cpu(idb->table_desc.len), + sizeof(*db)); + ioctx->n_rbuf = 0; + ret = -EINVAL; + goto out; + } + + if (ioctx->n_rbuf == 1) + ioctx->rbufs = &ioctx->single_rbuf; + else { + ioctx->rbufs = + kmalloc(ioctx->n_rbuf * sizeof *db, GFP_ATOMIC); + if (!ioctx->rbufs) { + ioctx->n_rbuf = 0; + ret = -ENOMEM; + goto out; + } + } + + db = idb->desc_list; + memcpy(ioctx->rbufs, db, ioctx->n_rbuf * sizeof *db); + *data_len = be32_to_cpu(idb->len); + } +out: + return ret; +} + +/** + * srpt_init_ch_qp() - Initialize queue pair attributes. + * + * Initialized the attributes of queue pair 'qp' by allowing local write, + * remote read and remote write. Also transitions 'qp' to state IB_QPS_INIT. + */ +static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp) +{ + struct ib_qp_attr *attr; + int ret; + + attr = kzalloc(sizeof *attr, GFP_KERNEL); + if (!attr) + return -ENOMEM; + + attr->qp_state = IB_QPS_INIT; + attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ | + IB_ACCESS_REMOTE_WRITE; + attr->port_num = ch->sport->port; + attr->pkey_index = 0; + + ret = ib_modify_qp(qp, attr, + IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PORT | + IB_QP_PKEY_INDEX); + + kfree(attr); + return ret; +} + +/** + * srpt_ch_qp_rtr() - Change the state of a channel to 'ready to receive' (RTR). + * @ch: channel of the queue pair. + * @qp: queue pair to change the state of. + * + * Returns zero upon success and a negative value upon failure. + * + * Note: currently a struct ib_qp_attr takes 136 bytes on a 64-bit system. + * If this structure ever becomes larger, it might be necessary to allocate + * it dynamically instead of on the stack. + */ +static int srpt_ch_qp_rtr(struct srpt_rdma_ch *ch, struct ib_qp *qp) +{ + struct ib_qp_attr qp_attr; + int attr_mask; + int ret; + + qp_attr.qp_state = IB_QPS_RTR; + ret = ib_cm_init_qp_attr(ch->cm_id, &qp_attr, &attr_mask); + if (ret) + goto out; + + qp_attr.max_dest_rd_atomic = 4; + + ret = ib_modify_qp(qp, &qp_attr, attr_mask); + +out: + return ret; +} + +/** + * srpt_ch_qp_rts() - Change the state of a channel to 'ready to send' (RTS). + * @ch: channel of the queue pair. + * @qp: queue pair to change the state of. + * + * Returns zero upon success and a negative value upon failure. + * + * Note: currently a struct ib_qp_attr takes 136 bytes on a 64-bit system. + * If this structure ever becomes larger, it might be necessary to allocate + * it dynamically instead of on the stack. + */ +static int srpt_ch_qp_rts(struct srpt_rdma_ch *ch, struct ib_qp *qp) +{ + struct ib_qp_attr qp_attr; + int attr_mask; + int ret; + + qp_attr.qp_state = IB_QPS_RTS; + ret = ib_cm_init_qp_attr(ch->cm_id, &qp_attr, &attr_mask); + if (ret) + goto out; + + qp_attr.max_rd_atomic = 4; + + ret = ib_modify_qp(qp, &qp_attr, attr_mask); + +out: + return ret; +} + +/** + * srpt_ch_qp_err() - Set the channel queue pair state to 'error'. + */ +static int srpt_ch_qp_err(struct srpt_rdma_ch *ch) +{ + struct ib_qp_attr qp_attr; + + qp_attr.qp_state = IB_QPS_ERR; + return ib_modify_qp(ch->qp, &qp_attr, IB_QP_STATE); +} + +/** + * srpt_unmap_sg_to_ib_sge() - Unmap an IB SGE list. + */ +static void srpt_unmap_sg_to_ib_sge(struct srpt_rdma_ch *ch, + struct srpt_send_ioctx *ioctx) +{ + struct scatterlist *sg; + enum dma_data_direction dir; + + BUG_ON(!ch); + BUG_ON(!ioctx); + BUG_ON(ioctx->n_rdma && !ioctx->rdma_ius); + + while (ioctx->n_rdma) + kfree(ioctx->rdma_ius[--ioctx->n_rdma].sge); + + kfree(ioctx->rdma_ius); + ioctx->rdma_ius = NULL; + + if (ioctx->mapped_sg_count) { + sg = ioctx->sg; + WARN_ON(!sg); + dir = ioctx->cmd.data_direction; + BUG_ON(dir == DMA_NONE); + ib_dma_unmap_sg(ch->sport->sdev->device, sg, ioctx->sg_cnt, + opposite_dma_dir(dir)); + ioctx->mapped_sg_count = 0; + } +} + +/** + * srpt_map_sg_to_ib_sge() - Map an SG list to an IB SGE list. + */ +static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch, + struct srpt_send_ioctx *ioctx) +{ + struct se_cmd *cmd; + struct scatterlist *sg, *sg_orig; + int sg_cnt; + enum dma_data_direction dir; + struct rdma_iu *riu; + struct srp_direct_buf *db; + dma_addr_t dma_addr; + struct ib_sge *sge; + u64 raddr; + u32 rsize; + u32 tsize; + u32 dma_len; + int count, nrdma; + int i, j, k; + + BUG_ON(!ch); + BUG_ON(!ioctx); + cmd = &ioctx->cmd; + dir = cmd->data_direction; + BUG_ON(dir == DMA_NONE); + + transport_do_task_sg_chain(cmd); + ioctx->sg = sg = sg_orig = cmd->t_tasks_sg_chained; + ioctx->sg_cnt = sg_cnt = cmd->t_tasks_sg_chained_no; + + count = ib_dma_map_sg(ch->sport->sdev->device, sg, sg_cnt, + opposite_dma_dir(dir)); + if (unlikely(!count)) + return -EAGAIN; + + ioctx->mapped_sg_count = count; + + if (ioctx->rdma_ius && ioctx->n_rdma_ius) + nrdma = ioctx->n_rdma_ius; + else { + nrdma = (count + SRPT_DEF_SG_PER_WQE - 1) / SRPT_DEF_SG_PER_WQE + + ioctx->n_rbuf; + + ioctx->rdma_ius = kzalloc(nrdma * sizeof *riu, GFP_KERNEL); + if (!ioctx->rdma_ius) + goto free_mem; + + ioctx->n_rdma_ius = nrdma; + } + + db = ioctx->rbufs; + tsize = cmd->data_length; + dma_len = sg_dma_len(&sg[0]); + riu = ioctx->rdma_ius; + + /* + * For each remote desc - calculate the #ib_sge. + * If #ib_sge < SRPT_DEF_SG_PER_WQE per rdma operation then + * each remote desc rdma_iu is required a rdma wr; + * else + * we need to allocate extra rdma_iu to carry extra #ib_sge in + * another rdma wr + */ + for (i = 0, j = 0; + j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) { + rsize = be32_to_cpu(db->len); + raddr = be64_to_cpu(db->va); + riu->raddr = raddr; + riu->rkey = be32_to_cpu(db->key); + riu->sge_cnt = 0; + + /* calculate how many sge required for this remote_buf */ + while (rsize > 0 && tsize > 0) { + + if (rsize >= dma_len) { + tsize -= dma_len; + rsize -= dma_len; + raddr += dma_len; + + if (tsize > 0) { + ++j; + if (j < count) { + sg = sg_next(sg); + dma_len = sg_dma_len(sg); + } + } + } else { + tsize -= rsize; + dma_len -= rsize; + rsize = 0; + } + + ++riu->sge_cnt; + + if (rsize > 0 && riu->sge_cnt == SRPT_DEF_SG_PER_WQE) { + ++ioctx->n_rdma; + riu->sge = + kmalloc(riu->sge_cnt * sizeof *riu->sge, + GFP_KERNEL); + if (!riu->sge) + goto free_mem; + + ++riu; + riu->sge_cnt = 0; + riu->raddr = raddr; + riu->rkey = be32_to_cpu(db->key); + } + } + + ++ioctx->n_rdma; + riu->sge = kmalloc(riu->sge_cnt * sizeof *riu->sge, + GFP_KERNEL); + if (!riu->sge) + goto free_mem; + } + + db = ioctx->rbufs; + tsize = cmd->data_length; + riu = ioctx->rdma_ius; + sg = sg_orig; + dma_len = sg_dma_len(&sg[0]); + dma_addr = sg_dma_address(&sg[0]); + + /* this second loop is really mapped sg_addres to rdma_iu->ib_sge */ + for (i = 0, j = 0; + j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) { + rsize = be32_to_cpu(db->len); + sge = riu->sge; + k = 0; + + while (rsize > 0 && tsize > 0) { + sge->addr = dma_addr; + sge->lkey = ch->sport->sdev->mr->lkey; + + if (rsize >= dma_len) { + sge->length = + (tsize < dma_len) ? tsize : dma_len; + tsize -= dma_len; + rsize -= dma_len; + + if (tsize > 0) { + ++j; + if (j < count) { + sg = sg_next(sg); + dma_len = sg_dma_len(sg); + dma_addr = sg_dma_address(sg); + } + } + } else { + sge->length = (tsize < rsize) ? tsize : rsize; + tsize -= rsize; + dma_len -= rsize; + dma_addr += rsize; + rsize = 0; + } + + ++k; + if (k == riu->sge_cnt && rsize > 0 && tsize > 0) { + ++riu; + sge = riu->sge; + k = 0; + } else if (rsize > 0 && tsize > 0) + ++sge; + } + } + + return 0; + +free_mem: + srpt_unmap_sg_to_ib_sge(ch, ioctx); + + return -ENOMEM; +} + +/** + * srpt_get_send_ioctx() - Obtain an I/O context for sending to the initiator. + */ +static struct srpt_send_ioctx *srpt_get_send_ioctx(struct srpt_rdma_ch *ch) +{ + struct srpt_send_ioctx *ioctx; + unsigned long flags; + + BUG_ON(!ch); + + ioctx = NULL; + spin_lock_irqsave(&ch->spinlock, flags); + if (!list_empty(&ch->free_list)) { + ioctx = list_first_entry(&ch->free_list, + struct srpt_send_ioctx, free_list); + list_del(&ioctx->free_list); + } + spin_unlock_irqrestore(&ch->spinlock, flags); + + if (!ioctx) + return ioctx; + + BUG_ON(ioctx->ch != ch); + kref_init(&ioctx->kref); + spin_lock_init(&ioctx->spinlock); + ioctx->state = SRPT_STATE_NEW; + ioctx->n_rbuf = 0; + ioctx->rbufs = NULL; + ioctx->n_rdma = 0; + ioctx->n_rdma_ius = 0; + ioctx->rdma_ius = NULL; + ioctx->mapped_sg_count = 0; + init_completion(&ioctx->tx_done); + ioctx->queue_status_only = false; + /* + * transport_init_se_cmd() does not initialize all fields, so do it + * here. + */ + memset(&ioctx->cmd, 0, sizeof(ioctx->cmd)); + memset(&ioctx->sense_data, 0, sizeof(ioctx->sense_data)); + + return ioctx; +} + +/** + * srpt_put_send_ioctx() - Free up resources. + */ +static void srpt_put_send_ioctx(struct srpt_send_ioctx *ioctx) +{ + struct srpt_rdma_ch *ch; + unsigned long flags; + + BUG_ON(!ioctx); + ch = ioctx->ch; + BUG_ON(!ch); + + WARN_ON(srpt_get_cmd_state(ioctx) != SRPT_STATE_DONE); + + srpt_unmap_sg_to_ib_sge(ioctx->ch, ioctx); + transport_generic_free_cmd(&ioctx->cmd, 0); + + if (ioctx->n_rbuf > 1) { + kfree(ioctx->rbufs); + ioctx->rbufs = NULL; + ioctx->n_rbuf = 0; + } + + spin_lock_irqsave(&ch->spinlock, flags); + list_add(&ioctx->free_list, &ch->free_list); + spin_unlock_irqrestore(&ch->spinlock, flags); +} + +static void srpt_put_send_ioctx_kref(struct kref *kref) +{ + srpt_put_send_ioctx(container_of(kref, struct srpt_send_ioctx, kref)); +} + +/** + * srpt_abort_cmd() - Abort a SCSI command. + * @ioctx: I/O context associated with the SCSI command. + * @context: Preferred execution context. + */ +static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx) +{ + enum srpt_command_state state; + unsigned long flags; + + BUG_ON(!ioctx); + + /* + * If the command is in a state where the target core is waiting for + * the ib_srpt driver, change the state to the next state. Changing + * the state of the command from SRPT_STATE_NEED_DATA to + * SRPT_STATE_DATA_IN ensures that srpt_xmit_response() will call this + * function a second time. + */ + + spin_lock_irqsave(&ioctx->spinlock, flags); + state = ioctx->state; + switch (state) { + case SRPT_STATE_NEED_DATA: + ioctx->state = SRPT_STATE_DATA_IN; + break; + case SRPT_STATE_DATA_IN: + case SRPT_STATE_CMD_RSP_SENT: + case SRPT_STATE_MGMT_RSP_SENT: + ioctx->state = SRPT_STATE_DONE; + break; + default: + break; + } + spin_unlock_irqrestore(&ioctx->spinlock, flags); + + if (state == SRPT_STATE_DONE) + goto out; + + pr_debug("Aborting cmd with state %d and tag %lld\n", state, + ioctx->tag); + + switch (state) { + case SRPT_STATE_NEW: + case SRPT_STATE_DATA_IN: + case SRPT_STATE_MGMT: + /* + * Do nothing - defer abort processing until + * srpt_queue_response() is invoked. + */ + WARN_ON(!transport_check_aborted_status(&ioctx->cmd, false)); + break; + case SRPT_STATE_NEED_DATA: + /* DMA_TO_DEVICE (write) - RDMA read error. */ + spin_lock_irqsave(&ioctx->cmd.t_state_lock, flags); + ioctx->cmd.transport_state |= CMD_T_LUN_STOP; + spin_unlock_irqrestore(&ioctx->cmd.t_state_lock, flags); + transport_generic_handle_data(&ioctx->cmd); + break; + case SRPT_STATE_CMD_RSP_SENT: + /* + * SRP_RSP sending failed or the SRP_RSP send completion has + * not been received in time. + */ + srpt_unmap_sg_to_ib_sge(ioctx->ch, ioctx); + spin_lock_irqsave(&ioctx->cmd.t_state_lock, flags); + ioctx->cmd.transport_state |= CMD_T_LUN_STOP; + spin_unlock_irqrestore(&ioctx->cmd.t_state_lock, flags); + kref_put(&ioctx->kref, srpt_put_send_ioctx_kref); + break; + case SRPT_STATE_MGMT_RSP_SENT: + srpt_set_cmd_state(ioctx, SRPT_STATE_DONE); + kref_put(&ioctx->kref, srpt_put_send_ioctx_kref); + break; + default: + WARN_ON("ERROR: unexpected command state"); + break; + } + +out: + return state; +} + +/** + * srpt_handle_send_err_comp() - Process an IB_WC_SEND error completion. + */ +static void srpt_handle_send_err_comp(struct srpt_rdma_ch *ch, u64 wr_id) +{ + struct srpt_send_ioctx *ioctx; + enum srpt_command_state state; + struct se_cmd *cmd; + u32 index; + + atomic_inc(&ch->sq_wr_avail); + + index = idx_from_wr_id(wr_id); + ioctx = ch->ioctx_ring[index]; + state = srpt_get_cmd_state(ioctx); + cmd = &ioctx->cmd; + + WARN_ON(state != SRPT_STATE_CMD_RSP_SENT + && state != SRPT_STATE_MGMT_RSP_SENT + && state != SRPT_STATE_NEED_DATA + && state != SRPT_STATE_DONE); + + /* If SRP_RSP sending failed, undo the ch->req_lim change. */ + if (state == SRPT_STATE_CMD_RSP_SENT + || state == SRPT_STATE_MGMT_RSP_SENT) + atomic_dec(&ch->req_lim); + + srpt_abort_cmd(ioctx); +} + +/** + * srpt_handle_send_comp() - Process an IB send completion notification. + */ +static void srpt_handle_send_comp(struct srpt_rdma_ch *ch, + struct srpt_send_ioctx *ioctx) +{ + enum srpt_command_state state; + + atomic_inc(&ch->sq_wr_avail); + + state = srpt_set_cmd_state(ioctx, SRPT_STATE_DONE); + + if (WARN_ON(state != SRPT_STATE_CMD_RSP_SENT + && state != SRPT_STATE_MGMT_RSP_SENT + && state != SRPT_STATE_DONE)) + pr_debug("state = %d\n", state); + + if (state != SRPT_STATE_DONE) + kref_put(&ioctx->kref, srpt_put_send_ioctx_kref); + else + printk(KERN_ERR "IB completion has been received too late for" + " wr_id = %u.\n", ioctx->ioctx.index); +} + +/** + * srpt_handle_rdma_comp() - Process an IB RDMA completion notification. + * + * Note: transport_generic_handle_data() is asynchronous so unmapping the + * data that has been transferred via IB RDMA must be postponed until the + * check_stop_free() callback. + */ +static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch, + struct srpt_send_ioctx *ioctx, + enum srpt_opcode opcode) +{ + WARN_ON(ioctx->n_rdma <= 0); + atomic_add(ioctx->n_rdma, &ch->sq_wr_avail); + + if (opcode == SRPT_RDMA_READ_LAST) { + if (srpt_test_and_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA, + SRPT_STATE_DATA_IN)) + transport_generic_handle_data(&ioctx->cmd); + else + printk(KERN_ERR "%s[%d]: wrong state = %d\n", __func__, + __LINE__, srpt_get_cmd_state(ioctx)); + } else if (opcode == SRPT_RDMA_ABORT) { + ioctx->rdma_aborted = true; + } else { + WARN(true, "unexpected opcode %d\n", opcode); + } +} + +/** + * srpt_handle_rdma_err_comp() - Process an IB RDMA error completion. + */ +static void srpt_handle_rdma_err_comp(struct srpt_rdma_ch *ch, + struct srpt_send_ioctx *ioctx, + enum srpt_opcode opcode) +{ + struct se_cmd *cmd; + enum srpt_command_state state; + unsigned long flags; + + cmd = &ioctx->cmd; + state = srpt_get_cmd_state(ioctx); + switch (opcode) { + case SRPT_RDMA_READ_LAST: + if (ioctx->n_rdma <= 0) { + printk(KERN_ERR "Received invalid RDMA read" + " error completion with idx %d\n", + ioctx->ioctx.index); + break; + } + atomic_add(ioctx->n_rdma, &ch->sq_wr_avail); + if (state == SRPT_STATE_NEED_DATA) + srpt_abort_cmd(ioctx); + else + printk(KERN_ERR "%s[%d]: wrong state = %d\n", + __func__, __LINE__, state); + break; + case SRPT_RDMA_WRITE_LAST: + spin_lock_irqsave(&ioctx->cmd.t_state_lock, flags); + ioctx->cmd.transport_state |= CMD_T_LUN_STOP; + spin_unlock_irqrestore(&ioctx->cmd.t_state_lock, flags); + break; + default: + printk(KERN_ERR "%s[%d]: opcode = %u\n", __func__, + __LINE__, opcode); + break; + } +} + +/** + * srpt_build_cmd_rsp() - Build an SRP_RSP response. + * @ch: RDMA channel through which the request has been received. + * @ioctx: I/O context associated with the SRP_CMD request. The response will + * be built in the buffer ioctx->buf points at and hence this function will + * overwrite the request data. + * @tag: tag of the request for which this response is being generated. + * @status: value for the STATUS field of the SRP_RSP information unit. + * + * Returns the size in bytes of the SRP_RSP response. + * + * An SRP_RSP response contains a SCSI status or service response. See also + * section 6.9 in the SRP r16a document for the format of an SRP_RSP + * response. See also SPC-2 for more information about sense data. + */ +static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch, + struct srpt_send_ioctx *ioctx, u64 tag, + int status) +{ + struct srp_rsp *srp_rsp; + const u8 *sense_data; + int sense_data_len, max_sense_len; + + /* + * The lowest bit of all SAM-3 status codes is zero (see also + * paragraph 5.3 in SAM-3). + */ + WARN_ON(status & 1); + + srp_rsp = ioctx->ioctx.buf; + BUG_ON(!srp_rsp); + + sense_data = ioctx->sense_data; + sense_data_len = ioctx->cmd.scsi_sense_length; + WARN_ON(sense_data_len > sizeof(ioctx->sense_data)); + + memset(srp_rsp, 0, sizeof *srp_rsp); + srp_rsp->opcode = SRP_RSP; + srp_rsp->req_lim_delta = + __constant_cpu_to_be32(1 + atomic_xchg(&ch->req_lim_delta, 0)); + srp_rsp->tag = tag; + srp_rsp->status = status; + + if (sense_data_len) { + BUILD_BUG_ON(MIN_MAX_RSP_SIZE <= sizeof(*srp_rsp)); + max_sense_len = ch->max_ti_iu_len - sizeof(*srp_rsp); + if (sense_data_len > max_sense_len) { + printk(KERN_WARNING "truncated sense data from %d to %d" + " bytes\n", sense_data_len, max_sense_len); + sense_data_len = max_sense_len; + } + + srp_rsp->flags |= SRP_RSP_FLAG_SNSVALID; + srp_rsp->sense_data_len = cpu_to_be32(sense_data_len); + memcpy(srp_rsp + 1, sense_data, sense_data_len); + } + + return sizeof(*srp_rsp) + sense_data_len; +} + +/** + * srpt_build_tskmgmt_rsp() - Build a task management response. + * @ch: RDMA channel through which the request has been received. + * @ioctx: I/O context in which the SRP_RSP response will be built. + * @rsp_code: RSP_CODE that will be stored in the response. + * @tag: Tag of the request for which this response is being generated. + * + * Returns the size in bytes of the SRP_RSP response. + * + * An SRP_RSP response contains a SCSI status or service response. See also + * section 6.9 in the SRP r16a document for the format of an SRP_RSP + * response. + */ +static int srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch, + struct srpt_send_ioctx *ioctx, + u8 rsp_code, u64 tag) +{ + struct srp_rsp *srp_rsp; + int resp_data_len; + int resp_len; + + resp_data_len = (rsp_code == SRP_TSK_MGMT_SUCCESS) ? 0 : 4; + resp_len = sizeof(*srp_rsp) + resp_data_len; + + srp_rsp = ioctx->ioctx.buf; + BUG_ON(!srp_rsp); + memset(srp_rsp, 0, sizeof *srp_rsp); + + srp_rsp->opcode = SRP_RSP; + srp_rsp->req_lim_delta = __constant_cpu_to_be32(1 + + atomic_xchg(&ch->req_lim_delta, 0)); + srp_rsp->tag = tag; + + if (rsp_code != SRP_TSK_MGMT_SUCCESS) { + srp_rsp->flags |= SRP_RSP_FLAG_RSPVALID; + srp_rsp->resp_data_len = cpu_to_be32(resp_data_len); + srp_rsp->data[3] = rsp_code; + } + + return resp_len; +} + +#define NO_SUCH_LUN ((uint64_t)-1LL) + +/* + * SCSI LUN addressing method. See also SAM-2 and the section about + * eight byte LUNs. + */ +enum scsi_lun_addr_method { + SCSI_LUN_ADDR_METHOD_PERIPHERAL = 0, + SCSI_LUN_ADDR_METHOD_FLAT = 1, + SCSI_LUN_ADDR_METHOD_LUN = 2, + SCSI_LUN_ADDR_METHOD_EXTENDED_LUN = 3, +}; + +/* + * srpt_unpack_lun() - Convert from network LUN to linear LUN. + * + * Convert an 2-byte, 4-byte, 6-byte or 8-byte LUN structure in network byte + * order (big endian) to a linear LUN. Supports three LUN addressing methods: + * peripheral, flat and logical unit. See also SAM-2, section 4.9.4 (page 40). + */ +static uint64_t srpt_unpack_lun(const uint8_t *lun, int len) +{ + uint64_t res = NO_SUCH_LUN; + int addressing_method; + + if (unlikely(len < 2)) { + printk(KERN_ERR "Illegal LUN length %d, expected 2 bytes or " + "more", len); + goto out; + } + + switch (len) { + case 8: + if ((*((__be64 *)lun) & + __constant_cpu_to_be64(0x0000FFFFFFFFFFFFLL)) != 0) + goto out_err; + break; + case 4: + if (*((__be16 *)&lun[2]) != 0) + goto out_err; + break; + case 6: + if (*((__be32 *)&lun[2]) != 0) + goto out_err; + break; + case 2: + break; + default: + goto out_err; + } + + addressing_method = (*lun) >> 6; /* highest two bits of byte 0 */ + switch (addressing_method) { + case SCSI_LUN_ADDR_METHOD_PERIPHERAL: + case SCSI_LUN_ADDR_METHOD_FLAT: + case SCSI_LUN_ADDR_METHOD_LUN: + res = *(lun + 1) | (((*lun) & 0x3f) << 8); + break; + + case SCSI_LUN_ADDR_METHOD_EXTENDED_LUN: + default: + printk(KERN_ERR "Unimplemented LUN addressing method %u", + addressing_method); + break; + } + +out: + return res; + +out_err: + printk(KERN_ERR "Support for multi-level LUNs has not yet been" + " implemented"); + goto out; +} + +static int srpt_check_stop_free(struct se_cmd *cmd) +{ + struct srpt_send_ioctx *ioctx; + + ioctx = container_of(cmd, struct srpt_send_ioctx, cmd); + return kref_put(&ioctx->kref, srpt_put_send_ioctx_kref); +} + +/** + * srpt_handle_cmd() - Process SRP_CMD. + */ +static int srpt_handle_cmd(struct srpt_rdma_ch *ch, + struct srpt_recv_ioctx *recv_ioctx, + struct srpt_send_ioctx *send_ioctx) +{ + struct se_cmd *cmd; + struct srp_cmd *srp_cmd; + uint64_t unpacked_lun; + u64 data_len; + enum dma_data_direction dir; + int ret; + + BUG_ON(!send_ioctx); + + srp_cmd = recv_ioctx->ioctx.buf; + kref_get(&send_ioctx->kref); + cmd = &send_ioctx->cmd; + send_ioctx->tag = srp_cmd->tag; + + switch (srp_cmd->task_attr) { + case SRP_CMD_SIMPLE_Q: + cmd->sam_task_attr = MSG_SIMPLE_TAG; + break; + case SRP_CMD_ORDERED_Q: + default: + cmd->sam_task_attr = MSG_ORDERED_TAG; + break; + case SRP_CMD_HEAD_OF_Q: + cmd->sam_task_attr = MSG_HEAD_TAG; + break; + case SRP_CMD_ACA: + cmd->sam_task_attr = MSG_ACA_TAG; + break; + } + + ret = srpt_get_desc_tbl(send_ioctx, srp_cmd, &dir, &data_len); + if (ret) { + printk(KERN_ERR "0x%llx: parsing SRP descriptor table failed.\n", + srp_cmd->tag); + cmd->se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION; + cmd->scsi_sense_reason = TCM_INVALID_CDB_FIELD; + kref_put(&send_ioctx->kref, srpt_put_send_ioctx_kref); + goto send_sense; + } + + cmd->data_length = data_len; + cmd->data_direction = dir; + unpacked_lun = srpt_unpack_lun((uint8_t *)&srp_cmd->lun, + sizeof(srp_cmd->lun)); + if (transport_lookup_cmd_lun(cmd, unpacked_lun) < 0) { + kref_put(&send_ioctx->kref, srpt_put_send_ioctx_kref); + goto send_sense; + } + ret = transport_generic_allocate_tasks(cmd, srp_cmd->cdb); + if (ret < 0) { + kref_put(&send_ioctx->kref, srpt_put_send_ioctx_kref); + if (cmd->se_cmd_flags & SCF_SCSI_RESERVATION_CONFLICT) { + srpt_queue_status(cmd); + return 0; + } else + goto send_sense; + } + + transport_handle_cdb_direct(cmd); + return 0; + +send_sense: + transport_send_check_condition_and_sense(cmd, cmd->scsi_sense_reason, + 0); + return -1; +} + +/** + * srpt_rx_mgmt_fn_tag() - Process a task management function by tag. + * @ch: RDMA channel of the task management request. + * @fn: Task management function to perform. + * @req_tag: Tag of the SRP task management request. + * @mgmt_ioctx: I/O context of the task management request. + * + * Returns zero if the target core will process the task management + * request asynchronously. + * + * Note: It is assumed that the initiator serializes tag-based task management + * requests. + */ +static int srpt_rx_mgmt_fn_tag(struct srpt_send_ioctx *ioctx, u64 tag) +{ + struct srpt_device *sdev; + struct srpt_rdma_ch *ch; + struct srpt_send_ioctx *target; + int ret, i; + + ret = -EINVAL; + ch = ioctx->ch; + BUG_ON(!ch); + BUG_ON(!ch->sport); + sdev = ch->sport->sdev; + BUG_ON(!sdev); + spin_lock_irq(&sdev->spinlock); + for (i = 0; i < ch->rq_size; ++i) { + target = ch->ioctx_ring[i]; + if (target->cmd.se_lun == ioctx->cmd.se_lun && + target->tag == tag && + srpt_get_cmd_state(target) != SRPT_STATE_DONE) { + ret = 0; + /* now let the target core abort &target->cmd; */ + break; + } + } + spin_unlock_irq(&sdev->spinlock); + return ret; +} + +static int srp_tmr_to_tcm(int fn) +{ + switch (fn) { + case SRP_TSK_ABORT_TASK: + return TMR_ABORT_TASK; + case SRP_TSK_ABORT_TASK_SET: + return TMR_ABORT_TASK_SET; + case SRP_TSK_CLEAR_TASK_SET: + return TMR_CLEAR_TASK_SET; + case SRP_TSK_LUN_RESET: + return TMR_LUN_RESET; + case SRP_TSK_CLEAR_ACA: + return TMR_CLEAR_ACA; + default: + return -1; + } +} + +/** + * srpt_handle_tsk_mgmt() - Process an SRP_TSK_MGMT information unit. + * + * Returns 0 if and only if the request will be processed by the target core. + * + * For more information about SRP_TSK_MGMT information units, see also section + * 6.7 in the SRP r16a document. + */ +static void srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch, + struct srpt_recv_ioctx *recv_ioctx, + struct srpt_send_ioctx *send_ioctx) +{ + struct srp_tsk_mgmt *srp_tsk; + struct se_cmd *cmd; + uint64_t unpacked_lun; + int tcm_tmr; + int res; + + BUG_ON(!send_ioctx); + + srp_tsk = recv_ioctx->ioctx.buf; + cmd = &send_ioctx->cmd; + + pr_debug("recv tsk_mgmt fn %d for task_tag %lld and cmd tag %lld" + " cm_id %p sess %p\n", srp_tsk->tsk_mgmt_func, + srp_tsk->task_tag, srp_tsk->tag, ch->cm_id, ch->sess); + + srpt_set_cmd_state(send_ioctx, SRPT_STATE_MGMT); + send_ioctx->tag = srp_tsk->tag; + tcm_tmr = srp_tmr_to_tcm(srp_tsk->tsk_mgmt_func); + if (tcm_tmr < 0) { + send_ioctx->cmd.se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION; + send_ioctx->cmd.se_tmr_req->response = + TMR_TASK_MGMT_FUNCTION_NOT_SUPPORTED; + goto process_tmr; + } + res = core_tmr_alloc_req(cmd, NULL, tcm_tmr, GFP_KERNEL); + if (res < 0) { + send_ioctx->cmd.se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION; + send_ioctx->cmd.se_tmr_req->response = TMR_FUNCTION_REJECTED; + goto process_tmr; + } + + unpacked_lun = srpt_unpack_lun((uint8_t *)&srp_tsk->lun, + sizeof(srp_tsk->lun)); + res = transport_lookup_tmr_lun(&send_ioctx->cmd, unpacked_lun); + if (res) { + pr_debug("rejecting TMR for LUN %lld\n", unpacked_lun); + send_ioctx->cmd.se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION; + send_ioctx->cmd.se_tmr_req->response = TMR_LUN_DOES_NOT_EXIST; + goto process_tmr; + } + + if (srp_tsk->tsk_mgmt_func == SRP_TSK_ABORT_TASK) + srpt_rx_mgmt_fn_tag(send_ioctx, srp_tsk->task_tag); + +process_tmr: + kref_get(&send_ioctx->kref); + if (!(send_ioctx->cmd.se_cmd_flags & SCF_SCSI_CDB_EXCEPTION)) + transport_generic_handle_tmr(&send_ioctx->cmd); + else + transport_send_check_condition_and_sense(cmd, + cmd->scsi_sense_reason, 0); + +} + +/** + * srpt_handle_new_iu() - Process a newly received information unit. + * @ch: RDMA channel through which the information unit has been received. + * @ioctx: SRPT I/O context associated with the information unit. + */ +static void srpt_handle_new_iu(struct srpt_rdma_ch *ch, + struct srpt_recv_ioctx *recv_ioctx, + struct srpt_send_ioctx *send_ioctx) +{ + struct srp_cmd *srp_cmd; + enum rdma_ch_state ch_state; + + BUG_ON(!ch); + BUG_ON(!recv_ioctx); + + ib_dma_sync_single_for_cpu(ch->sport->sdev->device, + recv_ioctx->ioctx.dma, srp_max_req_size, + DMA_FROM_DEVICE); + + ch_state = srpt_get_ch_state(ch); + if (unlikely(ch_state == CH_CONNECTING)) { + list_add_tail(&recv_ioctx->wait_list, &ch->cmd_wait_list); + goto out; + } + + if (unlikely(ch_state != CH_LIVE)) + goto out; + + srp_cmd = recv_ioctx->ioctx.buf; + if (srp_cmd->opcode == SRP_CMD || srp_cmd->opcode == SRP_TSK_MGMT) { + if (!send_ioctx) + send_ioctx = srpt_get_send_ioctx(ch); + if (unlikely(!send_ioctx)) { + list_add_tail(&recv_ioctx->wait_list, + &ch->cmd_wait_list); + goto out; + } + } + + transport_init_se_cmd(&send_ioctx->cmd, &srpt_target->tf_ops, ch->sess, + 0, DMA_NONE, MSG_SIMPLE_TAG, + send_ioctx->sense_data); + + switch (srp_cmd->opcode) { + case SRP_CMD: + srpt_handle_cmd(ch, recv_ioctx, send_ioctx); + break; + case SRP_TSK_MGMT: + srpt_handle_tsk_mgmt(ch, recv_ioctx, send_ioctx); + break; + case SRP_I_LOGOUT: + printk(KERN_ERR "Not yet implemented: SRP_I_LOGOUT\n"); + break; + case SRP_CRED_RSP: + pr_debug("received SRP_CRED_RSP\n"); + break; + case SRP_AER_RSP: + pr_debug("received SRP_AER_RSP\n"); + break; + case SRP_RSP: + printk(KERN_ERR "Received SRP_RSP\n"); + break; + default: + printk(KERN_ERR "received IU with unknown opcode 0x%x\n", + srp_cmd->opcode); + break; + } + + srpt_post_recv(ch->sport->sdev, recv_ioctx); +out: + return; +} + +static void srpt_process_rcv_completion(struct ib_cq *cq, + struct srpt_rdma_ch *ch, + struct ib_wc *wc) +{ + struct srpt_device *sdev = ch->sport->sdev; + struct srpt_recv_ioctx *ioctx; + u32 index; + + index = idx_from_wr_id(wc->wr_id); + if (wc->status == IB_WC_SUCCESS) { + int req_lim; + + req_lim = atomic_dec_return(&ch->req_lim); + if (unlikely(req_lim < 0)) + printk(KERN_ERR "req_lim = %d < 0\n", req_lim); + ioctx = sdev->ioctx_ring[index]; + srpt_handle_new_iu(ch, ioctx, NULL); + } else { + printk(KERN_INFO "receiving failed for idx %u with status %d\n", + index, wc->status); + } +} + +/** + * srpt_process_send_completion() - Process an IB send completion. + * + * Note: Although this has not yet been observed during tests, at least in + * theory it is possible that the srpt_get_send_ioctx() call invoked by + * srpt_handle_new_iu() fails. This is possible because the req_lim_delta + * value in each response is set to one, and it is possible that this response + * makes the initiator send a new request before the send completion for that + * response has been processed. This could e.g. happen if the call to + * srpt_put_send_iotcx() is delayed because of a higher priority interrupt or + * if IB retransmission causes generation of the send completion to be + * delayed. Incoming information units for which srpt_get_send_ioctx() fails + * are queued on cmd_wait_list. The code below processes these delayed + * requests one at a time. + */ +static void srpt_process_send_completion(struct ib_cq *cq, + struct srpt_rdma_ch *ch, + struct ib_wc *wc) +{ + struct srpt_send_ioctx *send_ioctx; + uint32_t index; + enum srpt_opcode opcode; + + index = idx_from_wr_id(wc->wr_id); + opcode = opcode_from_wr_id(wc->wr_id); + send_ioctx = ch->ioctx_ring[index]; + if (wc->status == IB_WC_SUCCESS) { + if (opcode == SRPT_SEND) + srpt_handle_send_comp(ch, send_ioctx); + else { + WARN_ON(opcode != SRPT_RDMA_ABORT && + wc->opcode != IB_WC_RDMA_READ); + srpt_handle_rdma_comp(ch, send_ioctx, opcode); + } + } else { + if (opcode == SRPT_SEND) { + printk(KERN_INFO "sending response for idx %u failed" + " with status %d\n", index, wc->status); + srpt_handle_send_err_comp(ch, wc->wr_id); + } else if (opcode != SRPT_RDMA_MID) { + printk(KERN_INFO "RDMA t %d for idx %u failed with" + " status %d", opcode, index, wc->status); + srpt_handle_rdma_err_comp(ch, send_ioctx, opcode); + } + } + + while (unlikely(opcode == SRPT_SEND + && !list_empty(&ch->cmd_wait_list) + && srpt_get_ch_state(ch) == CH_LIVE + && (send_ioctx = srpt_get_send_ioctx(ch)) != NULL)) { + struct srpt_recv_ioctx *recv_ioctx; + + recv_ioctx = list_first_entry(&ch->cmd_wait_list, + struct srpt_recv_ioctx, + wait_list); + list_del(&recv_ioctx->wait_list); + srpt_handle_new_iu(ch, recv_ioctx, send_ioctx); + } +} + +static void srpt_process_completion(struct ib_cq *cq, struct srpt_rdma_ch *ch) +{ + struct ib_wc *const wc = ch->wc; + int i, n; + + WARN_ON(cq != ch->cq); + + ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); + while ((n = ib_poll_cq(cq, ARRAY_SIZE(ch->wc), wc)) > 0) { + for (i = 0; i < n; i++) { + if (opcode_from_wr_id(wc[i].wr_id) == SRPT_RECV) + srpt_process_rcv_completion(cq, ch, &wc[i]); + else + srpt_process_send_completion(cq, ch, &wc[i]); + } + } +} + +/** + * srpt_completion() - IB completion queue callback function. + * + * Notes: + * - It is guaranteed that a completion handler will never be invoked + * concurrently on two different CPUs for the same completion queue. See also + * Documentation/infiniband/core_locking.txt and the implementation of + * handle_edge_irq() in kernel/irq/chip.c. + * - When threaded IRQs are enabled, completion handlers are invoked in thread + * context instead of interrupt context. + */ +static void srpt_completion(struct ib_cq *cq, void *ctx) +{ + struct srpt_rdma_ch *ch = ctx; + + wake_up_interruptible(&ch->wait_queue); +} + +static int srpt_compl_thread(void *arg) +{ + struct srpt_rdma_ch *ch; + + /* Hibernation / freezing of the SRPT kernel thread is not supported. */ + current->flags |= PF_NOFREEZE; + + ch = arg; + BUG_ON(!ch); + printk(KERN_INFO "Session %s: kernel thread %s (PID %d) started\n", + ch->sess_name, ch->thread->comm, current->pid); + while (!kthread_should_stop()) { + wait_event_interruptible(ch->wait_queue, + (srpt_process_completion(ch->cq, ch), + kthread_should_stop())); + } + printk(KERN_INFO "Session %s: kernel thread %s (PID %d) stopped\n", + ch->sess_name, ch->thread->comm, current->pid); + return 0; +} + +/** + * srpt_create_ch_ib() - Create receive and send completion queues. + */ +static int srpt_create_ch_ib(struct srpt_rdma_ch *ch) +{ + struct ib_qp_init_attr *qp_init; + struct srpt_port *sport = ch->sport; + struct srpt_device *sdev = sport->sdev; + u32 srp_sq_size = sport->port_attrib.srp_sq_size; + int ret; + + WARN_ON(ch->rq_size < 1); + + ret = -ENOMEM; + qp_init = kzalloc(sizeof *qp_init, GFP_KERNEL); + if (!qp_init) + goto out; + + ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, + ch->rq_size + srp_sq_size, 0); + if (IS_ERR(ch->cq)) { + ret = PTR_ERR(ch->cq); + printk(KERN_ERR "failed to create CQ cqe= %d ret= %d\n", + ch->rq_size + srp_sq_size, ret); + goto out; + } + + qp_init->qp_context = (void *)ch; + qp_init->event_handler + = (void(*)(struct ib_event *, void*))srpt_qp_event; + qp_init->send_cq = ch->cq; + qp_init->recv_cq = ch->cq; + qp_init->srq = sdev->srq; + qp_init->sq_sig_type = IB_SIGNAL_REQ_WR; + qp_init->qp_type = IB_QPT_RC; + qp_init->cap.max_send_wr = srp_sq_size; + qp_init->cap.max_send_sge = SRPT_DEF_SG_PER_WQE; + + ch->qp = ib_create_qp(sdev->pd, qp_init); + if (IS_ERR(ch->qp)) { + ret = PTR_ERR(ch->qp); + printk(KERN_ERR "failed to create_qp ret= %d\n", ret); + goto err_destroy_cq; + } + + atomic_set(&ch->sq_wr_avail, qp_init->cap.max_send_wr); + + pr_debug("%s: max_cqe= %d max_sge= %d sq_size = %d cm_id= %p\n", + __func__, ch->cq->cqe, qp_init->cap.max_send_sge, + qp_init->cap.max_send_wr, ch->cm_id); + + ret = srpt_init_ch_qp(ch, ch->qp); + if (ret) + goto err_destroy_qp; + + init_waitqueue_head(&ch->wait_queue); + + pr_debug("creating thread for session %s\n", ch->sess_name); + + ch->thread = kthread_run(srpt_compl_thread, ch, "ib_srpt_compl"); + if (IS_ERR(ch->thread)) { + printk(KERN_ERR "failed to create kernel thread %ld\n", + PTR_ERR(ch->thread)); + ch->thread = NULL; + goto err_destroy_qp; + } + +out: + kfree(qp_init); + return ret; + +err_destroy_qp: + ib_destroy_qp(ch->qp); +err_destroy_cq: + ib_destroy_cq(ch->cq); + goto out; +} + +static void srpt_destroy_ch_ib(struct srpt_rdma_ch *ch) +{ + if (ch->thread) + kthread_stop(ch->thread); + + ib_destroy_qp(ch->qp); + ib_destroy_cq(ch->cq); +} + +/** + * __srpt_close_ch() - Close an RDMA channel by setting the QP error state. + * + * Reset the QP and make sure all resources associated with the channel will + * be deallocated at an appropriate time. + * + * Note: The caller must hold ch->sport->sdev->spinlock. + */ +static void __srpt_close_ch(struct srpt_rdma_ch *ch) +{ + struct srpt_device *sdev; + enum rdma_ch_state prev_state; + unsigned long flags; + + sdev = ch->sport->sdev; + + spin_lock_irqsave(&ch->spinlock, flags); + prev_state = ch->state; + switch (prev_state) { + case CH_CONNECTING: + case CH_LIVE: + ch->state = CH_DISCONNECTING; + break; + default: + break; + } + spin_unlock_irqrestore(&ch->spinlock, flags); + + switch (prev_state) { + case CH_CONNECTING: + ib_send_cm_rej(ch->cm_id, IB_CM_REJ_NO_RESOURCES, NULL, 0, + NULL, 0); + /* fall through */ + case CH_LIVE: + if (ib_send_cm_dreq(ch->cm_id, NULL, 0) < 0) + printk(KERN_ERR "sending CM DREQ failed.\n"); + break; + case CH_DISCONNECTING: + break; + case CH_DRAINING: + case CH_RELEASING: + break; + } +} + +/** + * srpt_close_ch() - Close an RDMA channel. + */ +static void srpt_close_ch(struct srpt_rdma_ch *ch) +{ + struct srpt_device *sdev; + + sdev = ch->sport->sdev; + spin_lock_irq(&sdev->spinlock); + __srpt_close_ch(ch); + spin_unlock_irq(&sdev->spinlock); +} + +/** + * srpt_drain_channel() - Drain a channel by resetting the IB queue pair. + * @cm_id: Pointer to the CM ID of the channel to be drained. + * + * Note: Must be called from inside srpt_cm_handler to avoid a race between + * accessing sdev->spinlock and the call to kfree(sdev) in srpt_remove_one() + * (the caller of srpt_cm_handler holds the cm_id spinlock; srpt_remove_one() + * waits until all target sessions for the associated IB device have been + * unregistered and target session registration involves a call to + * ib_destroy_cm_id(), which locks the cm_id spinlock and hence waits until + * this function has finished). + */ +static void srpt_drain_channel(struct ib_cm_id *cm_id) +{ + struct srpt_device *sdev; + struct srpt_rdma_ch *ch; + int ret; + bool do_reset = false; + + WARN_ON_ONCE(irqs_disabled()); + + sdev = cm_id->context; + BUG_ON(!sdev); + spin_lock_irq(&sdev->spinlock); + list_for_each_entry(ch, &sdev->rch_list, list) { + if (ch->cm_id == cm_id) { + do_reset = srpt_test_and_set_ch_state(ch, + CH_CONNECTING, CH_DRAINING) || + srpt_test_and_set_ch_state(ch, + CH_LIVE, CH_DRAINING) || + srpt_test_and_set_ch_state(ch, + CH_DISCONNECTING, CH_DRAINING); + break; + } + } + spin_unlock_irq(&sdev->spinlock); + + if (do_reset) { + ret = srpt_ch_qp_err(ch); + if (ret < 0) + printk(KERN_ERR "Setting queue pair in error state" + " failed: %d\n", ret); + } +} + +/** + * srpt_find_channel() - Look up an RDMA channel. + * @cm_id: Pointer to the CM ID of the channel to be looked up. + * + * Return NULL if no matching RDMA channel has been found. + */ +static struct srpt_rdma_ch *srpt_find_channel(struct srpt_device *sdev, + struct ib_cm_id *cm_id) +{ + struct srpt_rdma_ch *ch; + bool found; + + WARN_ON_ONCE(irqs_disabled()); + BUG_ON(!sdev); + + found = false; + spin_lock_irq(&sdev->spinlock); + list_for_each_entry(ch, &sdev->rch_list, list) { + if (ch->cm_id == cm_id) { + found = true; + break; + } + } + spin_unlock_irq(&sdev->spinlock); + + return found ? ch : NULL; +} + +/** + * srpt_release_channel() - Release channel resources. + * + * Schedules the actual release because: + * - Calling the ib_destroy_cm_id() call from inside an IB CM callback would + * trigger a deadlock. + * - It is not safe to call TCM transport_* functions from interrupt context. + */ +static void srpt_release_channel(struct srpt_rdma_ch *ch) +{ + schedule_work(&ch->release_work); +} + +static void srpt_release_channel_work(struct work_struct *w) +{ + struct srpt_rdma_ch *ch; + struct srpt_device *sdev; + + ch = container_of(w, struct srpt_rdma_ch, release_work); + pr_debug("ch = %p; ch->sess = %p; release_done = %p\n", ch, ch->sess, + ch->release_done); + + sdev = ch->sport->sdev; + BUG_ON(!sdev); + + transport_deregister_session_configfs(ch->sess); + transport_deregister_session(ch->sess); + ch->sess = NULL; + + srpt_destroy_ch_ib(ch); + + srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring, + ch->sport->sdev, ch->rq_size, + ch->rsp_size, DMA_TO_DEVICE); + + spin_lock_irq(&sdev->spinlock); + list_del(&ch->list); + spin_unlock_irq(&sdev->spinlock); + + ib_destroy_cm_id(ch->cm_id); + + if (ch->release_done) + complete(ch->release_done); + + wake_up(&sdev->ch_releaseQ); + + kfree(ch); +} + +static struct srpt_node_acl *__srpt_lookup_acl(struct srpt_port *sport, + u8 i_port_id[16]) +{ + struct srpt_node_acl *nacl; + + list_for_each_entry(nacl, &sport->port_acl_list, list) + if (memcmp(nacl->i_port_id, i_port_id, + sizeof(nacl->i_port_id)) == 0) + return nacl; + + return NULL; +} + +static struct srpt_node_acl *srpt_lookup_acl(struct srpt_port *sport, + u8 i_port_id[16]) +{ + struct srpt_node_acl *nacl; + + spin_lock_irq(&sport->port_acl_lock); + nacl = __srpt_lookup_acl(sport, i_port_id); + spin_unlock_irq(&sport->port_acl_lock); + + return nacl; +} + +/** + * srpt_cm_req_recv() - Process the event IB_CM_REQ_RECEIVED. + * + * Ownership of the cm_id is transferred to the target session if this + * functions returns zero. Otherwise the caller remains the owner of cm_id. + */ +static int srpt_cm_req_recv(struct ib_cm_id *cm_id, + struct ib_cm_req_event_param *param, + void *private_data) +{ + struct srpt_device *sdev = cm_id->context; + struct srpt_port *sport = &sdev->port[param->port - 1]; + struct srp_login_req *req; + struct srp_login_rsp *rsp; + struct srp_login_rej *rej; + struct ib_cm_rep_param *rep_param; + struct srpt_rdma_ch *ch, *tmp_ch; + struct srpt_node_acl *nacl; + u32 it_iu_len; + int i; + int ret = 0; + + WARN_ON_ONCE(irqs_disabled()); + + if (WARN_ON(!sdev || !private_data)) + return -EINVAL; + + req = (struct srp_login_req *)private_data; + + it_iu_len = be32_to_cpu(req->req_it_iu_len); + + printk(KERN_INFO "Received SRP_LOGIN_REQ with i_port_id 0x%llx:0x%llx," + " t_port_id 0x%llx:0x%llx and it_iu_len %d on port %d" + " (guid=0x%llx:0x%llx)\n", + be64_to_cpu(*(__be64 *)&req->initiator_port_id[0]), + be64_to_cpu(*(__be64 *)&req->initiator_port_id[8]), + be64_to_cpu(*(__be64 *)&req->target_port_id[0]), + be64_to_cpu(*(__be64 *)&req->target_port_id[8]), + it_iu_len, + param->port, + be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[0]), + be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[8])); + + rsp = kzalloc(sizeof *rsp, GFP_KERNEL); + rej = kzalloc(sizeof *rej, GFP_KERNEL); + rep_param = kzalloc(sizeof *rep_param, GFP_KERNEL); + + if (!rsp || !rej || !rep_param) { + ret = -ENOMEM; + goto out; + } + + if (it_iu_len > srp_max_req_size || it_iu_len < 64) { + rej->reason = __constant_cpu_to_be32( + SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE); + ret = -EINVAL; + printk(KERN_ERR "rejected SRP_LOGIN_REQ because its" + " length (%d bytes) is out of range (%d .. %d)\n", + it_iu_len, 64, srp_max_req_size); + goto reject; + } + + if (!sport->enabled) { + rej->reason = __constant_cpu_to_be32( + SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); + ret = -EINVAL; + printk(KERN_ERR "rejected SRP_LOGIN_REQ because the target port" + " has not yet been enabled\n"); + goto reject; + } + + if ((req->req_flags & SRP_MTCH_ACTION) == SRP_MULTICHAN_SINGLE) { + rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_NO_CHAN; + + spin_lock_irq(&sdev->spinlock); + + list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) { + if (!memcmp(ch->i_port_id, req->initiator_port_id, 16) + && !memcmp(ch->t_port_id, req->target_port_id, 16) + && param->port == ch->sport->port + && param->listen_id == ch->sport->sdev->cm_id + && ch->cm_id) { + enum rdma_ch_state ch_state; + + ch_state = srpt_get_ch_state(ch); + if (ch_state != CH_CONNECTING + && ch_state != CH_LIVE) + continue; + + /* found an existing channel */ + pr_debug("Found existing channel %s" + " cm_id= %p state= %d\n", + ch->sess_name, ch->cm_id, ch_state); + + __srpt_close_ch(ch); + + rsp->rsp_flags = + SRP_LOGIN_RSP_MULTICHAN_TERMINATED; + } + } + + spin_unlock_irq(&sdev->spinlock); + + } else + rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_MAINTAINED; + + if (*(__be64 *)req->target_port_id != cpu_to_be64(srpt_service_guid) + || *(__be64 *)(req->target_port_id + 8) != + cpu_to_be64(srpt_service_guid)) { + rej->reason = __constant_cpu_to_be32( + SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL); + ret = -ENOMEM; + printk(KERN_ERR "rejected SRP_LOGIN_REQ because it" + " has an invalid target port identifier.\n"); + goto reject; + } + + ch = kzalloc(sizeof *ch, GFP_KERNEL); + if (!ch) { + rej->reason = __constant_cpu_to_be32( + SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); + printk(KERN_ERR "rejected SRP_LOGIN_REQ because no memory.\n"); + ret = -ENOMEM; + goto reject; + } + + INIT_WORK(&ch->release_work, srpt_release_channel_work); + memcpy(ch->i_port_id, req->initiator_port_id, 16); + memcpy(ch->t_port_id, req->target_port_id, 16); + ch->sport = &sdev->port[param->port - 1]; + ch->cm_id = cm_id; + /* + * Avoid QUEUE_FULL conditions by limiting the number of buffers used + * for the SRP protocol to the command queue size. + */ + ch->rq_size = SRPT_RQ_SIZE; + spin_lock_init(&ch->spinlock); + ch->state = CH_CONNECTING; + INIT_LIST_HEAD(&ch->cmd_wait_list); + ch->rsp_size = ch->sport->port_attrib.srp_max_rsp_size; + + ch->ioctx_ring = (struct srpt_send_ioctx **) + srpt_alloc_ioctx_ring(ch->sport->sdev, ch->rq_size, + sizeof(*ch->ioctx_ring[0]), + ch->rsp_size, DMA_TO_DEVICE); + if (!ch->ioctx_ring) + goto free_ch; + + INIT_LIST_HEAD(&ch->free_list); + for (i = 0; i < ch->rq_size; i++) { + ch->ioctx_ring[i]->ch = ch; + list_add_tail(&ch->ioctx_ring[i]->free_list, &ch->free_list); + } + + ret = srpt_create_ch_ib(ch); + if (ret) { + rej->reason = __constant_cpu_to_be32( + SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); + printk(KERN_ERR "rejected SRP_LOGIN_REQ because creating" + " a new RDMA channel failed.\n"); + goto free_ring; + } + + ret = srpt_ch_qp_rtr(ch, ch->qp); + if (ret) { + rej->reason = __constant_cpu_to_be32( + SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); + printk(KERN_ERR "rejected SRP_LOGIN_REQ because enabling" + " RTR failed (error code = %d)\n", ret); + goto destroy_ib; + } + /* + * Use the initator port identifier as the session name. + */ + snprintf(ch->sess_name, sizeof(ch->sess_name), "0x%016llx%016llx", + be64_to_cpu(*(__be64 *)ch->i_port_id), + be64_to_cpu(*(__be64 *)(ch->i_port_id + 8))); + + pr_debug("registering session %s\n", ch->sess_name); + + nacl = srpt_lookup_acl(sport, ch->i_port_id); + if (!nacl) { + printk(KERN_INFO "Rejected login because no ACL has been" + " configured yet for initiator %s.\n", ch->sess_name); + rej->reason = __constant_cpu_to_be32( + SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED); + goto destroy_ib; + } + + ch->sess = transport_init_session(); + if (IS_ERR(ch->sess)) { + rej->reason = __constant_cpu_to_be32( + SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); + pr_debug("Failed to create session\n"); + goto deregister_session; + } + ch->sess->se_node_acl = &nacl->nacl; + transport_register_session(&sport->port_tpg_1, &nacl->nacl, ch->sess, ch); + + pr_debug("Establish connection sess=%p name=%s cm_id=%p\n", ch->sess, + ch->sess_name, ch->cm_id); + + /* create srp_login_response */ + rsp->opcode = SRP_LOGIN_RSP; + rsp->tag = req->tag; + rsp->max_it_iu_len = req->req_it_iu_len; + rsp->max_ti_iu_len = req->req_it_iu_len; + ch->max_ti_iu_len = it_iu_len; + rsp->buf_fmt = __constant_cpu_to_be16(SRP_BUF_FORMAT_DIRECT + | SRP_BUF_FORMAT_INDIRECT); + rsp->req_lim_delta = cpu_to_be32(ch->rq_size); + atomic_set(&ch->req_lim, ch->rq_size); + atomic_set(&ch->req_lim_delta, 0); + + /* create cm reply */ + rep_param->qp_num = ch->qp->qp_num; + rep_param->private_data = (void *)rsp; + rep_param->private_data_len = sizeof *rsp; + rep_param->rnr_retry_count = 7; + rep_param->flow_control = 1; + rep_param->failover_accepted = 0; + rep_param->srq = 1; + rep_param->responder_resources = 4; + rep_param->initiator_depth = 4; + + ret = ib_send_cm_rep(cm_id, rep_param); + if (ret) { + printk(KERN_ERR "sending SRP_LOGIN_REQ response failed" + " (error code = %d)\n", ret); + goto release_channel; + } + + spin_lock_irq(&sdev->spinlock); + list_add_tail(&ch->list, &sdev->rch_list); + spin_unlock_irq(&sdev->spinlock); + + goto out; + +release_channel: + srpt_set_ch_state(ch, CH_RELEASING); + transport_deregister_session_configfs(ch->sess); + +deregister_session: + transport_deregister_session(ch->sess); + ch->sess = NULL; + +destroy_ib: + srpt_destroy_ch_ib(ch); + +free_ring: + srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring, + ch->sport->sdev, ch->rq_size, + ch->rsp_size, DMA_TO_DEVICE); +free_ch: + kfree(ch); + +reject: + rej->opcode = SRP_LOGIN_REJ; + rej->tag = req->tag; + rej->buf_fmt = __constant_cpu_to_be16(SRP_BUF_FORMAT_DIRECT + | SRP_BUF_FORMAT_INDIRECT); + + ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, + (void *)rej, sizeof *rej); + +out: + kfree(rep_param); + kfree(rsp); + kfree(rej); + + return ret; +} + +static void srpt_cm_rej_recv(struct ib_cm_id *cm_id) +{ + printk(KERN_INFO "Received IB REJ for cm_id %p.\n", cm_id); + srpt_drain_channel(cm_id); +} + +/** + * srpt_cm_rtu_recv() - Process an IB_CM_RTU_RECEIVED or USER_ESTABLISHED event. + * + * An IB_CM_RTU_RECEIVED message indicates that the connection is established + * and that the recipient may begin transmitting (RTU = ready to use). + */ +static void srpt_cm_rtu_recv(struct ib_cm_id *cm_id) +{ + struct srpt_rdma_ch *ch; + int ret; + + ch = srpt_find_channel(cm_id->context, cm_id); + BUG_ON(!ch); + + if (srpt_test_and_set_ch_state(ch, CH_CONNECTING, CH_LIVE)) { + struct srpt_recv_ioctx *ioctx, *ioctx_tmp; + + ret = srpt_ch_qp_rts(ch, ch->qp); + + list_for_each_entry_safe(ioctx, ioctx_tmp, &ch->cmd_wait_list, + wait_list) { + list_del(&ioctx->wait_list); + srpt_handle_new_iu(ch, ioctx, NULL); + } + if (ret) + srpt_close_ch(ch); + } +} + +static void srpt_cm_timewait_exit(struct ib_cm_id *cm_id) +{ + printk(KERN_INFO "Received IB TimeWait exit for cm_id %p.\n", cm_id); + srpt_drain_channel(cm_id); +} + +static void srpt_cm_rep_error(struct ib_cm_id *cm_id) +{ + printk(KERN_INFO "Received IB REP error for cm_id %p.\n", cm_id); + srpt_drain_channel(cm_id); +} + +/** + * srpt_cm_dreq_recv() - Process reception of a DREQ message. + */ +static void srpt_cm_dreq_recv(struct ib_cm_id *cm_id) +{ + struct srpt_rdma_ch *ch; + unsigned long flags; + bool send_drep = false; + + ch = srpt_find_channel(cm_id->context, cm_id); + BUG_ON(!ch); + + pr_debug("cm_id= %p ch->state= %d\n", cm_id, srpt_get_ch_state(ch)); + + spin_lock_irqsave(&ch->spinlock, flags); + switch (ch->state) { + case CH_CONNECTING: + case CH_LIVE: + send_drep = true; + ch->state = CH_DISCONNECTING; + break; + case CH_DISCONNECTING: + case CH_DRAINING: + case CH_RELEASING: + WARN(true, "unexpected channel state %d\n", ch->state); + break; + } + spin_unlock_irqrestore(&ch->spinlock, flags); + + if (send_drep) { + if (ib_send_cm_drep(ch->cm_id, NULL, 0) < 0) + printk(KERN_ERR "Sending IB DREP failed.\n"); + printk(KERN_INFO "Received DREQ and sent DREP for session %s.\n", + ch->sess_name); + } +} + +/** + * srpt_cm_drep_recv() - Process reception of a DREP message. + */ +static void srpt_cm_drep_recv(struct ib_cm_id *cm_id) +{ + printk(KERN_INFO "Received InfiniBand DREP message for cm_id %p.\n", + cm_id); + srpt_drain_channel(cm_id); +} + +/** + * srpt_cm_handler() - IB connection manager callback function. + * + * A non-zero return value will cause the caller destroy the CM ID. + * + * Note: srpt_cm_handler() must only return a non-zero value when transferring + * ownership of the cm_id to a channel by srpt_cm_req_recv() failed. Returning + * a non-zero value in any other case will trigger a race with the + * ib_destroy_cm_id() call in srpt_release_channel(). + */ +static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) +{ + int ret; + + ret = 0; + switch (event->event) { + case IB_CM_REQ_RECEIVED: + ret = srpt_cm_req_recv(cm_id, &event->param.req_rcvd, + event->private_data); + break; + case IB_CM_REJ_RECEIVED: + srpt_cm_rej_recv(cm_id); + break; + case IB_CM_RTU_RECEIVED: + case IB_CM_USER_ESTABLISHED: + srpt_cm_rtu_recv(cm_id); + break; + case IB_CM_DREQ_RECEIVED: + srpt_cm_dreq_recv(cm_id); + break; + case IB_CM_DREP_RECEIVED: + srpt_cm_drep_recv(cm_id); + break; + case IB_CM_TIMEWAIT_EXIT: + srpt_cm_timewait_exit(cm_id); + break; + case IB_CM_REP_ERROR: + srpt_cm_rep_error(cm_id); + break; + case IB_CM_DREQ_ERROR: + printk(KERN_INFO "Received IB DREQ ERROR event.\n"); + break; + case IB_CM_MRA_RECEIVED: + printk(KERN_INFO "Received IB MRA event\n"); + break; + default: + printk(KERN_ERR "received unrecognized IB CM event %d\n", + event->event); + break; + } + + return ret; +} + +/** + * srpt_perform_rdmas() - Perform IB RDMA. + * + * Returns zero upon success or a negative number upon failure. + */ +static int srpt_perform_rdmas(struct srpt_rdma_ch *ch, + struct srpt_send_ioctx *ioctx) +{ + struct ib_send_wr wr; + struct ib_send_wr *bad_wr; + struct rdma_iu *riu; + int i; + int ret; + int sq_wr_avail; + enum dma_data_direction dir; + const int n_rdma = ioctx->n_rdma; + + dir = ioctx->cmd.data_direction; + if (dir == DMA_TO_DEVICE) { + /* write */ + ret = -ENOMEM; + sq_wr_avail = atomic_sub_return(n_rdma, &ch->sq_wr_avail); + if (sq_wr_avail < 0) { + printk(KERN_WARNING "IB send queue full (needed %d)\n", + n_rdma); + goto out; + } + } + + ioctx->rdma_aborted = false; + ret = 0; + riu = ioctx->rdma_ius; + memset(&wr, 0, sizeof wr); + + for (i = 0; i < n_rdma; ++i, ++riu) { + if (dir == DMA_FROM_DEVICE) { + wr.opcode = IB_WR_RDMA_WRITE; + wr.wr_id = encode_wr_id(i == n_rdma - 1 ? + SRPT_RDMA_WRITE_LAST : + SRPT_RDMA_MID, + ioctx->ioctx.index); + } else { + wr.opcode = IB_WR_RDMA_READ; + wr.wr_id = encode_wr_id(i == n_rdma - 1 ? + SRPT_RDMA_READ_LAST : + SRPT_RDMA_MID, + ioctx->ioctx.index); + } + wr.next = NULL; + wr.wr.rdma.remote_addr = riu->raddr; + wr.wr.rdma.rkey = riu->rkey; + wr.num_sge = riu->sge_cnt; + wr.sg_list = riu->sge; + + /* only get completion event for the last rdma write */ + if (i == (n_rdma - 1) && dir == DMA_TO_DEVICE) + wr.send_flags = IB_SEND_SIGNALED; + + ret = ib_post_send(ch->qp, &wr, &bad_wr); + if (ret) + break; + } + + if (ret) + printk(KERN_ERR "%s[%d]: ib_post_send() returned %d for %d/%d", + __func__, __LINE__, ret, i, n_rdma); + if (ret && i > 0) { + wr.num_sge = 0; + wr.wr_id = encode_wr_id(SRPT_RDMA_ABORT, ioctx->ioctx.index); + wr.send_flags = IB_SEND_SIGNALED; + while (ch->state == CH_LIVE && + ib_post_send(ch->qp, &wr, &bad_wr) != 0) { + printk(KERN_INFO "Trying to abort failed RDMA transfer [%d]", + ioctx->ioctx.index); + msleep(1000); + } + while (ch->state != CH_RELEASING && !ioctx->rdma_aborted) { + printk(KERN_INFO "Waiting until RDMA abort finished [%d]", + ioctx->ioctx.index); + msleep(1000); + } + } +out: + if (unlikely(dir == DMA_TO_DEVICE && ret < 0)) + atomic_add(n_rdma, &ch->sq_wr_avail); + return ret; +} + +/** + * srpt_xfer_data() - Start data transfer from initiator to target. + */ +static int srpt_xfer_data(struct srpt_rdma_ch *ch, + struct srpt_send_ioctx *ioctx) +{ + int ret; + + ret = srpt_map_sg_to_ib_sge(ch, ioctx); + if (ret) { + printk(KERN_ERR "%s[%d] ret=%d\n", __func__, __LINE__, ret); + goto out; + } + + ret = srpt_perform_rdmas(ch, ioctx); + if (ret) { + if (ret == -EAGAIN || ret == -ENOMEM) + printk(KERN_INFO "%s[%d] queue full -- ret=%d\n", + __func__, __LINE__, ret); + else + printk(KERN_ERR "%s[%d] fatal error -- ret=%d\n", + __func__, __LINE__, ret); + goto out_unmap; + } + +out: + return ret; +out_unmap: + srpt_unmap_sg_to_ib_sge(ch, ioctx); + goto out; +} + +static int srpt_write_pending_status(struct se_cmd *se_cmd) +{ + struct srpt_send_ioctx *ioctx; + + ioctx = container_of(se_cmd, struct srpt_send_ioctx, cmd); + return srpt_get_cmd_state(ioctx) == SRPT_STATE_NEED_DATA; +} + +/* + * srpt_write_pending() - Start data transfer from initiator to target (write). + */ +static int srpt_write_pending(struct se_cmd *se_cmd) +{ + struct srpt_rdma_ch *ch; + struct srpt_send_ioctx *ioctx; + enum srpt_command_state new_state; + enum rdma_ch_state ch_state; + int ret; + + ioctx = container_of(se_cmd, struct srpt_send_ioctx, cmd); + + new_state = srpt_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA); + WARN_ON(new_state == SRPT_STATE_DONE); + + ch = ioctx->ch; + BUG_ON(!ch); + + ch_state = srpt_get_ch_state(ch); + switch (ch_state) { + case CH_CONNECTING: + WARN(true, "unexpected channel state %d\n", ch_state); + ret = -EINVAL; + goto out; + case CH_LIVE: + break; + case CH_DISCONNECTING: + case CH_DRAINING: + case CH_RELEASING: + pr_debug("cmd with tag %lld: channel disconnecting\n", + ioctx->tag); + srpt_set_cmd_state(ioctx, SRPT_STATE_DATA_IN); + ret = -EINVAL; + goto out; + } + ret = srpt_xfer_data(ch, ioctx); + +out: + return ret; +} + +static u8 tcm_to_srp_tsk_mgmt_status(const int tcm_mgmt_status) +{ + switch (tcm_mgmt_status) { + case TMR_FUNCTION_COMPLETE: + return SRP_TSK_MGMT_SUCCESS; + case TMR_FUNCTION_REJECTED: + return SRP_TSK_MGMT_FUNC_NOT_SUPP; + } + return SRP_TSK_MGMT_FAILED; +} + +/** + * srpt_queue_response() - Transmits the response to a SCSI command. + * + * Callback function called by the TCM core. Must not block since it can be + * invoked on the context of the IB completion handler. + */ +static int srpt_queue_response(struct se_cmd *cmd) +{ + struct srpt_rdma_ch *ch; + struct srpt_send_ioctx *ioctx; + enum srpt_command_state state; + unsigned long flags; + int ret; + enum dma_data_direction dir; + int resp_len; + u8 srp_tm_status; + + ret = 0; + + ioctx = container_of(cmd, struct srpt_send_ioctx, cmd); + ch = ioctx->ch; + BUG_ON(!ch); + + spin_lock_irqsave(&ioctx->spinlock, flags); + state = ioctx->state; + switch (state) { + case SRPT_STATE_NEW: + case SRPT_STATE_DATA_IN: + ioctx->state = SRPT_STATE_CMD_RSP_SENT; + break; + case SRPT_STATE_MGMT: + ioctx->state = SRPT_STATE_MGMT_RSP_SENT; + break; + default: + WARN(true, "ch %p; cmd %d: unexpected command state %d\n", + ch, ioctx->ioctx.index, ioctx->state); + break; + } + spin_unlock_irqrestore(&ioctx->spinlock, flags); + + if (unlikely(transport_check_aborted_status(&ioctx->cmd, false) + || WARN_ON_ONCE(state == SRPT_STATE_CMD_RSP_SENT))) { + atomic_inc(&ch->req_lim_delta); + srpt_abort_cmd(ioctx); + goto out; + } + + dir = ioctx->cmd.data_direction; + + /* For read commands, transfer the data to the initiator. */ + if (dir == DMA_FROM_DEVICE && ioctx->cmd.data_length && + !ioctx->queue_status_only) { + ret = srpt_xfer_data(ch, ioctx); + if (ret) { + printk(KERN_ERR "xfer_data failed for tag %llu\n", + ioctx->tag); + goto out; + } + } + + if (state != SRPT_STATE_MGMT) + resp_len = srpt_build_cmd_rsp(ch, ioctx, ioctx->tag, + cmd->scsi_status); + else { + srp_tm_status + = tcm_to_srp_tsk_mgmt_status(cmd->se_tmr_req->response); + resp_len = srpt_build_tskmgmt_rsp(ch, ioctx, srp_tm_status, + ioctx->tag); + } + ret = srpt_post_send(ch, ioctx, resp_len); + if (ret) { + printk(KERN_ERR "sending cmd response failed for tag %llu\n", + ioctx->tag); + srpt_unmap_sg_to_ib_sge(ch, ioctx); + srpt_set_cmd_state(ioctx, SRPT_STATE_DONE); + kref_put(&ioctx->kref, srpt_put_send_ioctx_kref); + } + +out: + return ret; +} + +static int srpt_queue_status(struct se_cmd *cmd) +{ + struct srpt_send_ioctx *ioctx; + + ioctx = container_of(cmd, struct srpt_send_ioctx, cmd); + BUG_ON(ioctx->sense_data != cmd->sense_buffer); + if (cmd->se_cmd_flags & + (SCF_TRANSPORT_TASK_SENSE | SCF_EMULATED_TASK_SENSE)) + WARN_ON(cmd->scsi_status != SAM_STAT_CHECK_CONDITION); + ioctx->queue_status_only = true; + return srpt_queue_response(cmd); +} + +static void srpt_refresh_port_work(struct work_struct *work) +{ + struct srpt_port *sport = container_of(work, struct srpt_port, work); + + srpt_refresh_port(sport); +} + +static int srpt_ch_list_empty(struct srpt_device *sdev) +{ + int res; + + spin_lock_irq(&sdev->spinlock); + res = list_empty(&sdev->rch_list); + spin_unlock_irq(&sdev->spinlock); + + return res; +} + +/** + * srpt_release_sdev() - Free the channel resources associated with a target. + */ +static int srpt_release_sdev(struct srpt_device *sdev) +{ + struct srpt_rdma_ch *ch, *tmp_ch; + int res; + + WARN_ON_ONCE(irqs_disabled()); + + BUG_ON(!sdev); + + spin_lock_irq(&sdev->spinlock); + list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) + __srpt_close_ch(ch); + spin_unlock_irq(&sdev->spinlock); + + res = wait_event_interruptible(sdev->ch_releaseQ, + srpt_ch_list_empty(sdev)); + if (res) + printk(KERN_ERR "%s: interrupted.\n", __func__); + + return 0; +} + +static struct srpt_port *__srpt_lookup_port(const char *name) +{ + struct ib_device *dev; + struct srpt_device *sdev; + struct srpt_port *sport; + int i; + + list_for_each_entry(sdev, &srpt_dev_list, list) { + dev = sdev->device; + if (!dev) + continue; + + for (i = 0; i < dev->phys_port_cnt; i++) { + sport = &sdev->port[i]; + + if (!strcmp(sport->port_guid, name)) + return sport; + } + } + + return NULL; +} + +static struct srpt_port *srpt_lookup_port(const char *name) +{ + struct srpt_port *sport; + + spin_lock(&srpt_dev_lock); + sport = __srpt_lookup_port(name); + spin_unlock(&srpt_dev_lock); + + return sport; +} + +/** + * srpt_add_one() - Infiniband device addition callback function. + */ +static void srpt_add_one(struct ib_device *device) +{ + struct srpt_device *sdev; + struct srpt_port *sport; + struct ib_srq_init_attr srq_attr; + int i; + + pr_debug("device = %p, device->dma_ops = %p\n", device, + device->dma_ops); + + sdev = kzalloc(sizeof *sdev, GFP_KERNEL); + if (!sdev) + goto err; + + sdev->device = device; + INIT_LIST_HEAD(&sdev->rch_list); + init_waitqueue_head(&sdev->ch_releaseQ); + spin_lock_init(&sdev->spinlock); + + if (ib_query_device(device, &sdev->dev_attr)) + goto free_dev; + + sdev->pd = ib_alloc_pd(device); + if (IS_ERR(sdev->pd)) + goto free_dev; + + sdev->mr = ib_get_dma_mr(sdev->pd, IB_ACCESS_LOCAL_WRITE); + if (IS_ERR(sdev->mr)) + goto err_pd; + + sdev->srq_size = min(srpt_srq_size, sdev->dev_attr.max_srq_wr); + + srq_attr.event_handler = srpt_srq_event; + srq_attr.srq_context = (void *)sdev; + srq_attr.attr.max_wr = sdev->srq_size; + srq_attr.attr.max_sge = 1; + srq_attr.attr.srq_limit = 0; + srq_attr.srq_type = IB_SRQT_BASIC; + + sdev->srq = ib_create_srq(sdev->pd, &srq_attr); + if (IS_ERR(sdev->srq)) + goto err_mr; + + pr_debug("%s: create SRQ #wr= %d max_allow=%d dev= %s\n", + __func__, sdev->srq_size, sdev->dev_attr.max_srq_wr, + device->name); + + if (!srpt_service_guid) + srpt_service_guid = be64_to_cpu(device->node_guid); + + sdev->cm_id = ib_create_cm_id(device, srpt_cm_handler, sdev); + if (IS_ERR(sdev->cm_id)) + goto err_srq; + + /* print out target login information */ + pr_debug("Target login info: id_ext=%016llx,ioc_guid=%016llx," + "pkey=ffff,service_id=%016llx\n", srpt_service_guid, + srpt_service_guid, srpt_service_guid); + + /* + * We do not have a consistent service_id (ie. also id_ext of target_id) + * to identify this target. We currently use the guid of the first HCA + * in the system as service_id; therefore, the target_id will change + * if this HCA is gone bad and replaced by different HCA + */ + if (ib_cm_listen(sdev->cm_id, cpu_to_be64(srpt_service_guid), 0, NULL)) + goto err_cm; + + INIT_IB_EVENT_HANDLER(&sdev->event_handler, sdev->device, + srpt_event_handler); + if (ib_register_event_handler(&sdev->event_handler)) + goto err_cm; + + sdev->ioctx_ring = (struct srpt_recv_ioctx **) + srpt_alloc_ioctx_ring(sdev, sdev->srq_size, + sizeof(*sdev->ioctx_ring[0]), + srp_max_req_size, DMA_FROM_DEVICE); + if (!sdev->ioctx_ring) + goto err_event; + + for (i = 0; i < sdev->srq_size; ++i) + srpt_post_recv(sdev, sdev->ioctx_ring[i]); + + WARN_ON(sdev->device->phys_port_cnt > ARRAY_SIZE(sdev->port)); + + for (i = 1; i <= sdev->device->phys_port_cnt; i++) { + sport = &sdev->port[i - 1]; + sport->sdev = sdev; + sport->port = i; + sport->port_attrib.srp_max_rdma_size = DEFAULT_MAX_RDMA_SIZE; + sport->port_attrib.srp_max_rsp_size = DEFAULT_MAX_RSP_SIZE; + sport->port_attrib.srp_sq_size = DEF_SRPT_SQ_SIZE; + INIT_WORK(&sport->work, srpt_refresh_port_work); + INIT_LIST_HEAD(&sport->port_acl_list); + spin_lock_init(&sport->port_acl_lock); + + if (srpt_refresh_port(sport)) { + printk(KERN_ERR "MAD registration failed for %s-%d.\n", + srpt_sdev_name(sdev), i); + goto err_ring; + } + snprintf(sport->port_guid, sizeof(sport->port_guid), + "0x%016llx%016llx", + be64_to_cpu(sport->gid.global.subnet_prefix), + be64_to_cpu(sport->gid.global.interface_id)); + } + + spin_lock(&srpt_dev_lock); + list_add_tail(&sdev->list, &srpt_dev_list); + spin_unlock(&srpt_dev_lock); + +out: + ib_set_client_data(device, &srpt_client, sdev); + pr_debug("added %s.\n", device->name); + return; + +err_ring: + srpt_free_ioctx_ring((struct srpt_ioctx **)sdev->ioctx_ring, sdev, + sdev->srq_size, srp_max_req_size, + DMA_FROM_DEVICE); +err_event: + ib_unregister_event_handler(&sdev->event_handler); +err_cm: + ib_destroy_cm_id(sdev->cm_id); +err_srq: + ib_destroy_srq(sdev->srq); +err_mr: + ib_dereg_mr(sdev->mr); +err_pd: + ib_dealloc_pd(sdev->pd); +free_dev: + kfree(sdev); +err: + sdev = NULL; + printk(KERN_INFO "%s(%s) failed.\n", __func__, device->name); + goto out; +} + +/** + * srpt_remove_one() - InfiniBand device removal callback function. + */ +static void srpt_remove_one(struct ib_device *device) +{ + struct srpt_device *sdev; + int i; + + sdev = ib_get_client_data(device, &srpt_client); + if (!sdev) { + printk(KERN_INFO "%s(%s): nothing to do.\n", __func__, + device->name); + return; + } + + srpt_unregister_mad_agent(sdev); + + ib_unregister_event_handler(&sdev->event_handler); + + /* Cancel any work queued by the just unregistered IB event handler. */ + for (i = 0; i < sdev->device->phys_port_cnt; i++) + cancel_work_sync(&sdev->port[i].work); + + ib_destroy_cm_id(sdev->cm_id); + + /* + * Unregistering a target must happen after destroying sdev->cm_id + * such that no new SRP_LOGIN_REQ information units can arrive while + * destroying the target. + */ + spin_lock(&srpt_dev_lock); + list_del(&sdev->list); + spin_unlock(&srpt_dev_lock); + srpt_release_sdev(sdev); + + ib_destroy_srq(sdev->srq); + ib_dereg_mr(sdev->mr); + ib_dealloc_pd(sdev->pd); + + srpt_free_ioctx_ring((struct srpt_ioctx **)sdev->ioctx_ring, sdev, + sdev->srq_size, srp_max_req_size, DMA_FROM_DEVICE); + sdev->ioctx_ring = NULL; + kfree(sdev); +} + +static struct ib_client srpt_client = { + .name = DRV_NAME, + .add = srpt_add_one, + .remove = srpt_remove_one +}; + +static int srpt_check_true(struct se_portal_group *se_tpg) +{ + return 1; +} + +static int srpt_check_false(struct se_portal_group *se_tpg) +{ + return 0; +} + +static char *srpt_get_fabric_name(void) +{ + return "srpt"; +} + +static u8 srpt_get_fabric_proto_ident(struct se_portal_group *se_tpg) +{ + return SCSI_TRANSPORTID_PROTOCOLID_SRP; +} + +static char *srpt_get_fabric_wwn(struct se_portal_group *tpg) +{ + struct srpt_port *sport = container_of(tpg, struct srpt_port, port_tpg_1); + + return sport->port_guid; +} + +static u16 srpt_get_tag(struct se_portal_group *tpg) +{ + return 1; +} + +static u32 srpt_get_default_depth(struct se_portal_group *se_tpg) +{ + return 1; +} + +static u32 srpt_get_pr_transport_id(struct se_portal_group *se_tpg, + struct se_node_acl *se_nacl, + struct t10_pr_registration *pr_reg, + int *format_code, unsigned char *buf) +{ + struct srpt_node_acl *nacl; + struct spc_rdma_transport_id *tr_id; + + nacl = container_of(se_nacl, struct srpt_node_acl, nacl); + tr_id = (void *)buf; + tr_id->protocol_identifier = SCSI_TRANSPORTID_PROTOCOLID_SRP; + memcpy(tr_id->i_port_id, nacl->i_port_id, sizeof(tr_id->i_port_id)); + return sizeof(*tr_id); +} + +static u32 srpt_get_pr_transport_id_len(struct se_portal_group *se_tpg, + struct se_node_acl *se_nacl, + struct t10_pr_registration *pr_reg, + int *format_code) +{ + *format_code = 0; + return sizeof(struct spc_rdma_transport_id); +} + +static char *srpt_parse_pr_out_transport_id(struct se_portal_group *se_tpg, + const char *buf, u32 *out_tid_len, + char **port_nexus_ptr) +{ + struct spc_rdma_transport_id *tr_id; + + *port_nexus_ptr = NULL; + *out_tid_len = sizeof(struct spc_rdma_transport_id); + tr_id = (void *)buf; + return (char *)tr_id->i_port_id; +} + +static struct se_node_acl *srpt_alloc_fabric_acl(struct se_portal_group *se_tpg) +{ + struct srpt_node_acl *nacl; + + nacl = kzalloc(sizeof(struct srpt_node_acl), GFP_KERNEL); + if (!nacl) { + printk(KERN_ERR "Unable to allocate struct srpt_node_acl\n"); + return NULL; + } + + return &nacl->nacl; +} + +static void srpt_release_fabric_acl(struct se_portal_group *se_tpg, + struct se_node_acl *se_nacl) +{ + struct srpt_node_acl *nacl; + + nacl = container_of(se_nacl, struct srpt_node_acl, nacl); + kfree(nacl); +} + +static u32 srpt_tpg_get_inst_index(struct se_portal_group *se_tpg) +{ + return 1; +} + +static void srpt_release_cmd(struct se_cmd *se_cmd) +{ +} + +/** + * srpt_shutdown_session() - Whether or not a session may be shut down. + */ +static int srpt_shutdown_session(struct se_session *se_sess) +{ + return true; +} + +/** + * srpt_close_session() - Forcibly close a session. + * + * Callback function invoked by the TCM core to clean up sessions associated + * with a node ACL when the user invokes + * rmdir /sys/kernel/config/target/$driver/$port/$tpg/acls/$i_port_id + */ +static void srpt_close_session(struct se_session *se_sess) +{ + DECLARE_COMPLETION_ONSTACK(release_done); + struct srpt_rdma_ch *ch; + struct srpt_device *sdev; + int res; + + ch = se_sess->fabric_sess_ptr; + WARN_ON(ch->sess != se_sess); + + pr_debug("ch %p state %d\n", ch, srpt_get_ch_state(ch)); + + sdev = ch->sport->sdev; + spin_lock_irq(&sdev->spinlock); + BUG_ON(ch->release_done); + ch->release_done = &release_done; + __srpt_close_ch(ch); + spin_unlock_irq(&sdev->spinlock); + + res = wait_for_completion_timeout(&release_done, 60 * HZ); + WARN_ON(res <= 0); +} + +/** + * srpt_sess_get_index() - Return the value of scsiAttIntrPortIndex (SCSI-MIB). + * + * A quote from RFC 4455 (SCSI-MIB) about this MIB object: + * This object represents an arbitrary integer used to uniquely identify a + * particular attached remote initiator port to a particular SCSI target port + * within a particular SCSI target device within a particular SCSI instance. + */ +static u32 srpt_sess_get_index(struct se_session *se_sess) +{ + return 0; +} + +static void srpt_set_default_node_attrs(struct se_node_acl *nacl) +{ +} + +static u32 srpt_get_task_tag(struct se_cmd *se_cmd) +{ + struct srpt_send_ioctx *ioctx; + + ioctx = container_of(se_cmd, struct srpt_send_ioctx, cmd); + return ioctx->tag; +} + +/* Note: only used from inside debug printk's by the TCM core. */ +static int srpt_get_tcm_cmd_state(struct se_cmd *se_cmd) +{ + struct srpt_send_ioctx *ioctx; + + ioctx = container_of(se_cmd, struct srpt_send_ioctx, cmd); + return srpt_get_cmd_state(ioctx); +} + +static u16 srpt_set_fabric_sense_len(struct se_cmd *cmd, u32 sense_length) +{ + return 0; +} + +static u16 srpt_get_fabric_sense_len(void) +{ + return 0; +} + +/** + * srpt_parse_i_port_id() - Parse an initiator port ID. + * @name: ASCII representation of a 128-bit initiator port ID. + * @i_port_id: Binary 128-bit port ID. + */ +static int srpt_parse_i_port_id(u8 i_port_id[16], const char *name) +{ + const char *p; + unsigned len, count, leading_zero_bytes; + int ret, rc; + + p = name; + if (strnicmp(p, "0x", 2) == 0) + p += 2; + ret = -EINVAL; + len = strlen(p); + if (len % 2) + goto out; + count = min(len / 2, 16U); + leading_zero_bytes = 16 - count; + memset(i_port_id, 0, leading_zero_bytes); + rc = hex2bin(i_port_id + leading_zero_bytes, p, count); + if (rc < 0) + pr_debug("hex2bin failed for srpt_parse_i_port_id: %d\n", rc); + ret = 0; +out: + return ret; +} + +/* + * configfs callback function invoked for + * mkdir /sys/kernel/config/target/$driver/$port/$tpg/acls/$i_port_id + */ +static struct se_node_acl *srpt_make_nodeacl(struct se_portal_group *tpg, + struct config_group *group, + const char *name) +{ + struct srpt_port *sport = container_of(tpg, struct srpt_port, port_tpg_1); + struct se_node_acl *se_nacl, *se_nacl_new; + struct srpt_node_acl *nacl; + int ret = 0; + u32 nexus_depth = 1; + u8 i_port_id[16]; + + if (srpt_parse_i_port_id(i_port_id, name) < 0) { + printk(KERN_ERR "invalid initiator port ID %s\n", name); + ret = -EINVAL; + goto err; + } + + se_nacl_new = srpt_alloc_fabric_acl(tpg); + if (!se_nacl_new) { + ret = -ENOMEM; + goto err; + } + /* + * nacl_new may be released by core_tpg_add_initiator_node_acl() + * when converting a node ACL from demo mode to explict + */ + se_nacl = core_tpg_add_initiator_node_acl(tpg, se_nacl_new, name, + nexus_depth); + if (IS_ERR(se_nacl)) { + ret = PTR_ERR(se_nacl); + goto err; + } + /* Locate our struct srpt_node_acl and set sdev and i_port_id. */ + nacl = container_of(se_nacl, struct srpt_node_acl, nacl); + memcpy(&nacl->i_port_id[0], &i_port_id[0], 16); + nacl->sport = sport; + + spin_lock_irq(&sport->port_acl_lock); + list_add_tail(&nacl->list, &sport->port_acl_list); + spin_unlock_irq(&sport->port_acl_lock); + + return se_nacl; +err: + return ERR_PTR(ret); +} + +/* + * configfs callback function invoked for + * rmdir /sys/kernel/config/target/$driver/$port/$tpg/acls/$i_port_id + */ +static void srpt_drop_nodeacl(struct se_node_acl *se_nacl) +{ + struct srpt_node_acl *nacl; + struct srpt_device *sdev; + struct srpt_port *sport; + + nacl = container_of(se_nacl, struct srpt_node_acl, nacl); + sport = nacl->sport; + sdev = sport->sdev; + spin_lock_irq(&sport->port_acl_lock); + list_del(&nacl->list); + spin_unlock_irq(&sport->port_acl_lock); + core_tpg_del_initiator_node_acl(&sport->port_tpg_1, se_nacl, 1); + srpt_release_fabric_acl(NULL, se_nacl); +} + +static ssize_t srpt_tpg_attrib_show_srp_max_rdma_size( + struct se_portal_group *se_tpg, + char *page) +{ + struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1); + + return sprintf(page, "%u\n", sport->port_attrib.srp_max_rdma_size); +} + +static ssize_t srpt_tpg_attrib_store_srp_max_rdma_size( + struct se_portal_group *se_tpg, + const char *page, + size_t count) +{ + struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1); + unsigned long val; + int ret; + + ret = strict_strtoul(page, 0, &val); + if (ret < 0) { + pr_err("strict_strtoul() failed with ret: %d\n", ret); + return -EINVAL; + } + if (val > MAX_SRPT_RDMA_SIZE) { + pr_err("val: %lu exceeds MAX_SRPT_RDMA_SIZE: %d\n", val, + MAX_SRPT_RDMA_SIZE); + return -EINVAL; + } + if (val < DEFAULT_MAX_RDMA_SIZE) { + pr_err("val: %lu smaller than DEFAULT_MAX_RDMA_SIZE: %d\n", + val, DEFAULT_MAX_RDMA_SIZE); + return -EINVAL; + } + sport->port_attrib.srp_max_rdma_size = val; + + return count; +} + +TF_TPG_ATTRIB_ATTR(srpt, srp_max_rdma_size, S_IRUGO | S_IWUSR); + +static ssize_t srpt_tpg_attrib_show_srp_max_rsp_size( + struct se_portal_group *se_tpg, + char *page) +{ + struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1); + + return sprintf(page, "%u\n", sport->port_attrib.srp_max_rsp_size); +} + +static ssize_t srpt_tpg_attrib_store_srp_max_rsp_size( + struct se_portal_group *se_tpg, + const char *page, + size_t count) +{ + struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1); + unsigned long val; + int ret; + + ret = strict_strtoul(page, 0, &val); + if (ret < 0) { + pr_err("strict_strtoul() failed with ret: %d\n", ret); + return -EINVAL; + } + if (val > MAX_SRPT_RSP_SIZE) { + pr_err("val: %lu exceeds MAX_SRPT_RSP_SIZE: %d\n", val, + MAX_SRPT_RSP_SIZE); + return -EINVAL; + } + if (val < MIN_MAX_RSP_SIZE) { + pr_err("val: %lu smaller than MIN_MAX_RSP_SIZE: %d\n", val, + MIN_MAX_RSP_SIZE); + return -EINVAL; + } + sport->port_attrib.srp_max_rsp_size = val; + + return count; +} + +TF_TPG_ATTRIB_ATTR(srpt, srp_max_rsp_size, S_IRUGO | S_IWUSR); + +static ssize_t srpt_tpg_attrib_show_srp_sq_size( + struct se_portal_group *se_tpg, + char *page) +{ + struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1); + + return sprintf(page, "%u\n", sport->port_attrib.srp_sq_size); +} + +static ssize_t srpt_tpg_attrib_store_srp_sq_size( + struct se_portal_group *se_tpg, + const char *page, + size_t count) +{ + struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1); + unsigned long val; + int ret; + + ret = strict_strtoul(page, 0, &val); + if (ret < 0) { + pr_err("strict_strtoul() failed with ret: %d\n", ret); + return -EINVAL; + } + if (val > MAX_SRPT_SRQ_SIZE) { + pr_err("val: %lu exceeds MAX_SRPT_SRQ_SIZE: %d\n", val, + MAX_SRPT_SRQ_SIZE); + return -EINVAL; + } + if (val < MIN_SRPT_SRQ_SIZE) { + pr_err("val: %lu smaller than MIN_SRPT_SRQ_SIZE: %d\n", val, + MIN_SRPT_SRQ_SIZE); + return -EINVAL; + } + sport->port_attrib.srp_sq_size = val; + + return count; +} + +TF_TPG_ATTRIB_ATTR(srpt, srp_sq_size, S_IRUGO | S_IWUSR); + +static struct configfs_attribute *srpt_tpg_attrib_attrs[] = { + &srpt_tpg_attrib_srp_max_rdma_size.attr, + &srpt_tpg_attrib_srp_max_rsp_size.attr, + &srpt_tpg_attrib_srp_sq_size.attr, + NULL, +}; + +static ssize_t srpt_tpg_show_enable( + struct se_portal_group *se_tpg, + char *page) +{ + struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1); + + return snprintf(page, PAGE_SIZE, "%d\n", (sport->enabled) ? 1: 0); +} + +static ssize_t srpt_tpg_store_enable( + struct se_portal_group *se_tpg, + const char *page, + size_t count) +{ + struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1); + unsigned long tmp; + int ret; + + ret = strict_strtoul(page, 0, &tmp); + if (ret < 0) { + printk(KERN_ERR "Unable to extract srpt_tpg_store_enable\n"); + return -EINVAL; + } + + if ((tmp != 0) && (tmp != 1)) { + printk(KERN_ERR "Illegal value for srpt_tpg_store_enable: %lu\n", tmp); + return -EINVAL; + } + if (tmp == 1) + sport->enabled = true; + else + sport->enabled = false; + + return count; +} + +TF_TPG_BASE_ATTR(srpt, enable, S_IRUGO | S_IWUSR); + +static struct configfs_attribute *srpt_tpg_attrs[] = { + &srpt_tpg_enable.attr, + NULL, +}; + +/** + * configfs callback invoked for + * mkdir /sys/kernel/config/target/$driver/$port/$tpg + */ +static struct se_portal_group *srpt_make_tpg(struct se_wwn *wwn, + struct config_group *group, + const char *name) +{ + struct srpt_port *sport = container_of(wwn, struct srpt_port, port_wwn); + int res; + + /* Initialize sport->port_wwn and sport->port_tpg_1 */ + res = core_tpg_register(&srpt_target->tf_ops, &sport->port_wwn, + &sport->port_tpg_1, sport, TRANSPORT_TPG_TYPE_NORMAL); + if (res) + return ERR_PTR(res); + + return &sport->port_tpg_1; +} + +/** + * configfs callback invoked for + * rmdir /sys/kernel/config/target/$driver/$port/$tpg + */ +static void srpt_drop_tpg(struct se_portal_group *tpg) +{ + struct srpt_port *sport = container_of(tpg, + struct srpt_port, port_tpg_1); + + sport->enabled = false; + core_tpg_deregister(&sport->port_tpg_1); +} + +/** + * configfs callback invoked for + * mkdir /sys/kernel/config/target/$driver/$port + */ +static struct se_wwn *srpt_make_tport(struct target_fabric_configfs *tf, + struct config_group *group, + const char *name) +{ + struct srpt_port *sport; + int ret; + + sport = srpt_lookup_port(name); + pr_debug("make_tport(%s)\n", name); + ret = -EINVAL; + if (!sport) + goto err; + + return &sport->port_wwn; + +err: + return ERR_PTR(ret); +} + +/** + * configfs callback invoked for + * rmdir /sys/kernel/config/target/$driver/$port + */ +static void srpt_drop_tport(struct se_wwn *wwn) +{ + struct srpt_port *sport = container_of(wwn, struct srpt_port, port_wwn); + + pr_debug("drop_tport(%s\n", config_item_name(&sport->port_wwn.wwn_group.cg_item)); +} + +static ssize_t srpt_wwn_show_attr_version(struct target_fabric_configfs *tf, + char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%s\n", DRV_VERSION); +} + +TF_WWN_ATTR_RO(srpt, version); + +static struct configfs_attribute *srpt_wwn_attrs[] = { + &srpt_wwn_version.attr, + NULL, +}; + +static struct target_core_fabric_ops srpt_template = { + .get_fabric_name = srpt_get_fabric_name, + .get_fabric_proto_ident = srpt_get_fabric_proto_ident, + .tpg_get_wwn = srpt_get_fabric_wwn, + .tpg_get_tag = srpt_get_tag, + .tpg_get_default_depth = srpt_get_default_depth, + .tpg_get_pr_transport_id = srpt_get_pr_transport_id, + .tpg_get_pr_transport_id_len = srpt_get_pr_transport_id_len, + .tpg_parse_pr_out_transport_id = srpt_parse_pr_out_transport_id, + .tpg_check_demo_mode = srpt_check_false, + .tpg_check_demo_mode_cache = srpt_check_true, + .tpg_check_demo_mode_write_protect = srpt_check_true, + .tpg_check_prod_mode_write_protect = srpt_check_false, + .tpg_alloc_fabric_acl = srpt_alloc_fabric_acl, + .tpg_release_fabric_acl = srpt_release_fabric_acl, + .tpg_get_inst_index = srpt_tpg_get_inst_index, + .release_cmd = srpt_release_cmd, + .check_stop_free = srpt_check_stop_free, + .shutdown_session = srpt_shutdown_session, + .close_session = srpt_close_session, + .sess_get_index = srpt_sess_get_index, + .sess_get_initiator_sid = NULL, + .write_pending = srpt_write_pending, + .write_pending_status = srpt_write_pending_status, + .set_default_node_attributes = srpt_set_default_node_attrs, + .get_task_tag = srpt_get_task_tag, + .get_cmd_state = srpt_get_tcm_cmd_state, + .queue_data_in = srpt_queue_response, + .queue_status = srpt_queue_status, + .queue_tm_rsp = srpt_queue_response, + .get_fabric_sense_len = srpt_get_fabric_sense_len, + .set_fabric_sense_len = srpt_set_fabric_sense_len, + /* + * Setup function pointers for generic logic in + * target_core_fabric_configfs.c + */ + .fabric_make_wwn = srpt_make_tport, + .fabric_drop_wwn = srpt_drop_tport, + .fabric_make_tpg = srpt_make_tpg, + .fabric_drop_tpg = srpt_drop_tpg, + .fabric_post_link = NULL, + .fabric_pre_unlink = NULL, + .fabric_make_np = NULL, + .fabric_drop_np = NULL, + .fabric_make_nodeacl = srpt_make_nodeacl, + .fabric_drop_nodeacl = srpt_drop_nodeacl, +}; + +/** + * srpt_init_module() - Kernel module initialization. + * + * Note: Since ib_register_client() registers callback functions, and since at + * least one of these callback functions (srpt_add_one()) calls target core + * functions, this driver must be registered with the target core before + * ib_register_client() is called. + */ +static int __init srpt_init_module(void) +{ + int ret; + + ret = -EINVAL; + if (srp_max_req_size < MIN_MAX_REQ_SIZE) { + printk(KERN_ERR "invalid value %d for kernel module parameter" + " srp_max_req_size -- must be at least %d.\n", + srp_max_req_size, MIN_MAX_REQ_SIZE); + goto out; + } + + if (srpt_srq_size < MIN_SRPT_SRQ_SIZE + || srpt_srq_size > MAX_SRPT_SRQ_SIZE) { + printk(KERN_ERR "invalid value %d for kernel module parameter" + " srpt_srq_size -- must be in the range [%d..%d].\n", + srpt_srq_size, MIN_SRPT_SRQ_SIZE, MAX_SRPT_SRQ_SIZE); + goto out; + } + + srpt_target = target_fabric_configfs_init(THIS_MODULE, "srpt"); + if (IS_ERR(srpt_target)) { + printk(KERN_ERR "couldn't register\n"); + ret = PTR_ERR(srpt_target); + goto out; + } + + srpt_target->tf_ops = srpt_template; + + /* Enable SG chaining */ + srpt_target->tf_ops.task_sg_chaining = true; + + /* + * Set up default attribute lists. + */ + srpt_target->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = srpt_wwn_attrs; + srpt_target->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = srpt_tpg_attrs; + srpt_target->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = srpt_tpg_attrib_attrs; + srpt_target->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = NULL; + srpt_target->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = NULL; + srpt_target->tf_cit_tmpl.tfc_tpg_nacl_base_cit.ct_attrs = NULL; + srpt_target->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit.ct_attrs = NULL; + srpt_target->tf_cit_tmpl.tfc_tpg_nacl_auth_cit.ct_attrs = NULL; + srpt_target->tf_cit_tmpl.tfc_tpg_nacl_param_cit.ct_attrs = NULL; + + ret = target_fabric_configfs_register(srpt_target); + if (ret < 0) { + printk(KERN_ERR "couldn't register\n"); + goto out_free_target; + } + + ret = ib_register_client(&srpt_client); + if (ret) { + printk(KERN_ERR "couldn't register IB client\n"); + goto out_unregister_target; + } + + return 0; + +out_unregister_target: + target_fabric_configfs_deregister(srpt_target); + srpt_target = NULL; +out_free_target: + if (srpt_target) + target_fabric_configfs_free(srpt_target); +out: + return ret; +} + +static void __exit srpt_cleanup_module(void) +{ + ib_unregister_client(&srpt_client); + target_fabric_configfs_deregister(srpt_target); + srpt_target = NULL; +} + +module_init(srpt_init_module); +module_exit(srpt_cleanup_module); diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h new file mode 100644 index 000000000000..61e52b830816 --- /dev/null +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -0,0 +1,443 @@ +/* + * Copyright (c) 2006 - 2009 Mellanox Technology Inc. All rights reserved. + * Copyright (C) 2009 - 2010 Bart Van Assche <bvanassche@acm.org>. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef IB_SRPT_H +#define IB_SRPT_H + +#include <linux/types.h> +#include <linux/list.h> +#include <linux/wait.h> + +#include <rdma/ib_verbs.h> +#include <rdma/ib_sa.h> +#include <rdma/ib_cm.h> + +#include <scsi/srp.h> + +#include "ib_dm_mad.h" + +/* + * The prefix the ServiceName field must start with in the device management + * ServiceEntries attribute pair. See also the SRP specification. + */ +#define SRP_SERVICE_NAME_PREFIX "SRP.T10:" + +enum { + /* + * SRP IOControllerProfile attributes for SRP target ports that have + * not been defined in <scsi/srp.h>. Source: section B.7, table B.7 + * in the SRP specification. + */ + SRP_PROTOCOL = 0x0108, + SRP_PROTOCOL_VERSION = 0x0001, + SRP_IO_SUBCLASS = 0x609e, + SRP_SEND_TO_IOC = 0x01, + SRP_SEND_FROM_IOC = 0x02, + SRP_RDMA_READ_FROM_IOC = 0x08, + SRP_RDMA_WRITE_FROM_IOC = 0x20, + + /* + * srp_login_cmd.req_flags bitmasks. See also table 9 in the SRP + * specification. + */ + SRP_MTCH_ACTION = 0x03, /* MULTI-CHANNEL ACTION */ + SRP_LOSOLNT = 0x10, /* logout solicited notification */ + SRP_CRSOLNT = 0x20, /* credit request solicited notification */ + SRP_AESOLNT = 0x40, /* asynchronous event solicited notification */ + + /* + * srp_cmd.sol_nt / srp_tsk_mgmt.sol_not bitmasks. See also tables + * 18 and 20 in the SRP specification. + */ + SRP_SCSOLNT = 0x02, /* SCSOLNT = successful solicited notification */ + SRP_UCSOLNT = 0x04, /* UCSOLNT = unsuccessful solicited notification */ + + /* + * srp_rsp.sol_not / srp_t_logout.sol_not bitmasks. See also tables + * 16 and 22 in the SRP specification. + */ + SRP_SOLNT = 0x01, /* SOLNT = solicited notification */ + + /* See also table 24 in the SRP specification. */ + SRP_TSK_MGMT_SUCCESS = 0x00, + SRP_TSK_MGMT_FUNC_NOT_SUPP = 0x04, + SRP_TSK_MGMT_FAILED = 0x05, + + /* See also table 21 in the SRP specification. */ + SRP_CMD_SIMPLE_Q = 0x0, + SRP_CMD_HEAD_OF_Q = 0x1, + SRP_CMD_ORDERED_Q = 0x2, + SRP_CMD_ACA = 0x4, + + SRP_LOGIN_RSP_MULTICHAN_NO_CHAN = 0x0, + SRP_LOGIN_RSP_MULTICHAN_TERMINATED = 0x1, + SRP_LOGIN_RSP_MULTICHAN_MAINTAINED = 0x2, + + SRPT_DEF_SG_TABLESIZE = 128, + SRPT_DEF_SG_PER_WQE = 16, + + MIN_SRPT_SQ_SIZE = 16, + DEF_SRPT_SQ_SIZE = 4096, + SRPT_RQ_SIZE = 128, + MIN_SRPT_SRQ_SIZE = 4, + DEFAULT_SRPT_SRQ_SIZE = 4095, + MAX_SRPT_SRQ_SIZE = 65535, + MAX_SRPT_RDMA_SIZE = 1U << 24, + MAX_SRPT_RSP_SIZE = 1024, + + MIN_MAX_REQ_SIZE = 996, + DEFAULT_MAX_REQ_SIZE + = sizeof(struct srp_cmd)/*48*/ + + sizeof(struct srp_indirect_buf)/*20*/ + + 128 * sizeof(struct srp_direct_buf)/*16*/, + + MIN_MAX_RSP_SIZE = sizeof(struct srp_rsp)/*36*/ + 4, + DEFAULT_MAX_RSP_SIZE = 256, /* leaves 220 bytes for sense data */ + + DEFAULT_MAX_RDMA_SIZE = 65536, +}; + +enum srpt_opcode { + SRPT_RECV, + SRPT_SEND, + SRPT_RDMA_MID, + SRPT_RDMA_ABORT, + SRPT_RDMA_READ_LAST, + SRPT_RDMA_WRITE_LAST, +}; + +static inline u64 encode_wr_id(u8 opcode, u32 idx) +{ + return ((u64)opcode << 32) | idx; +} +static inline enum srpt_opcode opcode_from_wr_id(u64 wr_id) +{ + return wr_id >> 32; +} +static inline u32 idx_from_wr_id(u64 wr_id) +{ + return (u32)wr_id; +} + +struct rdma_iu { + u64 raddr; + u32 rkey; + struct ib_sge *sge; + u32 sge_cnt; + int mem_id; +}; + +/** + * enum srpt_command_state - SCSI command state managed by SRPT. + * @SRPT_STATE_NEW: New command arrived and is being processed. + * @SRPT_STATE_NEED_DATA: Processing a write or bidir command and waiting + * for data arrival. + * @SRPT_STATE_DATA_IN: Data for the write or bidir command arrived and is + * being processed. + * @SRPT_STATE_CMD_RSP_SENT: SRP_RSP for SRP_CMD has been sent. + * @SRPT_STATE_MGMT: Processing a SCSI task management command. + * @SRPT_STATE_MGMT_RSP_SENT: SRP_RSP for SRP_TSK_MGMT has been sent. + * @SRPT_STATE_DONE: Command processing finished successfully, command + * processing has been aborted or command processing + * failed. + */ +enum srpt_command_state { + SRPT_STATE_NEW = 0, + SRPT_STATE_NEED_DATA = 1, + SRPT_STATE_DATA_IN = 2, + SRPT_STATE_CMD_RSP_SENT = 3, + SRPT_STATE_MGMT = 4, + SRPT_STATE_MGMT_RSP_SENT = 5, + SRPT_STATE_DONE = 6, +}; + +/** + * struct srpt_ioctx - Shared SRPT I/O context information. + * @buf: Pointer to the buffer. + * @dma: DMA address of the buffer. + * @index: Index of the I/O context in its ioctx_ring array. + */ +struct srpt_ioctx { + void *buf; + dma_addr_t dma; + uint32_t index; +}; + +/** + * struct srpt_recv_ioctx - SRPT receive I/O context. + * @ioctx: See above. + * @wait_list: Node for insertion in srpt_rdma_ch.cmd_wait_list. + */ +struct srpt_recv_ioctx { + struct srpt_ioctx ioctx; + struct list_head wait_list; +}; + +/** + * struct srpt_send_ioctx - SRPT send I/O context. + * @ioctx: See above. + * @ch: Channel pointer. + * @free_list: Node in srpt_rdma_ch.free_list. + * @n_rbuf: Number of data buffers in the received SRP command. + * @rbufs: Pointer to SRP data buffer array. + * @single_rbuf: SRP data buffer if the command has only a single buffer. + * @sg: Pointer to sg-list associated with this I/O context. + * @sg_cnt: SG-list size. + * @mapped_sg_count: ib_dma_map_sg() return value. + * @n_rdma_ius: Number of elements in the rdma_ius array. + * @rdma_ius: Array with information about the RDMA mapping. + * @tag: Tag of the received SRP information unit. + * @spinlock: Protects 'state'. + * @state: I/O context state. + * @rdma_aborted: If initiating a multipart RDMA transfer failed, whether + * the already initiated transfers have finished. + * @cmd: Target core command data structure. + * @sense_data: SCSI sense data. + */ +struct srpt_send_ioctx { + struct srpt_ioctx ioctx; + struct srpt_rdma_ch *ch; + struct kref kref; + struct rdma_iu *rdma_ius; + struct srp_direct_buf *rbufs; + struct srp_direct_buf single_rbuf; + struct scatterlist *sg; + struct list_head free_list; + spinlock_t spinlock; + enum srpt_command_state state; + bool rdma_aborted; + struct se_cmd cmd; + struct completion tx_done; + u64 tag; + int sg_cnt; + int mapped_sg_count; + u16 n_rdma_ius; + u8 n_rdma; + u8 n_rbuf; + bool queue_status_only; + u8 sense_data[SCSI_SENSE_BUFFERSIZE]; +}; + +/** + * enum rdma_ch_state - SRP channel state. + * @CH_CONNECTING: QP is in RTR state; waiting for RTU. + * @CH_LIVE: QP is in RTS state. + * @CH_DISCONNECTING: DREQ has been received; waiting for DREP + * or DREQ has been send and waiting for DREP + * or . + * @CH_DRAINING: QP is in ERR state; waiting for last WQE event. + * @CH_RELEASING: Last WQE event has been received; releasing resources. + */ +enum rdma_ch_state { + CH_CONNECTING, + CH_LIVE, + CH_DISCONNECTING, + CH_DRAINING, + CH_RELEASING +}; + +/** + * struct srpt_rdma_ch - RDMA channel. + * @wait_queue: Allows the kernel thread to wait for more work. + * @thread: Kernel thread that processes the IB queues associated with + * the channel. + * @cm_id: IB CM ID associated with the channel. + * @qp: IB queue pair used for communicating over this channel. + * @cq: IB completion queue for this channel. + * @rq_size: IB receive queue size. + * @rsp_size IB response message size in bytes. + * @sq_wr_avail: number of work requests available in the send queue. + * @sport: pointer to the information of the HCA port used by this + * channel. + * @i_port_id: 128-bit initiator port identifier copied from SRP_LOGIN_REQ. + * @t_port_id: 128-bit target port identifier copied from SRP_LOGIN_REQ. + * @max_ti_iu_len: maximum target-to-initiator information unit length. + * @req_lim: request limit: maximum number of requests that may be sent + * by the initiator without having received a response. + * @req_lim_delta: Number of credits not yet sent back to the initiator. + * @spinlock: Protects free_list and state. + * @free_list: Head of list with free send I/O contexts. + * @state: channel state. See also enum rdma_ch_state. + * @ioctx_ring: Send ring. + * @wc: IB work completion array for srpt_process_completion(). + * @list: Node for insertion in the srpt_device.rch_list list. + * @cmd_wait_list: List of SCSI commands that arrived before the RTU event. This + * list contains struct srpt_ioctx elements and is protected + * against concurrent modification by the cm_id spinlock. + * @sess: Session information associated with this SRP channel. + * @sess_name: Session name. + * @release_work: Allows scheduling of srpt_release_channel(). + * @release_done: Enables waiting for srpt_release_channel() completion. + */ +struct srpt_rdma_ch { + wait_queue_head_t wait_queue; + struct task_struct *thread; + struct ib_cm_id *cm_id; + struct ib_qp *qp; + struct ib_cq *cq; + int rq_size; + u32 rsp_size; + atomic_t sq_wr_avail; + struct srpt_port *sport; + u8 i_port_id[16]; + u8 t_port_id[16]; + int max_ti_iu_len; + atomic_t req_lim; + atomic_t req_lim_delta; + spinlock_t spinlock; + struct list_head free_list; + enum rdma_ch_state state; + struct srpt_send_ioctx **ioctx_ring; + struct ib_wc wc[16]; + struct list_head list; + struct list_head cmd_wait_list; + struct se_session *sess; + u8 sess_name[36]; + struct work_struct release_work; + struct completion *release_done; +}; + +/** + * struct srpt_port_attib - Attributes for SRPT port + * @srp_max_rdma_size: Maximum size of SRP RDMA transfers for new connections. + * @srp_max_rsp_size: Maximum size of SRP response messages in bytes. + * @srp_sq_size: Shared receive queue (SRQ) size. + */ +struct srpt_port_attrib { + u32 srp_max_rdma_size; + u32 srp_max_rsp_size; + u32 srp_sq_size; +}; + +/** + * struct srpt_port - Information associated by SRPT with a single IB port. + * @sdev: backpointer to the HCA information. + * @mad_agent: per-port management datagram processing information. + * @enabled: Whether or not this target port is enabled. + * @port_guid: ASCII representation of Port GUID + * @port: one-based port number. + * @sm_lid: cached value of the port's sm_lid. + * @lid: cached value of the port's lid. + * @gid: cached value of the port's gid. + * @port_acl_lock spinlock for port_acl_list: + * @work: work structure for refreshing the aforementioned cached values. + * @port_tpg_1 Target portal group = 1 data. + * @port_wwn: Target core WWN data. + * @port_acl_list: Head of the list with all node ACLs for this port. + */ +struct srpt_port { + struct srpt_device *sdev; + struct ib_mad_agent *mad_agent; + bool enabled; + u8 port_guid[64]; + u8 port; + u16 sm_lid; + u16 lid; + union ib_gid gid; + spinlock_t port_acl_lock; + struct work_struct work; + struct se_portal_group port_tpg_1; + struct se_wwn port_wwn; + struct list_head port_acl_list; + struct srpt_port_attrib port_attrib; +}; + +/** + * struct srpt_device - Information associated by SRPT with a single HCA. + * @device: Backpointer to the struct ib_device managed by the IB core. + * @pd: IB protection domain. + * @mr: L_Key (local key) with write access to all local memory. + * @srq: Per-HCA SRQ (shared receive queue). + * @cm_id: Connection identifier. + * @dev_attr: Attributes of the InfiniBand device as obtained during the + * ib_client.add() callback. + * @srq_size: SRQ size. + * @ioctx_ring: Per-HCA SRQ. + * @rch_list: Per-device channel list -- see also srpt_rdma_ch.list. + * @ch_releaseQ: Enables waiting for removal from rch_list. + * @spinlock: Protects rch_list and tpg. + * @port: Information about the ports owned by this HCA. + * @event_handler: Per-HCA asynchronous IB event handler. + * @list: Node in srpt_dev_list. + */ +struct srpt_device { + struct ib_device *device; + struct ib_pd *pd; + struct ib_mr *mr; + struct ib_srq *srq; + struct ib_cm_id *cm_id; + struct ib_device_attr dev_attr; + int srq_size; + struct srpt_recv_ioctx **ioctx_ring; + struct list_head rch_list; + wait_queue_head_t ch_releaseQ; + spinlock_t spinlock; + struct srpt_port port[2]; + struct ib_event_handler event_handler; + struct list_head list; +}; + +/** + * struct srpt_node_acl - Per-initiator ACL data (managed via configfs). + * @i_port_id: 128-bit SRP initiator port ID. + * @sport: port information. + * @nacl: Target core node ACL information. + * @list: Element of the per-HCA ACL list. + */ +struct srpt_node_acl { + u8 i_port_id[16]; + struct srpt_port *sport; + struct se_node_acl nacl; + struct list_head list; +}; + +/* + * SRP-releated SCSI persistent reservation definitions. + * + * See also SPC4r28, section 7.6.1 (Protocol specific parameters introduction). + * See also SPC4r28, section 7.6.4.5 (TransportID for initiator ports using + * SCSI over an RDMA interface). + */ + +enum { + SCSI_TRANSPORTID_PROTOCOLID_SRP = 4, +}; + +struct spc_rdma_transport_id { + uint8_t protocol_identifier; + uint8_t reserved[7]; + uint8_t i_port_id[16]; +}; + +#endif /* IB_SRPT_H */ |