diff options
Diffstat (limited to 'drivers/infiniband')
61 files changed, 5047 insertions, 402 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index 0f9a84c1046a..eb0add311dc8 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -55,6 +55,7 @@ source "drivers/infiniband/hw/nes/Kconfig" source "drivers/infiniband/ulp/ipoib/Kconfig" source "drivers/infiniband/ulp/srp/Kconfig" +source "drivers/infiniband/ulp/srpt/Kconfig" source "drivers/infiniband/ulp/iser/Kconfig" diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile index 9cc7a47d3e67..a3b2d8eac86e 100644 --- a/drivers/infiniband/Makefile +++ b/drivers/infiniband/Makefile @@ -10,4 +10,5 @@ obj-$(CONFIG_MLX4_INFINIBAND) += hw/mlx4/ obj-$(CONFIG_INFINIBAND_NES) += hw/nes/ obj-$(CONFIG_INFINIBAND_IPOIB) += ulp/ipoib/ obj-$(CONFIG_INFINIBAND_SRP) += ulp/srp/ +obj-$(CONFIG_INFINIBAND_SRPT) += ulp/srpt/ obj-$(CONFIG_INFINIBAND_ISER) += ulp/iser/ diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 691276bafd78..1612cfd50f39 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -178,6 +178,25 @@ static void queue_req(struct addr_req *req) mutex_unlock(&lock); } +static int dst_fetch_ha(struct dst_entry *dst, struct rdma_dev_addr *addr) +{ + struct neighbour *n; + int ret; + + rcu_read_lock(); + n = dst_get_neighbour_noref(dst); + if (!n || !(n->nud_state & NUD_VALID)) { + if (n) + neigh_event_send(n, NULL); + ret = -ENODATA; + } else { + ret = rdma_copy_addr(addr, dst->dev, n->ha); + } + rcu_read_unlock(); + + return ret; +} + static int addr4_resolve(struct sockaddr_in *src_in, struct sockaddr_in *dst_in, struct rdma_dev_addr *addr) @@ -185,7 +204,6 @@ static int addr4_resolve(struct sockaddr_in *src_in, __be32 src_ip = src_in->sin_addr.s_addr; __be32 dst_ip = dst_in->sin_addr.s_addr; struct rtable *rt; - struct neighbour *neigh; struct flowi4 fl4; int ret; @@ -214,18 +232,7 @@ static int addr4_resolve(struct sockaddr_in *src_in, goto put; } - neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->dst.dev); - if (!neigh || !(neigh->nud_state & NUD_VALID)) { - neigh_event_send(dst_get_neighbour(&rt->dst), NULL); - ret = -ENODATA; - if (neigh) - goto release; - goto put; - } - - ret = rdma_copy_addr(addr, neigh->dev, neigh->ha); -release: - neigh_release(neigh); + ret = dst_fetch_ha(&rt->dst, addr); put: ip_rt_put(rt); out: @@ -238,13 +245,12 @@ static int addr6_resolve(struct sockaddr_in6 *src_in, struct rdma_dev_addr *addr) { struct flowi6 fl6; - struct neighbour *neigh; struct dst_entry *dst; int ret; memset(&fl6, 0, sizeof fl6); - ipv6_addr_copy(&fl6.daddr, &dst_in->sin6_addr); - ipv6_addr_copy(&fl6.saddr, &src_in->sin6_addr); + fl6.daddr = dst_in->sin6_addr; + fl6.saddr = src_in->sin6_addr; fl6.flowi6_oif = addr->bound_dev_if; dst = ip6_route_output(&init_net, NULL, &fl6); @@ -258,7 +264,7 @@ static int addr6_resolve(struct sockaddr_in6 *src_in, goto put; src_in->sin6_family = AF_INET6; - ipv6_addr_copy(&src_in->sin6_addr, &fl6.saddr); + src_in->sin6_addr = fl6.saddr; } if (dst->dev->flags & IFF_LOOPBACK) { @@ -274,15 +280,7 @@ static int addr6_resolve(struct sockaddr_in6 *src_in, goto put; } - neigh = dst_get_neighbour(dst); - if (!neigh || !(neigh->nud_state & NUD_VALID)) { - if (neigh) - neigh_event_send(neigh, NULL); - ret = -ENODATA; - goto put; - } - - ret = rdma_copy_addr(addr, dst->dev, neigh->ha); + ret = dst_fetch_ha(dst, addr); put: dst_release(dst); return ret; diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 8b72f39202fb..c889aaef3416 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -3659,7 +3659,7 @@ static struct kobj_type cm_port_obj_type = { .release = cm_release_port_obj }; -static char *cm_devnode(struct device *dev, mode_t *mode) +static char *cm_devnode(struct device *dev, umode_t *mode) { if (mode) *mode = 0666; diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h index 505db2a59e7f..7da9b2102341 100644 --- a/drivers/infiniband/core/cm_msgs.h +++ b/drivers/infiniband/core/cm_msgs.h @@ -799,6 +799,7 @@ struct cm_apr_msg { u8 info_length; u8 ap_status; + __be16 rsvd; u8 info[IB_CM_APR_INFO_LENGTH]; u8 private_data[IB_CM_APR_PRIVATE_DATA_SIZE]; diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 75ff821c0af0..e3e470fecaa9 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -1110,7 +1110,7 @@ static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id, if (cma_any_addr((struct sockaddr *) &rt->addr.src_addr)) { rt->addr.dev_addr.dev_type = ARPHRD_INFINIBAND; rdma_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid); - ib_addr_set_pkey(&rt->addr.dev_addr, rt->path_rec[0].pkey); + ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey)); } else { ret = rdma_translate_ip((struct sockaddr *) &rt->addr.src_addr, &rt->addr.dev_addr); @@ -2005,11 +2005,11 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv) if (cma_zero_addr(src)) { dst = (struct sockaddr *) &id_priv->id.route.addr.dst_addr; if ((src->sa_family = dst->sa_family) == AF_INET) { - ((struct sockaddr_in *) src)->sin_addr.s_addr = - ((struct sockaddr_in *) dst)->sin_addr.s_addr; + ((struct sockaddr_in *)src)->sin_addr = + ((struct sockaddr_in *)dst)->sin_addr; } else { - ipv6_addr_copy(&((struct sockaddr_in6 *) src)->sin6_addr, - &((struct sockaddr_in6 *) dst)->sin6_addr); + ((struct sockaddr_in6 *)src)->sin6_addr = + ((struct sockaddr_in6 *)dst)->sin6_addr; } } @@ -2513,6 +2513,9 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv, req.private_data_len = sizeof(struct cma_hdr) + conn_param->private_data_len; + if (req.private_data_len < conn_param->private_data_len) + return -EINVAL; + req.private_data = kzalloc(req.private_data_len, GFP_ATOMIC); if (!req.private_data) return -ENOMEM; @@ -2562,6 +2565,9 @@ static int cma_connect_ib(struct rdma_id_private *id_priv, memset(&req, 0, sizeof req); offset = cma_user_data_offset(id_priv->id.ps); req.private_data_len = offset + conn_param->private_data_len; + if (req.private_data_len < conn_param->private_data_len) + return -EINVAL; + private_data = kzalloc(req.private_data_len, GFP_ATOMIC); if (!private_data) return -ENOMEM; @@ -2920,7 +2926,7 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) mutex_lock(&id_priv->qp_mutex); if (!status && id_priv->id.qp) status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid, - multicast->rec.mlid); + be16_to_cpu(multicast->rec.mlid)); mutex_unlock(&id_priv->qp_mutex); memset(&event, 0, sizeof event); @@ -3181,7 +3187,7 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr) if (id->qp) ib_detach_mcast(id->qp, &mc->multicast.ib->rec.mgid, - mc->multicast.ib->rec.mlid); + be16_to_cpu(mc->multicast.ib->rec.mlid)); if (rdma_node_get_transport(id_priv->cma_dev->device->node_type) == RDMA_TRANSPORT_IB) { switch (rdma_port_get_link_layer(id->device, id->port_num)) { case IB_LINK_LAYER_INFINIBAND: diff --git a/drivers/infiniband/core/ucm.c b/drivers/infiniband/core/ucm.c index b8a0b4a7811b..06f08713f487 100644 --- a/drivers/infiniband/core/ucm.c +++ b/drivers/infiniband/core/ucm.c @@ -106,9 +106,6 @@ enum { IB_UCM_MAX_DEVICES = 32 }; -/* ib_cm and ib_user_cm modules share /sys/class/infiniband_cm */ -extern struct class cm_class; - #define IB_UCM_BASE_DEV MKDEV(IB_UCM_MAJOR, IB_UCM_BASE_MINOR) static void ib_ucm_add_one(struct ib_device *device); diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index b37b0c02a7b9..5034a87cc72d 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -808,9 +808,12 @@ static ssize_t ucma_accept(struct ucma_file *file, const char __user *inbuf, return PTR_ERR(ctx); if (cmd.conn_param.valid) { - ctx->uid = cmd.uid; ucma_copy_conn_param(&conn_param, &cmd.conn_param); + mutex_lock(&file->mut); ret = rdma_accept(ctx->cm_id, &conn_param); + if (!ret) + ctx->uid = cmd.uid; + mutex_unlock(&file->mut); } else ret = rdma_accept(ctx->cm_id, NULL); diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 07db22997e97..f0d588f8859e 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -1175,7 +1175,7 @@ static void ib_umad_remove_one(struct ib_device *device) kref_put(&umad_dev->ref, ib_umad_release_dev); } -static char *umad_devnode(struct device *dev, mode_t *mode) +static char *umad_devnode(struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev)); } diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 254f1649c734..4d27e4c3fe34 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -241,11 +241,24 @@ static struct ib_qp *idr_read_qp(int qp_handle, struct ib_ucontext *context) return idr_read_obj(&ib_uverbs_qp_idr, qp_handle, context, 0); } +static struct ib_qp *idr_write_qp(int qp_handle, struct ib_ucontext *context) +{ + struct ib_uobject *uobj; + + uobj = idr_write_uobj(&ib_uverbs_qp_idr, qp_handle, context); + return uobj ? uobj->object : NULL; +} + static void put_qp_read(struct ib_qp *qp) { put_uobj_read(qp->uobject); } +static void put_qp_write(struct ib_qp *qp) +{ + put_uobj_write(qp->uobject); +} + static struct ib_srq *idr_read_srq(int srq_handle, struct ib_ucontext *context) { return idr_read_obj(&ib_uverbs_srq_idr, srq_handle, context, 0); @@ -1472,6 +1485,7 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, qp->event_handler = attr.event_handler; qp->qp_context = attr.qp_context; qp->qp_type = attr.qp_type; + atomic_set(&qp->usecnt, 0); atomic_inc(&pd->usecnt); atomic_inc(&attr.send_cq->usecnt); if (attr.recv_cq) @@ -2375,7 +2389,7 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - qp = idr_read_qp(cmd.qp_handle, file->ucontext); + qp = idr_write_qp(cmd.qp_handle, file->ucontext); if (!qp) return -EINVAL; @@ -2404,7 +2418,7 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, kfree(mcast); out_put: - put_qp_read(qp); + put_qp_write(qp); return ret ? ret : in_len; } @@ -2422,7 +2436,7 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; - qp = idr_read_qp(cmd.qp_handle, file->ucontext); + qp = idr_write_qp(cmd.qp_handle, file->ucontext); if (!qp) return -EINVAL; @@ -2441,14 +2455,14 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, } out_put: - put_qp_read(qp); + put_qp_write(qp); return ret ? ret : in_len; } -int __uverbs_create_xsrq(struct ib_uverbs_file *file, - struct ib_uverbs_create_xsrq *cmd, - struct ib_udata *udata) +static int __uverbs_create_xsrq(struct ib_uverbs_file *file, + struct ib_uverbs_create_xsrq *cmd, + struct ib_udata *udata) { struct ib_uverbs_create_srq_resp resp; struct ib_usrq_object *obj; diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 879636746373..604556d73d25 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -846,7 +846,7 @@ static void ib_uverbs_remove_one(struct ib_device *device) kfree(uverbs_dev); } -static char *uverbs_devnode(struct device *dev, mode_t *mode) +static char *uverbs_devnode(struct device *dev, umode_t *mode) { if (mode) *mode = 0666; diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 602b1bd723a9..575b78045aaf 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -421,6 +421,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, qp->uobject = NULL; qp->qp_type = qp_init_attr->qp_type; + atomic_set(&qp->usecnt, 0); if (qp_init_attr->qp_type == IB_QPT_XRC_TGT) { qp->event_handler = __ib_shared_qp_event_handler; qp->qp_context = qp; @@ -430,7 +431,6 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, qp->xrcd = qp_init_attr->xrcd; atomic_inc(&qp_init_attr->xrcd->usecnt); INIT_LIST_HEAD(&qp->open_list); - atomic_set(&qp->usecnt, 0); real_qp = qp; qp = __ib_open_qp(real_qp, qp_init_attr->event_handler, diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index de6d0774e609..740dcc065cf2 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -1338,7 +1338,6 @@ static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) struct iwch_ep *child_ep, *parent_ep = ctx; struct cpl_pass_accept_req *req = cplhdr(skb); unsigned int hwtid = GET_TID(req); - struct neighbour *neigh; struct dst_entry *dst; struct l2t_entry *l2t; struct rtable *rt; @@ -1375,8 +1374,7 @@ static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) goto reject; } dst = &rt->dst; - neigh = dst_get_neighbour(dst); - l2t = t3_l2t_get(tdev, neigh, neigh->dev); + l2t = t3_l2t_get(tdev, dst, NULL); if (!l2t) { printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n", __func__); @@ -1887,7 +1885,6 @@ static int is_loopback_dst(struct iw_cm_id *cm_id) int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) { struct iwch_dev *h = to_iwch_dev(cm_id->device); - struct neighbour *neigh; struct iwch_ep *ep; struct rtable *rt; int err = 0; @@ -1945,11 +1942,7 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) goto fail3; } ep->dst = &rt->dst; - - neigh = dst_get_neighbour(ep->dst); - - /* get a l2t entry */ - ep->l2t = t3_l2t_get(ep->com.tdev, neigh, neigh->dev); + ep->l2t = t3_l2t_get(ep->com.tdev, ep->dst, NULL); if (!ep->l2t) { printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__); err = -ENOMEM; diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index b36cdac9c558..0668bb3472d0 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -542,8 +542,10 @@ static void send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb, (mpa_rev_to_use == 2 ? MPA_ENHANCED_RDMA_CONN : 0); mpa->private_data_size = htons(ep->plen); mpa->revision = mpa_rev_to_use; - if (mpa_rev_to_use == 1) + if (mpa_rev_to_use == 1) { ep->tried_with_mpa_v1 = 1; + ep->retry_with_mpa_v1 = 0; + } if (mpa_rev_to_use == 2) { mpa->private_data_size += @@ -1554,6 +1556,67 @@ static void get_4tuple(struct cpl_pass_accept_req *req, return; } +static int import_ep(struct c4iw_ep *ep, __be32 peer_ip, struct dst_entry *dst, + struct c4iw_dev *cdev, bool clear_mpa_v1) +{ + struct neighbour *n; + int err, step; + + rcu_read_lock(); + n = dst_get_neighbour_noref(dst); + err = -ENODEV; + if (!n) + goto out; + err = -ENOMEM; + if (n->dev->flags & IFF_LOOPBACK) { + struct net_device *pdev; + + pdev = ip_dev_find(&init_net, peer_ip); + ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t, + n, pdev, 0); + if (!ep->l2t) + goto out; + ep->mtu = pdev->mtu; + ep->tx_chan = cxgb4_port_chan(pdev); + ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1; + step = cdev->rdev.lldi.ntxq / + cdev->rdev.lldi.nchan; + ep->txq_idx = cxgb4_port_idx(pdev) * step; + step = cdev->rdev.lldi.nrxq / + cdev->rdev.lldi.nchan; + ep->ctrlq_idx = cxgb4_port_idx(pdev); + ep->rss_qid = cdev->rdev.lldi.rxq_ids[ + cxgb4_port_idx(pdev) * step]; + dev_put(pdev); + } else { + ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t, + n, n->dev, 0); + if (!ep->l2t) + goto out; + ep->mtu = dst_mtu(ep->dst); + ep->tx_chan = cxgb4_port_chan(n->dev); + ep->smac_idx = (cxgb4_port_viid(n->dev) & 0x7F) << 1; + step = cdev->rdev.lldi.ntxq / + cdev->rdev.lldi.nchan; + ep->txq_idx = cxgb4_port_idx(n->dev) * step; + ep->ctrlq_idx = cxgb4_port_idx(n->dev); + step = cdev->rdev.lldi.nrxq / + cdev->rdev.lldi.nchan; + ep->rss_qid = cdev->rdev.lldi.rxq_ids[ + cxgb4_port_idx(n->dev) * step]; + + if (clear_mpa_v1) { + ep->retry_with_mpa_v1 = 0; + ep->tried_with_mpa_v1 = 0; + } + } + err = 0; +out: + rcu_read_unlock(); + + return err; +} + static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) { struct c4iw_ep *child_ep, *parent_ep; @@ -1561,18 +1624,11 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) unsigned int stid = GET_POPEN_TID(ntohl(req->tos_stid)); struct tid_info *t = dev->rdev.lldi.tids; unsigned int hwtid = GET_TID(req); - struct neighbour *neigh; struct dst_entry *dst; - struct l2t_entry *l2t; struct rtable *rt; __be32 local_ip, peer_ip; __be16 local_port, peer_port; - struct net_device *pdev; - u32 tx_chan, smac_idx; - u16 rss_qid; - u32 mtu; - int step; - int txq_idx, ctrlq_idx; + int err; parent_ep = lookup_stid(t, stid); PDBG("%s parent ep %p tid %u\n", __func__, parent_ep, hwtid); @@ -1594,47 +1650,24 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) goto reject; } dst = &rt->dst; - neigh = dst_get_neighbour(dst); - if (neigh->dev->flags & IFF_LOOPBACK) { - pdev = ip_dev_find(&init_net, peer_ip); - BUG_ON(!pdev); - l2t = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh, pdev, 0); - mtu = pdev->mtu; - tx_chan = cxgb4_port_chan(pdev); - smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1; - step = dev->rdev.lldi.ntxq / dev->rdev.lldi.nchan; - txq_idx = cxgb4_port_idx(pdev) * step; - ctrlq_idx = cxgb4_port_idx(pdev); - step = dev->rdev.lldi.nrxq / dev->rdev.lldi.nchan; - rss_qid = dev->rdev.lldi.rxq_ids[cxgb4_port_idx(pdev) * step]; - dev_put(pdev); - } else { - l2t = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh, neigh->dev, 0); - mtu = dst_mtu(dst); - tx_chan = cxgb4_port_chan(neigh->dev); - smac_idx = (cxgb4_port_viid(neigh->dev) & 0x7F) << 1; - step = dev->rdev.lldi.ntxq / dev->rdev.lldi.nchan; - txq_idx = cxgb4_port_idx(neigh->dev) * step; - ctrlq_idx = cxgb4_port_idx(neigh->dev); - step = dev->rdev.lldi.nrxq / dev->rdev.lldi.nchan; - rss_qid = dev->rdev.lldi.rxq_ids[ - cxgb4_port_idx(neigh->dev) * step]; - } - if (!l2t) { - printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n", + + child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL); + if (!child_ep) { + printk(KERN_ERR MOD "%s - failed to allocate ep entry!\n", __func__); dst_release(dst); goto reject; } - child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL); - if (!child_ep) { - printk(KERN_ERR MOD "%s - failed to allocate ep entry!\n", + err = import_ep(child_ep, peer_ip, dst, dev, false); + if (err) { + printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n", __func__); - cxgb4_l2t_release(l2t); dst_release(dst); + kfree(child_ep); goto reject; } + state_set(&child_ep->com, CONNECTING); child_ep->com.dev = dev; child_ep->com.cm_id = NULL; @@ -1647,18 +1680,11 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) c4iw_get_ep(&parent_ep->com); child_ep->parent_ep = parent_ep; child_ep->tos = GET_POPEN_TOS(ntohl(req->tos_stid)); - child_ep->l2t = l2t; child_ep->dst = dst; child_ep->hwtid = hwtid; - child_ep->tx_chan = tx_chan; - child_ep->smac_idx = smac_idx; - child_ep->rss_qid = rss_qid; - child_ep->mtu = mtu; - child_ep->txq_idx = txq_idx; - child_ep->ctrlq_idx = ctrlq_idx; PDBG("%s tx_chan %u smac_idx %u rss_qid %u\n", __func__, - tx_chan, smac_idx, rss_qid); + child_ep->tx_chan, child_ep->smac_idx, child_ep->rss_qid); init_timer(&child_ep->timer); cxgb4_insert_tid(t, child_ep, hwtid); @@ -1788,11 +1814,8 @@ static int is_neg_adv_abort(unsigned int status) static int c4iw_reconnect(struct c4iw_ep *ep) { - int err = 0; struct rtable *rt; - struct net_device *pdev; - struct neighbour *neigh; - int step; + int err = 0; PDBG("%s qp %p cm_id %p\n", __func__, ep->com.qp, ep->com.cm_id); init_timer(&ep->timer); @@ -1820,45 +1843,10 @@ static int c4iw_reconnect(struct c4iw_ep *ep) } ep->dst = &rt->dst; - neigh = dst_get_neighbour(ep->dst); - - /* get a l2t entry */ - if (neigh->dev->flags & IFF_LOOPBACK) { - PDBG("%s LOOPBACK\n", __func__); - pdev = ip_dev_find(&init_net, - ep->com.cm_id->remote_addr.sin_addr.s_addr); - ep->l2t = cxgb4_l2t_get(ep->com.dev->rdev.lldi.l2t, - neigh, pdev, 0); - ep->mtu = pdev->mtu; - ep->tx_chan = cxgb4_port_chan(pdev); - ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1; - step = ep->com.dev->rdev.lldi.ntxq / - ep->com.dev->rdev.lldi.nchan; - ep->txq_idx = cxgb4_port_idx(pdev) * step; - step = ep->com.dev->rdev.lldi.nrxq / - ep->com.dev->rdev.lldi.nchan; - ep->ctrlq_idx = cxgb4_port_idx(pdev); - ep->rss_qid = ep->com.dev->rdev.lldi.rxq_ids[ - cxgb4_port_idx(pdev) * step]; - dev_put(pdev); - } else { - ep->l2t = cxgb4_l2t_get(ep->com.dev->rdev.lldi.l2t, - neigh, neigh->dev, 0); - ep->mtu = dst_mtu(ep->dst); - ep->tx_chan = cxgb4_port_chan(neigh->dev); - ep->smac_idx = (cxgb4_port_viid(neigh->dev) & 0x7F) << 1; - step = ep->com.dev->rdev.lldi.ntxq / - ep->com.dev->rdev.lldi.nchan; - ep->txq_idx = cxgb4_port_idx(neigh->dev) * step; - ep->ctrlq_idx = cxgb4_port_idx(neigh->dev); - step = ep->com.dev->rdev.lldi.nrxq / - ep->com.dev->rdev.lldi.nchan; - ep->rss_qid = ep->com.dev->rdev.lldi.rxq_ids[ - cxgb4_port_idx(neigh->dev) * step]; - } - if (!ep->l2t) { + err = import_ep(ep, ep->com.cm_id->remote_addr.sin_addr.s_addr, + ep->dst, ep->com.dev, false); + if (err) { printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__); - err = -ENOMEM; goto fail4; } @@ -2234,13 +2222,10 @@ err: int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) { - int err = 0; struct c4iw_dev *dev = to_c4iw_dev(cm_id->device); struct c4iw_ep *ep; struct rtable *rt; - struct net_device *pdev; - struct neighbour *neigh; - int step; + int err = 0; if ((conn_param->ord > c4iw_max_read_depth) || (conn_param->ird > c4iw_max_read_depth)) { @@ -2301,47 +2286,10 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) } ep->dst = &rt->dst; - neigh = dst_get_neighbour(ep->dst); - - /* get a l2t entry */ - if (neigh->dev->flags & IFF_LOOPBACK) { - PDBG("%s LOOPBACK\n", __func__); - pdev = ip_dev_find(&init_net, - cm_id->remote_addr.sin_addr.s_addr); - ep->l2t = cxgb4_l2t_get(ep->com.dev->rdev.lldi.l2t, - neigh, pdev, 0); - ep->mtu = pdev->mtu; - ep->tx_chan = cxgb4_port_chan(pdev); - ep->smac_idx = (cxgb4_port_viid(pdev) & 0x7F) << 1; - step = ep->com.dev->rdev.lldi.ntxq / - ep->com.dev->rdev.lldi.nchan; - ep->txq_idx = cxgb4_port_idx(pdev) * step; - step = ep->com.dev->rdev.lldi.nrxq / - ep->com.dev->rdev.lldi.nchan; - ep->ctrlq_idx = cxgb4_port_idx(pdev); - ep->rss_qid = ep->com.dev->rdev.lldi.rxq_ids[ - cxgb4_port_idx(pdev) * step]; - dev_put(pdev); - } else { - ep->l2t = cxgb4_l2t_get(ep->com.dev->rdev.lldi.l2t, - neigh, neigh->dev, 0); - ep->mtu = dst_mtu(ep->dst); - ep->tx_chan = cxgb4_port_chan(neigh->dev); - ep->smac_idx = (cxgb4_port_viid(neigh->dev) & 0x7F) << 1; - step = ep->com.dev->rdev.lldi.ntxq / - ep->com.dev->rdev.lldi.nchan; - ep->txq_idx = cxgb4_port_idx(neigh->dev) * step; - ep->ctrlq_idx = cxgb4_port_idx(neigh->dev); - step = ep->com.dev->rdev.lldi.nrxq / - ep->com.dev->rdev.lldi.nchan; - ep->rss_qid = ep->com.dev->rdev.lldi.rxq_ids[ - cxgb4_port_idx(neigh->dev) * step]; - ep->retry_with_mpa_v1 = 0; - ep->tried_with_mpa_v1 = 0; - } - if (!ep->l2t) { + err = import_ep(ep, cm_id->remote_addr.sin_addr.s_addr, + ep->dst, ep->com.dev, true); + if (err) { printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__); - err = -ENOMEM; goto fail4; } diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index f35a935267e7..0f1607c8325a 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -311,7 +311,7 @@ void c4iw_count_rcqes(struct t4_cq *cq, struct t4_wq *wq, int *count) while (ptr != cq->sw_pidx) { cqe = &cq->sw_queue[ptr]; if (RQ_TYPE(cqe) && (CQE_OPCODE(cqe) != FW_RI_READ_RESP) && - (CQE_QPID(cqe) == wq->rq.qid) && cqe_completes_wr(cqe, wq)) + (CQE_QPID(cqe) == wq->sq.qid) && cqe_completes_wr(cqe, wq)) (*count)++; if (++ptr == cq->size) ptr = 0; diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h index aaf6023a4835..f08f6eaf3fa8 100644 --- a/drivers/infiniband/hw/ehca/ehca_classes.h +++ b/drivers/infiniband/hw/ehca/ehca_classes.h @@ -379,8 +379,8 @@ extern spinlock_t shca_list_lock; extern int ehca_static_rate; extern int ehca_port_act_time; -extern int ehca_use_hp_mr; -extern int ehca_scaling_code; +extern bool ehca_use_hp_mr; +extern bool ehca_scaling_code; extern int ehca_lock_hcalls; extern int ehca_nr_ports; extern int ehca_max_cq; diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c index c240e9972cb0..832e7a7d0aee 100644 --- a/drivers/infiniband/hw/ehca/ehca_main.c +++ b/drivers/infiniband/hw/ehca/ehca_main.c @@ -59,16 +59,16 @@ MODULE_AUTHOR("Christoph Raisch <raisch@de.ibm.com>"); MODULE_DESCRIPTION("IBM eServer HCA InfiniBand Device Driver"); MODULE_VERSION(HCAD_VERSION); -static int ehca_open_aqp1 = 0; +static bool ehca_open_aqp1 = 0; static int ehca_hw_level = 0; -static int ehca_poll_all_eqs = 1; +static bool ehca_poll_all_eqs = 1; int ehca_debug_level = 0; int ehca_nr_ports = -1; -int ehca_use_hp_mr = 0; +bool ehca_use_hp_mr = 0; int ehca_port_act_time = 30; int ehca_static_rate = -1; -int ehca_scaling_code = 0; +bool ehca_scaling_code = 0; int ehca_lock_hcalls = -1; int ehca_max_cq = -1; int ehca_max_qp = -1; @@ -82,7 +82,7 @@ module_param_named(port_act_time, ehca_port_act_time, int, S_IRUGO); module_param_named(poll_all_eqs, ehca_poll_all_eqs, bool, S_IRUGO); module_param_named(static_rate, ehca_static_rate, int, S_IRUGO); module_param_named(scaling_code, ehca_scaling_code, bool, S_IRUGO); -module_param_named(lock_hcalls, ehca_lock_hcalls, bool, S_IRUGO); +module_param_named(lock_hcalls, ehca_lock_hcalls, bint, S_IRUGO); module_param_named(number_of_cqs, ehca_max_cq, int, S_IRUGO); module_param_named(number_of_qps, ehca_max_qp, int, S_IRUGO); diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index 31ae1b108aea..a4de9d58e9b4 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -46,7 +46,7 @@ static struct super_block *ipath_super; static int ipathfs_mknod(struct inode *dir, struct dentry *dentry, - int mode, const struct file_operations *fops, + umode_t mode, const struct file_operations *fops, void *data) { int error; @@ -61,7 +61,7 @@ static int ipathfs_mknod(struct inode *dir, struct dentry *dentry, inode->i_mode = mode; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; inode->i_private = data; - if ((mode & S_IFMT) == S_IFDIR) { + if (S_ISDIR(mode)) { inode->i_op = &simple_dir_inode_operations; inc_nlink(inode); inc_nlink(dir); @@ -76,7 +76,7 @@ bail: return error; } -static int create_file(const char *name, mode_t mode, +static int create_file(const char *name, umode_t mode, struct dentry *parent, struct dentry **dentry, const struct file_operations *fops, void *data) { @@ -89,7 +89,7 @@ static int create_file(const char *name, mode_t mode, error = ipathfs_mknod(parent->d_inode, *dentry, mode, fops, data); else - error = PTR_ERR(dentry); + error = PTR_ERR(*dentry); mutex_unlock(&parent->d_inode->i_mutex); return error; diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index 4b8f9c49397e..a251becdaa98 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -126,7 +126,7 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr ah->av.ib.dlid = cpu_to_be16(0xc000); memcpy(ah->av.eth.dgid, ah_attr->grh.dgid.raw, 16); - ah->av.eth.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28); + ah->av.eth.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 29); return &ah->ibah; } diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index e8df155bc3b0..5ecf38d97269 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -715,13 +715,17 @@ repoll: } wc->slid = be16_to_cpu(cqe->rlid); - wc->sl = be16_to_cpu(cqe->sl_vid) >> 12; g_mlpath_rqpn = be32_to_cpu(cqe->g_mlpath_rqpn); wc->src_qp = g_mlpath_rqpn & 0xffffff; wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f; wc->wc_flags |= g_mlpath_rqpn & 0x80000000 ? IB_WC_GRH : 0; wc->pkey_index = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f; wc->csum_ok = mlx4_ib_ipoib_csum_ok(cqe->status, cqe->checksum); + if (rdma_port_get_link_layer(wc->qp->device, + (*cur_qp)->port) == IB_LINK_LAYER_ETHERNET) + wc->sl = be16_to_cpu(cqe->sl_vid) >> 13; + else + wc->sl = be16_to_cpu(cqe->sl_vid) >> 12; } return 0; diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index f36da994a85a..259b0670b51c 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -109,7 +109,8 @@ int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey, err = mlx4_cmd_box(dev->dev, inmailbox->dma, outmailbox->dma, in_modifier, op_modifier, - MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C); + MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C, + MLX4_CMD_NATIVE); if (!err) memcpy(response_mad, outmailbox->buf, 256); @@ -256,12 +257,9 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, return IB_MAD_RESULT_SUCCESS; /* - * Don't process SMInfo queries or vendor-specific - * MADs -- the SMA can't handle them. + * Don't process SMInfo queries -- the SMA can't handle them. */ - if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO || - ((in_mad->mad_hdr.attr_id & IB_SMP_ATTR_VENDOR_MASK) == - IB_SMP_ATTR_VENDOR_MASK)) + if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO) return IB_MAD_RESULT_SUCCESS; } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT || in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS1 || @@ -330,7 +328,8 @@ static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, return IB_MAD_RESULT_FAILURE; err = mlx4_cmd_box(dev->dev, 0, mailbox->dma, inmod, 0, - MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C); + MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C, + MLX4_CMD_WRAPPED); if (err) err = IB_MAD_RESULT_FAILURE; else { diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 77f3dbc0aaa1..7b445df6a667 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -177,7 +177,7 @@ mlx4_ib_port_link_layer(struct ib_device *device, u8 port_num) { struct mlx4_dev *dev = to_mdev(device)->dev; - return dev->caps.port_mask & (1 << (port_num - 1)) ? + return dev->caps.port_mask[port_num] == MLX4_PORT_TYPE_IB ? IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET; } @@ -434,7 +434,7 @@ static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask, memset(mailbox->buf, 0, 256); memcpy(mailbox->buf, props->node_desc, 64); mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0, - MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A); + MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); mlx4_free_cmd_mailbox(to_mdev(ibdev)->dev, mailbox); @@ -463,7 +463,7 @@ static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols, } err = mlx4_cmd(dev->dev, mailbox->dma, port, is_eth, MLX4_CMD_SET_PORT, - MLX4_CMD_TIME_CLASS_B); + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); mlx4_free_cmd_mailbox(dev->dev, mailbox); return err; @@ -899,7 +899,8 @@ static void update_gids_task(struct work_struct *work) memcpy(gids, gw->gids, sizeof gw->gids); err = mlx4_cmd(dev, mailbox->dma, MLX4_SET_PORT_GID_TABLE << 8 | gw->port, - 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B); + 1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_NATIVE); if (err) printk(KERN_WARNING "set port command failed\n"); else { @@ -1074,6 +1075,11 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) printk_once(KERN_INFO "%s", mlx4_ib_version); + if (mlx4_is_mfunc(dev)) { + printk(KERN_WARNING "IB not yet supported in SRIOV\n"); + return NULL; + } + mlx4_foreach_ib_transport_port(i, dev) num_ports++; @@ -1244,7 +1250,8 @@ err_reg: err_counter: for (; i; --i) - mlx4_counter_free(ibdev->dev, ibdev->counters[i - 1]); + if (ibdev->counters[i - 1] != -1) + mlx4_counter_free(ibdev->dev, ibdev->counters[i - 1]); err_map: iounmap(ibdev->uar_map); @@ -1275,7 +1282,8 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) } iounmap(ibdev->uar_map); for (p = 0; p < ibdev->num_ports; ++p) - mlx4_counter_free(ibdev->dev, ibdev->counters[p]); + if (ibdev->counters[p] != -1) + mlx4_counter_free(ibdev->dev, ibdev->counters[p]); mlx4_foreach_port(p, dev, MLX4_PORT_TYPE_IB) mlx4_CLOSE_PORT(dev, p); diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index a16f0c8e6f3f..aa2aefa4236c 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -962,7 +962,7 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah, if (is_eth) { path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | - ((port - 1) << 6) | ((ah->sl & 7) << 3) | ((ah->sl & 8) >> 1); + ((port - 1) << 6) | ((ah->sl & 7) << 3); if (!(ah->ah_flags & IB_AH_GRH)) return -1; @@ -1437,7 +1437,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, u16 pcp; sqp->ud_header.vlan.type = cpu_to_be16(MLX4_IB_IBOE_ETHERTYPE); - pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 27 & 3) << 13; + pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13; sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp); } } else { diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c index 5965b3df8f2f..7140199f562e 100644 --- a/drivers/infiniband/hw/nes/nes.c +++ b/drivers/infiniband/hw/nes/nes.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -96,7 +96,7 @@ unsigned int wqm_quanta = 0x10000; module_param(wqm_quanta, int, 0644); MODULE_PARM_DESC(wqm_quanta, "WQM quanta"); -static unsigned int limit_maxrdreqsz; +static bool limit_maxrdreqsz; module_param(limit_maxrdreqsz, bool, 0644); MODULE_PARM_DESC(limit_maxrdreqsz, "Limit max read request size to 256 Bytes"); diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h index 568b4f11380a..c438e4691b3c 100644 --- a/drivers/infiniband/hw/nes/nes.h +++ b/drivers/infiniband/hw/nes/nes.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index dfce9ea98a39..a4972abedef1 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -233,6 +233,7 @@ static int send_mpa_reject(struct nes_cm_node *cm_node) u8 *start_ptr = &start_addr; u8 **start_buff = &start_ptr; u16 buff_len = 0; + struct ietf_mpa_v1 *mpa_frame; skb = dev_alloc_skb(MAX_CM_BUFFER); if (!skb) { @@ -242,6 +243,8 @@ static int send_mpa_reject(struct nes_cm_node *cm_node) /* send an MPA reject frame */ cm_build_mpa_frame(cm_node, start_buff, &buff_len, NULL, MPA_KEY_REPLY); + mpa_frame = (struct ietf_mpa_v1 *)*start_buff; + mpa_frame->flags |= IETF_MPA_FLAGS_REJECT; form_cm_frame(skb, cm_node, NULL, 0, *start_buff, buff_len, SET_ACK | SET_FIN); cm_node->state = NES_CM_STATE_FIN_WAIT1; @@ -1348,7 +1351,8 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi else netdev = nesvnic->netdev; - neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, netdev); + rcu_read_lock(); + neigh = dst_get_neighbour_noref(&rt->dst); if (neigh) { if (neigh->nud_state & NUD_VALID) { nes_debug(NES_DBG_CM, "Neighbor MAC address for 0x%08X" @@ -1359,9 +1363,7 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi if (!memcmp(nesadapter->arp_table[arpindex].mac_addr, neigh->ha, ETH_ALEN)) { /* Mac address same as in nes_arp_table */ - neigh_release(neigh); - ip_rt_put(rt); - return rc; + goto out; } nes_manage_arp_cache(nesvnic->netdev, @@ -1373,13 +1375,13 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi dst_ip, NES_ARP_ADD); rc = nes_arp_table(nesvnic->nesdev, dst_ip, NULL, NES_ARP_RESOLVE); + } else { + neigh_event_send(neigh, NULL); } - neigh_release(neigh); } - if ((neigh == NULL) || (!(neigh->nud_state & NUD_VALID))) - neigh_event_send(dst_get_neighbour(&rt->dst), NULL); - +out: + rcu_read_unlock(); ip_rt_put(rt); return rc; } @@ -2836,6 +2838,7 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp) issue_disconn = 1; issue_close = 1; nesqp->cm_id = NULL; + del_timer(&nesqp->terminate_timer); if (nesqp->flush_issued == 0) { nesqp->flush_issued = 1; issue_flush = 1; diff --git a/drivers/infiniband/hw/nes/nes_cm.h b/drivers/infiniband/hw/nes/nes_cm.h index bdfa1fbb35fc..4646e6666087 100644 --- a/drivers/infiniband/hw/nes/nes_cm.h +++ b/drivers/infiniband/hw/nes/nes_cm.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/nes/nes_context.h b/drivers/infiniband/hw/nes/nes_context.h index b4393a16099d..a69eef16d72d 100644 --- a/drivers/infiniband/hw/nes/nes_context.h +++ b/drivers/infiniband/hw/nes/nes_context.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c index 7c0ff19ce382..d42c9f435b1b 100644 --- a/drivers/infiniband/hw/nes/nes_hw.c +++ b/drivers/infiniband/hw/nes/nes_hw.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -1529,7 +1529,7 @@ int nes_init_phy(struct nes_device *nesdev) } else { /* setup 10G MDIO operation */ tx_config &= 0xFFFFFFE3; - tx_config |= 0x15; + tx_config |= 0x1D; } nes_write_indexed(nesdev, NES_IDX_MAC_TX_CONFIG, tx_config); @@ -3619,10 +3619,6 @@ static void nes_process_iwarp_aeqe(struct nes_device *nesdev, } break; case NES_AEQE_AEID_LLP_CLOSE_COMPLETE: - if (nesqp->term_flags) { - nes_terminate_done(nesqp, 0); - return; - } spin_lock_irqsave(&nesqp->lock, flags); nesqp->hw_iwarp_state = iwarp_state; nesqp->hw_tcp_state = tcp_state; diff --git a/drivers/infiniband/hw/nes/nes_hw.h b/drivers/infiniband/hw/nes/nes_hw.h index 0b590e152c6a..d748e4b31b8d 100644 --- a/drivers/infiniband/hw/nes/nes_hw.h +++ b/drivers/infiniband/hw/nes/nes_hw.h @@ -1,5 +1,5 @@ /* -* Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved. +* Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/nes/nes_mgt.c b/drivers/infiniband/hw/nes/nes_mgt.c index b3b2a240c6e9..3ba7be369452 100644 --- a/drivers/infiniband/hw/nes/nes_mgt.c +++ b/drivers/infiniband/hw/nes/nes_mgt.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2009 Intel-NE, Inc. All rights reserved. + * Copyright (c) 2006 - 2011 Intel-NE, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/nes/nes_mgt.h b/drivers/infiniband/hw/nes/nes_mgt.h index 8c8af254555a..4f7f701c4a81 100644 --- a/drivers/infiniband/hw/nes/nes_mgt.h +++ b/drivers/infiniband/hw/nes/nes_mgt.h @@ -1,5 +1,5 @@ /* -* Copyright (c) 2010 Intel-NE, Inc. All rights reserved. +* Copyright (c) 2006 - 2011 Intel-NE, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index c00d2f3f8966..f3a3ecf8d09e 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -1589,7 +1589,7 @@ static const struct ethtool_ops nes_ethtool_ops = { .set_pauseparam = nes_netdev_set_pauseparam, }; -static void nes_vlan_mode(struct net_device *netdev, struct nes_device *nesdev, u32 features) +static void nes_vlan_mode(struct net_device *netdev, struct nes_device *nesdev, netdev_features_t features) { struct nes_adapter *nesadapter = nesdev->nesadapter; u32 u32temp; @@ -1610,7 +1610,7 @@ static void nes_vlan_mode(struct net_device *netdev, struct nes_device *nesdev, spin_unlock_irqrestore(&nesadapter->phy_lock, flags); } -static u32 nes_fix_features(struct net_device *netdev, u32 features) +static netdev_features_t nes_fix_features(struct net_device *netdev, netdev_features_t features) { /* * Since there is no support for separate rx/tx vlan accel @@ -1624,7 +1624,7 @@ static u32 nes_fix_features(struct net_device *netdev, u32 features) return features; } -static int nes_set_features(struct net_device *netdev, u32 features) +static int nes_set_features(struct net_device *netdev, netdev_features_t features) { struct nes_vnic *nesvnic = netdev_priv(netdev); struct nes_device *nesdev = nesvnic->nesdev; diff --git a/drivers/infiniband/hw/nes/nes_user.h b/drivers/infiniband/hw/nes/nes_user.h index 71e133ab209b..4926de744488 100644 --- a/drivers/infiniband/hw/nes/nes_user.h +++ b/drivers/infiniband/hw/nes/nes_user.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Cisco Systems. All rights reserved. * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. diff --git a/drivers/infiniband/hw/nes/nes_utils.c b/drivers/infiniband/hw/nes/nes_utils.c index cd10968bfa22..e98f4fc0b768 100644 --- a/drivers/infiniband/hw/nes/nes_utils.c +++ b/drivers/infiniband/hw/nes/nes_utils.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -56,7 +56,7 @@ static u16 nes_read16_eeprom(void __iomem *addr, u16 offset); u32 mh_detected; u32 mh_pauses_sent; -u32 nes_set_pau(struct nes_device *nesdev) +static u32 nes_set_pau(struct nes_device *nesdev) { u32 ret = 0; u32 counter; diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index 5095bc41c6cc..0927b5cc65d3 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -3428,6 +3428,8 @@ static int nes_post_send(struct ib_qp *ibqp, struct ib_send_wr *ib_wr, NES_IWARP_SQ_FMR_WQE_LENGTH_LOW_IDX, ib_wr->wr.fast_reg.length); set_wqe_32bit_value(wqe->wqe_words, + NES_IWARP_SQ_FMR_WQE_LENGTH_HIGH_IDX, 0); + set_wqe_32bit_value(wqe->wqe_words, NES_IWARP_SQ_FMR_WQE_MR_STAG_IDX, ib_wr->wr.fast_reg.rkey); /* Set page size: */ @@ -3724,7 +3726,7 @@ static int nes_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry) entry->opcode = IB_WC_SEND; break; case NES_IWARP_SQ_OP_LOCINV: - entry->opcode = IB_WR_LOCAL_INV; + entry->opcode = IB_WC_LOCAL_INV; break; case NES_IWARP_SQ_OP_FAST_REG: entry->opcode = IB_WC_FAST_REG_MR; diff --git a/drivers/infiniband/hw/nes/nes_verbs.h b/drivers/infiniband/hw/nes/nes_verbs.h index fe6b6e92fa90..0eff7c44d76b 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.h +++ b/drivers/infiniband/hw/nes/nes_verbs.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 - 2009 Intel Corporation. All rights reserved. + * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. * * This software is available to you under a choice of one of two diff --git a/drivers/infiniband/hw/qib/qib_7220.h b/drivers/infiniband/hw/qib/qib_7220.h index 21f374aa0631..a5356cb4252e 100644 --- a/drivers/infiniband/hw/qib/qib_7220.h +++ b/drivers/infiniband/hw/qib/qib_7220.h @@ -97,7 +97,7 @@ struct qib_chippport_specific { u64 iblnkerrsnap; u64 ibcctrl; /* kr_ibcctrl shadow */ u64 ibcddrctrl; /* kr_ibcddrctrl shadow */ - u64 chase_end; + unsigned long chase_end; u32 last_delay_mult; }; diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c index c90a55f4120f..6fc9365ba8a6 100644 --- a/drivers/infiniband/hw/qib/qib_driver.c +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -371,9 +371,8 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd, lnh == QIB_LRH_GRH, qp, be32_to_cpu(ohdr->bth[0])); - if (ruc_res) { + if (ruc_res) goto unlock; - } /* Only deal with RDMA Writes for now */ if (opcode < diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index 574600ef5b42..a7403248d83d 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -1285,7 +1285,7 @@ static int setup_ctxt(struct qib_pportdata *ppd, int ctxt, strlcpy(rcd->comm, current->comm, sizeof(rcd->comm)); ctxt_fp(fp) = rcd; qib_stats.sps_ctxts++; - dd->freectxts++; + dd->freectxts--; ret = 0; goto bail; @@ -1794,7 +1794,7 @@ static int qib_close(struct inode *in, struct file *fp) if (dd->pageshadow) unlock_expected_tids(rcd); qib_stats.sps_ctxts--; - dd->freectxts--; + dd->freectxts++; } mutex_unlock(&qib_mutex); diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index df7fa251dcdc..05e0f17c5b44 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -47,7 +47,7 @@ static struct super_block *qib_super; #define private2dd(file) ((file)->f_dentry->d_inode->i_private) static int qibfs_mknod(struct inode *dir, struct dentry *dentry, - int mode, const struct file_operations *fops, + umode_t mode, const struct file_operations *fops, void *data) { int error; @@ -67,7 +67,7 @@ static int qibfs_mknod(struct inode *dir, struct dentry *dentry, inode->i_mtime = inode->i_atime; inode->i_ctime = inode->i_atime; inode->i_private = data; - if ((mode & S_IFMT) == S_IFDIR) { + if (S_ISDIR(mode)) { inode->i_op = &simple_dir_inode_operations; inc_nlink(inode); inc_nlink(dir); @@ -82,7 +82,7 @@ bail: return error; } -static int create_file(const char *name, mode_t mode, +static int create_file(const char *name, umode_t mode, struct dentry *parent, struct dentry **dentry, const struct file_operations *fops, void *data) { diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index 781a802a321f..d0c64d514813 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -2076,9 +2076,11 @@ static void qib_6120_config_ctxts(struct qib_devdata *dd) static void qib_update_6120_usrhead(struct qib_ctxtdata *rcd, u64 hd, u32 updegr, u32 egrhd, u32 npkts) { - qib_write_ureg(rcd->dd, ur_rcvhdrhead, hd, rcd->ctxt); if (updegr) qib_write_ureg(rcd->dd, ur_rcvegrindexhead, egrhd, rcd->ctxt); + mmiowb(); + qib_write_ureg(rcd->dd, ur_rcvhdrhead, hd, rcd->ctxt); + mmiowb(); } static u32 qib_6120_hdrqempty(struct qib_ctxtdata *rcd) @@ -2103,7 +2105,7 @@ static void alloc_dummy_hdrq(struct qib_devdata *dd) dd->cspec->dummy_hdrq = dma_alloc_coherent(&dd->pcidev->dev, dd->rcd[0]->rcvhdrq_size, &dd->cspec->dummy_hdrq_phys, - GFP_KERNEL | __GFP_COMP); + GFP_ATOMIC | __GFP_COMP); if (!dd->cspec->dummy_hdrq) { qib_devinfo(dd->pcidev, "Couldn't allocate dummy hdrq\n"); /* fallback to just 0'ing */ diff --git a/drivers/infiniband/hw/qib/qib_iba7220.c b/drivers/infiniband/hw/qib/qib_iba7220.c index 439d3c503cd5..3c722f79d6f6 100644 --- a/drivers/infiniband/hw/qib/qib_iba7220.c +++ b/drivers/infiniband/hw/qib/qib_iba7220.c @@ -1051,7 +1051,7 @@ static void reenable_7220_chase(unsigned long opaque) static void handle_7220_chase(struct qib_pportdata *ppd, u64 ibcst) { u8 ibclt; - u64 tnow; + unsigned long tnow; ibclt = (u8)SYM_FIELD(ibcst, IBCStatus, LinkTrainingState); @@ -1066,9 +1066,9 @@ static void handle_7220_chase(struct qib_pportdata *ppd, u64 ibcst) case IB_7220_LT_STATE_CFGWAITRMT: case IB_7220_LT_STATE_TXREVLANES: case IB_7220_LT_STATE_CFGENH: - tnow = get_jiffies_64(); + tnow = jiffies; if (ppd->cpspec->chase_end && - time_after64(tnow, ppd->cpspec->chase_end)) { + time_after(tnow, ppd->cpspec->chase_end)) { ppd->cpspec->chase_end = 0; qib_set_ib_7220_lstate(ppd, QLOGIC_IB_IBCC_LINKCMD_DOWN, @@ -2725,9 +2725,11 @@ static int qib_7220_set_loopback(struct qib_pportdata *ppd, const char *what) static void qib_update_7220_usrhead(struct qib_ctxtdata *rcd, u64 hd, u32 updegr, u32 egrhd, u32 npkts) { - qib_write_ureg(rcd->dd, ur_rcvhdrhead, hd, rcd->ctxt); if (updegr) qib_write_ureg(rcd->dd, ur_rcvegrindexhead, egrhd, rcd->ctxt); + mmiowb(); + qib_write_ureg(rcd->dd, ur_rcvhdrhead, hd, rcd->ctxt); + mmiowb(); } static u32 qib_7220_hdrqempty(struct qib_ctxtdata *rcd) diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 5bd2162b95dc..41e92089e41b 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -615,8 +615,8 @@ struct qib_chippport_specific { u64 ibmalfsnap; u64 ibcctrl_a; /* krp_ibcctrl_a shadow */ u64 ibcctrl_b; /* krp_ibcctrl_b shadow */ - u64 qdr_dfe_time; - u64 chase_end; + unsigned long qdr_dfe_time; + unsigned long chase_end; u32 autoneg_tries; u32 recovery_init; u32 qdr_dfe_on; @@ -1672,7 +1672,8 @@ static void reenable_chase(unsigned long opaque) QLOGIC_IB_IBCC_LINKINITCMD_POLL); } -static void disable_chase(struct qib_pportdata *ppd, u64 tnow, u8 ibclt) +static void disable_chase(struct qib_pportdata *ppd, unsigned long tnow, + u8 ibclt) { ppd->cpspec->chase_end = 0; @@ -1688,7 +1689,7 @@ static void disable_chase(struct qib_pportdata *ppd, u64 tnow, u8 ibclt) static void handle_serdes_issues(struct qib_pportdata *ppd, u64 ibcst) { u8 ibclt; - u64 tnow; + unsigned long tnow; ibclt = (u8)SYM_FIELD(ibcst, IBCStatusA_0, LinkTrainingState); @@ -1703,9 +1704,9 @@ static void handle_serdes_issues(struct qib_pportdata *ppd, u64 ibcst) case IB_7322_LT_STATE_CFGWAITRMT: case IB_7322_LT_STATE_TXREVLANES: case IB_7322_LT_STATE_CFGENH: - tnow = get_jiffies_64(); + tnow = jiffies; if (ppd->cpspec->chase_end && - time_after64(tnow, ppd->cpspec->chase_end)) + time_after(tnow, ppd->cpspec->chase_end)) disable_chase(ppd, tnow, ibclt); else if (!ppd->cpspec->chase_end) ppd->cpspec->chase_end = tnow + QIB_CHASE_TIME; @@ -2307,19 +2308,11 @@ static int qib_7322_bringup_serdes(struct qib_pportdata *ppd) SYM_LSB(IBCCtrlA_0, MaxPktLen); ppd->cpspec->ibcctrl_a = ibc; /* without linkcmd or linkinitcmd! */ - /* initially come up waiting for TS1, without sending anything. */ - val = ppd->cpspec->ibcctrl_a | (QLOGIC_IB_IBCC_LINKINITCMD_DISABLE << - QLOGIC_IB_IBCC_LINKINITCMD_SHIFT); - - ppd->cpspec->ibcctrl_a = val; /* * Reset the PCS interface to the serdes (and also ibc, which is still * in reset from above). Writes new value of ibcctrl_a as last step. */ qib_7322_mini_pcs_reset(ppd); - qib_write_kreg(dd, kr_scratch, 0ULL); - /* clear the linkinit cmds */ - ppd->cpspec->ibcctrl_a &= ~SYM_MASK(IBCCtrlA_0, LinkInitCmd); if (!ppd->cpspec->ibcctrl_b) { unsigned lse = ppd->link_speed_enabled; @@ -2385,6 +2378,14 @@ static int qib_7322_bringup_serdes(struct qib_pportdata *ppd) ppd->cpspec->ibcctrl_a |= SYM_MASK(IBCCtrlA_0, IBLinkEn); set_vls(ppd); + /* initially come up DISABLED, without sending anything. */ + val = ppd->cpspec->ibcctrl_a | (QLOGIC_IB_IBCC_LINKINITCMD_DISABLE << + QLOGIC_IB_IBCC_LINKINITCMD_SHIFT); + qib_write_kreg_port(ppd, krp_ibcctrl_a, val); + qib_write_kreg(dd, kr_scratch, 0ULL); + /* clear the linkinit cmds */ + ppd->cpspec->ibcctrl_a = val & ~SYM_MASK(IBCCtrlA_0, LinkInitCmd); + /* be paranoid against later code motion, etc. */ spin_lock_irqsave(&dd->cspec->rcvmod_lock, flags); ppd->p_rcvctrl |= SYM_MASK(RcvCtrl_0, RcvIBPortEnable); @@ -2714,7 +2715,7 @@ static noinline void unknown_7322_gpio_intr(struct qib_devdata *dd) pins >>= SYM_LSB(EXTStatus, GPIOIn); if (!(pins & mask)) { ++handled; - qd->t_insert = get_jiffies_64(); + qd->t_insert = jiffies; queue_work(ib_wq, &qd->work); } } @@ -3602,7 +3603,7 @@ static void qib_7322_config_ctxts(struct qib_devdata *dd) if (qib_rcvhdrcnt) dd->rcvhdrcnt = max(dd->cspec->rcvegrcnt, qib_rcvhdrcnt); else - dd->rcvhdrcnt = max(dd->cspec->rcvegrcnt, + dd->rcvhdrcnt = 2 * max(dd->cspec->rcvegrcnt, dd->num_pports > 1 ? 1024U : 2048U); } @@ -4082,10 +4083,12 @@ static void qib_update_7322_usrhead(struct qib_ctxtdata *rcd, u64 hd, */ if (hd >> IBA7322_HDRHEAD_PKTINT_SHIFT) adjust_rcv_timeout(rcd, npkts); - qib_write_ureg(rcd->dd, ur_rcvhdrhead, hd, rcd->ctxt); - qib_write_ureg(rcd->dd, ur_rcvhdrhead, hd, rcd->ctxt); if (updegr) qib_write_ureg(rcd->dd, ur_rcvegrindexhead, egrhd, rcd->ctxt); + mmiowb(); + qib_write_ureg(rcd->dd, ur_rcvhdrhead, hd, rcd->ctxt); + qib_write_ureg(rcd->dd, ur_rcvhdrhead, hd, rcd->ctxt); + mmiowb(); } static u32 qib_7322_hdrqempty(struct qib_ctxtdata *rcd) @@ -4794,7 +4797,7 @@ static void qib_get_7322_faststats(unsigned long opaque) (ppd->lflags & (QIBL_LINKINIT | QIBL_LINKARMED | QIBL_LINKACTIVE)) && ppd->cpspec->qdr_dfe_time && - time_after64(get_jiffies_64(), ppd->cpspec->qdr_dfe_time)) { + time_is_before_jiffies(ppd->cpspec->qdr_dfe_time)) { ppd->cpspec->qdr_dfe_on = 0; qib_write_kreg_port(ppd, krp_static_adapt_dis(2), @@ -5240,8 +5243,8 @@ static int qib_7322_ib_updown(struct qib_pportdata *ppd, int ibup, u64 ibcs) /* schedule the qsfp refresh which should turn the link off */ if (ppd->dd->flags & QIB_HAS_QSFP) { - qd->t_insert = get_jiffies_64(); - schedule_work(&qd->work); + qd->t_insert = jiffies; + queue_work(ib_wq, &qd->work); } spin_lock_irqsave(&ppd->sdma_lock, flags); if (__qib_sdma_running(ppd)) @@ -5592,7 +5595,7 @@ static void qsfp_7322_event(struct work_struct *work) { struct qib_qsfp_data *qd; struct qib_pportdata *ppd; - u64 pwrup; + unsigned long pwrup; unsigned long flags; int ret; u32 le2; @@ -5620,8 +5623,7 @@ static void qsfp_7322_event(struct work_struct *work) * to insertion. */ while (1) { - u64 now = get_jiffies_64(); - if (time_after64(now, pwrup)) + if (time_is_before_jiffies(pwrup)) break; msleep(20); } @@ -7506,7 +7508,7 @@ static int serdes_7322_init_old(struct qib_pportdata *ppd) static int serdes_7322_init_new(struct qib_pportdata *ppd) { - u64 tstart; + unsigned long tend; u32 le_val, rxcaldone; int chan, chan_done = (1 << SERDES_CHANS) - 1; @@ -7611,10 +7613,8 @@ static int serdes_7322_init_new(struct qib_pportdata *ppd) msleep(20); /* Start Calibration */ ibsd_wr_allchans(ppd, 4, (1 << 10), BMASK(10, 10)); - tstart = get_jiffies_64(); - while (chan_done && - !time_after64(get_jiffies_64(), - tstart + msecs_to_jiffies(500))) { + tend = jiffies + msecs_to_jiffies(500); + while (chan_done && !time_is_before_jiffies(tend)) { msleep(20); for (chan = 0; chan < SERDES_CHANS; ++chan) { rxcaldone = ahb_mod(ppd->dd, IBSD(ppd->hw_pidx), diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index 58b0f8ad4a29..cf0cd30adc8d 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -1015,7 +1015,7 @@ static int __devinit qib_init_one(struct pci_dev *, #define DRIVER_LOAD_MSG "QLogic " QIB_DRV_NAME " loaded: " #define PFX QIB_DRV_NAME ": " -static const struct pci_device_id qib_pci_tbl[] = { +static DEFINE_PCI_DEVICE_TABLE(qib_pci_tbl) = { { PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_QLOGIC_IB_6120) }, { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_IB_7220) }, { PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_QLOGIC_IB_7322) }, diff --git a/drivers/infiniband/hw/qib/qib_pcie.c b/drivers/infiniband/hw/qib/qib_pcie.c index 97a8bdf68e60..0fde788e1100 100644 --- a/drivers/infiniband/hw/qib/qib_pcie.c +++ b/drivers/infiniband/hw/qib/qib_pcie.c @@ -562,7 +562,7 @@ static int qib_tune_pcie_coalesce(struct qib_devdata *dd) */ static int qib_pcie_caps; module_param_named(pcie_caps, qib_pcie_caps, int, S_IRUGO); -MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (4lsb), ReadReq (D4..7)"); +MODULE_PARM_DESC(pcie_caps, "Max PCIe tuning: Payload (0..3), ReadReq (4..7)"); static int qib_tune_pcie_caps(struct qib_devdata *dd) { diff --git a/drivers/infiniband/hw/qib/qib_qsfp.c b/drivers/infiniband/hw/qib/qib_qsfp.c index e06c4ed383f1..fa71b1e666c5 100644 --- a/drivers/infiniband/hw/qib/qib_qsfp.c +++ b/drivers/infiniband/hw/qib/qib_qsfp.c @@ -480,18 +480,6 @@ void qib_qsfp_init(struct qib_qsfp_data *qd, udelay(20); /* Generous RST dwell */ dd->f_gpio_mod(dd, mask, mask, mask); - /* Spec says module can take up to two seconds! */ - mask = QSFP_GPIO_MOD_PRS_N; - if (qd->ppd->hw_pidx) - mask <<= QSFP_GPIO_PORT2_SHIFT; - - /* Do not try to wait here. Better to let event handle it */ - if (!qib_qsfp_mod_present(qd->ppd)) - goto bail; - /* We see a module, but it may be unwise to look yet. Just schedule */ - qd->t_insert = get_jiffies_64(); - queue_work(ib_wq, &qd->work); -bail: return; } diff --git a/drivers/infiniband/hw/qib/qib_qsfp.h b/drivers/infiniband/hw/qib/qib_qsfp.h index 46002a9417c0..91908f533a2b 100644 --- a/drivers/infiniband/hw/qib/qib_qsfp.h +++ b/drivers/infiniband/hw/qib/qib_qsfp.h @@ -177,7 +177,7 @@ struct qib_qsfp_data { struct qib_pportdata *ppd; struct work_struct work; struct qib_qsfp_cache cache; - u64 t_insert; + unsigned long t_insert; u8 modpresent; }; diff --git a/drivers/infiniband/hw/qib/qib_sd7220.c b/drivers/infiniband/hw/qib/qib_sd7220.c index de1a4b2f33c0..ac065dd6b693 100644 --- a/drivers/infiniband/hw/qib/qib_sd7220.c +++ b/drivers/infiniband/hw/qib/qib_sd7220.c @@ -300,7 +300,7 @@ bail: } static void qib_sd_trimdone_monitor(struct qib_devdata *dd, - const char *where) + const char *where) { int ret, chn, baduns; u64 val; diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c index 78fbd56879d4..dae51604cfcd 100644 --- a/drivers/infiniband/hw/qib/qib_sysfs.c +++ b/drivers/infiniband/hw/qib/qib_sysfs.c @@ -150,7 +150,7 @@ static ssize_t show_status(struct qib_pportdata *ppd, char *buf) * For userland compatibility, these offsets must remain fixed. * They are strings for QIB_STATUS_* */ -static const char *qib_status_str[] = { +static const char * const qib_status_str[] = { "Initted", "", "", diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index a894762da462..7b6c3bffa9d9 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -913,8 +913,8 @@ static void copy_io(u32 __iomem *piobuf, struct qib_sge_state *ss, __raw_writel(last, piobuf); } -static struct qib_verbs_txreq *get_txreq(struct qib_ibdev *dev, - struct qib_qp *qp, int *retp) +static noinline struct qib_verbs_txreq *__get_txreq(struct qib_ibdev *dev, + struct qib_qp *qp) { struct qib_verbs_txreq *tx; unsigned long flags; @@ -926,8 +926,9 @@ static struct qib_verbs_txreq *get_txreq(struct qib_ibdev *dev, struct list_head *l = dev->txreq_free.next; list_del(l); + spin_unlock(&dev->pending_lock); + spin_unlock_irqrestore(&qp->s_lock, flags); tx = list_entry(l, struct qib_verbs_txreq, txreq.list); - *retp = 0; } else { if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK && list_empty(&qp->iowait)) { @@ -935,14 +936,33 @@ static struct qib_verbs_txreq *get_txreq(struct qib_ibdev *dev, qp->s_flags |= QIB_S_WAIT_TX; list_add_tail(&qp->iowait, &dev->txwait); } - tx = NULL; qp->s_flags &= ~QIB_S_BUSY; - *retp = -EBUSY; + spin_unlock(&dev->pending_lock); + spin_unlock_irqrestore(&qp->s_lock, flags); + tx = ERR_PTR(-EBUSY); } + return tx; +} - spin_unlock(&dev->pending_lock); - spin_unlock_irqrestore(&qp->s_lock, flags); +static inline struct qib_verbs_txreq *get_txreq(struct qib_ibdev *dev, + struct qib_qp *qp) +{ + struct qib_verbs_txreq *tx; + unsigned long flags; + spin_lock_irqsave(&dev->pending_lock, flags); + /* assume the list non empty */ + if (likely(!list_empty(&dev->txreq_free))) { + struct list_head *l = dev->txreq_free.next; + + list_del(l); + spin_unlock_irqrestore(&dev->pending_lock, flags); + tx = list_entry(l, struct qib_verbs_txreq, txreq.list); + } else { + /* call slow path to get the extra lock */ + spin_unlock_irqrestore(&dev->pending_lock, flags); + tx = __get_txreq(dev, qp); + } return tx; } @@ -1122,9 +1142,9 @@ static int qib_verbs_send_dma(struct qib_qp *qp, struct qib_ib_header *hdr, goto bail; } - tx = get_txreq(dev, qp, &ret); - if (!tx) - goto bail; + tx = get_txreq(dev, qp); + if (IS_ERR(tx)) + goto bail_tx; control = dd->f_setpbc_control(ppd, plen, qp->s_srate, be16_to_cpu(hdr->lrh[0]) >> 12); @@ -1195,6 +1215,9 @@ unaligned: ibp->n_unaligned++; bail: return ret; +bail_tx: + ret = PTR_ERR(tx); + goto bail; } /* diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index b3cc1e062b17..86df632ea612 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -44,6 +44,7 @@ #include <linux/mutex.h> #include <net/neighbour.h> +#include <net/sch_generic.h> #include <linux/atomic.h> @@ -117,8 +118,9 @@ struct ipoib_header { u16 reserved; }; -struct ipoib_pseudoheader { - u8 hwaddr[INFINIBAND_ALEN]; +struct ipoib_cb { + struct qdisc_skb_cb qdisc_cb; + u8 hwaddr[INFINIBAND_ALEN]; }; /* Used for all multicast joins (broadcast, IPv4 mcast and IPv6 mcast) */ diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 0ef9af94997d..4115be54ba3b 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -57,21 +57,24 @@ struct ipoib_ah *ipoib_create_ah(struct net_device *dev, struct ib_pd *pd, struct ib_ah_attr *attr) { struct ipoib_ah *ah; + struct ib_ah *vah; ah = kmalloc(sizeof *ah, GFP_KERNEL); if (!ah) - return NULL; + return ERR_PTR(-ENOMEM); ah->dev = dev; ah->last_send = 0; kref_init(&ah->ref); - ah->ah = ib_create_ah(pd, attr); - if (IS_ERR(ah->ah)) { + vah = ib_create_ah(pd, attr); + if (IS_ERR(vah)) { kfree(ah); - ah = NULL; - } else + ah = (struct ipoib_ah *)vah; + } else { + ah->ah = vah; ipoib_dbg(netdev_priv(dev), "Created ah %p\n", ah->ah); + } return ah; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 7567b6000230..3974c290b667 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -171,7 +171,7 @@ static int ipoib_stop(struct net_device *dev) return 0; } -static u32 ipoib_fix_features(struct net_device *dev, u32 features) +static netdev_features_t ipoib_fix_features(struct net_device *dev, netdev_features_t features) { struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -432,7 +432,7 @@ static void path_rec_completion(int status, spin_lock_irqsave(&priv->lock, flags); - if (ah) { + if (!IS_ERR_OR_NULL(ah)) { path->pathrec = *pathrec; old_ah = path->ah; @@ -555,15 +555,14 @@ static int path_rec_start(struct net_device *dev, return 0; } -static void neigh_add_path(struct sk_buff *skb, struct net_device *dev) +/* called with rcu_read_lock */ +static void neigh_add_path(struct sk_buff *skb, struct neighbour *n, struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_path *path; struct ipoib_neigh *neigh; - struct neighbour *n; unsigned long flags; - n = dst_get_neighbour(skb_dst(skb)); neigh = ipoib_neigh_alloc(n, skb->dev); if (!neigh) { ++dev->stats.tx_dropped; @@ -636,16 +635,14 @@ err_drop: spin_unlock_irqrestore(&priv->lock, flags); } -static void ipoib_path_lookup(struct sk_buff *skb, struct net_device *dev) +/* called with rcu_read_lock */ +static void ipoib_path_lookup(struct sk_buff *skb, struct neighbour *n, struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(skb->dev); - struct dst_entry *dst = skb_dst(skb); - struct neighbour *n; /* Look up path record for unicasts */ - n = dst_get_neighbour(dst); if (n->ha[4] != 0xff) { - neigh_add_path(skb, dev); + neigh_add_path(skb, n, dev); return; } @@ -656,7 +653,7 @@ static void ipoib_path_lookup(struct sk_buff *skb, struct net_device *dev) } static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, - struct ipoib_pseudoheader *phdr) + struct ipoib_cb *cb) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_path *path; @@ -664,17 +661,15 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, spin_lock_irqsave(&priv->lock, flags); - path = __path_find(dev, phdr->hwaddr + 4); + path = __path_find(dev, cb->hwaddr + 4); if (!path || !path->valid) { int new_path = 0; if (!path) { - path = path_rec_create(dev, phdr->hwaddr + 4); + path = path_rec_create(dev, cb->hwaddr + 4); new_path = 1; } if (path) { - /* put pseudoheader back on for next time */ - skb_push(skb, sizeof *phdr); __skb_queue_tail(&path->queue, skb); if (!path->query && path_rec_start(dev, path)) { @@ -698,12 +693,10 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, be16_to_cpu(path->pathrec.dlid)); spin_unlock_irqrestore(&priv->lock, flags); - ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr)); + ipoib_send(dev, skb, path->ah, IPOIB_QPN(cb->hwaddr)); return; } else if ((path->query || !path_rec_start(dev, path)) && skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) { - /* put pseudoheader back on for next time */ - skb_push(skb, sizeof *phdr); __skb_queue_tail(&path->queue, skb); } else { ++dev->stats.tx_dropped; @@ -720,13 +713,19 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) struct neighbour *n = NULL; unsigned long flags; - if (likely(skb_dst(skb))) - n = dst_get_neighbour(skb_dst(skb)); - + rcu_read_lock(); + if (likely(skb_dst(skb))) { + n = dst_get_neighbour_noref(skb_dst(skb)); + if (!n) { + ++dev->stats.tx_dropped; + dev_kfree_skb_any(skb); + goto unlock; + } + } if (likely(n)) { if (unlikely(!*to_ipoib_neigh(n))) { - ipoib_path_lookup(skb, dev); - return NETDEV_TX_OK; + ipoib_path_lookup(skb, n, dev); + goto unlock; } neigh = *to_ipoib_neigh(n); @@ -748,18 +747,18 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) list_del(&neigh->list); ipoib_neigh_free(dev, neigh); spin_unlock_irqrestore(&priv->lock, flags); - ipoib_path_lookup(skb, dev); - return NETDEV_TX_OK; + ipoib_path_lookup(skb, n, dev); + goto unlock; } if (ipoib_cm_get(neigh)) { if (ipoib_cm_up(neigh)) { ipoib_cm_send(dev, skb, ipoib_cm_get(neigh)); - return NETDEV_TX_OK; + goto unlock; } } else if (neigh->ah) { ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(n->ha)); - return NETDEV_TX_OK; + goto unlock; } if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) { @@ -771,16 +770,14 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) dev_kfree_skb_any(skb); } } else { - struct ipoib_pseudoheader *phdr = - (struct ipoib_pseudoheader *) skb->data; - skb_pull(skb, sizeof *phdr); + struct ipoib_cb *cb = (struct ipoib_cb *) skb->cb; - if (phdr->hwaddr[4] == 0xff) { + if (cb->hwaddr[4] == 0xff) { /* Add in the P_Key for multicast*/ - phdr->hwaddr[8] = (priv->pkey >> 8) & 0xff; - phdr->hwaddr[9] = priv->pkey & 0xff; + cb->hwaddr[8] = (priv->pkey >> 8) & 0xff; + cb->hwaddr[9] = priv->pkey & 0xff; - ipoib_mcast_send(dev, phdr->hwaddr + 4, skb); + ipoib_mcast_send(dev, cb->hwaddr + 4, skb); } else { /* unicast GID -- should be ARP or RARP reply */ @@ -789,17 +786,18 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) ipoib_warn(priv, "Unicast, no %s: type %04x, QPN %06x %pI6\n", skb_dst(skb) ? "neigh" : "dst", be16_to_cpup((__be16 *) skb->data), - IPOIB_QPN(phdr->hwaddr), - phdr->hwaddr + 4); + IPOIB_QPN(cb->hwaddr), + cb->hwaddr + 4); dev_kfree_skb_any(skb); ++dev->stats.tx_dropped; - return NETDEV_TX_OK; + goto unlock; } - unicast_arp_send(skb, dev, phdr); + unicast_arp_send(skb, dev, cb); } } - +unlock: + rcu_read_unlock(); return NETDEV_TX_OK; } @@ -821,8 +819,6 @@ static int ipoib_hard_header(struct sk_buff *skb, const void *daddr, const void *saddr, unsigned len) { struct ipoib_header *header; - struct dst_entry *dst; - struct neighbour *n; header = (struct ipoib_header *) skb_push(skb, sizeof *header); @@ -830,18 +826,13 @@ static int ipoib_hard_header(struct sk_buff *skb, header->reserved = 0; /* - * If we don't have a neighbour structure, stuff the - * destination address onto the front of the skb so we can - * figure out where to send the packet later. + * If we don't have a dst_entry structure, stuff the + * destination address into skb->cb so we can figure out where + * to send the packet later. */ - dst = skb_dst(skb); - n = NULL; - if (dst) - n = dst_get_neighbour(dst); - if ((!dst || !n) && daddr) { - struct ipoib_pseudoheader *phdr = - (struct ipoib_pseudoheader *) skb_push(skb, sizeof *phdr); - memcpy(phdr->hwaddr, daddr, INFINIBAND_ALEN); + if (!skb_dst(skb)) { + struct ipoib_cb *cb = (struct ipoib_cb *) skb->cb; + memcpy(cb->hwaddr, daddr, INFINIBAND_ALEN); } return 0; @@ -1017,11 +1008,7 @@ static void ipoib_setup(struct net_device *dev) dev->flags |= IFF_BROADCAST | IFF_MULTICAST; - /* - * We add in INFINIBAND_ALEN to allow for the destination - * address "pseudoheader" for skbs without neighbour struct. - */ - dev->hard_header_len = IPOIB_ENCAP_LEN + INFINIBAND_ALEN; + dev->hard_header_len = IPOIB_ENCAP_LEN; dev->addr_len = INFINIBAND_ALEN; dev->type = ARPHRD_INFINIBAND; dev->tx_queue_len = ipoib_sendq_size * 2; @@ -1218,6 +1205,8 @@ static struct net_device *ipoib_add_port(const char *format, priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu); priv->mcast_mtu = priv->admin_mtu = priv->dev->mtu; + priv->dev->neigh_priv_len = sizeof(struct ipoib_neigh); + result = ib_query_pkey(hca, port, 0, &priv->pkey); if (result) { printk(KERN_WARNING "%s: ib_query_pkey port %d failed (ret = %d)\n", diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 1b7a97686356..20ebc6fd1bb9 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -240,8 +240,11 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, av.grh.dgid = mcast->mcmember.mgid; ah = ipoib_create_ah(dev, priv->pd, &av); - if (!ah) { - ipoib_warn(priv, "ib_address_create failed\n"); + if (IS_ERR(ah)) { + ipoib_warn(priv, "ib_address_create failed %ld\n", + -PTR_ERR(ah)); + /* use original error */ + return PTR_ERR(ah); } else { spin_lock_irq(&priv->lock); mcast->ah = ah; @@ -259,21 +262,13 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, netif_tx_lock_bh(dev); while (!skb_queue_empty(&mcast->pkt_queue)) { struct sk_buff *skb = skb_dequeue(&mcast->pkt_queue); - struct dst_entry *dst = skb_dst(skb); - struct neighbour *n = NULL; netif_tx_unlock_bh(dev); skb->dev = dev; - if (dst) - n = dst_get_neighbour(dst); - if (!dst || !n) { - /* put pseudoheader back on for next time */ - skb_push(skb, sizeof (struct ipoib_pseudoheader)); - } - if (dev_queue_xmit(skb)) ipoib_warn(priv, "dev_queue_xmit failed to requeue packet\n"); + netif_tx_lock_bh(dev); } netif_tx_unlock_bh(dev); @@ -722,8 +717,10 @@ out: if (mcast && mcast->ah) { struct dst_entry *dst = skb_dst(skb); struct neighbour *n = NULL; + + rcu_read_lock(); if (dst) - n = dst_get_neighbour(dst); + n = dst_get_neighbour_noref(dst); if (n && !*to_ipoib_neigh(n)) { struct ipoib_neigh *neigh = ipoib_neigh_alloc(n, skb->dev); @@ -734,7 +731,7 @@ out: list_add_tail(&neigh->list, &mcast->neigh_list); } } - + rcu_read_unlock(); spin_unlock_irqrestore(&priv->lock, flags); ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN); return; diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 7e7373a700e6..9a43cb07f294 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -638,7 +638,7 @@ iscsi_iser_ep_disconnect(struct iscsi_endpoint *ep) iser_conn_terminate(ib_conn); } -static mode_t iser_attr_is_visible(int param_type, int param) +static umode_t iser_attr_is_visible(int param_type, int param) { switch (param_type) { case ISCSI_HOST_PARAM: diff --git a/drivers/infiniband/ulp/srpt/Kconfig b/drivers/infiniband/ulp/srpt/Kconfig new file mode 100644 index 000000000000..31ee83d528d9 --- /dev/null +++ b/drivers/infiniband/ulp/srpt/Kconfig @@ -0,0 +1,12 @@ +config INFINIBAND_SRPT + tristate "InfiniBand SCSI RDMA Protocol target support" + depends on INFINIBAND && TARGET_CORE + ---help--- + + Support for the SCSI RDMA Protocol (SRP) Target driver. The + SRP protocol is a protocol that allows an initiator to access + a block storage device on another host (target) over a network + that supports the RDMA protocol. Currently the RDMA protocol is + supported by InfiniBand and by iWarp network hardware. More + information about the SRP protocol can be found on the website + of the INCITS T10 technical committee (http://www.t10.org/). diff --git a/drivers/infiniband/ulp/srpt/Makefile b/drivers/infiniband/ulp/srpt/Makefile new file mode 100644 index 000000000000..e3ee4bdfffa5 --- /dev/null +++ b/drivers/infiniband/ulp/srpt/Makefile @@ -0,0 +1,2 @@ +ccflags-y := -Idrivers/target +obj-$(CONFIG_INFINIBAND_SRPT) += ib_srpt.o diff --git a/drivers/infiniband/ulp/srpt/ib_dm_mad.h b/drivers/infiniband/ulp/srpt/ib_dm_mad.h new file mode 100644 index 000000000000..fb1de1f6f297 --- /dev/null +++ b/drivers/infiniband/ulp/srpt/ib_dm_mad.h @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2006 - 2009 Mellanox Technology Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef IB_DM_MAD_H +#define IB_DM_MAD_H + +#include <linux/types.h> + +#include <rdma/ib_mad.h> + +enum { + /* + * See also section 13.4.7 Status Field, table 115 MAD Common Status + * Field Bit Values and also section 16.3.1.1 Status Field in the + * InfiniBand Architecture Specification. + */ + DM_MAD_STATUS_UNSUP_METHOD = 0x0008, + DM_MAD_STATUS_UNSUP_METHOD_ATTR = 0x000c, + DM_MAD_STATUS_INVALID_FIELD = 0x001c, + DM_MAD_STATUS_NO_IOC = 0x0100, + + /* + * See also the Device Management chapter, section 16.3.3 Attributes, + * table 279 Device Management Attributes in the InfiniBand + * Architecture Specification. + */ + DM_ATTR_CLASS_PORT_INFO = 0x01, + DM_ATTR_IOU_INFO = 0x10, + DM_ATTR_IOC_PROFILE = 0x11, + DM_ATTR_SVC_ENTRIES = 0x12 +}; + +struct ib_dm_hdr { + u8 reserved[28]; +}; + +/* + * Structure of management datagram sent by the SRP target implementation. + * Contains a management datagram header, reliable multi-packet transaction + * protocol (RMPP) header and ib_dm_hdr. Notes: + * - The SRP target implementation does not use RMPP or ib_dm_hdr when sending + * management datagrams. + * - The header size must be exactly 64 bytes (IB_MGMT_DEVICE_HDR), since this + * is the header size that is passed to ib_create_send_mad() in ib_srpt.c. + * - The maximum supported size for a management datagram when not using RMPP + * is 256 bytes -- 64 bytes header and 192 (IB_MGMT_DEVICE_DATA) bytes data. + */ +struct ib_dm_mad { + struct ib_mad_hdr mad_hdr; + struct ib_rmpp_hdr rmpp_hdr; + struct ib_dm_hdr dm_hdr; + u8 data[IB_MGMT_DEVICE_DATA]; +}; + +/* + * IOUnitInfo as defined in section 16.3.3.3 IOUnitInfo of the InfiniBand + * Architecture Specification. + */ +struct ib_dm_iou_info { + __be16 change_id; + u8 max_controllers; + u8 op_rom; + u8 controller_list[128]; +}; + +/* + * IOControllerprofile as defined in section 16.3.3.4 IOControllerProfile of + * the InfiniBand Architecture Specification. + */ +struct ib_dm_ioc_profile { + __be64 guid; + __be32 vendor_id; + __be32 device_id; + __be16 device_version; + __be16 reserved1; + __be32 subsys_vendor_id; + __be32 subsys_device_id; + __be16 io_class; + __be16 io_subclass; + __be16 protocol; + __be16 protocol_version; + __be16 service_conn; + __be16 initiators_supported; + __be16 send_queue_depth; + u8 reserved2; + u8 rdma_read_depth; + __be32 send_size; + __be32 rdma_size; + u8 op_cap_mask; + u8 svc_cap_mask; + u8 num_svc_entries; + u8 reserved3[9]; + u8 id_string[64]; +}; + +struct ib_dm_svc_entry { + u8 name[40]; + __be64 id; +}; + +/* + * See also section 16.3.3.5 ServiceEntries in the InfiniBand Architecture + * Specification. See also section B.7, table B.8 in the T10 SRP r16a document. + */ +struct ib_dm_svc_entries { + struct ib_dm_svc_entry service_entries[4]; +}; + +#endif diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c new file mode 100644 index 000000000000..2b73d43cd691 --- /dev/null +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -0,0 +1,4070 @@ +/* + * Copyright (c) 2006 - 2009 Mellanox Technology Inc. All rights reserved. + * Copyright (C) 2008 - 2011 Bart Van Assche <bvanassche@acm.org>. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/err.h> +#include <linux/ctype.h> +#include <linux/kthread.h> +#include <linux/string.h> +#include <linux/delay.h> +#include <linux/atomic.h> +#include <scsi/scsi_tcq.h> +#include <target/configfs_macros.h> +#include <target/target_core_base.h> +#include <target/target_core_fabric_configfs.h> +#include <target/target_core_fabric.h> +#include <target/target_core_configfs.h> +#include "ib_srpt.h" + +/* Name of this kernel module. */ +#define DRV_NAME "ib_srpt" +#define DRV_VERSION "2.0.0" +#define DRV_RELDATE "2011-02-14" + +#define SRPT_ID_STRING "Linux SRP target" + +#undef pr_fmt +#define pr_fmt(fmt) DRV_NAME " " fmt + +MODULE_AUTHOR("Vu Pham and Bart Van Assche"); +MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol target " + "v" DRV_VERSION " (" DRV_RELDATE ")"); +MODULE_LICENSE("Dual BSD/GPL"); + +/* + * Global Variables + */ + +static u64 srpt_service_guid; +static DEFINE_SPINLOCK(srpt_dev_lock); /* Protects srpt_dev_list. */ +static LIST_HEAD(srpt_dev_list); /* List of srpt_device structures. */ + +static unsigned srp_max_req_size = DEFAULT_MAX_REQ_SIZE; +module_param(srp_max_req_size, int, 0444); +MODULE_PARM_DESC(srp_max_req_size, + "Maximum size of SRP request messages in bytes."); + +static int srpt_srq_size = DEFAULT_SRPT_SRQ_SIZE; +module_param(srpt_srq_size, int, 0444); +MODULE_PARM_DESC(srpt_srq_size, + "Shared receive queue (SRQ) size."); + +static int srpt_get_u64_x(char *buffer, struct kernel_param *kp) +{ + return sprintf(buffer, "0x%016llx", *(u64 *)kp->arg); +} +module_param_call(srpt_service_guid, NULL, srpt_get_u64_x, &srpt_service_guid, + 0444); +MODULE_PARM_DESC(srpt_service_guid, + "Using this value for ioc_guid, id_ext, and cm_listen_id" + " instead of using the node_guid of the first HCA."); + +static struct ib_client srpt_client; +static struct target_fabric_configfs *srpt_target; +static void srpt_release_channel(struct srpt_rdma_ch *ch); +static int srpt_queue_status(struct se_cmd *cmd); + +/** + * opposite_dma_dir() - Swap DMA_TO_DEVICE and DMA_FROM_DEVICE. + */ +static inline +enum dma_data_direction opposite_dma_dir(enum dma_data_direction dir) +{ + switch (dir) { + case DMA_TO_DEVICE: return DMA_FROM_DEVICE; + case DMA_FROM_DEVICE: return DMA_TO_DEVICE; + default: return dir; + } +} + +/** + * srpt_sdev_name() - Return the name associated with the HCA. + * + * Examples are ib0, ib1, ... + */ +static inline const char *srpt_sdev_name(struct srpt_device *sdev) +{ + return sdev->device->name; +} + +static enum rdma_ch_state srpt_get_ch_state(struct srpt_rdma_ch *ch) +{ + unsigned long flags; + enum rdma_ch_state state; + + spin_lock_irqsave(&ch->spinlock, flags); + state = ch->state; + spin_unlock_irqrestore(&ch->spinlock, flags); + return state; +} + +static enum rdma_ch_state +srpt_set_ch_state(struct srpt_rdma_ch *ch, enum rdma_ch_state new_state) +{ + unsigned long flags; + enum rdma_ch_state prev; + + spin_lock_irqsave(&ch->spinlock, flags); + prev = ch->state; + ch->state = new_state; + spin_unlock_irqrestore(&ch->spinlock, flags); + return prev; +} + +/** + * srpt_test_and_set_ch_state() - Test and set the channel state. + * + * Returns true if and only if the channel state has been set to the new state. + */ +static bool +srpt_test_and_set_ch_state(struct srpt_rdma_ch *ch, enum rdma_ch_state old, + enum rdma_ch_state new) +{ + unsigned long flags; + enum rdma_ch_state prev; + + spin_lock_irqsave(&ch->spinlock, flags); + prev = ch->state; + if (prev == old) + ch->state = new; + spin_unlock_irqrestore(&ch->spinlock, flags); + return prev == old; +} + +/** + * srpt_event_handler() - Asynchronous IB event callback function. + * + * Callback function called by the InfiniBand core when an asynchronous IB + * event occurs. This callback may occur in interrupt context. See also + * section 11.5.2, Set Asynchronous Event Handler in the InfiniBand + * Architecture Specification. + */ +static void srpt_event_handler(struct ib_event_handler *handler, + struct ib_event *event) +{ + struct srpt_device *sdev; + struct srpt_port *sport; + + sdev = ib_get_client_data(event->device, &srpt_client); + if (!sdev || sdev->device != event->device) + return; + + pr_debug("ASYNC event= %d on device= %s\n", event->event, + srpt_sdev_name(sdev)); + + switch (event->event) { + case IB_EVENT_PORT_ERR: + if (event->element.port_num <= sdev->device->phys_port_cnt) { + sport = &sdev->port[event->element.port_num - 1]; + sport->lid = 0; + sport->sm_lid = 0; + } + break; + case IB_EVENT_PORT_ACTIVE: + case IB_EVENT_LID_CHANGE: + case IB_EVENT_PKEY_CHANGE: + case IB_EVENT_SM_CHANGE: + case IB_EVENT_CLIENT_REREGISTER: + /* Refresh port data asynchronously. */ + if (event->element.port_num <= sdev->device->phys_port_cnt) { + sport = &sdev->port[event->element.port_num - 1]; + if (!sport->lid && !sport->sm_lid) + schedule_work(&sport->work); + } + break; + default: + printk(KERN_ERR "received unrecognized IB event %d\n", + event->event); + break; + } +} + +/** + * srpt_srq_event() - SRQ event callback function. + */ +static void srpt_srq_event(struct ib_event *event, void *ctx) +{ + printk(KERN_INFO "SRQ event %d\n", event->event); +} + +/** + * srpt_qp_event() - QP event callback function. + */ +static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch) +{ + pr_debug("QP event %d on cm_id=%p sess_name=%s state=%d\n", + event->event, ch->cm_id, ch->sess_name, srpt_get_ch_state(ch)); + + switch (event->event) { + case IB_EVENT_COMM_EST: + ib_cm_notify(ch->cm_id, event->event); + break; + case IB_EVENT_QP_LAST_WQE_REACHED: + if (srpt_test_and_set_ch_state(ch, CH_DRAINING, + CH_RELEASING)) + srpt_release_channel(ch); + else + pr_debug("%s: state %d - ignored LAST_WQE.\n", + ch->sess_name, srpt_get_ch_state(ch)); + break; + default: + printk(KERN_ERR "received unrecognized IB QP event %d\n", + event->event); + break; + } +} + +/** + * srpt_set_ioc() - Helper function for initializing an IOUnitInfo structure. + * + * @slot: one-based slot number. + * @value: four-bit value. + * + * Copies the lowest four bits of value in element slot of the array of four + * bit elements called c_list (controller list). The index slot is one-based. + */ +static void srpt_set_ioc(u8 *c_list, u32 slot, u8 value) +{ + u16 id; + u8 tmp; + + id = (slot - 1) / 2; + if (slot & 0x1) { + tmp = c_list[id] & 0xf; + c_list[id] = (value << 4) | tmp; + } else { + tmp = c_list[id] & 0xf0; + c_list[id] = (value & 0xf) | tmp; + } +} + +/** + * srpt_get_class_port_info() - Copy ClassPortInfo to a management datagram. + * + * See also section 16.3.3.1 ClassPortInfo in the InfiniBand Architecture + * Specification. + */ +static void srpt_get_class_port_info(struct ib_dm_mad *mad) +{ + struct ib_class_port_info *cif; + + cif = (struct ib_class_port_info *)mad->data; + memset(cif, 0, sizeof *cif); + cif->base_version = 1; + cif->class_version = 1; + cif->resp_time_value = 20; + + mad->mad_hdr.status = 0; +} + +/** + * srpt_get_iou() - Write IOUnitInfo to a management datagram. + * + * See also section 16.3.3.3 IOUnitInfo in the InfiniBand Architecture + * Specification. See also section B.7, table B.6 in the SRP r16a document. + */ +static void srpt_get_iou(struct ib_dm_mad *mad) +{ + struct ib_dm_iou_info *ioui; + u8 slot; + int i; + + ioui = (struct ib_dm_iou_info *)mad->data; + ioui->change_id = __constant_cpu_to_be16(1); + ioui->max_controllers = 16; + + /* set present for slot 1 and empty for the rest */ + srpt_set_ioc(ioui->controller_list, 1, 1); + for (i = 1, slot = 2; i < 16; i++, slot++) + srpt_set_ioc(ioui->controller_list, slot, 0); + + mad->mad_hdr.status = 0; +} + +/** + * srpt_get_ioc() - Write IOControllerprofile to a management datagram. + * + * See also section 16.3.3.4 IOControllerProfile in the InfiniBand + * Architecture Specification. See also section B.7, table B.7 in the SRP + * r16a document. + */ +static void srpt_get_ioc(struct srpt_port *sport, u32 slot, + struct ib_dm_mad *mad) +{ + struct srpt_device *sdev = sport->sdev; + struct ib_dm_ioc_profile *iocp; + + iocp = (struct ib_dm_ioc_profile *)mad->data; + + if (!slot || slot > 16) { + mad->mad_hdr.status + = __constant_cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD); + return; + } + + if (slot > 2) { + mad->mad_hdr.status + = __constant_cpu_to_be16(DM_MAD_STATUS_NO_IOC); + return; + } + + memset(iocp, 0, sizeof *iocp); + strcpy(iocp->id_string, SRPT_ID_STRING); + iocp->guid = cpu_to_be64(srpt_service_guid); + iocp->vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id); + iocp->device_id = cpu_to_be32(sdev->dev_attr.vendor_part_id); + iocp->device_version = cpu_to_be16(sdev->dev_attr.hw_ver); + iocp->subsys_vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id); + iocp->subsys_device_id = 0x0; + iocp->io_class = __constant_cpu_to_be16(SRP_REV16A_IB_IO_CLASS); + iocp->io_subclass = __constant_cpu_to_be16(SRP_IO_SUBCLASS); + iocp->protocol = __constant_cpu_to_be16(SRP_PROTOCOL); + iocp->protocol_version = __constant_cpu_to_be16(SRP_PROTOCOL_VERSION); + iocp->send_queue_depth = cpu_to_be16(sdev->srq_size); + iocp->rdma_read_depth = 4; + iocp->send_size = cpu_to_be32(srp_max_req_size); + iocp->rdma_size = cpu_to_be32(min(sport->port_attrib.srp_max_rdma_size, + 1U << 24)); + iocp->num_svc_entries = 1; + iocp->op_cap_mask = SRP_SEND_TO_IOC | SRP_SEND_FROM_IOC | + SRP_RDMA_READ_FROM_IOC | SRP_RDMA_WRITE_FROM_IOC; + + mad->mad_hdr.status = 0; +} + +/** + * srpt_get_svc_entries() - Write ServiceEntries to a management datagram. + * + * See also section 16.3.3.5 ServiceEntries in the InfiniBand Architecture + * Specification. See also section B.7, table B.8 in the SRP r16a document. + */ +static void srpt_get_svc_entries(u64 ioc_guid, + u16 slot, u8 hi, u8 lo, struct ib_dm_mad *mad) +{ + struct ib_dm_svc_entries *svc_entries; + + WARN_ON(!ioc_guid); + + if (!slot || slot > 16) { + mad->mad_hdr.status + = __constant_cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD); + return; + } + + if (slot > 2 || lo > hi || hi > 1) { + mad->mad_hdr.status + = __constant_cpu_to_be16(DM_MAD_STATUS_NO_IOC); + return; + } + + svc_entries = (struct ib_dm_svc_entries *)mad->data; + memset(svc_entries, 0, sizeof *svc_entries); + svc_entries->service_entries[0].id = cpu_to_be64(ioc_guid); + snprintf(svc_entries->service_entries[0].name, + sizeof(svc_entries->service_entries[0].name), + "%s%016llx", + SRP_SERVICE_NAME_PREFIX, + ioc_guid); + + mad->mad_hdr.status = 0; +} + +/** + * srpt_mgmt_method_get() - Process a received management datagram. + * @sp: source port through which the MAD has been received. + * @rq_mad: received MAD. + * @rsp_mad: response MAD. + */ +static void srpt_mgmt_method_get(struct srpt_port *sp, struct ib_mad *rq_mad, + struct ib_dm_mad *rsp_mad) +{ + u16 attr_id; + u32 slot; + u8 hi, lo; + + attr_id = be16_to_cpu(rq_mad->mad_hdr.attr_id); + switch (attr_id) { + case DM_ATTR_CLASS_PORT_INFO: + srpt_get_class_port_info(rsp_mad); + break; + case DM_ATTR_IOU_INFO: + srpt_get_iou(rsp_mad); + break; + case DM_ATTR_IOC_PROFILE: + slot = be32_to_cpu(rq_mad->mad_hdr.attr_mod); + srpt_get_ioc(sp, slot, rsp_mad); + break; + case DM_ATTR_SVC_ENTRIES: + slot = be32_to_cpu(rq_mad->mad_hdr.attr_mod); + hi = (u8) ((slot >> 8) & 0xff); + lo = (u8) (slot & 0xff); + slot = (u16) ((slot >> 16) & 0xffff); + srpt_get_svc_entries(srpt_service_guid, + slot, hi, lo, rsp_mad); + break; + default: + rsp_mad->mad_hdr.status = + __constant_cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR); + break; + } +} + +/** + * srpt_mad_send_handler() - Post MAD-send callback function. + */ +static void srpt_mad_send_handler(struct ib_mad_agent *mad_agent, + struct ib_mad_send_wc *mad_wc) +{ + ib_destroy_ah(mad_wc->send_buf->ah); + ib_free_send_mad(mad_wc->send_buf); +} + +/** + * srpt_mad_recv_handler() - MAD reception callback function. + */ +static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent, + struct ib_mad_recv_wc *mad_wc) +{ + struct srpt_port *sport = (struct srpt_port *)mad_agent->context; + struct ib_ah *ah; + struct ib_mad_send_buf *rsp; + struct ib_dm_mad *dm_mad; + + if (!mad_wc || !mad_wc->recv_buf.mad) + return; + + ah = ib_create_ah_from_wc(mad_agent->qp->pd, mad_wc->wc, + mad_wc->recv_buf.grh, mad_agent->port_num); + if (IS_ERR(ah)) + goto err; + + BUILD_BUG_ON(offsetof(struct ib_dm_mad, data) != IB_MGMT_DEVICE_HDR); + + rsp = ib_create_send_mad(mad_agent, mad_wc->wc->src_qp, + mad_wc->wc->pkey_index, 0, + IB_MGMT_DEVICE_HDR, IB_MGMT_DEVICE_DATA, + GFP_KERNEL); + if (IS_ERR(rsp)) + goto err_rsp; + + rsp->ah = ah; + + dm_mad = rsp->mad; + memcpy(dm_mad, mad_wc->recv_buf.mad, sizeof *dm_mad); + dm_mad->mad_hdr.method = IB_MGMT_METHOD_GET_RESP; + dm_mad->mad_hdr.status = 0; + + switch (mad_wc->recv_buf.mad->mad_hdr.method) { + case IB_MGMT_METHOD_GET: + srpt_mgmt_method_get(sport, mad_wc->recv_buf.mad, dm_mad); + break; + case IB_MGMT_METHOD_SET: + dm_mad->mad_hdr.status = + __constant_cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR); + break; + default: + dm_mad->mad_hdr.status = + __constant_cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD); + break; + } + + if (!ib_post_send_mad(rsp, NULL)) { + ib_free_recv_mad(mad_wc); + /* will destroy_ah & free_send_mad in send completion */ + return; + } + + ib_free_send_mad(rsp); + +err_rsp: + ib_destroy_ah(ah); +err: + ib_free_recv_mad(mad_wc); +} + +/** + * srpt_refresh_port() - Configure a HCA port. + * + * Enable InfiniBand management datagram processing, update the cached sm_lid, + * lid and gid values, and register a callback function for processing MADs + * on the specified port. + * + * Note: It is safe to call this function more than once for the same port. + */ +static int srpt_refresh_port(struct srpt_port *sport) +{ + struct ib_mad_reg_req reg_req; + struct ib_port_modify port_modify; + struct ib_port_attr port_attr; + int ret; + + memset(&port_modify, 0, sizeof port_modify); + port_modify.set_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP; + port_modify.clr_port_cap_mask = 0; + + ret = ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify); + if (ret) + goto err_mod_port; + + ret = ib_query_port(sport->sdev->device, sport->port, &port_attr); + if (ret) + goto err_query_port; + + sport->sm_lid = port_attr.sm_lid; + sport->lid = port_attr.lid; + + ret = ib_query_gid(sport->sdev->device, sport->port, 0, &sport->gid); + if (ret) + goto err_query_port; + + if (!sport->mad_agent) { + memset(®_req, 0, sizeof reg_req); + reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT; + reg_req.mgmt_class_version = IB_MGMT_BASE_VERSION; + set_bit(IB_MGMT_METHOD_GET, reg_req.method_mask); + set_bit(IB_MGMT_METHOD_SET, reg_req.method_mask); + + sport->mad_agent = ib_register_mad_agent(sport->sdev->device, + sport->port, + IB_QPT_GSI, + ®_req, 0, + srpt_mad_send_handler, + srpt_mad_recv_handler, + sport); + if (IS_ERR(sport->mad_agent)) { + ret = PTR_ERR(sport->mad_agent); + sport->mad_agent = NULL; + goto err_query_port; + } + } + + return 0; + +err_query_port: + + port_modify.set_port_cap_mask = 0; + port_modify.clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP; + ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify); + +err_mod_port: + + return ret; +} + +/** + * srpt_unregister_mad_agent() - Unregister MAD callback functions. + * + * Note: It is safe to call this function more than once for the same device. + */ +static void srpt_unregister_mad_agent(struct srpt_device *sdev) +{ + struct ib_port_modify port_modify = { + .clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP, + }; + struct srpt_port *sport; + int i; + + for (i = 1; i <= sdev->device->phys_port_cnt; i++) { + sport = &sdev->port[i - 1]; + WARN_ON(sport->port != i); + if (ib_modify_port(sdev->device, i, 0, &port_modify) < 0) + printk(KERN_ERR "disabling MAD processing failed.\n"); + if (sport->mad_agent) { + ib_unregister_mad_agent(sport->mad_agent); + sport->mad_agent = NULL; + } + } +} + +/** + * srpt_alloc_ioctx() - Allocate an SRPT I/O context structure. + */ +static struct srpt_ioctx *srpt_alloc_ioctx(struct srpt_device *sdev, + int ioctx_size, int dma_size, + enum dma_data_direction dir) +{ + struct srpt_ioctx *ioctx; + + ioctx = kmalloc(ioctx_size, GFP_KERNEL); + if (!ioctx) + goto err; + + ioctx->buf = kmalloc(dma_size, GFP_KERNEL); + if (!ioctx->buf) + goto err_free_ioctx; + + ioctx->dma = ib_dma_map_single(sdev->device, ioctx->buf, dma_size, dir); + if (ib_dma_mapping_error(sdev->device, ioctx->dma)) + goto err_free_buf; + + return ioctx; + +err_free_buf: + kfree(ioctx->buf); +err_free_ioctx: + kfree(ioctx); +err: + return NULL; +} + +/** + * srpt_free_ioctx() - Free an SRPT I/O context structure. + */ +static void srpt_free_ioctx(struct srpt_device *sdev, struct srpt_ioctx *ioctx, + int dma_size, enum dma_data_direction dir) +{ + if (!ioctx) + return; + + ib_dma_unmap_single(sdev->device, ioctx->dma, dma_size, dir); + kfree(ioctx->buf); + kfree(ioctx); +} + +/** + * srpt_alloc_ioctx_ring() - Allocate a ring of SRPT I/O context structures. + * @sdev: Device to allocate the I/O context ring for. + * @ring_size: Number of elements in the I/O context ring. + * @ioctx_size: I/O context size. + * @dma_size: DMA buffer size. + * @dir: DMA data direction. + */ +static struct srpt_ioctx **srpt_alloc_ioctx_ring(struct srpt_device *sdev, + int ring_size, int ioctx_size, + int dma_size, enum dma_data_direction dir) +{ + struct srpt_ioctx **ring; + int i; + + WARN_ON(ioctx_size != sizeof(struct srpt_recv_ioctx) + && ioctx_size != sizeof(struct srpt_send_ioctx)); + + ring = kmalloc(ring_size * sizeof(ring[0]), GFP_KERNEL); + if (!ring) + goto out; + for (i = 0; i < ring_size; ++i) { + ring[i] = srpt_alloc_ioctx(sdev, ioctx_size, dma_size, dir); + if (!ring[i]) + goto err; + ring[i]->index = i; + } + goto out; + +err: + while (--i >= 0) + srpt_free_ioctx(sdev, ring[i], dma_size, dir); + kfree(ring); + ring = NULL; +out: + return ring; +} + +/** + * srpt_free_ioctx_ring() - Free the ring of SRPT I/O context structures. + */ +static void srpt_free_ioctx_ring(struct srpt_ioctx **ioctx_ring, + struct srpt_device *sdev, int ring_size, + int dma_size, enum dma_data_direction dir) +{ + int i; + + for (i = 0; i < ring_size; ++i) + srpt_free_ioctx(sdev, ioctx_ring[i], dma_size, dir); + kfree(ioctx_ring); +} + +/** + * srpt_get_cmd_state() - Get the state of a SCSI command. + */ +static enum srpt_command_state srpt_get_cmd_state(struct srpt_send_ioctx *ioctx) +{ + enum srpt_command_state state; + unsigned long flags; + + BUG_ON(!ioctx); + + spin_lock_irqsave(&ioctx->spinlock, flags); + state = ioctx->state; + spin_unlock_irqrestore(&ioctx->spinlock, flags); + return state; +} + +/** + * srpt_set_cmd_state() - Set the state of a SCSI command. + * + * Does not modify the state of aborted commands. Returns the previous command + * state. + */ +static enum srpt_command_state srpt_set_cmd_state(struct srpt_send_ioctx *ioctx, + enum srpt_command_state new) +{ + enum srpt_command_state previous; + unsigned long flags; + + BUG_ON(!ioctx); + + spin_lock_irqsave(&ioctx->spinlock, flags); + previous = ioctx->state; + if (previous != SRPT_STATE_DONE) + ioctx->state = new; + spin_unlock_irqrestore(&ioctx->spinlock, flags); + + return previous; +} + +/** + * srpt_test_and_set_cmd_state() - Test and set the state of a command. + * + * Returns true if and only if the previous command state was equal to 'old'. + */ +static bool srpt_test_and_set_cmd_state(struct srpt_send_ioctx *ioctx, + enum srpt_command_state old, + enum srpt_command_state new) +{ + enum srpt_command_state previous; + unsigned long flags; + + WARN_ON(!ioctx); + WARN_ON(old == SRPT_STATE_DONE); + WARN_ON(new == SRPT_STATE_NEW); + + spin_lock_irqsave(&ioctx->spinlock, flags); + previous = ioctx->state; + if (previous == old) + ioctx->state = new; + spin_unlock_irqrestore(&ioctx->spinlock, flags); + return previous == old; +} + +/** + * srpt_post_recv() - Post an IB receive request. + */ +static int srpt_post_recv(struct srpt_device *sdev, + struct srpt_recv_ioctx *ioctx) +{ + struct ib_sge list; + struct ib_recv_wr wr, *bad_wr; + + BUG_ON(!sdev); + wr.wr_id = encode_wr_id(SRPT_RECV, ioctx->ioctx.index); + + list.addr = ioctx->ioctx.dma; + list.length = srp_max_req_size; + list.lkey = sdev->mr->lkey; + + wr.next = NULL; + wr.sg_list = &list; + wr.num_sge = 1; + + return ib_post_srq_recv(sdev->srq, &wr, &bad_wr); +} + +/** + * srpt_post_send() - Post an IB send request. + * + * Returns zero upon success and a non-zero value upon failure. + */ +static int srpt_post_send(struct srpt_rdma_ch *ch, + struct srpt_send_ioctx *ioctx, int len) +{ + struct ib_sge list; + struct ib_send_wr wr, *bad_wr; + struct srpt_device *sdev = ch->sport->sdev; + int ret; + + atomic_inc(&ch->req_lim); + + ret = -ENOMEM; + if (unlikely(atomic_dec_return(&ch->sq_wr_avail) < 0)) { + printk(KERN_WARNING "IB send queue full (needed 1)\n"); + goto out; + } + + ib_dma_sync_single_for_device(sdev->device, ioctx->ioctx.dma, len, + DMA_TO_DEVICE); + + list.addr = ioctx->ioctx.dma; + list.length = len; + list.lkey = sdev->mr->lkey; + + wr.next = NULL; + wr.wr_id = encode_wr_id(SRPT_SEND, ioctx->ioctx.index); + wr.sg_list = &list; + wr.num_sge = 1; + wr.opcode = IB_WR_SEND; + wr.send_flags = IB_SEND_SIGNALED; + + ret = ib_post_send(ch->qp, &wr, &bad_wr); + +out: + if (ret < 0) { + atomic_inc(&ch->sq_wr_avail); + atomic_dec(&ch->req_lim); + } + return ret; +} + +/** + * srpt_get_desc_tbl() - Parse the data descriptors of an SRP_CMD request. + * @ioctx: Pointer to the I/O context associated with the request. + * @srp_cmd: Pointer to the SRP_CMD request data. + * @dir: Pointer to the variable to which the transfer direction will be + * written. + * @data_len: Pointer to the variable to which the total data length of all + * descriptors in the SRP_CMD request will be written. + * + * This function initializes ioctx->nrbuf and ioctx->r_bufs. + * + * Returns -EINVAL when the SRP_CMD request contains inconsistent descriptors; + * -ENOMEM when memory allocation fails and zero upon success. + */ +static int srpt_get_desc_tbl(struct srpt_send_ioctx *ioctx, + struct srp_cmd *srp_cmd, + enum dma_data_direction *dir, u64 *data_len) +{ + struct srp_indirect_buf *idb; + struct srp_direct_buf *db; + unsigned add_cdb_offset; + int ret; + + /* + * The pointer computations below will only be compiled correctly + * if srp_cmd::add_data is declared as s8*, u8*, s8[] or u8[], so check + * whether srp_cmd::add_data has been declared as a byte pointer. + */ + BUILD_BUG_ON(!__same_type(srp_cmd->add_data[0], (s8)0) + && !__same_type(srp_cmd->add_data[0], (u8)0)); + + BUG_ON(!dir); + BUG_ON(!data_len); + + ret = 0; + *data_len = 0; + + /* + * The lower four bits of the buffer format field contain the DATA-IN + * buffer descriptor format, and the highest four bits contain the + * DATA-OUT buffer descriptor format. + */ + *dir = DMA_NONE; + if (srp_cmd->buf_fmt & 0xf) + /* DATA-IN: transfer data from target to initiator (read). */ + *dir = DMA_FROM_DEVICE; + else if (srp_cmd->buf_fmt >> 4) + /* DATA-OUT: transfer data from initiator to target (write). */ + *dir = DMA_TO_DEVICE; + + /* + * According to the SRP spec, the lower two bits of the 'ADDITIONAL + * CDB LENGTH' field are reserved and the size in bytes of this field + * is four times the value specified in bits 3..7. Hence the "& ~3". + */ + add_cdb_offset = srp_cmd->add_cdb_len & ~3; + if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_DIRECT) || + ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_DIRECT)) { + ioctx->n_rbuf = 1; + ioctx->rbufs = &ioctx->single_rbuf; + + db = (struct srp_direct_buf *)(srp_cmd->add_data + + add_cdb_offset); + memcpy(ioctx->rbufs, db, sizeof *db); + *data_len = be32_to_cpu(db->len); + } else if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_INDIRECT) || + ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_INDIRECT)) { + idb = (struct srp_indirect_buf *)(srp_cmd->add_data + + add_cdb_offset); + + ioctx->n_rbuf = be32_to_cpu(idb->table_desc.len) / sizeof *db; + + if (ioctx->n_rbuf > + (srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) { + printk(KERN_ERR "received unsupported SRP_CMD request" + " type (%u out + %u in != %u / %zu)\n", + srp_cmd->data_out_desc_cnt, + srp_cmd->data_in_desc_cnt, + be32_to_cpu(idb->table_desc.len), + sizeof(*db)); + ioctx->n_rbuf = 0; + ret = -EINVAL; + goto out; + } + + if (ioctx->n_rbuf == 1) + ioctx->rbufs = &ioctx->single_rbuf; + else { + ioctx->rbufs = + kmalloc(ioctx->n_rbuf * sizeof *db, GFP_ATOMIC); + if (!ioctx->rbufs) { + ioctx->n_rbuf = 0; + ret = -ENOMEM; + goto out; + } + } + + db = idb->desc_list; + memcpy(ioctx->rbufs, db, ioctx->n_rbuf * sizeof *db); + *data_len = be32_to_cpu(idb->len); + } +out: + return ret; +} + +/** + * srpt_init_ch_qp() - Initialize queue pair attributes. + * + * Initialized the attributes of queue pair 'qp' by allowing local write, + * remote read and remote write. Also transitions 'qp' to state IB_QPS_INIT. + */ +static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp) +{ + struct ib_qp_attr *attr; + int ret; + + attr = kzalloc(sizeof *attr, GFP_KERNEL); + if (!attr) + return -ENOMEM; + + attr->qp_state = IB_QPS_INIT; + attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ | + IB_ACCESS_REMOTE_WRITE; + attr->port_num = ch->sport->port; + attr->pkey_index = 0; + + ret = ib_modify_qp(qp, attr, + IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PORT | + IB_QP_PKEY_INDEX); + + kfree(attr); + return ret; +} + +/** + * srpt_ch_qp_rtr() - Change the state of a channel to 'ready to receive' (RTR). + * @ch: channel of the queue pair. + * @qp: queue pair to change the state of. + * + * Returns zero upon success and a negative value upon failure. + * + * Note: currently a struct ib_qp_attr takes 136 bytes on a 64-bit system. + * If this structure ever becomes larger, it might be necessary to allocate + * it dynamically instead of on the stack. + */ +static int srpt_ch_qp_rtr(struct srpt_rdma_ch *ch, struct ib_qp *qp) +{ + struct ib_qp_attr qp_attr; + int attr_mask; + int ret; + + qp_attr.qp_state = IB_QPS_RTR; + ret = ib_cm_init_qp_attr(ch->cm_id, &qp_attr, &attr_mask); + if (ret) + goto out; + + qp_attr.max_dest_rd_atomic = 4; + + ret = ib_modify_qp(qp, &qp_attr, attr_mask); + +out: + return ret; +} + +/** + * srpt_ch_qp_rts() - Change the state of a channel to 'ready to send' (RTS). + * @ch: channel of the queue pair. + * @qp: queue pair to change the state of. + * + * Returns zero upon success and a negative value upon failure. + * + * Note: currently a struct ib_qp_attr takes 136 bytes on a 64-bit system. + * If this structure ever becomes larger, it might be necessary to allocate + * it dynamically instead of on the stack. + */ +static int srpt_ch_qp_rts(struct srpt_rdma_ch *ch, struct ib_qp *qp) +{ + struct ib_qp_attr qp_attr; + int attr_mask; + int ret; + + qp_attr.qp_state = IB_QPS_RTS; + ret = ib_cm_init_qp_attr(ch->cm_id, &qp_attr, &attr_mask); + if (ret) + goto out; + + qp_attr.max_rd_atomic = 4; + + ret = ib_modify_qp(qp, &qp_attr, attr_mask); + +out: + return ret; +} + +/** + * srpt_ch_qp_err() - Set the channel queue pair state to 'error'. + */ +static int srpt_ch_qp_err(struct srpt_rdma_ch *ch) +{ + struct ib_qp_attr qp_attr; + + qp_attr.qp_state = IB_QPS_ERR; + return ib_modify_qp(ch->qp, &qp_attr, IB_QP_STATE); +} + +/** + * srpt_unmap_sg_to_ib_sge() - Unmap an IB SGE list. + */ +static void srpt_unmap_sg_to_ib_sge(struct srpt_rdma_ch *ch, + struct srpt_send_ioctx *ioctx) +{ + struct scatterlist *sg; + enum dma_data_direction dir; + + BUG_ON(!ch); + BUG_ON(!ioctx); + BUG_ON(ioctx->n_rdma && !ioctx->rdma_ius); + + while (ioctx->n_rdma) + kfree(ioctx->rdma_ius[--ioctx->n_rdma].sge); + + kfree(ioctx->rdma_ius); + ioctx->rdma_ius = NULL; + + if (ioctx->mapped_sg_count) { + sg = ioctx->sg; + WARN_ON(!sg); + dir = ioctx->cmd.data_direction; + BUG_ON(dir == DMA_NONE); + ib_dma_unmap_sg(ch->sport->sdev->device, sg, ioctx->sg_cnt, + opposite_dma_dir(dir)); + ioctx->mapped_sg_count = 0; + } +} + +/** + * srpt_map_sg_to_ib_sge() - Map an SG list to an IB SGE list. + */ +static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch, + struct srpt_send_ioctx *ioctx) +{ + struct se_cmd *cmd; + struct scatterlist *sg, *sg_orig; + int sg_cnt; + enum dma_data_direction dir; + struct rdma_iu *riu; + struct srp_direct_buf *db; + dma_addr_t dma_addr; + struct ib_sge *sge; + u64 raddr; + u32 rsize; + u32 tsize; + u32 dma_len; + int count, nrdma; + int i, j, k; + + BUG_ON(!ch); + BUG_ON(!ioctx); + cmd = &ioctx->cmd; + dir = cmd->data_direction; + BUG_ON(dir == DMA_NONE); + + transport_do_task_sg_chain(cmd); + ioctx->sg = sg = sg_orig = cmd->t_tasks_sg_chained; + ioctx->sg_cnt = sg_cnt = cmd->t_tasks_sg_chained_no; + + count = ib_dma_map_sg(ch->sport->sdev->device, sg, sg_cnt, + opposite_dma_dir(dir)); + if (unlikely(!count)) + return -EAGAIN; + + ioctx->mapped_sg_count = count; + + if (ioctx->rdma_ius && ioctx->n_rdma_ius) + nrdma = ioctx->n_rdma_ius; + else { + nrdma = (count + SRPT_DEF_SG_PER_WQE - 1) / SRPT_DEF_SG_PER_WQE + + ioctx->n_rbuf; + + ioctx->rdma_ius = kzalloc(nrdma * sizeof *riu, GFP_KERNEL); + if (!ioctx->rdma_ius) + goto free_mem; + + ioctx->n_rdma_ius = nrdma; + } + + db = ioctx->rbufs; + tsize = cmd->data_length; + dma_len = sg_dma_len(&sg[0]); + riu = ioctx->rdma_ius; + + /* + * For each remote desc - calculate the #ib_sge. + * If #ib_sge < SRPT_DEF_SG_PER_WQE per rdma operation then + * each remote desc rdma_iu is required a rdma wr; + * else + * we need to allocate extra rdma_iu to carry extra #ib_sge in + * another rdma wr + */ + for (i = 0, j = 0; + j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) { + rsize = be32_to_cpu(db->len); + raddr = be64_to_cpu(db->va); + riu->raddr = raddr; + riu->rkey = be32_to_cpu(db->key); + riu->sge_cnt = 0; + + /* calculate how many sge required for this remote_buf */ + while (rsize > 0 && tsize > 0) { + + if (rsize >= dma_len) { + tsize -= dma_len; + rsize -= dma_len; + raddr += dma_len; + + if (tsize > 0) { + ++j; + if (j < count) { + sg = sg_next(sg); + dma_len = sg_dma_len(sg); + } + } + } else { + tsize -= rsize; + dma_len -= rsize; + rsize = 0; + } + + ++riu->sge_cnt; + + if (rsize > 0 && riu->sge_cnt == SRPT_DEF_SG_PER_WQE) { + ++ioctx->n_rdma; + riu->sge = + kmalloc(riu->sge_cnt * sizeof *riu->sge, + GFP_KERNEL); + if (!riu->sge) + goto free_mem; + + ++riu; + riu->sge_cnt = 0; + riu->raddr = raddr; + riu->rkey = be32_to_cpu(db->key); + } + } + + ++ioctx->n_rdma; + riu->sge = kmalloc(riu->sge_cnt * sizeof *riu->sge, + GFP_KERNEL); + if (!riu->sge) + goto free_mem; + } + + db = ioctx->rbufs; + tsize = cmd->data_length; + riu = ioctx->rdma_ius; + sg = sg_orig; + dma_len = sg_dma_len(&sg[0]); + dma_addr = sg_dma_address(&sg[0]); + + /* this second loop is really mapped sg_addres to rdma_iu->ib_sge */ + for (i = 0, j = 0; + j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) { + rsize = be32_to_cpu(db->len); + sge = riu->sge; + k = 0; + + while (rsize > 0 && tsize > 0) { + sge->addr = dma_addr; + sge->lkey = ch->sport->sdev->mr->lkey; + + if (rsize >= dma_len) { + sge->length = + (tsize < dma_len) ? tsize : dma_len; + tsize -= dma_len; + rsize -= dma_len; + + if (tsize > 0) { + ++j; + if (j < count) { + sg = sg_next(sg); + dma_len = sg_dma_len(sg); + dma_addr = sg_dma_address(sg); + } + } + } else { + sge->length = (tsize < rsize) ? tsize : rsize; + tsize -= rsize; + dma_len -= rsize; + dma_addr += rsize; + rsize = 0; + } + + ++k; + if (k == riu->sge_cnt && rsize > 0 && tsize > 0) { + ++riu; + sge = riu->sge; + k = 0; + } else if (rsize > 0 && tsize > 0) + ++sge; + } + } + + return 0; + +free_mem: + srpt_unmap_sg_to_ib_sge(ch, ioctx); + + return -ENOMEM; +} + +/** + * srpt_get_send_ioctx() - Obtain an I/O context for sending to the initiator. + */ +static struct srpt_send_ioctx *srpt_get_send_ioctx(struct srpt_rdma_ch *ch) +{ + struct srpt_send_ioctx *ioctx; + unsigned long flags; + + BUG_ON(!ch); + + ioctx = NULL; + spin_lock_irqsave(&ch->spinlock, flags); + if (!list_empty(&ch->free_list)) { + ioctx = list_first_entry(&ch->free_list, + struct srpt_send_ioctx, free_list); + list_del(&ioctx->free_list); + } + spin_unlock_irqrestore(&ch->spinlock, flags); + + if (!ioctx) + return ioctx; + + BUG_ON(ioctx->ch != ch); + kref_init(&ioctx->kref); + spin_lock_init(&ioctx->spinlock); + ioctx->state = SRPT_STATE_NEW; + ioctx->n_rbuf = 0; + ioctx->rbufs = NULL; + ioctx->n_rdma = 0; + ioctx->n_rdma_ius = 0; + ioctx->rdma_ius = NULL; + ioctx->mapped_sg_count = 0; + init_completion(&ioctx->tx_done); + ioctx->queue_status_only = false; + /* + * transport_init_se_cmd() does not initialize all fields, so do it + * here. + */ + memset(&ioctx->cmd, 0, sizeof(ioctx->cmd)); + memset(&ioctx->sense_data, 0, sizeof(ioctx->sense_data)); + + return ioctx; +} + +/** + * srpt_put_send_ioctx() - Free up resources. + */ +static void srpt_put_send_ioctx(struct srpt_send_ioctx *ioctx) +{ + struct srpt_rdma_ch *ch; + unsigned long flags; + + BUG_ON(!ioctx); + ch = ioctx->ch; + BUG_ON(!ch); + + WARN_ON(srpt_get_cmd_state(ioctx) != SRPT_STATE_DONE); + + srpt_unmap_sg_to_ib_sge(ioctx->ch, ioctx); + transport_generic_free_cmd(&ioctx->cmd, 0); + + if (ioctx->n_rbuf > 1) { + kfree(ioctx->rbufs); + ioctx->rbufs = NULL; + ioctx->n_rbuf = 0; + } + + spin_lock_irqsave(&ch->spinlock, flags); + list_add(&ioctx->free_list, &ch->free_list); + spin_unlock_irqrestore(&ch->spinlock, flags); +} + +static void srpt_put_send_ioctx_kref(struct kref *kref) +{ + srpt_put_send_ioctx(container_of(kref, struct srpt_send_ioctx, kref)); +} + +/** + * srpt_abort_cmd() - Abort a SCSI command. + * @ioctx: I/O context associated with the SCSI command. + * @context: Preferred execution context. + */ +static int srpt_abort_cmd(struct srpt_send_ioctx *ioctx) +{ + enum srpt_command_state state; + unsigned long flags; + + BUG_ON(!ioctx); + + /* + * If the command is in a state where the target core is waiting for + * the ib_srpt driver, change the state to the next state. Changing + * the state of the command from SRPT_STATE_NEED_DATA to + * SRPT_STATE_DATA_IN ensures that srpt_xmit_response() will call this + * function a second time. + */ + + spin_lock_irqsave(&ioctx->spinlock, flags); + state = ioctx->state; + switch (state) { + case SRPT_STATE_NEED_DATA: + ioctx->state = SRPT_STATE_DATA_IN; + break; + case SRPT_STATE_DATA_IN: + case SRPT_STATE_CMD_RSP_SENT: + case SRPT_STATE_MGMT_RSP_SENT: + ioctx->state = SRPT_STATE_DONE; + break; + default: + break; + } + spin_unlock_irqrestore(&ioctx->spinlock, flags); + + if (state == SRPT_STATE_DONE) + goto out; + + pr_debug("Aborting cmd with state %d and tag %lld\n", state, + ioctx->tag); + + switch (state) { + case SRPT_STATE_NEW: + case SRPT_STATE_DATA_IN: + case SRPT_STATE_MGMT: + /* + * Do nothing - defer abort processing until + * srpt_queue_response() is invoked. + */ + WARN_ON(!transport_check_aborted_status(&ioctx->cmd, false)); + break; + case SRPT_STATE_NEED_DATA: + /* DMA_TO_DEVICE (write) - RDMA read error. */ + atomic_set(&ioctx->cmd.transport_lun_stop, 1); + transport_generic_handle_data(&ioctx->cmd); + break; + case SRPT_STATE_CMD_RSP_SENT: + /* + * SRP_RSP sending failed or the SRP_RSP send completion has + * not been received in time. + */ + srpt_unmap_sg_to_ib_sge(ioctx->ch, ioctx); + atomic_set(&ioctx->cmd.transport_lun_stop, 1); + kref_put(&ioctx->kref, srpt_put_send_ioctx_kref); + break; + case SRPT_STATE_MGMT_RSP_SENT: + srpt_set_cmd_state(ioctx, SRPT_STATE_DONE); + kref_put(&ioctx->kref, srpt_put_send_ioctx_kref); + break; + default: + WARN_ON("ERROR: unexpected command state"); + break; + } + +out: + return state; +} + +/** + * srpt_handle_send_err_comp() - Process an IB_WC_SEND error completion. + */ +static void srpt_handle_send_err_comp(struct srpt_rdma_ch *ch, u64 wr_id) +{ + struct srpt_send_ioctx *ioctx; + enum srpt_command_state state; + struct se_cmd *cmd; + u32 index; + + atomic_inc(&ch->sq_wr_avail); + + index = idx_from_wr_id(wr_id); + ioctx = ch->ioctx_ring[index]; + state = srpt_get_cmd_state(ioctx); + cmd = &ioctx->cmd; + + WARN_ON(state != SRPT_STATE_CMD_RSP_SENT + && state != SRPT_STATE_MGMT_RSP_SENT + && state != SRPT_STATE_NEED_DATA + && state != SRPT_STATE_DONE); + + /* If SRP_RSP sending failed, undo the ch->req_lim change. */ + if (state == SRPT_STATE_CMD_RSP_SENT + || state == SRPT_STATE_MGMT_RSP_SENT) + atomic_dec(&ch->req_lim); + + srpt_abort_cmd(ioctx); +} + +/** + * srpt_handle_send_comp() - Process an IB send completion notification. + */ +static void srpt_handle_send_comp(struct srpt_rdma_ch *ch, + struct srpt_send_ioctx *ioctx) +{ + enum srpt_command_state state; + + atomic_inc(&ch->sq_wr_avail); + + state = srpt_set_cmd_state(ioctx, SRPT_STATE_DONE); + + if (WARN_ON(state != SRPT_STATE_CMD_RSP_SENT + && state != SRPT_STATE_MGMT_RSP_SENT + && state != SRPT_STATE_DONE)) + pr_debug("state = %d\n", state); + + if (state != SRPT_STATE_DONE) + kref_put(&ioctx->kref, srpt_put_send_ioctx_kref); + else + printk(KERN_ERR "IB completion has been received too late for" + " wr_id = %u.\n", ioctx->ioctx.index); +} + +/** + * srpt_handle_rdma_comp() - Process an IB RDMA completion notification. + * + * Note: transport_generic_handle_data() is asynchronous so unmapping the + * data that has been transferred via IB RDMA must be postponed until the + * check_stop_free() callback. + */ +static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch, + struct srpt_send_ioctx *ioctx, + enum srpt_opcode opcode) +{ + WARN_ON(ioctx->n_rdma <= 0); + atomic_add(ioctx->n_rdma, &ch->sq_wr_avail); + + if (opcode == SRPT_RDMA_READ_LAST) { + if (srpt_test_and_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA, + SRPT_STATE_DATA_IN)) + transport_generic_handle_data(&ioctx->cmd); + else + printk(KERN_ERR "%s[%d]: wrong state = %d\n", __func__, + __LINE__, srpt_get_cmd_state(ioctx)); + } else if (opcode == SRPT_RDMA_ABORT) { + ioctx->rdma_aborted = true; + } else { + WARN(true, "unexpected opcode %d\n", opcode); + } +} + +/** + * srpt_handle_rdma_err_comp() - Process an IB RDMA error completion. + */ +static void srpt_handle_rdma_err_comp(struct srpt_rdma_ch *ch, + struct srpt_send_ioctx *ioctx, + enum srpt_opcode opcode) +{ + struct se_cmd *cmd; + enum srpt_command_state state; + + cmd = &ioctx->cmd; + state = srpt_get_cmd_state(ioctx); + switch (opcode) { + case SRPT_RDMA_READ_LAST: + if (ioctx->n_rdma <= 0) { + printk(KERN_ERR "Received invalid RDMA read" + " error completion with idx %d\n", + ioctx->ioctx.index); + break; + } + atomic_add(ioctx->n_rdma, &ch->sq_wr_avail); + if (state == SRPT_STATE_NEED_DATA) + srpt_abort_cmd(ioctx); + else + printk(KERN_ERR "%s[%d]: wrong state = %d\n", + __func__, __LINE__, state); + break; + case SRPT_RDMA_WRITE_LAST: + atomic_set(&ioctx->cmd.transport_lun_stop, 1); + break; + default: + printk(KERN_ERR "%s[%d]: opcode = %u\n", __func__, + __LINE__, opcode); + break; + } +} + +/** + * srpt_build_cmd_rsp() - Build an SRP_RSP response. + * @ch: RDMA channel through which the request has been received. + * @ioctx: I/O context associated with the SRP_CMD request. The response will + * be built in the buffer ioctx->buf points at and hence this function will + * overwrite the request data. + * @tag: tag of the request for which this response is being generated. + * @status: value for the STATUS field of the SRP_RSP information unit. + * + * Returns the size in bytes of the SRP_RSP response. + * + * An SRP_RSP response contains a SCSI status or service response. See also + * section 6.9 in the SRP r16a document for the format of an SRP_RSP + * response. See also SPC-2 for more information about sense data. + */ +static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch, + struct srpt_send_ioctx *ioctx, u64 tag, + int status) +{ + struct srp_rsp *srp_rsp; + const u8 *sense_data; + int sense_data_len, max_sense_len; + + /* + * The lowest bit of all SAM-3 status codes is zero (see also + * paragraph 5.3 in SAM-3). + */ + WARN_ON(status & 1); + + srp_rsp = ioctx->ioctx.buf; + BUG_ON(!srp_rsp); + + sense_data = ioctx->sense_data; + sense_data_len = ioctx->cmd.scsi_sense_length; + WARN_ON(sense_data_len > sizeof(ioctx->sense_data)); + + memset(srp_rsp, 0, sizeof *srp_rsp); + srp_rsp->opcode = SRP_RSP; + srp_rsp->req_lim_delta = + __constant_cpu_to_be32(1 + atomic_xchg(&ch->req_lim_delta, 0)); + srp_rsp->tag = tag; + srp_rsp->status = status; + + if (sense_data_len) { + BUILD_BUG_ON(MIN_MAX_RSP_SIZE <= sizeof(*srp_rsp)); + max_sense_len = ch->max_ti_iu_len - sizeof(*srp_rsp); + if (sense_data_len > max_sense_len) { + printk(KERN_WARNING "truncated sense data from %d to %d" + " bytes\n", sense_data_len, max_sense_len); + sense_data_len = max_sense_len; + } + + srp_rsp->flags |= SRP_RSP_FLAG_SNSVALID; + srp_rsp->sense_data_len = cpu_to_be32(sense_data_len); + memcpy(srp_rsp + 1, sense_data, sense_data_len); + } + + return sizeof(*srp_rsp) + sense_data_len; +} + +/** + * srpt_build_tskmgmt_rsp() - Build a task management response. + * @ch: RDMA channel through which the request has been received. + * @ioctx: I/O context in which the SRP_RSP response will be built. + * @rsp_code: RSP_CODE that will be stored in the response. + * @tag: Tag of the request for which this response is being generated. + * + * Returns the size in bytes of the SRP_RSP response. + * + * An SRP_RSP response contains a SCSI status or service response. See also + * section 6.9 in the SRP r16a document for the format of an SRP_RSP + * response. + */ +static int srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch, + struct srpt_send_ioctx *ioctx, + u8 rsp_code, u64 tag) +{ + struct srp_rsp *srp_rsp; + int resp_data_len; + int resp_len; + + resp_data_len = (rsp_code == SRP_TSK_MGMT_SUCCESS) ? 0 : 4; + resp_len = sizeof(*srp_rsp) + resp_data_len; + + srp_rsp = ioctx->ioctx.buf; + BUG_ON(!srp_rsp); + memset(srp_rsp, 0, sizeof *srp_rsp); + + srp_rsp->opcode = SRP_RSP; + srp_rsp->req_lim_delta = __constant_cpu_to_be32(1 + + atomic_xchg(&ch->req_lim_delta, 0)); + srp_rsp->tag = tag; + + if (rsp_code != SRP_TSK_MGMT_SUCCESS) { + srp_rsp->flags |= SRP_RSP_FLAG_RSPVALID; + srp_rsp->resp_data_len = cpu_to_be32(resp_data_len); + srp_rsp->data[3] = rsp_code; + } + + return resp_len; +} + +#define NO_SUCH_LUN ((uint64_t)-1LL) + +/* + * SCSI LUN addressing method. See also SAM-2 and the section about + * eight byte LUNs. + */ +enum scsi_lun_addr_method { + SCSI_LUN_ADDR_METHOD_PERIPHERAL = 0, + SCSI_LUN_ADDR_METHOD_FLAT = 1, + SCSI_LUN_ADDR_METHOD_LUN = 2, + SCSI_LUN_ADDR_METHOD_EXTENDED_LUN = 3, +}; + +/* + * srpt_unpack_lun() - Convert from network LUN to linear LUN. + * + * Convert an 2-byte, 4-byte, 6-byte or 8-byte LUN structure in network byte + * order (big endian) to a linear LUN. Supports three LUN addressing methods: + * peripheral, flat and logical unit. See also SAM-2, section 4.9.4 (page 40). + */ +static uint64_t srpt_unpack_lun(const uint8_t *lun, int len) +{ + uint64_t res = NO_SUCH_LUN; + int addressing_method; + + if (unlikely(len < 2)) { + printk(KERN_ERR "Illegal LUN length %d, expected 2 bytes or " + "more", len); + goto out; + } + + switch (len) { + case 8: + if ((*((__be64 *)lun) & + __constant_cpu_to_be64(0x0000FFFFFFFFFFFFLL)) != 0) + goto out_err; + break; + case 4: + if (*((__be16 *)&lun[2]) != 0) + goto out_err; + break; + case 6: + if (*((__be32 *)&lun[2]) != 0) + goto out_err; + break; + case 2: + break; + default: + goto out_err; + } + + addressing_method = (*lun) >> 6; /* highest two bits of byte 0 */ + switch (addressing_method) { + case SCSI_LUN_ADDR_METHOD_PERIPHERAL: + case SCSI_LUN_ADDR_METHOD_FLAT: + case SCSI_LUN_ADDR_METHOD_LUN: + res = *(lun + 1) | (((*lun) & 0x3f) << 8); + break; + + case SCSI_LUN_ADDR_METHOD_EXTENDED_LUN: + default: + printk(KERN_ERR "Unimplemented LUN addressing method %u", + addressing_method); + break; + } + +out: + return res; + +out_err: + printk(KERN_ERR "Support for multi-level LUNs has not yet been" + " implemented"); + goto out; +} + +static int srpt_check_stop_free(struct se_cmd *cmd) +{ + struct srpt_send_ioctx *ioctx; + + ioctx = container_of(cmd, struct srpt_send_ioctx, cmd); + return kref_put(&ioctx->kref, srpt_put_send_ioctx_kref); +} + +/** + * srpt_handle_cmd() - Process SRP_CMD. + */ +static int srpt_handle_cmd(struct srpt_rdma_ch *ch, + struct srpt_recv_ioctx *recv_ioctx, + struct srpt_send_ioctx *send_ioctx) +{ + struct se_cmd *cmd; + struct srp_cmd *srp_cmd; + uint64_t unpacked_lun; + u64 data_len; + enum dma_data_direction dir; + int ret; + + BUG_ON(!send_ioctx); + + srp_cmd = recv_ioctx->ioctx.buf; + kref_get(&send_ioctx->kref); + cmd = &send_ioctx->cmd; + send_ioctx->tag = srp_cmd->tag; + + switch (srp_cmd->task_attr) { + case SRP_CMD_SIMPLE_Q: + cmd->sam_task_attr = MSG_SIMPLE_TAG; + break; + case SRP_CMD_ORDERED_Q: + default: + cmd->sam_task_attr = MSG_ORDERED_TAG; + break; + case SRP_CMD_HEAD_OF_Q: + cmd->sam_task_attr = MSG_HEAD_TAG; + break; + case SRP_CMD_ACA: + cmd->sam_task_attr = MSG_ACA_TAG; + break; + } + + ret = srpt_get_desc_tbl(send_ioctx, srp_cmd, &dir, &data_len); + if (ret) { + printk(KERN_ERR "0x%llx: parsing SRP descriptor table failed.\n", + srp_cmd->tag); + cmd->se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION; + cmd->scsi_sense_reason = TCM_INVALID_CDB_FIELD; + goto send_sense; + } + + cmd->data_length = data_len; + cmd->data_direction = dir; + unpacked_lun = srpt_unpack_lun((uint8_t *)&srp_cmd->lun, + sizeof(srp_cmd->lun)); + if (transport_lookup_cmd_lun(cmd, unpacked_lun) < 0) + goto send_sense; + ret = transport_generic_allocate_tasks(cmd, srp_cmd->cdb); + if (cmd->se_cmd_flags & SCF_SCSI_RESERVATION_CONFLICT) + srpt_queue_status(cmd); + else if (cmd->se_cmd_flags & SCF_SCSI_CDB_EXCEPTION) + goto send_sense; + else + WARN_ON_ONCE(ret); + + transport_handle_cdb_direct(cmd); + return 0; + +send_sense: + transport_send_check_condition_and_sense(cmd, cmd->scsi_sense_reason, + 0); + return -1; +} + +/** + * srpt_rx_mgmt_fn_tag() - Process a task management function by tag. + * @ch: RDMA channel of the task management request. + * @fn: Task management function to perform. + * @req_tag: Tag of the SRP task management request. + * @mgmt_ioctx: I/O context of the task management request. + * + * Returns zero if the target core will process the task management + * request asynchronously. + * + * Note: It is assumed that the initiator serializes tag-based task management + * requests. + */ +static int srpt_rx_mgmt_fn_tag(struct srpt_send_ioctx *ioctx, u64 tag) +{ + struct srpt_device *sdev; + struct srpt_rdma_ch *ch; + struct srpt_send_ioctx *target; + int ret, i; + + ret = -EINVAL; + ch = ioctx->ch; + BUG_ON(!ch); + BUG_ON(!ch->sport); + sdev = ch->sport->sdev; + BUG_ON(!sdev); + spin_lock_irq(&sdev->spinlock); + for (i = 0; i < ch->rq_size; ++i) { + target = ch->ioctx_ring[i]; + if (target->cmd.se_lun == ioctx->cmd.se_lun && + target->tag == tag && + srpt_get_cmd_state(target) != SRPT_STATE_DONE) { + ret = 0; + /* now let the target core abort &target->cmd; */ + break; + } + } + spin_unlock_irq(&sdev->spinlock); + return ret; +} + +static int srp_tmr_to_tcm(int fn) +{ + switch (fn) { + case SRP_TSK_ABORT_TASK: + return TMR_ABORT_TASK; + case SRP_TSK_ABORT_TASK_SET: + return TMR_ABORT_TASK_SET; + case SRP_TSK_CLEAR_TASK_SET: + return TMR_CLEAR_TASK_SET; + case SRP_TSK_LUN_RESET: + return TMR_LUN_RESET; + case SRP_TSK_CLEAR_ACA: + return TMR_CLEAR_ACA; + default: + return -1; + } +} + +/** + * srpt_handle_tsk_mgmt() - Process an SRP_TSK_MGMT information unit. + * + * Returns 0 if and only if the request will be processed by the target core. + * + * For more information about SRP_TSK_MGMT information units, see also section + * 6.7 in the SRP r16a document. + */ +static void srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch, + struct srpt_recv_ioctx *recv_ioctx, + struct srpt_send_ioctx *send_ioctx) +{ + struct srp_tsk_mgmt *srp_tsk; + struct se_cmd *cmd; + uint64_t unpacked_lun; + int tcm_tmr; + int res; + + BUG_ON(!send_ioctx); + + srp_tsk = recv_ioctx->ioctx.buf; + cmd = &send_ioctx->cmd; + + pr_debug("recv tsk_mgmt fn %d for task_tag %lld and cmd tag %lld" + " cm_id %p sess %p\n", srp_tsk->tsk_mgmt_func, + srp_tsk->task_tag, srp_tsk->tag, ch->cm_id, ch->sess); + + srpt_set_cmd_state(send_ioctx, SRPT_STATE_MGMT); + send_ioctx->tag = srp_tsk->tag; + tcm_tmr = srp_tmr_to_tcm(srp_tsk->tsk_mgmt_func); + if (tcm_tmr < 0) { + send_ioctx->cmd.se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION; + send_ioctx->cmd.se_tmr_req->response = + TMR_TASK_MGMT_FUNCTION_NOT_SUPPORTED; + goto process_tmr; + } + cmd->se_tmr_req = core_tmr_alloc_req(cmd, NULL, tcm_tmr, GFP_KERNEL); + if (!cmd->se_tmr_req) { + send_ioctx->cmd.se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION; + send_ioctx->cmd.se_tmr_req->response = TMR_FUNCTION_REJECTED; + goto process_tmr; + } + + unpacked_lun = srpt_unpack_lun((uint8_t *)&srp_tsk->lun, + sizeof(srp_tsk->lun)); + res = transport_lookup_tmr_lun(&send_ioctx->cmd, unpacked_lun); + if (res) { + pr_debug("rejecting TMR for LUN %lld\n", unpacked_lun); + send_ioctx->cmd.se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION; + send_ioctx->cmd.se_tmr_req->response = TMR_LUN_DOES_NOT_EXIST; + goto process_tmr; + } + + if (srp_tsk->tsk_mgmt_func == SRP_TSK_ABORT_TASK) + srpt_rx_mgmt_fn_tag(send_ioctx, srp_tsk->task_tag); + +process_tmr: + kref_get(&send_ioctx->kref); + if (!(send_ioctx->cmd.se_cmd_flags & SCF_SCSI_CDB_EXCEPTION)) + transport_generic_handle_tmr(&send_ioctx->cmd); + else + transport_send_check_condition_and_sense(cmd, + cmd->scsi_sense_reason, 0); + +} + +/** + * srpt_handle_new_iu() - Process a newly received information unit. + * @ch: RDMA channel through which the information unit has been received. + * @ioctx: SRPT I/O context associated with the information unit. + */ +static void srpt_handle_new_iu(struct srpt_rdma_ch *ch, + struct srpt_recv_ioctx *recv_ioctx, + struct srpt_send_ioctx *send_ioctx) +{ + struct srp_cmd *srp_cmd; + enum rdma_ch_state ch_state; + + BUG_ON(!ch); + BUG_ON(!recv_ioctx); + + ib_dma_sync_single_for_cpu(ch->sport->sdev->device, + recv_ioctx->ioctx.dma, srp_max_req_size, + DMA_FROM_DEVICE); + + ch_state = srpt_get_ch_state(ch); + if (unlikely(ch_state == CH_CONNECTING)) { + list_add_tail(&recv_ioctx->wait_list, &ch->cmd_wait_list); + goto out; + } + + if (unlikely(ch_state != CH_LIVE)) + goto out; + + srp_cmd = recv_ioctx->ioctx.buf; + if (srp_cmd->opcode == SRP_CMD || srp_cmd->opcode == SRP_TSK_MGMT) { + if (!send_ioctx) + send_ioctx = srpt_get_send_ioctx(ch); + if (unlikely(!send_ioctx)) { + list_add_tail(&recv_ioctx->wait_list, + &ch->cmd_wait_list); + goto out; + } + } + + transport_init_se_cmd(&send_ioctx->cmd, &srpt_target->tf_ops, ch->sess, + 0, DMA_NONE, MSG_SIMPLE_TAG, + send_ioctx->sense_data); + + switch (srp_cmd->opcode) { + case SRP_CMD: + srpt_handle_cmd(ch, recv_ioctx, send_ioctx); + break; + case SRP_TSK_MGMT: + srpt_handle_tsk_mgmt(ch, recv_ioctx, send_ioctx); + break; + case SRP_I_LOGOUT: + printk(KERN_ERR "Not yet implemented: SRP_I_LOGOUT\n"); + break; + case SRP_CRED_RSP: + pr_debug("received SRP_CRED_RSP\n"); + break; + case SRP_AER_RSP: + pr_debug("received SRP_AER_RSP\n"); + break; + case SRP_RSP: + printk(KERN_ERR "Received SRP_RSP\n"); + break; + default: + printk(KERN_ERR "received IU with unknown opcode 0x%x\n", + srp_cmd->opcode); + break; + } + + srpt_post_recv(ch->sport->sdev, recv_ioctx); +out: + return; +} + +static void srpt_process_rcv_completion(struct ib_cq *cq, + struct srpt_rdma_ch *ch, + struct ib_wc *wc) +{ + struct srpt_device *sdev = ch->sport->sdev; + struct srpt_recv_ioctx *ioctx; + u32 index; + + index = idx_from_wr_id(wc->wr_id); + if (wc->status == IB_WC_SUCCESS) { + int req_lim; + + req_lim = atomic_dec_return(&ch->req_lim); + if (unlikely(req_lim < 0)) + printk(KERN_ERR "req_lim = %d < 0\n", req_lim); + ioctx = sdev->ioctx_ring[index]; + srpt_handle_new_iu(ch, ioctx, NULL); + } else { + printk(KERN_INFO "receiving failed for idx %u with status %d\n", + index, wc->status); + } +} + +/** + * srpt_process_send_completion() - Process an IB send completion. + * + * Note: Although this has not yet been observed during tests, at least in + * theory it is possible that the srpt_get_send_ioctx() call invoked by + * srpt_handle_new_iu() fails. This is possible because the req_lim_delta + * value in each response is set to one, and it is possible that this response + * makes the initiator send a new request before the send completion for that + * response has been processed. This could e.g. happen if the call to + * srpt_put_send_iotcx() is delayed because of a higher priority interrupt or + * if IB retransmission causes generation of the send completion to be + * delayed. Incoming information units for which srpt_get_send_ioctx() fails + * are queued on cmd_wait_list. The code below processes these delayed + * requests one at a time. + */ +static void srpt_process_send_completion(struct ib_cq *cq, + struct srpt_rdma_ch *ch, + struct ib_wc *wc) +{ + struct srpt_send_ioctx *send_ioctx; + uint32_t index; + enum srpt_opcode opcode; + + index = idx_from_wr_id(wc->wr_id); + opcode = opcode_from_wr_id(wc->wr_id); + send_ioctx = ch->ioctx_ring[index]; + if (wc->status == IB_WC_SUCCESS) { + if (opcode == SRPT_SEND) + srpt_handle_send_comp(ch, send_ioctx); + else { + WARN_ON(opcode != SRPT_RDMA_ABORT && + wc->opcode != IB_WC_RDMA_READ); + srpt_handle_rdma_comp(ch, send_ioctx, opcode); + } + } else { + if (opcode == SRPT_SEND) { + printk(KERN_INFO "sending response for idx %u failed" + " with status %d\n", index, wc->status); + srpt_handle_send_err_comp(ch, wc->wr_id); + } else if (opcode != SRPT_RDMA_MID) { + printk(KERN_INFO "RDMA t %d for idx %u failed with" + " status %d", opcode, index, wc->status); + srpt_handle_rdma_err_comp(ch, send_ioctx, opcode); + } + } + + while (unlikely(opcode == SRPT_SEND + && !list_empty(&ch->cmd_wait_list) + && srpt_get_ch_state(ch) == CH_LIVE + && (send_ioctx = srpt_get_send_ioctx(ch)) != NULL)) { + struct srpt_recv_ioctx *recv_ioctx; + + recv_ioctx = list_first_entry(&ch->cmd_wait_list, + struct srpt_recv_ioctx, + wait_list); + list_del(&recv_ioctx->wait_list); + srpt_handle_new_iu(ch, recv_ioctx, send_ioctx); + } +} + +static void srpt_process_completion(struct ib_cq *cq, struct srpt_rdma_ch *ch) +{ + struct ib_wc *const wc = ch->wc; + int i, n; + + WARN_ON(cq != ch->cq); + + ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); + while ((n = ib_poll_cq(cq, ARRAY_SIZE(ch->wc), wc)) > 0) { + for (i = 0; i < n; i++) { + if (opcode_from_wr_id(wc[i].wr_id) == SRPT_RECV) + srpt_process_rcv_completion(cq, ch, &wc[i]); + else + srpt_process_send_completion(cq, ch, &wc[i]); + } + } +} + +/** + * srpt_completion() - IB completion queue callback function. + * + * Notes: + * - It is guaranteed that a completion handler will never be invoked + * concurrently on two different CPUs for the same completion queue. See also + * Documentation/infiniband/core_locking.txt and the implementation of + * handle_edge_irq() in kernel/irq/chip.c. + * - When threaded IRQs are enabled, completion handlers are invoked in thread + * context instead of interrupt context. + */ +static void srpt_completion(struct ib_cq *cq, void *ctx) +{ + struct srpt_rdma_ch *ch = ctx; + + wake_up_interruptible(&ch->wait_queue); +} + +static int srpt_compl_thread(void *arg) +{ + struct srpt_rdma_ch *ch; + + /* Hibernation / freezing of the SRPT kernel thread is not supported. */ + current->flags |= PF_NOFREEZE; + + ch = arg; + BUG_ON(!ch); + printk(KERN_INFO "Session %s: kernel thread %s (PID %d) started\n", + ch->sess_name, ch->thread->comm, current->pid); + while (!kthread_should_stop()) { + wait_event_interruptible(ch->wait_queue, + (srpt_process_completion(ch->cq, ch), + kthread_should_stop())); + } + printk(KERN_INFO "Session %s: kernel thread %s (PID %d) stopped\n", + ch->sess_name, ch->thread->comm, current->pid); + return 0; +} + +/** + * srpt_create_ch_ib() - Create receive and send completion queues. + */ +static int srpt_create_ch_ib(struct srpt_rdma_ch *ch) +{ + struct ib_qp_init_attr *qp_init; + struct srpt_port *sport = ch->sport; + struct srpt_device *sdev = sport->sdev; + u32 srp_sq_size = sport->port_attrib.srp_sq_size; + int ret; + + WARN_ON(ch->rq_size < 1); + + ret = -ENOMEM; + qp_init = kzalloc(sizeof *qp_init, GFP_KERNEL); + if (!qp_init) + goto out; + + ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, + ch->rq_size + srp_sq_size, 0); + if (IS_ERR(ch->cq)) { + ret = PTR_ERR(ch->cq); + printk(KERN_ERR "failed to create CQ cqe= %d ret= %d\n", + ch->rq_size + srp_sq_size, ret); + goto out; + } + + qp_init->qp_context = (void *)ch; + qp_init->event_handler + = (void(*)(struct ib_event *, void*))srpt_qp_event; + qp_init->send_cq = ch->cq; + qp_init->recv_cq = ch->cq; + qp_init->srq = sdev->srq; + qp_init->sq_sig_type = IB_SIGNAL_REQ_WR; + qp_init->qp_type = IB_QPT_RC; + qp_init->cap.max_send_wr = srp_sq_size; + qp_init->cap.max_send_sge = SRPT_DEF_SG_PER_WQE; + + ch->qp = ib_create_qp(sdev->pd, qp_init); + if (IS_ERR(ch->qp)) { + ret = PTR_ERR(ch->qp); + printk(KERN_ERR "failed to create_qp ret= %d\n", ret); + goto err_destroy_cq; + } + + atomic_set(&ch->sq_wr_avail, qp_init->cap.max_send_wr); + + pr_debug("%s: max_cqe= %d max_sge= %d sq_size = %d cm_id= %p\n", + __func__, ch->cq->cqe, qp_init->cap.max_send_sge, + qp_init->cap.max_send_wr, ch->cm_id); + + ret = srpt_init_ch_qp(ch, ch->qp); + if (ret) + goto err_destroy_qp; + + init_waitqueue_head(&ch->wait_queue); + + pr_debug("creating thread for session %s\n", ch->sess_name); + + ch->thread = kthread_run(srpt_compl_thread, ch, "ib_srpt_compl"); + if (IS_ERR(ch->thread)) { + printk(KERN_ERR "failed to create kernel thread %ld\n", + PTR_ERR(ch->thread)); + ch->thread = NULL; + goto err_destroy_qp; + } + +out: + kfree(qp_init); + return ret; + +err_destroy_qp: + ib_destroy_qp(ch->qp); +err_destroy_cq: + ib_destroy_cq(ch->cq); + goto out; +} + +static void srpt_destroy_ch_ib(struct srpt_rdma_ch *ch) +{ + if (ch->thread) + kthread_stop(ch->thread); + + ib_destroy_qp(ch->qp); + ib_destroy_cq(ch->cq); +} + +/** + * __srpt_close_ch() - Close an RDMA channel by setting the QP error state. + * + * Reset the QP and make sure all resources associated with the channel will + * be deallocated at an appropriate time. + * + * Note: The caller must hold ch->sport->sdev->spinlock. + */ +static void __srpt_close_ch(struct srpt_rdma_ch *ch) +{ + struct srpt_device *sdev; + enum rdma_ch_state prev_state; + unsigned long flags; + + sdev = ch->sport->sdev; + + spin_lock_irqsave(&ch->spinlock, flags); + prev_state = ch->state; + switch (prev_state) { + case CH_CONNECTING: + case CH_LIVE: + ch->state = CH_DISCONNECTING; + break; + default: + break; + } + spin_unlock_irqrestore(&ch->spinlock, flags); + + switch (prev_state) { + case CH_CONNECTING: + ib_send_cm_rej(ch->cm_id, IB_CM_REJ_NO_RESOURCES, NULL, 0, + NULL, 0); + /* fall through */ + case CH_LIVE: + if (ib_send_cm_dreq(ch->cm_id, NULL, 0) < 0) + printk(KERN_ERR "sending CM DREQ failed.\n"); + break; + case CH_DISCONNECTING: + break; + case CH_DRAINING: + case CH_RELEASING: + break; + } +} + +/** + * srpt_close_ch() - Close an RDMA channel. + */ +static void srpt_close_ch(struct srpt_rdma_ch *ch) +{ + struct srpt_device *sdev; + + sdev = ch->sport->sdev; + spin_lock_irq(&sdev->spinlock); + __srpt_close_ch(ch); + spin_unlock_irq(&sdev->spinlock); +} + +/** + * srpt_drain_channel() - Drain a channel by resetting the IB queue pair. + * @cm_id: Pointer to the CM ID of the channel to be drained. + * + * Note: Must be called from inside srpt_cm_handler to avoid a race between + * accessing sdev->spinlock and the call to kfree(sdev) in srpt_remove_one() + * (the caller of srpt_cm_handler holds the cm_id spinlock; srpt_remove_one() + * waits until all target sessions for the associated IB device have been + * unregistered and target session registration involves a call to + * ib_destroy_cm_id(), which locks the cm_id spinlock and hence waits until + * this function has finished). + */ +static void srpt_drain_channel(struct ib_cm_id *cm_id) +{ + struct srpt_device *sdev; + struct srpt_rdma_ch *ch; + int ret; + bool do_reset = false; + + WARN_ON_ONCE(irqs_disabled()); + + sdev = cm_id->context; + BUG_ON(!sdev); + spin_lock_irq(&sdev->spinlock); + list_for_each_entry(ch, &sdev->rch_list, list) { + if (ch->cm_id == cm_id) { + do_reset = srpt_test_and_set_ch_state(ch, + CH_CONNECTING, CH_DRAINING) || + srpt_test_and_set_ch_state(ch, + CH_LIVE, CH_DRAINING) || + srpt_test_and_set_ch_state(ch, + CH_DISCONNECTING, CH_DRAINING); + break; + } + } + spin_unlock_irq(&sdev->spinlock); + + if (do_reset) { + ret = srpt_ch_qp_err(ch); + if (ret < 0) + printk(KERN_ERR "Setting queue pair in error state" + " failed: %d\n", ret); + } +} + +/** + * srpt_find_channel() - Look up an RDMA channel. + * @cm_id: Pointer to the CM ID of the channel to be looked up. + * + * Return NULL if no matching RDMA channel has been found. + */ +static struct srpt_rdma_ch *srpt_find_channel(struct srpt_device *sdev, + struct ib_cm_id *cm_id) +{ + struct srpt_rdma_ch *ch; + bool found; + + WARN_ON_ONCE(irqs_disabled()); + BUG_ON(!sdev); + + found = false; + spin_lock_irq(&sdev->spinlock); + list_for_each_entry(ch, &sdev->rch_list, list) { + if (ch->cm_id == cm_id) { + found = true; + break; + } + } + spin_unlock_irq(&sdev->spinlock); + + return found ? ch : NULL; +} + +/** + * srpt_release_channel() - Release channel resources. + * + * Schedules the actual release because: + * - Calling the ib_destroy_cm_id() call from inside an IB CM callback would + * trigger a deadlock. + * - It is not safe to call TCM transport_* functions from interrupt context. + */ +static void srpt_release_channel(struct srpt_rdma_ch *ch) +{ + schedule_work(&ch->release_work); +} + +static void srpt_release_channel_work(struct work_struct *w) +{ + struct srpt_rdma_ch *ch; + struct srpt_device *sdev; + + ch = container_of(w, struct srpt_rdma_ch, release_work); + pr_debug("ch = %p; ch->sess = %p; release_done = %p\n", ch, ch->sess, + ch->release_done); + + sdev = ch->sport->sdev; + BUG_ON(!sdev); + + transport_deregister_session_configfs(ch->sess); + transport_deregister_session(ch->sess); + ch->sess = NULL; + + srpt_destroy_ch_ib(ch); + + srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring, + ch->sport->sdev, ch->rq_size, + ch->rsp_size, DMA_TO_DEVICE); + + spin_lock_irq(&sdev->spinlock); + list_del(&ch->list); + spin_unlock_irq(&sdev->spinlock); + + ib_destroy_cm_id(ch->cm_id); + + if (ch->release_done) + complete(ch->release_done); + + wake_up(&sdev->ch_releaseQ); + + kfree(ch); +} + +static struct srpt_node_acl *__srpt_lookup_acl(struct srpt_port *sport, + u8 i_port_id[16]) +{ + struct srpt_node_acl *nacl; + + list_for_each_entry(nacl, &sport->port_acl_list, list) + if (memcmp(nacl->i_port_id, i_port_id, + sizeof(nacl->i_port_id)) == 0) + return nacl; + + return NULL; +} + +static struct srpt_node_acl *srpt_lookup_acl(struct srpt_port *sport, + u8 i_port_id[16]) +{ + struct srpt_node_acl *nacl; + + spin_lock_irq(&sport->port_acl_lock); + nacl = __srpt_lookup_acl(sport, i_port_id); + spin_unlock_irq(&sport->port_acl_lock); + + return nacl; +} + +/** + * srpt_cm_req_recv() - Process the event IB_CM_REQ_RECEIVED. + * + * Ownership of the cm_id is transferred to the target session if this + * functions returns zero. Otherwise the caller remains the owner of cm_id. + */ +static int srpt_cm_req_recv(struct ib_cm_id *cm_id, + struct ib_cm_req_event_param *param, + void *private_data) +{ + struct srpt_device *sdev = cm_id->context; + struct srpt_port *sport = &sdev->port[param->port - 1]; + struct srp_login_req *req; + struct srp_login_rsp *rsp; + struct srp_login_rej *rej; + struct ib_cm_rep_param *rep_param; + struct srpt_rdma_ch *ch, *tmp_ch; + struct srpt_node_acl *nacl; + u32 it_iu_len; + int i; + int ret = 0; + + WARN_ON_ONCE(irqs_disabled()); + + if (WARN_ON(!sdev || !private_data)) + return -EINVAL; + + req = (struct srp_login_req *)private_data; + + it_iu_len = be32_to_cpu(req->req_it_iu_len); + + printk(KERN_INFO "Received SRP_LOGIN_REQ with i_port_id 0x%llx:0x%llx," + " t_port_id 0x%llx:0x%llx and it_iu_len %d on port %d" + " (guid=0x%llx:0x%llx)\n", + be64_to_cpu(*(__be64 *)&req->initiator_port_id[0]), + be64_to_cpu(*(__be64 *)&req->initiator_port_id[8]), + be64_to_cpu(*(__be64 *)&req->target_port_id[0]), + be64_to_cpu(*(__be64 *)&req->target_port_id[8]), + it_iu_len, + param->port, + be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[0]), + be64_to_cpu(*(__be64 *)&sdev->port[param->port - 1].gid.raw[8])); + + rsp = kzalloc(sizeof *rsp, GFP_KERNEL); + rej = kzalloc(sizeof *rej, GFP_KERNEL); + rep_param = kzalloc(sizeof *rep_param, GFP_KERNEL); + + if (!rsp || !rej || !rep_param) { + ret = -ENOMEM; + goto out; + } + + if (it_iu_len > srp_max_req_size || it_iu_len < 64) { + rej->reason = __constant_cpu_to_be32( + SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE); + ret = -EINVAL; + printk(KERN_ERR "rejected SRP_LOGIN_REQ because its" + " length (%d bytes) is out of range (%d .. %d)\n", + it_iu_len, 64, srp_max_req_size); + goto reject; + } + + if (!sport->enabled) { + rej->reason = __constant_cpu_to_be32( + SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); + ret = -EINVAL; + printk(KERN_ERR "rejected SRP_LOGIN_REQ because the target port" + " has not yet been enabled\n"); + goto reject; + } + + if ((req->req_flags & SRP_MTCH_ACTION) == SRP_MULTICHAN_SINGLE) { + rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_NO_CHAN; + + spin_lock_irq(&sdev->spinlock); + + list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) { + if (!memcmp(ch->i_port_id, req->initiator_port_id, 16) + && !memcmp(ch->t_port_id, req->target_port_id, 16) + && param->port == ch->sport->port + && param->listen_id == ch->sport->sdev->cm_id + && ch->cm_id) { + enum rdma_ch_state ch_state; + + ch_state = srpt_get_ch_state(ch); + if (ch_state != CH_CONNECTING + && ch_state != CH_LIVE) + continue; + + /* found an existing channel */ + pr_debug("Found existing channel %s" + " cm_id= %p state= %d\n", + ch->sess_name, ch->cm_id, ch_state); + + __srpt_close_ch(ch); + + rsp->rsp_flags = + SRP_LOGIN_RSP_MULTICHAN_TERMINATED; + } + } + + spin_unlock_irq(&sdev->spinlock); + + } else + rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_MAINTAINED; + + if (*(__be64 *)req->target_port_id != cpu_to_be64(srpt_service_guid) + || *(__be64 *)(req->target_port_id + 8) != + cpu_to_be64(srpt_service_guid)) { + rej->reason = __constant_cpu_to_be32( + SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL); + ret = -ENOMEM; + printk(KERN_ERR "rejected SRP_LOGIN_REQ because it" + " has an invalid target port identifier.\n"); + goto reject; + } + + ch = kzalloc(sizeof *ch, GFP_KERNEL); + if (!ch) { + rej->reason = __constant_cpu_to_be32( + SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); + printk(KERN_ERR "rejected SRP_LOGIN_REQ because no memory.\n"); + ret = -ENOMEM; + goto reject; + } + + INIT_WORK(&ch->release_work, srpt_release_channel_work); + memcpy(ch->i_port_id, req->initiator_port_id, 16); + memcpy(ch->t_port_id, req->target_port_id, 16); + ch->sport = &sdev->port[param->port - 1]; + ch->cm_id = cm_id; + /* + * Avoid QUEUE_FULL conditions by limiting the number of buffers used + * for the SRP protocol to the command queue size. + */ + ch->rq_size = SRPT_RQ_SIZE; + spin_lock_init(&ch->spinlock); + ch->state = CH_CONNECTING; + INIT_LIST_HEAD(&ch->cmd_wait_list); + ch->rsp_size = ch->sport->port_attrib.srp_max_rsp_size; + + ch->ioctx_ring = (struct srpt_send_ioctx **) + srpt_alloc_ioctx_ring(ch->sport->sdev, ch->rq_size, + sizeof(*ch->ioctx_ring[0]), + ch->rsp_size, DMA_TO_DEVICE); + if (!ch->ioctx_ring) + goto free_ch; + + INIT_LIST_HEAD(&ch->free_list); + for (i = 0; i < ch->rq_size; i++) { + ch->ioctx_ring[i]->ch = ch; + list_add_tail(&ch->ioctx_ring[i]->free_list, &ch->free_list); + } + + ret = srpt_create_ch_ib(ch); + if (ret) { + rej->reason = __constant_cpu_to_be32( + SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); + printk(KERN_ERR "rejected SRP_LOGIN_REQ because creating" + " a new RDMA channel failed.\n"); + goto free_ring; + } + + ret = srpt_ch_qp_rtr(ch, ch->qp); + if (ret) { + rej->reason = __constant_cpu_to_be32( + SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); + printk(KERN_ERR "rejected SRP_LOGIN_REQ because enabling" + " RTR failed (error code = %d)\n", ret); + goto destroy_ib; + } + /* + * Use the initator port identifier as the session name. + */ + snprintf(ch->sess_name, sizeof(ch->sess_name), "0x%016llx%016llx", + be64_to_cpu(*(__be64 *)ch->i_port_id), + be64_to_cpu(*(__be64 *)(ch->i_port_id + 8))); + + pr_debug("registering session %s\n", ch->sess_name); + + nacl = srpt_lookup_acl(sport, ch->i_port_id); + if (!nacl) { + printk(KERN_INFO "Rejected login because no ACL has been" + " configured yet for initiator %s.\n", ch->sess_name); + rej->reason = __constant_cpu_to_be32( + SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED); + goto destroy_ib; + } + + ch->sess = transport_init_session(); + if (IS_ERR(ch->sess)) { + rej->reason = __constant_cpu_to_be32( + SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); + pr_debug("Failed to create session\n"); + goto deregister_session; + } + ch->sess->se_node_acl = &nacl->nacl; + transport_register_session(&sport->port_tpg_1, &nacl->nacl, ch->sess, ch); + + pr_debug("Establish connection sess=%p name=%s cm_id=%p\n", ch->sess, + ch->sess_name, ch->cm_id); + + /* create srp_login_response */ + rsp->opcode = SRP_LOGIN_RSP; + rsp->tag = req->tag; + rsp->max_it_iu_len = req->req_it_iu_len; + rsp->max_ti_iu_len = req->req_it_iu_len; + ch->max_ti_iu_len = it_iu_len; + rsp->buf_fmt = __constant_cpu_to_be16(SRP_BUF_FORMAT_DIRECT + | SRP_BUF_FORMAT_INDIRECT); + rsp->req_lim_delta = cpu_to_be32(ch->rq_size); + atomic_set(&ch->req_lim, ch->rq_size); + atomic_set(&ch->req_lim_delta, 0); + + /* create cm reply */ + rep_param->qp_num = ch->qp->qp_num; + rep_param->private_data = (void *)rsp; + rep_param->private_data_len = sizeof *rsp; + rep_param->rnr_retry_count = 7; + rep_param->flow_control = 1; + rep_param->failover_accepted = 0; + rep_param->srq = 1; + rep_param->responder_resources = 4; + rep_param->initiator_depth = 4; + + ret = ib_send_cm_rep(cm_id, rep_param); + if (ret) { + printk(KERN_ERR "sending SRP_LOGIN_REQ response failed" + " (error code = %d)\n", ret); + goto release_channel; + } + + spin_lock_irq(&sdev->spinlock); + list_add_tail(&ch->list, &sdev->rch_list); + spin_unlock_irq(&sdev->spinlock); + + goto out; + +release_channel: + srpt_set_ch_state(ch, CH_RELEASING); + transport_deregister_session_configfs(ch->sess); + +deregister_session: + transport_deregister_session(ch->sess); + ch->sess = NULL; + +destroy_ib: + srpt_destroy_ch_ib(ch); + +free_ring: + srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring, + ch->sport->sdev, ch->rq_size, + ch->rsp_size, DMA_TO_DEVICE); +free_ch: + kfree(ch); + +reject: + rej->opcode = SRP_LOGIN_REJ; + rej->tag = req->tag; + rej->buf_fmt = __constant_cpu_to_be16(SRP_BUF_FORMAT_DIRECT + | SRP_BUF_FORMAT_INDIRECT); + + ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, + (void *)rej, sizeof *rej); + +out: + kfree(rep_param); + kfree(rsp); + kfree(rej); + + return ret; +} + +static void srpt_cm_rej_recv(struct ib_cm_id *cm_id) +{ + printk(KERN_INFO "Received IB REJ for cm_id %p.\n", cm_id); + srpt_drain_channel(cm_id); +} + +/** + * srpt_cm_rtu_recv() - Process an IB_CM_RTU_RECEIVED or USER_ESTABLISHED event. + * + * An IB_CM_RTU_RECEIVED message indicates that the connection is established + * and that the recipient may begin transmitting (RTU = ready to use). + */ +static void srpt_cm_rtu_recv(struct ib_cm_id *cm_id) +{ + struct srpt_rdma_ch *ch; + int ret; + + ch = srpt_find_channel(cm_id->context, cm_id); + BUG_ON(!ch); + + if (srpt_test_and_set_ch_state(ch, CH_CONNECTING, CH_LIVE)) { + struct srpt_recv_ioctx *ioctx, *ioctx_tmp; + + ret = srpt_ch_qp_rts(ch, ch->qp); + + list_for_each_entry_safe(ioctx, ioctx_tmp, &ch->cmd_wait_list, + wait_list) { + list_del(&ioctx->wait_list); + srpt_handle_new_iu(ch, ioctx, NULL); + } + if (ret) + srpt_close_ch(ch); + } +} + +static void srpt_cm_timewait_exit(struct ib_cm_id *cm_id) +{ + printk(KERN_INFO "Received IB TimeWait exit for cm_id %p.\n", cm_id); + srpt_drain_channel(cm_id); +} + +static void srpt_cm_rep_error(struct ib_cm_id *cm_id) +{ + printk(KERN_INFO "Received IB REP error for cm_id %p.\n", cm_id); + srpt_drain_channel(cm_id); +} + +/** + * srpt_cm_dreq_recv() - Process reception of a DREQ message. + */ +static void srpt_cm_dreq_recv(struct ib_cm_id *cm_id) +{ + struct srpt_rdma_ch *ch; + unsigned long flags; + bool send_drep = false; + + ch = srpt_find_channel(cm_id->context, cm_id); + BUG_ON(!ch); + + pr_debug("cm_id= %p ch->state= %d\n", cm_id, srpt_get_ch_state(ch)); + + spin_lock_irqsave(&ch->spinlock, flags); + switch (ch->state) { + case CH_CONNECTING: + case CH_LIVE: + send_drep = true; + ch->state = CH_DISCONNECTING; + break; + case CH_DISCONNECTING: + case CH_DRAINING: + case CH_RELEASING: + WARN(true, "unexpected channel state %d\n", ch->state); + break; + } + spin_unlock_irqrestore(&ch->spinlock, flags); + + if (send_drep) { + if (ib_send_cm_drep(ch->cm_id, NULL, 0) < 0) + printk(KERN_ERR "Sending IB DREP failed.\n"); + printk(KERN_INFO "Received DREQ and sent DREP for session %s.\n", + ch->sess_name); + } +} + +/** + * srpt_cm_drep_recv() - Process reception of a DREP message. + */ +static void srpt_cm_drep_recv(struct ib_cm_id *cm_id) +{ + printk(KERN_INFO "Received InfiniBand DREP message for cm_id %p.\n", + cm_id); + srpt_drain_channel(cm_id); +} + +/** + * srpt_cm_handler() - IB connection manager callback function. + * + * A non-zero return value will cause the caller destroy the CM ID. + * + * Note: srpt_cm_handler() must only return a non-zero value when transferring + * ownership of the cm_id to a channel by srpt_cm_req_recv() failed. Returning + * a non-zero value in any other case will trigger a race with the + * ib_destroy_cm_id() call in srpt_release_channel(). + */ +static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) +{ + int ret; + + ret = 0; + switch (event->event) { + case IB_CM_REQ_RECEIVED: + ret = srpt_cm_req_recv(cm_id, &event->param.req_rcvd, + event->private_data); + break; + case IB_CM_REJ_RECEIVED: + srpt_cm_rej_recv(cm_id); + break; + case IB_CM_RTU_RECEIVED: + case IB_CM_USER_ESTABLISHED: + srpt_cm_rtu_recv(cm_id); + break; + case IB_CM_DREQ_RECEIVED: + srpt_cm_dreq_recv(cm_id); + break; + case IB_CM_DREP_RECEIVED: + srpt_cm_drep_recv(cm_id); + break; + case IB_CM_TIMEWAIT_EXIT: + srpt_cm_timewait_exit(cm_id); + break; + case IB_CM_REP_ERROR: + srpt_cm_rep_error(cm_id); + break; + case IB_CM_DREQ_ERROR: + printk(KERN_INFO "Received IB DREQ ERROR event.\n"); + break; + case IB_CM_MRA_RECEIVED: + printk(KERN_INFO "Received IB MRA event\n"); + break; + default: + printk(KERN_ERR "received unrecognized IB CM event %d\n", + event->event); + break; + } + + return ret; +} + +/** + * srpt_perform_rdmas() - Perform IB RDMA. + * + * Returns zero upon success or a negative number upon failure. + */ +static int srpt_perform_rdmas(struct srpt_rdma_ch *ch, + struct srpt_send_ioctx *ioctx) +{ + struct ib_send_wr wr; + struct ib_send_wr *bad_wr; + struct rdma_iu *riu; + int i; + int ret; + int sq_wr_avail; + enum dma_data_direction dir; + const int n_rdma = ioctx->n_rdma; + + dir = ioctx->cmd.data_direction; + if (dir == DMA_TO_DEVICE) { + /* write */ + ret = -ENOMEM; + sq_wr_avail = atomic_sub_return(n_rdma, &ch->sq_wr_avail); + if (sq_wr_avail < 0) { + printk(KERN_WARNING "IB send queue full (needed %d)\n", + n_rdma); + goto out; + } + } + + ioctx->rdma_aborted = false; + ret = 0; + riu = ioctx->rdma_ius; + memset(&wr, 0, sizeof wr); + + for (i = 0; i < n_rdma; ++i, ++riu) { + if (dir == DMA_FROM_DEVICE) { + wr.opcode = IB_WR_RDMA_WRITE; + wr.wr_id = encode_wr_id(i == n_rdma - 1 ? + SRPT_RDMA_WRITE_LAST : + SRPT_RDMA_MID, + ioctx->ioctx.index); + } else { + wr.opcode = IB_WR_RDMA_READ; + wr.wr_id = encode_wr_id(i == n_rdma - 1 ? + SRPT_RDMA_READ_LAST : + SRPT_RDMA_MID, + ioctx->ioctx.index); + } + wr.next = NULL; + wr.wr.rdma.remote_addr = riu->raddr; + wr.wr.rdma.rkey = riu->rkey; + wr.num_sge = riu->sge_cnt; + wr.sg_list = riu->sge; + + /* only get completion event for the last rdma write */ + if (i == (n_rdma - 1) && dir == DMA_TO_DEVICE) + wr.send_flags = IB_SEND_SIGNALED; + + ret = ib_post_send(ch->qp, &wr, &bad_wr); + if (ret) + break; + } + + if (ret) + printk(KERN_ERR "%s[%d]: ib_post_send() returned %d for %d/%d", + __func__, __LINE__, ret, i, n_rdma); + if (ret && i > 0) { + wr.num_sge = 0; + wr.wr_id = encode_wr_id(SRPT_RDMA_ABORT, ioctx->ioctx.index); + wr.send_flags = IB_SEND_SIGNALED; + while (ch->state == CH_LIVE && + ib_post_send(ch->qp, &wr, &bad_wr) != 0) { + printk(KERN_INFO "Trying to abort failed RDMA transfer [%d]", + ioctx->ioctx.index); + msleep(1000); + } + while (ch->state != CH_RELEASING && !ioctx->rdma_aborted) { + printk(KERN_INFO "Waiting until RDMA abort finished [%d]", + ioctx->ioctx.index); + msleep(1000); + } + } +out: + if (unlikely(dir == DMA_TO_DEVICE && ret < 0)) + atomic_add(n_rdma, &ch->sq_wr_avail); + return ret; +} + +/** + * srpt_xfer_data() - Start data transfer from initiator to target. + */ +static int srpt_xfer_data(struct srpt_rdma_ch *ch, + struct srpt_send_ioctx *ioctx) +{ + int ret; + + ret = srpt_map_sg_to_ib_sge(ch, ioctx); + if (ret) { + printk(KERN_ERR "%s[%d] ret=%d\n", __func__, __LINE__, ret); + goto out; + } + + ret = srpt_perform_rdmas(ch, ioctx); + if (ret) { + if (ret == -EAGAIN || ret == -ENOMEM) + printk(KERN_INFO "%s[%d] queue full -- ret=%d\n", + __func__, __LINE__, ret); + else + printk(KERN_ERR "%s[%d] fatal error -- ret=%d\n", + __func__, __LINE__, ret); + goto out_unmap; + } + +out: + return ret; +out_unmap: + srpt_unmap_sg_to_ib_sge(ch, ioctx); + goto out; +} + +static int srpt_write_pending_status(struct se_cmd *se_cmd) +{ + struct srpt_send_ioctx *ioctx; + + ioctx = container_of(se_cmd, struct srpt_send_ioctx, cmd); + return srpt_get_cmd_state(ioctx) == SRPT_STATE_NEED_DATA; +} + +/* + * srpt_write_pending() - Start data transfer from initiator to target (write). + */ +static int srpt_write_pending(struct se_cmd *se_cmd) +{ + struct srpt_rdma_ch *ch; + struct srpt_send_ioctx *ioctx; + enum srpt_command_state new_state; + enum rdma_ch_state ch_state; + int ret; + + ioctx = container_of(se_cmd, struct srpt_send_ioctx, cmd); + + new_state = srpt_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA); + WARN_ON(new_state == SRPT_STATE_DONE); + + ch = ioctx->ch; + BUG_ON(!ch); + + ch_state = srpt_get_ch_state(ch); + switch (ch_state) { + case CH_CONNECTING: + WARN(true, "unexpected channel state %d\n", ch_state); + ret = -EINVAL; + goto out; + case CH_LIVE: + break; + case CH_DISCONNECTING: + case CH_DRAINING: + case CH_RELEASING: + pr_debug("cmd with tag %lld: channel disconnecting\n", + ioctx->tag); + srpt_set_cmd_state(ioctx, SRPT_STATE_DATA_IN); + ret = -EINVAL; + goto out; + } + ret = srpt_xfer_data(ch, ioctx); + +out: + return ret; +} + +static u8 tcm_to_srp_tsk_mgmt_status(const int tcm_mgmt_status) +{ + switch (tcm_mgmt_status) { + case TMR_FUNCTION_COMPLETE: + return SRP_TSK_MGMT_SUCCESS; + case TMR_FUNCTION_REJECTED: + return SRP_TSK_MGMT_FUNC_NOT_SUPP; + } + return SRP_TSK_MGMT_FAILED; +} + +/** + * srpt_queue_response() - Transmits the response to a SCSI command. + * + * Callback function called by the TCM core. Must not block since it can be + * invoked on the context of the IB completion handler. + */ +static int srpt_queue_response(struct se_cmd *cmd) +{ + struct srpt_rdma_ch *ch; + struct srpt_send_ioctx *ioctx; + enum srpt_command_state state; + unsigned long flags; + int ret; + enum dma_data_direction dir; + int resp_len; + u8 srp_tm_status; + + ret = 0; + + ioctx = container_of(cmd, struct srpt_send_ioctx, cmd); + ch = ioctx->ch; + BUG_ON(!ch); + + spin_lock_irqsave(&ioctx->spinlock, flags); + state = ioctx->state; + switch (state) { + case SRPT_STATE_NEW: + case SRPT_STATE_DATA_IN: + ioctx->state = SRPT_STATE_CMD_RSP_SENT; + break; + case SRPT_STATE_MGMT: + ioctx->state = SRPT_STATE_MGMT_RSP_SENT; + break; + default: + WARN(true, "ch %p; cmd %d: unexpected command state %d\n", + ch, ioctx->ioctx.index, ioctx->state); + break; + } + spin_unlock_irqrestore(&ioctx->spinlock, flags); + + if (unlikely(transport_check_aborted_status(&ioctx->cmd, false) + || WARN_ON_ONCE(state == SRPT_STATE_CMD_RSP_SENT))) { + atomic_inc(&ch->req_lim_delta); + srpt_abort_cmd(ioctx); + goto out; + } + + dir = ioctx->cmd.data_direction; + + /* For read commands, transfer the data to the initiator. */ + if (dir == DMA_FROM_DEVICE && ioctx->cmd.data_length && + !ioctx->queue_status_only) { + ret = srpt_xfer_data(ch, ioctx); + if (ret) { + printk(KERN_ERR "xfer_data failed for tag %llu\n", + ioctx->tag); + goto out; + } + } + + if (state != SRPT_STATE_MGMT) + resp_len = srpt_build_cmd_rsp(ch, ioctx, ioctx->tag, + cmd->scsi_status); + else { + srp_tm_status + = tcm_to_srp_tsk_mgmt_status(cmd->se_tmr_req->response); + resp_len = srpt_build_tskmgmt_rsp(ch, ioctx, srp_tm_status, + ioctx->tag); + } + ret = srpt_post_send(ch, ioctx, resp_len); + if (ret) { + printk(KERN_ERR "sending cmd response failed for tag %llu\n", + ioctx->tag); + srpt_unmap_sg_to_ib_sge(ch, ioctx); + srpt_set_cmd_state(ioctx, SRPT_STATE_DONE); + kref_put(&ioctx->kref, srpt_put_send_ioctx_kref); + } + +out: + return ret; +} + +static int srpt_queue_status(struct se_cmd *cmd) +{ + struct srpt_send_ioctx *ioctx; + + ioctx = container_of(cmd, struct srpt_send_ioctx, cmd); + BUG_ON(ioctx->sense_data != cmd->sense_buffer); + if (cmd->se_cmd_flags & + (SCF_TRANSPORT_TASK_SENSE | SCF_EMULATED_TASK_SENSE)) + WARN_ON(cmd->scsi_status != SAM_STAT_CHECK_CONDITION); + ioctx->queue_status_only = true; + return srpt_queue_response(cmd); +} + +static void srpt_refresh_port_work(struct work_struct *work) +{ + struct srpt_port *sport = container_of(work, struct srpt_port, work); + + srpt_refresh_port(sport); +} + +static int srpt_ch_list_empty(struct srpt_device *sdev) +{ + int res; + + spin_lock_irq(&sdev->spinlock); + res = list_empty(&sdev->rch_list); + spin_unlock_irq(&sdev->spinlock); + + return res; +} + +/** + * srpt_release_sdev() - Free the channel resources associated with a target. + */ +static int srpt_release_sdev(struct srpt_device *sdev) +{ + struct srpt_rdma_ch *ch, *tmp_ch; + int res; + + WARN_ON_ONCE(irqs_disabled()); + + BUG_ON(!sdev); + + spin_lock_irq(&sdev->spinlock); + list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) + __srpt_close_ch(ch); + spin_unlock_irq(&sdev->spinlock); + + res = wait_event_interruptible(sdev->ch_releaseQ, + srpt_ch_list_empty(sdev)); + if (res) + printk(KERN_ERR "%s: interrupted.\n", __func__); + + return 0; +} + +static struct srpt_port *__srpt_lookup_port(const char *name) +{ + struct ib_device *dev; + struct srpt_device *sdev; + struct srpt_port *sport; + int i; + + list_for_each_entry(sdev, &srpt_dev_list, list) { + dev = sdev->device; + if (!dev) + continue; + + for (i = 0; i < dev->phys_port_cnt; i++) { + sport = &sdev->port[i]; + + if (!strcmp(sport->port_guid, name)) + return sport; + } + } + + return NULL; +} + +static struct srpt_port *srpt_lookup_port(const char *name) +{ + struct srpt_port *sport; + + spin_lock(&srpt_dev_lock); + sport = __srpt_lookup_port(name); + spin_unlock(&srpt_dev_lock); + + return sport; +} + +/** + * srpt_add_one() - Infiniband device addition callback function. + */ +static void srpt_add_one(struct ib_device *device) +{ + struct srpt_device *sdev; + struct srpt_port *sport; + struct ib_srq_init_attr srq_attr; + int i; + + pr_debug("device = %p, device->dma_ops = %p\n", device, + device->dma_ops); + + sdev = kzalloc(sizeof *sdev, GFP_KERNEL); + if (!sdev) + goto err; + + sdev->device = device; + INIT_LIST_HEAD(&sdev->rch_list); + init_waitqueue_head(&sdev->ch_releaseQ); + spin_lock_init(&sdev->spinlock); + + if (ib_query_device(device, &sdev->dev_attr)) + goto free_dev; + + sdev->pd = ib_alloc_pd(device); + if (IS_ERR(sdev->pd)) + goto free_dev; + + sdev->mr = ib_get_dma_mr(sdev->pd, IB_ACCESS_LOCAL_WRITE); + if (IS_ERR(sdev->mr)) + goto err_pd; + + sdev->srq_size = min(srpt_srq_size, sdev->dev_attr.max_srq_wr); + + srq_attr.event_handler = srpt_srq_event; + srq_attr.srq_context = (void *)sdev; + srq_attr.attr.max_wr = sdev->srq_size; + srq_attr.attr.max_sge = 1; + srq_attr.attr.srq_limit = 0; + + sdev->srq = ib_create_srq(sdev->pd, &srq_attr); + if (IS_ERR(sdev->srq)) + goto err_mr; + + pr_debug("%s: create SRQ #wr= %d max_allow=%d dev= %s\n", + __func__, sdev->srq_size, sdev->dev_attr.max_srq_wr, + device->name); + + if (!srpt_service_guid) + srpt_service_guid = be64_to_cpu(device->node_guid); + + sdev->cm_id = ib_create_cm_id(device, srpt_cm_handler, sdev); + if (IS_ERR(sdev->cm_id)) + goto err_srq; + + /* print out target login information */ + pr_debug("Target login info: id_ext=%016llx,ioc_guid=%016llx," + "pkey=ffff,service_id=%016llx\n", srpt_service_guid, + srpt_service_guid, srpt_service_guid); + + /* + * We do not have a consistent service_id (ie. also id_ext of target_id) + * to identify this target. We currently use the guid of the first HCA + * in the system as service_id; therefore, the target_id will change + * if this HCA is gone bad and replaced by different HCA + */ + if (ib_cm_listen(sdev->cm_id, cpu_to_be64(srpt_service_guid), 0, NULL)) + goto err_cm; + + INIT_IB_EVENT_HANDLER(&sdev->event_handler, sdev->device, + srpt_event_handler); + if (ib_register_event_handler(&sdev->event_handler)) + goto err_cm; + + sdev->ioctx_ring = (struct srpt_recv_ioctx **) + srpt_alloc_ioctx_ring(sdev, sdev->srq_size, + sizeof(*sdev->ioctx_ring[0]), + srp_max_req_size, DMA_FROM_DEVICE); + if (!sdev->ioctx_ring) + goto err_event; + + for (i = 0; i < sdev->srq_size; ++i) + srpt_post_recv(sdev, sdev->ioctx_ring[i]); + + WARN_ON(sdev->device->phys_port_cnt > ARRAY_SIZE(sdev->port)); + + for (i = 1; i <= sdev->device->phys_port_cnt; i++) { + sport = &sdev->port[i - 1]; + sport->sdev = sdev; + sport->port = i; + sport->port_attrib.srp_max_rdma_size = DEFAULT_MAX_RDMA_SIZE; + sport->port_attrib.srp_max_rsp_size = DEFAULT_MAX_RSP_SIZE; + sport->port_attrib.srp_sq_size = DEF_SRPT_SQ_SIZE; + INIT_WORK(&sport->work, srpt_refresh_port_work); + INIT_LIST_HEAD(&sport->port_acl_list); + spin_lock_init(&sport->port_acl_lock); + + if (srpt_refresh_port(sport)) { + printk(KERN_ERR "MAD registration failed for %s-%d.\n", + srpt_sdev_name(sdev), i); + goto err_ring; + } + snprintf(sport->port_guid, sizeof(sport->port_guid), + "0x%016llx%016llx", + be64_to_cpu(sport->gid.global.subnet_prefix), + be64_to_cpu(sport->gid.global.interface_id)); + } + + spin_lock(&srpt_dev_lock); + list_add_tail(&sdev->list, &srpt_dev_list); + spin_unlock(&srpt_dev_lock); + +out: + ib_set_client_data(device, &srpt_client, sdev); + pr_debug("added %s.\n", device->name); + return; + +err_ring: + srpt_free_ioctx_ring((struct srpt_ioctx **)sdev->ioctx_ring, sdev, + sdev->srq_size, srp_max_req_size, + DMA_FROM_DEVICE); +err_event: + ib_unregister_event_handler(&sdev->event_handler); +err_cm: + ib_destroy_cm_id(sdev->cm_id); +err_srq: + ib_destroy_srq(sdev->srq); +err_mr: + ib_dereg_mr(sdev->mr); +err_pd: + ib_dealloc_pd(sdev->pd); +free_dev: + kfree(sdev); +err: + sdev = NULL; + printk(KERN_INFO "%s(%s) failed.\n", __func__, device->name); + goto out; +} + +/** + * srpt_remove_one() - InfiniBand device removal callback function. + */ +static void srpt_remove_one(struct ib_device *device) +{ + struct srpt_device *sdev; + int i; + + sdev = ib_get_client_data(device, &srpt_client); + if (!sdev) { + printk(KERN_INFO "%s(%s): nothing to do.\n", __func__, + device->name); + return; + } + + srpt_unregister_mad_agent(sdev); + + ib_unregister_event_handler(&sdev->event_handler); + + /* Cancel any work queued by the just unregistered IB event handler. */ + for (i = 0; i < sdev->device->phys_port_cnt; i++) + cancel_work_sync(&sdev->port[i].work); + + ib_destroy_cm_id(sdev->cm_id); + + /* + * Unregistering a target must happen after destroying sdev->cm_id + * such that no new SRP_LOGIN_REQ information units can arrive while + * destroying the target. + */ + spin_lock(&srpt_dev_lock); + list_del(&sdev->list); + spin_unlock(&srpt_dev_lock); + srpt_release_sdev(sdev); + + ib_destroy_srq(sdev->srq); + ib_dereg_mr(sdev->mr); + ib_dealloc_pd(sdev->pd); + + srpt_free_ioctx_ring((struct srpt_ioctx **)sdev->ioctx_ring, sdev, + sdev->srq_size, srp_max_req_size, DMA_FROM_DEVICE); + sdev->ioctx_ring = NULL; + kfree(sdev); +} + +static struct ib_client srpt_client = { + .name = DRV_NAME, + .add = srpt_add_one, + .remove = srpt_remove_one +}; + +static int srpt_check_true(struct se_portal_group *se_tpg) +{ + return 1; +} + +static int srpt_check_false(struct se_portal_group *se_tpg) +{ + return 0; +} + +static char *srpt_get_fabric_name(void) +{ + return "srpt"; +} + +static u8 srpt_get_fabric_proto_ident(struct se_portal_group *se_tpg) +{ + return SCSI_TRANSPORTID_PROTOCOLID_SRP; +} + +static char *srpt_get_fabric_wwn(struct se_portal_group *tpg) +{ + struct srpt_port *sport = container_of(tpg, struct srpt_port, port_tpg_1); + + return sport->port_guid; +} + +static u16 srpt_get_tag(struct se_portal_group *tpg) +{ + return 1; +} + +static u32 srpt_get_default_depth(struct se_portal_group *se_tpg) +{ + return 1; +} + +static u32 srpt_get_pr_transport_id(struct se_portal_group *se_tpg, + struct se_node_acl *se_nacl, + struct t10_pr_registration *pr_reg, + int *format_code, unsigned char *buf) +{ + struct srpt_node_acl *nacl; + struct spc_rdma_transport_id *tr_id; + + nacl = container_of(se_nacl, struct srpt_node_acl, nacl); + tr_id = (void *)buf; + tr_id->protocol_identifier = SCSI_TRANSPORTID_PROTOCOLID_SRP; + memcpy(tr_id->i_port_id, nacl->i_port_id, sizeof(tr_id->i_port_id)); + return sizeof(*tr_id); +} + +static u32 srpt_get_pr_transport_id_len(struct se_portal_group *se_tpg, + struct se_node_acl *se_nacl, + struct t10_pr_registration *pr_reg, + int *format_code) +{ + *format_code = 0; + return sizeof(struct spc_rdma_transport_id); +} + +static char *srpt_parse_pr_out_transport_id(struct se_portal_group *se_tpg, + const char *buf, u32 *out_tid_len, + char **port_nexus_ptr) +{ + struct spc_rdma_transport_id *tr_id; + + *port_nexus_ptr = NULL; + *out_tid_len = sizeof(struct spc_rdma_transport_id); + tr_id = (void *)buf; + return (char *)tr_id->i_port_id; +} + +static struct se_node_acl *srpt_alloc_fabric_acl(struct se_portal_group *se_tpg) +{ + struct srpt_node_acl *nacl; + + nacl = kzalloc(sizeof(struct srpt_node_acl), GFP_KERNEL); + if (!nacl) { + printk(KERN_ERR "Unable to alocate struct srpt_node_acl\n"); + return NULL; + } + + return &nacl->nacl; +} + +static void srpt_release_fabric_acl(struct se_portal_group *se_tpg, + struct se_node_acl *se_nacl) +{ + struct srpt_node_acl *nacl; + + nacl = container_of(se_nacl, struct srpt_node_acl, nacl); + kfree(nacl); +} + +static u32 srpt_tpg_get_inst_index(struct se_portal_group *se_tpg) +{ + return 1; +} + +static void srpt_release_cmd(struct se_cmd *se_cmd) +{ +} + +/** + * srpt_shutdown_session() - Whether or not a session may be shut down. + */ +static int srpt_shutdown_session(struct se_session *se_sess) +{ + return true; +} + +/** + * srpt_close_session() - Forcibly close a session. + * + * Callback function invoked by the TCM core to clean up sessions associated + * with a node ACL when the user invokes + * rmdir /sys/kernel/config/target/$driver/$port/$tpg/acls/$i_port_id + */ +static void srpt_close_session(struct se_session *se_sess) +{ + DECLARE_COMPLETION_ONSTACK(release_done); + struct srpt_rdma_ch *ch; + struct srpt_device *sdev; + int res; + + ch = se_sess->fabric_sess_ptr; + WARN_ON(ch->sess != se_sess); + + pr_debug("ch %p state %d\n", ch, srpt_get_ch_state(ch)); + + sdev = ch->sport->sdev; + spin_lock_irq(&sdev->spinlock); + BUG_ON(ch->release_done); + ch->release_done = &release_done; + __srpt_close_ch(ch); + spin_unlock_irq(&sdev->spinlock); + + res = wait_for_completion_timeout(&release_done, 60 * HZ); + WARN_ON(res <= 0); +} + +/** + * To do: Find out whether stop_session() has a meaning for transports + * other than iSCSI. + */ +static void srpt_stop_session(struct se_session *se_sess, int sess_sleep, + int conn_sleep) +{ +} + +static void srpt_reset_nexus(struct se_session *sess) +{ + printk(KERN_ERR "This is the SRP protocol, not iSCSI\n"); +} + +static int srpt_sess_logged_in(struct se_session *se_sess) +{ + return true; +} + +/** + * srpt_sess_get_index() - Return the value of scsiAttIntrPortIndex (SCSI-MIB). + * + * A quote from RFC 4455 (SCSI-MIB) about this MIB object: + * This object represents an arbitrary integer used to uniquely identify a + * particular attached remote initiator port to a particular SCSI target port + * within a particular SCSI target device within a particular SCSI instance. + */ +static u32 srpt_sess_get_index(struct se_session *se_sess) +{ + return 0; +} + +static void srpt_set_default_node_attrs(struct se_node_acl *nacl) +{ +} + +static u32 srpt_get_task_tag(struct se_cmd *se_cmd) +{ + struct srpt_send_ioctx *ioctx; + + ioctx = container_of(se_cmd, struct srpt_send_ioctx, cmd); + return ioctx->tag; +} + +/* Note: only used from inside debug printk's by the TCM core. */ +static int srpt_get_tcm_cmd_state(struct se_cmd *se_cmd) +{ + struct srpt_send_ioctx *ioctx; + + ioctx = container_of(se_cmd, struct srpt_send_ioctx, cmd); + return srpt_get_cmd_state(ioctx); +} + +static u16 srpt_set_fabric_sense_len(struct se_cmd *cmd, u32 sense_length) +{ + return 0; +} + +static u16 srpt_get_fabric_sense_len(void) +{ + return 0; +} + +static int srpt_is_state_remove(struct se_cmd *se_cmd) +{ + return 0; +} + +/** + * srpt_parse_i_port_id() - Parse an initiator port ID. + * @name: ASCII representation of a 128-bit initiator port ID. + * @i_port_id: Binary 128-bit port ID. + */ +static int srpt_parse_i_port_id(u8 i_port_id[16], const char *name) +{ + const char *p; + unsigned len, count, leading_zero_bytes; + int ret, rc; + + p = name; + if (strnicmp(p, "0x", 2) == 0) + p += 2; + ret = -EINVAL; + len = strlen(p); + if (len % 2) + goto out; + count = min(len / 2, 16U); + leading_zero_bytes = 16 - count; + memset(i_port_id, 0, leading_zero_bytes); + rc = hex2bin(i_port_id + leading_zero_bytes, p, count); + if (rc < 0) + pr_debug("hex2bin failed for srpt_parse_i_port_id: %d\n", rc); + ret = 0; +out: + return ret; +} + +/* + * configfs callback function invoked for + * mkdir /sys/kernel/config/target/$driver/$port/$tpg/acls/$i_port_id + */ +static struct se_node_acl *srpt_make_nodeacl(struct se_portal_group *tpg, + struct config_group *group, + const char *name) +{ + struct srpt_port *sport = container_of(tpg, struct srpt_port, port_tpg_1); + struct se_node_acl *se_nacl, *se_nacl_new; + struct srpt_node_acl *nacl; + int ret = 0; + u32 nexus_depth = 1; + u8 i_port_id[16]; + + if (srpt_parse_i_port_id(i_port_id, name) < 0) { + printk(KERN_ERR "invalid initiator port ID %s\n", name); + ret = -EINVAL; + goto err; + } + + se_nacl_new = srpt_alloc_fabric_acl(tpg); + if (!se_nacl_new) { + ret = -ENOMEM; + goto err; + } + /* + * nacl_new may be released by core_tpg_add_initiator_node_acl() + * when converting a node ACL from demo mode to explict + */ + se_nacl = core_tpg_add_initiator_node_acl(tpg, se_nacl_new, name, + nexus_depth); + if (IS_ERR(se_nacl)) { + ret = PTR_ERR(se_nacl); + goto err; + } + /* Locate our struct srpt_node_acl and set sdev and i_port_id. */ + nacl = container_of(se_nacl, struct srpt_node_acl, nacl); + memcpy(&nacl->i_port_id[0], &i_port_id[0], 16); + nacl->sport = sport; + + spin_lock_irq(&sport->port_acl_lock); + list_add_tail(&nacl->list, &sport->port_acl_list); + spin_unlock_irq(&sport->port_acl_lock); + + return se_nacl; +err: + return ERR_PTR(ret); +} + +/* + * configfs callback function invoked for + * rmdir /sys/kernel/config/target/$driver/$port/$tpg/acls/$i_port_id + */ +static void srpt_drop_nodeacl(struct se_node_acl *se_nacl) +{ + struct srpt_node_acl *nacl; + struct srpt_device *sdev; + struct srpt_port *sport; + + nacl = container_of(se_nacl, struct srpt_node_acl, nacl); + sport = nacl->sport; + sdev = sport->sdev; + spin_lock_irq(&sport->port_acl_lock); + list_del(&nacl->list); + spin_unlock_irq(&sport->port_acl_lock); + core_tpg_del_initiator_node_acl(&sport->port_tpg_1, se_nacl, 1); + srpt_release_fabric_acl(NULL, se_nacl); +} + +static ssize_t srpt_tpg_attrib_show_srp_max_rdma_size( + struct se_portal_group *se_tpg, + char *page) +{ + struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1); + + return sprintf(page, "%u\n", sport->port_attrib.srp_max_rdma_size); +} + +static ssize_t srpt_tpg_attrib_store_srp_max_rdma_size( + struct se_portal_group *se_tpg, + const char *page, + size_t count) +{ + struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1); + unsigned long val; + int ret; + + ret = strict_strtoul(page, 0, &val); + if (ret < 0) { + pr_err("strict_strtoul() failed with ret: %d\n", ret); + return -EINVAL; + } + if (val > MAX_SRPT_RDMA_SIZE) { + pr_err("val: %lu exceeds MAX_SRPT_RDMA_SIZE: %d\n", val, + MAX_SRPT_RDMA_SIZE); + return -EINVAL; + } + if (val < DEFAULT_MAX_RDMA_SIZE) { + pr_err("val: %lu smaller than DEFAULT_MAX_RDMA_SIZE: %d\n", + val, DEFAULT_MAX_RDMA_SIZE); + return -EINVAL; + } + sport->port_attrib.srp_max_rdma_size = val; + + return count; +} + +TF_TPG_ATTRIB_ATTR(srpt, srp_max_rdma_size, S_IRUGO | S_IWUSR); + +static ssize_t srpt_tpg_attrib_show_srp_max_rsp_size( + struct se_portal_group *se_tpg, + char *page) +{ + struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1); + + return sprintf(page, "%u\n", sport->port_attrib.srp_max_rsp_size); +} + +static ssize_t srpt_tpg_attrib_store_srp_max_rsp_size( + struct se_portal_group *se_tpg, + const char *page, + size_t count) +{ + struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1); + unsigned long val; + int ret; + + ret = strict_strtoul(page, 0, &val); + if (ret < 0) { + pr_err("strict_strtoul() failed with ret: %d\n", ret); + return -EINVAL; + } + if (val > MAX_SRPT_RSP_SIZE) { + pr_err("val: %lu exceeds MAX_SRPT_RSP_SIZE: %d\n", val, + MAX_SRPT_RSP_SIZE); + return -EINVAL; + } + if (val < MIN_MAX_RSP_SIZE) { + pr_err("val: %lu smaller than MIN_MAX_RSP_SIZE: %d\n", val, + MIN_MAX_RSP_SIZE); + return -EINVAL; + } + sport->port_attrib.srp_max_rsp_size = val; + + return count; +} + +TF_TPG_ATTRIB_ATTR(srpt, srp_max_rsp_size, S_IRUGO | S_IWUSR); + +static ssize_t srpt_tpg_attrib_show_srp_sq_size( + struct se_portal_group *se_tpg, + char *page) +{ + struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1); + + return sprintf(page, "%u\n", sport->port_attrib.srp_sq_size); +} + +static ssize_t srpt_tpg_attrib_store_srp_sq_size( + struct se_portal_group *se_tpg, + const char *page, + size_t count) +{ + struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1); + unsigned long val; + int ret; + + ret = strict_strtoul(page, 0, &val); + if (ret < 0) { + pr_err("strict_strtoul() failed with ret: %d\n", ret); + return -EINVAL; + } + if (val > MAX_SRPT_SRQ_SIZE) { + pr_err("val: %lu exceeds MAX_SRPT_SRQ_SIZE: %d\n", val, + MAX_SRPT_SRQ_SIZE); + return -EINVAL; + } + if (val < MIN_SRPT_SRQ_SIZE) { + pr_err("val: %lu smaller than MIN_SRPT_SRQ_SIZE: %d\n", val, + MIN_SRPT_SRQ_SIZE); + return -EINVAL; + } + sport->port_attrib.srp_sq_size = val; + + return count; +} + +TF_TPG_ATTRIB_ATTR(srpt, srp_sq_size, S_IRUGO | S_IWUSR); + +static struct configfs_attribute *srpt_tpg_attrib_attrs[] = { + &srpt_tpg_attrib_srp_max_rdma_size.attr, + &srpt_tpg_attrib_srp_max_rsp_size.attr, + &srpt_tpg_attrib_srp_sq_size.attr, + NULL, +}; + +static ssize_t srpt_tpg_show_enable( + struct se_portal_group *se_tpg, + char *page) +{ + struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1); + + return snprintf(page, PAGE_SIZE, "%d\n", (sport->enabled) ? 1: 0); +} + +static ssize_t srpt_tpg_store_enable( + struct se_portal_group *se_tpg, + const char *page, + size_t count) +{ + struct srpt_port *sport = container_of(se_tpg, struct srpt_port, port_tpg_1); + unsigned long tmp; + int ret; + + ret = strict_strtoul(page, 0, &tmp); + if (ret < 0) { + printk(KERN_ERR "Unable to extract srpt_tpg_store_enable\n"); + return -EINVAL; + } + + if ((tmp != 0) && (tmp != 1)) { + printk(KERN_ERR "Illegal value for srpt_tpg_store_enable: %lu\n", tmp); + return -EINVAL; + } + if (tmp == 1) + sport->enabled = true; + else + sport->enabled = false; + + return count; +} + +TF_TPG_BASE_ATTR(srpt, enable, S_IRUGO | S_IWUSR); + +static struct configfs_attribute *srpt_tpg_attrs[] = { + &srpt_tpg_enable.attr, + NULL, +}; + +/** + * configfs callback invoked for + * mkdir /sys/kernel/config/target/$driver/$port/$tpg + */ +static struct se_portal_group *srpt_make_tpg(struct se_wwn *wwn, + struct config_group *group, + const char *name) +{ + struct srpt_port *sport = container_of(wwn, struct srpt_port, port_wwn); + int res; + + /* Initialize sport->port_wwn and sport->port_tpg_1 */ + res = core_tpg_register(&srpt_target->tf_ops, &sport->port_wwn, + &sport->port_tpg_1, sport, TRANSPORT_TPG_TYPE_NORMAL); + if (res) + return ERR_PTR(res); + + return &sport->port_tpg_1; +} + +/** + * configfs callback invoked for + * rmdir /sys/kernel/config/target/$driver/$port/$tpg + */ +static void srpt_drop_tpg(struct se_portal_group *tpg) +{ + struct srpt_port *sport = container_of(tpg, + struct srpt_port, port_tpg_1); + + sport->enabled = false; + core_tpg_deregister(&sport->port_tpg_1); +} + +/** + * configfs callback invoked for + * mkdir /sys/kernel/config/target/$driver/$port + */ +static struct se_wwn *srpt_make_tport(struct target_fabric_configfs *tf, + struct config_group *group, + const char *name) +{ + struct srpt_port *sport; + int ret; + + sport = srpt_lookup_port(name); + pr_debug("make_tport(%s)\n", name); + ret = -EINVAL; + if (!sport) + goto err; + + return &sport->port_wwn; + +err: + return ERR_PTR(ret); +} + +/** + * configfs callback invoked for + * rmdir /sys/kernel/config/target/$driver/$port + */ +static void srpt_drop_tport(struct se_wwn *wwn) +{ + struct srpt_port *sport = container_of(wwn, struct srpt_port, port_wwn); + + pr_debug("drop_tport(%s\n", config_item_name(&sport->port_wwn.wwn_group.cg_item)); +} + +static ssize_t srpt_wwn_show_attr_version(struct target_fabric_configfs *tf, + char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%s\n", DRV_VERSION); +} + +TF_WWN_ATTR_RO(srpt, version); + +static struct configfs_attribute *srpt_wwn_attrs[] = { + &srpt_wwn_version.attr, + NULL, +}; + +static struct target_core_fabric_ops srpt_template = { + .get_fabric_name = srpt_get_fabric_name, + .get_fabric_proto_ident = srpt_get_fabric_proto_ident, + .tpg_get_wwn = srpt_get_fabric_wwn, + .tpg_get_tag = srpt_get_tag, + .tpg_get_default_depth = srpt_get_default_depth, + .tpg_get_pr_transport_id = srpt_get_pr_transport_id, + .tpg_get_pr_transport_id_len = srpt_get_pr_transport_id_len, + .tpg_parse_pr_out_transport_id = srpt_parse_pr_out_transport_id, + .tpg_check_demo_mode = srpt_check_false, + .tpg_check_demo_mode_cache = srpt_check_true, + .tpg_check_demo_mode_write_protect = srpt_check_true, + .tpg_check_prod_mode_write_protect = srpt_check_false, + .tpg_alloc_fabric_acl = srpt_alloc_fabric_acl, + .tpg_release_fabric_acl = srpt_release_fabric_acl, + .tpg_get_inst_index = srpt_tpg_get_inst_index, + .release_cmd = srpt_release_cmd, + .check_stop_free = srpt_check_stop_free, + .shutdown_session = srpt_shutdown_session, + .close_session = srpt_close_session, + .stop_session = srpt_stop_session, + .fall_back_to_erl0 = srpt_reset_nexus, + .sess_logged_in = srpt_sess_logged_in, + .sess_get_index = srpt_sess_get_index, + .sess_get_initiator_sid = NULL, + .write_pending = srpt_write_pending, + .write_pending_status = srpt_write_pending_status, + .set_default_node_attributes = srpt_set_default_node_attrs, + .get_task_tag = srpt_get_task_tag, + .get_cmd_state = srpt_get_tcm_cmd_state, + .queue_data_in = srpt_queue_response, + .queue_status = srpt_queue_status, + .queue_tm_rsp = srpt_queue_response, + .get_fabric_sense_len = srpt_get_fabric_sense_len, + .set_fabric_sense_len = srpt_set_fabric_sense_len, + .is_state_remove = srpt_is_state_remove, + /* + * Setup function pointers for generic logic in + * target_core_fabric_configfs.c + */ + .fabric_make_wwn = srpt_make_tport, + .fabric_drop_wwn = srpt_drop_tport, + .fabric_make_tpg = srpt_make_tpg, + .fabric_drop_tpg = srpt_drop_tpg, + .fabric_post_link = NULL, + .fabric_pre_unlink = NULL, + .fabric_make_np = NULL, + .fabric_drop_np = NULL, + .fabric_make_nodeacl = srpt_make_nodeacl, + .fabric_drop_nodeacl = srpt_drop_nodeacl, +}; + +/** + * srpt_init_module() - Kernel module initialization. + * + * Note: Since ib_register_client() registers callback functions, and since at + * least one of these callback functions (srpt_add_one()) calls target core + * functions, this driver must be registered with the target core before + * ib_register_client() is called. + */ +static int __init srpt_init_module(void) +{ + int ret; + + ret = -EINVAL; + if (srp_max_req_size < MIN_MAX_REQ_SIZE) { + printk(KERN_ERR "invalid value %d for kernel module parameter" + " srp_max_req_size -- must be at least %d.\n", + srp_max_req_size, MIN_MAX_REQ_SIZE); + goto out; + } + + if (srpt_srq_size < MIN_SRPT_SRQ_SIZE + || srpt_srq_size > MAX_SRPT_SRQ_SIZE) { + printk(KERN_ERR "invalid value %d for kernel module parameter" + " srpt_srq_size -- must be in the range [%d..%d].\n", + srpt_srq_size, MIN_SRPT_SRQ_SIZE, MAX_SRPT_SRQ_SIZE); + goto out; + } + + srpt_target = target_fabric_configfs_init(THIS_MODULE, "srpt"); + if (IS_ERR(srpt_target)) { + printk(KERN_ERR "couldn't register\n"); + ret = PTR_ERR(srpt_target); + goto out; + } + + srpt_target->tf_ops = srpt_template; + + /* Enable SG chaining */ + srpt_target->tf_ops.task_sg_chaining = true; + + /* + * Set up default attribute lists. + */ + srpt_target->tf_cit_tmpl.tfc_wwn_cit.ct_attrs = srpt_wwn_attrs; + srpt_target->tf_cit_tmpl.tfc_tpg_base_cit.ct_attrs = srpt_tpg_attrs; + srpt_target->tf_cit_tmpl.tfc_tpg_attrib_cit.ct_attrs = srpt_tpg_attrib_attrs; + srpt_target->tf_cit_tmpl.tfc_tpg_param_cit.ct_attrs = NULL; + srpt_target->tf_cit_tmpl.tfc_tpg_np_base_cit.ct_attrs = NULL; + srpt_target->tf_cit_tmpl.tfc_tpg_nacl_base_cit.ct_attrs = NULL; + srpt_target->tf_cit_tmpl.tfc_tpg_nacl_attrib_cit.ct_attrs = NULL; + srpt_target->tf_cit_tmpl.tfc_tpg_nacl_auth_cit.ct_attrs = NULL; + srpt_target->tf_cit_tmpl.tfc_tpg_nacl_param_cit.ct_attrs = NULL; + + ret = target_fabric_configfs_register(srpt_target); + if (ret < 0) { + printk(KERN_ERR "couldn't register\n"); + goto out_free_target; + } + + ret = ib_register_client(&srpt_client); + if (ret) { + printk(KERN_ERR "couldn't register IB client\n"); + goto out_unregister_target; + } + + return 0; + +out_unregister_target: + target_fabric_configfs_deregister(srpt_target); + srpt_target = NULL; +out_free_target: + if (srpt_target) + target_fabric_configfs_free(srpt_target); +out: + return ret; +} + +static void __exit srpt_cleanup_module(void) +{ + ib_unregister_client(&srpt_client); + target_fabric_configfs_deregister(srpt_target); + srpt_target = NULL; +} + +module_init(srpt_init_module); +module_exit(srpt_cleanup_module); diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h new file mode 100644 index 000000000000..61e52b830816 --- /dev/null +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -0,0 +1,443 @@ +/* + * Copyright (c) 2006 - 2009 Mellanox Technology Inc. All rights reserved. + * Copyright (C) 2009 - 2010 Bart Van Assche <bvanassche@acm.org>. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef IB_SRPT_H +#define IB_SRPT_H + +#include <linux/types.h> +#include <linux/list.h> +#include <linux/wait.h> + +#include <rdma/ib_verbs.h> +#include <rdma/ib_sa.h> +#include <rdma/ib_cm.h> + +#include <scsi/srp.h> + +#include "ib_dm_mad.h" + +/* + * The prefix the ServiceName field must start with in the device management + * ServiceEntries attribute pair. See also the SRP specification. + */ +#define SRP_SERVICE_NAME_PREFIX "SRP.T10:" + +enum { + /* + * SRP IOControllerProfile attributes for SRP target ports that have + * not been defined in <scsi/srp.h>. Source: section B.7, table B.7 + * in the SRP specification. + */ + SRP_PROTOCOL = 0x0108, + SRP_PROTOCOL_VERSION = 0x0001, + SRP_IO_SUBCLASS = 0x609e, + SRP_SEND_TO_IOC = 0x01, + SRP_SEND_FROM_IOC = 0x02, + SRP_RDMA_READ_FROM_IOC = 0x08, + SRP_RDMA_WRITE_FROM_IOC = 0x20, + + /* + * srp_login_cmd.req_flags bitmasks. See also table 9 in the SRP + * specification. + */ + SRP_MTCH_ACTION = 0x03, /* MULTI-CHANNEL ACTION */ + SRP_LOSOLNT = 0x10, /* logout solicited notification */ + SRP_CRSOLNT = 0x20, /* credit request solicited notification */ + SRP_AESOLNT = 0x40, /* asynchronous event solicited notification */ + + /* + * srp_cmd.sol_nt / srp_tsk_mgmt.sol_not bitmasks. See also tables + * 18 and 20 in the SRP specification. + */ + SRP_SCSOLNT = 0x02, /* SCSOLNT = successful solicited notification */ + SRP_UCSOLNT = 0x04, /* UCSOLNT = unsuccessful solicited notification */ + + /* + * srp_rsp.sol_not / srp_t_logout.sol_not bitmasks. See also tables + * 16 and 22 in the SRP specification. + */ + SRP_SOLNT = 0x01, /* SOLNT = solicited notification */ + + /* See also table 24 in the SRP specification. */ + SRP_TSK_MGMT_SUCCESS = 0x00, + SRP_TSK_MGMT_FUNC_NOT_SUPP = 0x04, + SRP_TSK_MGMT_FAILED = 0x05, + + /* See also table 21 in the SRP specification. */ + SRP_CMD_SIMPLE_Q = 0x0, + SRP_CMD_HEAD_OF_Q = 0x1, + SRP_CMD_ORDERED_Q = 0x2, + SRP_CMD_ACA = 0x4, + + SRP_LOGIN_RSP_MULTICHAN_NO_CHAN = 0x0, + SRP_LOGIN_RSP_MULTICHAN_TERMINATED = 0x1, + SRP_LOGIN_RSP_MULTICHAN_MAINTAINED = 0x2, + + SRPT_DEF_SG_TABLESIZE = 128, + SRPT_DEF_SG_PER_WQE = 16, + + MIN_SRPT_SQ_SIZE = 16, + DEF_SRPT_SQ_SIZE = 4096, + SRPT_RQ_SIZE = 128, + MIN_SRPT_SRQ_SIZE = 4, + DEFAULT_SRPT_SRQ_SIZE = 4095, + MAX_SRPT_SRQ_SIZE = 65535, + MAX_SRPT_RDMA_SIZE = 1U << 24, + MAX_SRPT_RSP_SIZE = 1024, + + MIN_MAX_REQ_SIZE = 996, + DEFAULT_MAX_REQ_SIZE + = sizeof(struct srp_cmd)/*48*/ + + sizeof(struct srp_indirect_buf)/*20*/ + + 128 * sizeof(struct srp_direct_buf)/*16*/, + + MIN_MAX_RSP_SIZE = sizeof(struct srp_rsp)/*36*/ + 4, + DEFAULT_MAX_RSP_SIZE = 256, /* leaves 220 bytes for sense data */ + + DEFAULT_MAX_RDMA_SIZE = 65536, +}; + +enum srpt_opcode { + SRPT_RECV, + SRPT_SEND, + SRPT_RDMA_MID, + SRPT_RDMA_ABORT, + SRPT_RDMA_READ_LAST, + SRPT_RDMA_WRITE_LAST, +}; + +static inline u64 encode_wr_id(u8 opcode, u32 idx) +{ + return ((u64)opcode << 32) | idx; +} +static inline enum srpt_opcode opcode_from_wr_id(u64 wr_id) +{ + return wr_id >> 32; +} +static inline u32 idx_from_wr_id(u64 wr_id) +{ + return (u32)wr_id; +} + +struct rdma_iu { + u64 raddr; + u32 rkey; + struct ib_sge *sge; + u32 sge_cnt; + int mem_id; +}; + +/** + * enum srpt_command_state - SCSI command state managed by SRPT. + * @SRPT_STATE_NEW: New command arrived and is being processed. + * @SRPT_STATE_NEED_DATA: Processing a write or bidir command and waiting + * for data arrival. + * @SRPT_STATE_DATA_IN: Data for the write or bidir command arrived and is + * being processed. + * @SRPT_STATE_CMD_RSP_SENT: SRP_RSP for SRP_CMD has been sent. + * @SRPT_STATE_MGMT: Processing a SCSI task management command. + * @SRPT_STATE_MGMT_RSP_SENT: SRP_RSP for SRP_TSK_MGMT has been sent. + * @SRPT_STATE_DONE: Command processing finished successfully, command + * processing has been aborted or command processing + * failed. + */ +enum srpt_command_state { + SRPT_STATE_NEW = 0, + SRPT_STATE_NEED_DATA = 1, + SRPT_STATE_DATA_IN = 2, + SRPT_STATE_CMD_RSP_SENT = 3, + SRPT_STATE_MGMT = 4, + SRPT_STATE_MGMT_RSP_SENT = 5, + SRPT_STATE_DONE = 6, +}; + +/** + * struct srpt_ioctx - Shared SRPT I/O context information. + * @buf: Pointer to the buffer. + * @dma: DMA address of the buffer. + * @index: Index of the I/O context in its ioctx_ring array. + */ +struct srpt_ioctx { + void *buf; + dma_addr_t dma; + uint32_t index; +}; + +/** + * struct srpt_recv_ioctx - SRPT receive I/O context. + * @ioctx: See above. + * @wait_list: Node for insertion in srpt_rdma_ch.cmd_wait_list. + */ +struct srpt_recv_ioctx { + struct srpt_ioctx ioctx; + struct list_head wait_list; +}; + +/** + * struct srpt_send_ioctx - SRPT send I/O context. + * @ioctx: See above. + * @ch: Channel pointer. + * @free_list: Node in srpt_rdma_ch.free_list. + * @n_rbuf: Number of data buffers in the received SRP command. + * @rbufs: Pointer to SRP data buffer array. + * @single_rbuf: SRP data buffer if the command has only a single buffer. + * @sg: Pointer to sg-list associated with this I/O context. + * @sg_cnt: SG-list size. + * @mapped_sg_count: ib_dma_map_sg() return value. + * @n_rdma_ius: Number of elements in the rdma_ius array. + * @rdma_ius: Array with information about the RDMA mapping. + * @tag: Tag of the received SRP information unit. + * @spinlock: Protects 'state'. + * @state: I/O context state. + * @rdma_aborted: If initiating a multipart RDMA transfer failed, whether + * the already initiated transfers have finished. + * @cmd: Target core command data structure. + * @sense_data: SCSI sense data. + */ +struct srpt_send_ioctx { + struct srpt_ioctx ioctx; + struct srpt_rdma_ch *ch; + struct kref kref; + struct rdma_iu *rdma_ius; + struct srp_direct_buf *rbufs; + struct srp_direct_buf single_rbuf; + struct scatterlist *sg; + struct list_head free_list; + spinlock_t spinlock; + enum srpt_command_state state; + bool rdma_aborted; + struct se_cmd cmd; + struct completion tx_done; + u64 tag; + int sg_cnt; + int mapped_sg_count; + u16 n_rdma_ius; + u8 n_rdma; + u8 n_rbuf; + bool queue_status_only; + u8 sense_data[SCSI_SENSE_BUFFERSIZE]; +}; + +/** + * enum rdma_ch_state - SRP channel state. + * @CH_CONNECTING: QP is in RTR state; waiting for RTU. + * @CH_LIVE: QP is in RTS state. + * @CH_DISCONNECTING: DREQ has been received; waiting for DREP + * or DREQ has been send and waiting for DREP + * or . + * @CH_DRAINING: QP is in ERR state; waiting for last WQE event. + * @CH_RELEASING: Last WQE event has been received; releasing resources. + */ +enum rdma_ch_state { + CH_CONNECTING, + CH_LIVE, + CH_DISCONNECTING, + CH_DRAINING, + CH_RELEASING +}; + +/** + * struct srpt_rdma_ch - RDMA channel. + * @wait_queue: Allows the kernel thread to wait for more work. + * @thread: Kernel thread that processes the IB queues associated with + * the channel. + * @cm_id: IB CM ID associated with the channel. + * @qp: IB queue pair used for communicating over this channel. + * @cq: IB completion queue for this channel. + * @rq_size: IB receive queue size. + * @rsp_size IB response message size in bytes. + * @sq_wr_avail: number of work requests available in the send queue. + * @sport: pointer to the information of the HCA port used by this + * channel. + * @i_port_id: 128-bit initiator port identifier copied from SRP_LOGIN_REQ. + * @t_port_id: 128-bit target port identifier copied from SRP_LOGIN_REQ. + * @max_ti_iu_len: maximum target-to-initiator information unit length. + * @req_lim: request limit: maximum number of requests that may be sent + * by the initiator without having received a response. + * @req_lim_delta: Number of credits not yet sent back to the initiator. + * @spinlock: Protects free_list and state. + * @free_list: Head of list with free send I/O contexts. + * @state: channel state. See also enum rdma_ch_state. + * @ioctx_ring: Send ring. + * @wc: IB work completion array for srpt_process_completion(). + * @list: Node for insertion in the srpt_device.rch_list list. + * @cmd_wait_list: List of SCSI commands that arrived before the RTU event. This + * list contains struct srpt_ioctx elements and is protected + * against concurrent modification by the cm_id spinlock. + * @sess: Session information associated with this SRP channel. + * @sess_name: Session name. + * @release_work: Allows scheduling of srpt_release_channel(). + * @release_done: Enables waiting for srpt_release_channel() completion. + */ +struct srpt_rdma_ch { + wait_queue_head_t wait_queue; + struct task_struct *thread; + struct ib_cm_id *cm_id; + struct ib_qp *qp; + struct ib_cq *cq; + int rq_size; + u32 rsp_size; + atomic_t sq_wr_avail; + struct srpt_port *sport; + u8 i_port_id[16]; + u8 t_port_id[16]; + int max_ti_iu_len; + atomic_t req_lim; + atomic_t req_lim_delta; + spinlock_t spinlock; + struct list_head free_list; + enum rdma_ch_state state; + struct srpt_send_ioctx **ioctx_ring; + struct ib_wc wc[16]; + struct list_head list; + struct list_head cmd_wait_list; + struct se_session *sess; + u8 sess_name[36]; + struct work_struct release_work; + struct completion *release_done; +}; + +/** + * struct srpt_port_attib - Attributes for SRPT port + * @srp_max_rdma_size: Maximum size of SRP RDMA transfers for new connections. + * @srp_max_rsp_size: Maximum size of SRP response messages in bytes. + * @srp_sq_size: Shared receive queue (SRQ) size. + */ +struct srpt_port_attrib { + u32 srp_max_rdma_size; + u32 srp_max_rsp_size; + u32 srp_sq_size; +}; + +/** + * struct srpt_port - Information associated by SRPT with a single IB port. + * @sdev: backpointer to the HCA information. + * @mad_agent: per-port management datagram processing information. + * @enabled: Whether or not this target port is enabled. + * @port_guid: ASCII representation of Port GUID + * @port: one-based port number. + * @sm_lid: cached value of the port's sm_lid. + * @lid: cached value of the port's lid. + * @gid: cached value of the port's gid. + * @port_acl_lock spinlock for port_acl_list: + * @work: work structure for refreshing the aforementioned cached values. + * @port_tpg_1 Target portal group = 1 data. + * @port_wwn: Target core WWN data. + * @port_acl_list: Head of the list with all node ACLs for this port. + */ +struct srpt_port { + struct srpt_device *sdev; + struct ib_mad_agent *mad_agent; + bool enabled; + u8 port_guid[64]; + u8 port; + u16 sm_lid; + u16 lid; + union ib_gid gid; + spinlock_t port_acl_lock; + struct work_struct work; + struct se_portal_group port_tpg_1; + struct se_wwn port_wwn; + struct list_head port_acl_list; + struct srpt_port_attrib port_attrib; +}; + +/** + * struct srpt_device - Information associated by SRPT with a single HCA. + * @device: Backpointer to the struct ib_device managed by the IB core. + * @pd: IB protection domain. + * @mr: L_Key (local key) with write access to all local memory. + * @srq: Per-HCA SRQ (shared receive queue). + * @cm_id: Connection identifier. + * @dev_attr: Attributes of the InfiniBand device as obtained during the + * ib_client.add() callback. + * @srq_size: SRQ size. + * @ioctx_ring: Per-HCA SRQ. + * @rch_list: Per-device channel list -- see also srpt_rdma_ch.list. + * @ch_releaseQ: Enables waiting for removal from rch_list. + * @spinlock: Protects rch_list and tpg. + * @port: Information about the ports owned by this HCA. + * @event_handler: Per-HCA asynchronous IB event handler. + * @list: Node in srpt_dev_list. + */ +struct srpt_device { + struct ib_device *device; + struct ib_pd *pd; + struct ib_mr *mr; + struct ib_srq *srq; + struct ib_cm_id *cm_id; + struct ib_device_attr dev_attr; + int srq_size; + struct srpt_recv_ioctx **ioctx_ring; + struct list_head rch_list; + wait_queue_head_t ch_releaseQ; + spinlock_t spinlock; + struct srpt_port port[2]; + struct ib_event_handler event_handler; + struct list_head list; +}; + +/** + * struct srpt_node_acl - Per-initiator ACL data (managed via configfs). + * @i_port_id: 128-bit SRP initiator port ID. + * @sport: port information. + * @nacl: Target core node ACL information. + * @list: Element of the per-HCA ACL list. + */ +struct srpt_node_acl { + u8 i_port_id[16]; + struct srpt_port *sport; + struct se_node_acl nacl; + struct list_head list; +}; + +/* + * SRP-releated SCSI persistent reservation definitions. + * + * See also SPC4r28, section 7.6.1 (Protocol specific parameters introduction). + * See also SPC4r28, section 7.6.4.5 (TransportID for initiator ports using + * SCSI over an RDMA interface). + */ + +enum { + SCSI_TRANSPORTID_PROTOCOLID_SRP = 4, +}; + +struct spc_rdma_transport_id { + uint8_t protocol_identifier; + uint8_t reserved[7]; + uint8_t i_port_id[16]; +}; + +#endif /* IB_SRPT_H */ |