diff options
Diffstat (limited to 'drivers/infiniband')
179 files changed, 20645 insertions, 2838 deletions
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index e9b7dc037ff8..fb3fb89640e5 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -74,6 +74,7 @@ source "drivers/infiniband/hw/mlx5/Kconfig" source "drivers/infiniband/hw/nes/Kconfig" source "drivers/infiniband/hw/ocrdma/Kconfig" source "drivers/infiniband/hw/usnic/Kconfig" +source "drivers/infiniband/hw/hns/Kconfig" source "drivers/infiniband/ulp/ipoib/Kconfig" @@ -88,4 +89,6 @@ source "drivers/infiniband/sw/rxe/Kconfig" source "drivers/infiniband/hw/hfi1/Kconfig" +source "drivers/infiniband/hw/qedr/Kconfig" + endif # INFINIBAND diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 1374541a4528..b136d3acc5bd 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -800,7 +800,7 @@ static struct notifier_block nb = { int addr_init(void) { - addr_wq = create_singlethread_workqueue("ib_addr"); + addr_wq = alloc_workqueue("ib_addr", WQ_MEM_RECLAIM, 0); if (!addr_wq) return -ENOMEM; diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index e6dfa1bd3def..36bf50ebb187 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2462,18 +2462,24 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) if (addr->dev_addr.bound_dev_if) { ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if); - if (!ndev) - return -ENODEV; + if (!ndev) { + ret = -ENODEV; + goto err2; + } if (ndev->flags & IFF_LOOPBACK) { dev_put(ndev); - if (!id_priv->id.device->get_netdev) - return -EOPNOTSUPP; + if (!id_priv->id.device->get_netdev) { + ret = -EOPNOTSUPP; + goto err2; + } ndev = id_priv->id.device->get_netdev(id_priv->id.device, id_priv->id.port_num); - if (!ndev) - return -ENODEV; + if (!ndev) { + ret = -ENODEV; + goto err2; + } } route->path_rec->net = &init_net; @@ -4363,7 +4369,7 @@ static int __init cma_init(void) { int ret; - cma_wq = create_singlethread_workqueue("rdma_cm"); + cma_wq = alloc_ordered_workqueue("rdma_cm", WQ_MEM_RECLAIM); if (!cma_wq) return -ENOMEM; diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index 357624f8b9d3..5495e22839a7 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -1160,7 +1160,7 @@ static int __init iw_cm_init(void) if (ret) pr_err("iw_cm: couldn't register netlink callbacks\n"); - iwcm_wq = create_singlethread_workqueue("iw_cm_wq"); + iwcm_wq = alloc_ordered_workqueue("iw_cm_wq", WQ_MEM_RECLAIM); if (!iwcm_wq) return -ENOMEM; diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 2d49228f28b2..40cbd6bdb73b 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -3160,7 +3160,7 @@ static int ib_mad_port_open(struct ib_device *device, goto error3; } - port_priv->pd = ib_alloc_pd(device); + port_priv->pd = ib_alloc_pd(device, 0); if (IS_ERR(port_priv->pd)) { dev_err(&device->dev, "Couldn't create ib_mad PD\n"); ret = PTR_ERR(port_priv->pd); @@ -3177,7 +3177,7 @@ static int ib_mad_port_open(struct ib_device *device, goto error7; snprintf(name, sizeof name, "ib_mad%d", port_num); - port_priv->wq = create_singlethread_workqueue(name); + port_priv->wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM); if (!port_priv->wq) { ret = -ENOMEM; goto error8; diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index 3a3c5d73bbfc..e51b739f6ea3 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -106,7 +106,6 @@ struct mcast_group { atomic_t refcount; enum mcast_group_state state; struct ib_sa_query *query; - int query_id; u16 pkey_index; u8 leave_state; int retries; @@ -340,11 +339,7 @@ static int send_join(struct mcast_group *group, struct mcast_member *member) member->multicast.comp_mask, 3000, GFP_KERNEL, join_handler, group, &group->query); - if (ret >= 0) { - group->query_id = ret; - ret = 0; - } - return ret; + return (ret > 0) ? 0 : ret; } static int send_leave(struct mcast_group *group, u8 leave_state) @@ -364,11 +359,7 @@ static int send_leave(struct mcast_group *group, u8 leave_state) IB_SA_MCMEMBER_REC_JOIN_STATE, 3000, GFP_KERNEL, leave_handler, group, &group->query); - if (ret >= 0) { - group->query_id = ret; - ret = 0; - } - return ret; + return (ret > 0) ? 0 : ret; } static void join_group(struct mcast_group *group, struct mcast_member *member, @@ -882,7 +873,7 @@ int mcast_init(void) { int ret; - mcast_wq = create_singlethread_workqueue("ib_mcast"); + mcast_wq = alloc_ordered_workqueue("ib_mcast", WQ_MEM_RECLAIM); if (!mcast_wq) return -ENOMEM; diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index b9bf7aa055e7..81b742ca1639 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -2015,7 +2015,7 @@ int ib_sa_init(void) goto err2; } - ib_nl_wq = create_singlethread_workqueue("ib_nl_sa_wq"); + ib_nl_wq = alloc_ordered_workqueue("ib_nl_sa_wq", WQ_MEM_RECLAIM); if (!ib_nl_wq) { ret = -ENOMEM; goto err3; diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 15defefecb4f..c1fb545e8d78 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -1193,7 +1193,7 @@ static ssize_t set_node_desc(struct device *device, if (!dev->modify_device) return -EIO; - memcpy(desc.node_desc, buf, min_t(int, count, 64)); + memcpy(desc.node_desc, buf, min_t(int, count, IB_DEVICE_NODE_DESC_MAX)); ret = ib_modify_device(dev, IB_DEVICE_MODIFY_NODE_DESC, &desc); if (ret) return ret; diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 2825ece91d3c..9520154f1d7c 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -1638,7 +1638,8 @@ static int ucma_open(struct inode *inode, struct file *filp) if (!file) return -ENOMEM; - file->close_wq = create_singlethread_workqueue("ucma_close_id"); + file->close_wq = alloc_ordered_workqueue("ucma_close_id", + WQ_MEM_RECLAIM); if (!file->close_wq) { kfree(file); return -ENOMEM; diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index c68746ce6624..224ad274ea0b 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -94,6 +94,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, unsigned long dma_attrs = 0; struct scatterlist *sg, *sg_list_start; int need_release = 0; + unsigned int gup_flags = FOLL_WRITE; if (dmasync) dma_attrs |= DMA_ATTR_WRITE_BARRIER; @@ -183,6 +184,9 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, if (ret) goto out; + if (!umem->writable) + gup_flags |= FOLL_FORCE; + need_release = 1; sg_list_start = umem->sg_head.sgl; @@ -190,7 +194,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr, ret = get_user_pages(cur_base, min_t(unsigned long, npages, PAGE_SIZE / sizeof (struct page *)), - 1, !umem->writable, page_list, vma_list); + gup_flags, page_list, vma_list); if (ret < 0) goto out; diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index 75077a018675..1f0fe3217f23 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -527,6 +527,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt, u64 off; int j, k, ret = 0, start_idx, npages = 0; u64 base_virt_addr; + unsigned int flags = 0; if (access_mask == 0) return -EINVAL; @@ -556,6 +557,9 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt, goto out_put_task; } + if (access_mask & ODP_WRITE_ALLOWED_BIT) + flags |= FOLL_WRITE; + start_idx = (user_virt - ib_umem_start(umem)) >> PAGE_SHIFT; k = start_idx; @@ -574,8 +578,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt, */ npages = get_user_pages_remote(owning_process, owning_mm, user_virt, gup_num_pages, - access_mask & ODP_WRITE_ALLOWED_BIT, - 0, local_page_list, NULL); + flags, local_page_list, NULL); up_read(&owning_mm->mmap_sem); if (npages < 0) diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index f6647318138d..cb3f515a2285 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -571,7 +571,7 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, pd->device = ib_dev; pd->uobject = uobj; - pd->local_mr = NULL; + pd->__internal_mr = NULL; atomic_set(&pd->usecnt, 0); uobj->object = pd; @@ -3078,51 +3078,102 @@ out_put: return ret ? ret : in_len; } +static size_t kern_spec_filter_sz(struct ib_uverbs_flow_spec_hdr *spec) +{ + /* Returns user space filter size, includes padding */ + return (spec->size - sizeof(struct ib_uverbs_flow_spec_hdr)) / 2; +} + +static ssize_t spec_filter_size(void *kern_spec_filter, u16 kern_filter_size, + u16 ib_real_filter_sz) +{ + /* + * User space filter structures must be 64 bit aligned, otherwise this + * may pass, but we won't handle additional new attributes. + */ + + if (kern_filter_size > ib_real_filter_sz) { + if (memchr_inv(kern_spec_filter + + ib_real_filter_sz, 0, + kern_filter_size - ib_real_filter_sz)) + return -EINVAL; + return ib_real_filter_sz; + } + return kern_filter_size; +} + static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec, union ib_flow_spec *ib_spec) { + ssize_t actual_filter_sz; + ssize_t kern_filter_sz; + ssize_t ib_filter_sz; + void *kern_spec_mask; + void *kern_spec_val; + if (kern_spec->reserved) return -EINVAL; ib_spec->type = kern_spec->type; + kern_filter_sz = kern_spec_filter_sz(&kern_spec->hdr); + /* User flow spec size must be aligned to 4 bytes */ + if (kern_filter_sz != ALIGN(kern_filter_sz, 4)) + return -EINVAL; + + kern_spec_val = (void *)kern_spec + + sizeof(struct ib_uverbs_flow_spec_hdr); + kern_spec_mask = kern_spec_val + kern_filter_sz; + switch (ib_spec->type) { case IB_FLOW_SPEC_ETH: - ib_spec->eth.size = sizeof(struct ib_flow_spec_eth); - if (ib_spec->eth.size != kern_spec->eth.size) + ib_filter_sz = offsetof(struct ib_flow_eth_filter, real_sz); + actual_filter_sz = spec_filter_size(kern_spec_mask, + kern_filter_sz, + ib_filter_sz); + if (actual_filter_sz <= 0) return -EINVAL; - memcpy(&ib_spec->eth.val, &kern_spec->eth.val, - sizeof(struct ib_flow_eth_filter)); - memcpy(&ib_spec->eth.mask, &kern_spec->eth.mask, - sizeof(struct ib_flow_eth_filter)); + ib_spec->size = sizeof(struct ib_flow_spec_eth); + memcpy(&ib_spec->eth.val, kern_spec_val, actual_filter_sz); + memcpy(&ib_spec->eth.mask, kern_spec_mask, actual_filter_sz); break; case IB_FLOW_SPEC_IPV4: - ib_spec->ipv4.size = sizeof(struct ib_flow_spec_ipv4); - if (ib_spec->ipv4.size != kern_spec->ipv4.size) + ib_filter_sz = offsetof(struct ib_flow_ipv4_filter, real_sz); + actual_filter_sz = spec_filter_size(kern_spec_mask, + kern_filter_sz, + ib_filter_sz); + if (actual_filter_sz <= 0) return -EINVAL; - memcpy(&ib_spec->ipv4.val, &kern_spec->ipv4.val, - sizeof(struct ib_flow_ipv4_filter)); - memcpy(&ib_spec->ipv4.mask, &kern_spec->ipv4.mask, - sizeof(struct ib_flow_ipv4_filter)); + ib_spec->size = sizeof(struct ib_flow_spec_ipv4); + memcpy(&ib_spec->ipv4.val, kern_spec_val, actual_filter_sz); + memcpy(&ib_spec->ipv4.mask, kern_spec_mask, actual_filter_sz); break; case IB_FLOW_SPEC_IPV6: - ib_spec->ipv6.size = sizeof(struct ib_flow_spec_ipv6); - if (ib_spec->ipv6.size != kern_spec->ipv6.size) + ib_filter_sz = offsetof(struct ib_flow_ipv6_filter, real_sz); + actual_filter_sz = spec_filter_size(kern_spec_mask, + kern_filter_sz, + ib_filter_sz); + if (actual_filter_sz <= 0) + return -EINVAL; + ib_spec->size = sizeof(struct ib_flow_spec_ipv6); + memcpy(&ib_spec->ipv6.val, kern_spec_val, actual_filter_sz); + memcpy(&ib_spec->ipv6.mask, kern_spec_mask, actual_filter_sz); + + if ((ntohl(ib_spec->ipv6.mask.flow_label)) >= BIT(20) || + (ntohl(ib_spec->ipv6.val.flow_label)) >= BIT(20)) return -EINVAL; - memcpy(&ib_spec->ipv6.val, &kern_spec->ipv6.val, - sizeof(struct ib_flow_ipv6_filter)); - memcpy(&ib_spec->ipv6.mask, &kern_spec->ipv6.mask, - sizeof(struct ib_flow_ipv6_filter)); break; case IB_FLOW_SPEC_TCP: case IB_FLOW_SPEC_UDP: - ib_spec->tcp_udp.size = sizeof(struct ib_flow_spec_tcp_udp); - if (ib_spec->tcp_udp.size != kern_spec->tcp_udp.size) + ib_filter_sz = offsetof(struct ib_flow_tcp_udp_filter, real_sz); + actual_filter_sz = spec_filter_size(kern_spec_mask, + kern_filter_sz, + ib_filter_sz); + if (actual_filter_sz <= 0) return -EINVAL; - memcpy(&ib_spec->tcp_udp.val, &kern_spec->tcp_udp.val, - sizeof(struct ib_flow_tcp_udp_filter)); - memcpy(&ib_spec->tcp_udp.mask, &kern_spec->tcp_udp.mask, - sizeof(struct ib_flow_tcp_udp_filter)); + ib_spec->size = sizeof(struct ib_flow_spec_tcp_udp); + memcpy(&ib_spec->tcp_udp.val, kern_spec_val, actual_filter_sz); + memcpy(&ib_spec->tcp_udp.mask, kern_spec_mask, actual_filter_sz); break; default: return -EINVAL; @@ -3654,7 +3705,8 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, goto err_uobj; } - flow_attr = kmalloc(sizeof(*flow_attr) + cmd.flow_attr.size, GFP_KERNEL); + flow_attr = kzalloc(sizeof(*flow_attr) + cmd.flow_attr.num_of_specs * + sizeof(union ib_flow_spec), GFP_KERNEL); if (!flow_attr) { err = -ENOMEM; goto err_put; @@ -4173,6 +4225,23 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, resp.device_cap_flags_ex = attr.device_cap_flags; resp.response_length += sizeof(resp.device_cap_flags_ex); + + if (ucore->outlen < resp.response_length + sizeof(resp.rss_caps)) + goto end; + + resp.rss_caps.supported_qpts = attr.rss_caps.supported_qpts; + resp.rss_caps.max_rwq_indirection_tables = + attr.rss_caps.max_rwq_indirection_tables; + resp.rss_caps.max_rwq_indirection_table_size = + attr.rss_caps.max_rwq_indirection_table_size; + + resp.response_length += sizeof(resp.rss_caps); + + if (ucore->outlen < resp.response_length + sizeof(resp.max_wq_type_rq)) + goto end; + + resp.max_wq_type_rq = attr.max_wq_type_rq; + resp.response_length += sizeof(resp.max_wq_type_rq); end: err = ib_copy_to_udata(ucore, &resp, resp.response_length); return err; diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index f2b776efab3a..83687646da68 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -227,9 +227,11 @@ EXPORT_SYMBOL(rdma_port_get_link_layer); * Every PD has a local_dma_lkey which can be used as the lkey value for local * memory operations. */ -struct ib_pd *ib_alloc_pd(struct ib_device *device) +struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, + const char *caller) { struct ib_pd *pd; + int mr_access_flags = 0; pd = device->alloc_pd(device, NULL, NULL); if (IS_ERR(pd)) @@ -237,26 +239,46 @@ struct ib_pd *ib_alloc_pd(struct ib_device *device) pd->device = device; pd->uobject = NULL; - pd->local_mr = NULL; + pd->__internal_mr = NULL; atomic_set(&pd->usecnt, 0); + pd->flags = flags; if (device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) pd->local_dma_lkey = device->local_dma_lkey; - else { + else + mr_access_flags |= IB_ACCESS_LOCAL_WRITE; + + if (flags & IB_PD_UNSAFE_GLOBAL_RKEY) { + pr_warn("%s: enabling unsafe global rkey\n", caller); + mr_access_flags |= IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE; + } + + if (mr_access_flags) { struct ib_mr *mr; - mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE); + mr = pd->device->get_dma_mr(pd, mr_access_flags); if (IS_ERR(mr)) { ib_dealloc_pd(pd); - return (struct ib_pd *)mr; + return ERR_CAST(mr); } - pd->local_mr = mr; - pd->local_dma_lkey = pd->local_mr->lkey; + mr->device = pd->device; + mr->pd = pd; + mr->uobject = NULL; + mr->need_inval = false; + + pd->__internal_mr = mr; + + if (!(device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) + pd->local_dma_lkey = pd->__internal_mr->lkey; + + if (flags & IB_PD_UNSAFE_GLOBAL_RKEY) + pd->unsafe_global_rkey = pd->__internal_mr->rkey; } + return pd; } -EXPORT_SYMBOL(ib_alloc_pd); +EXPORT_SYMBOL(__ib_alloc_pd); /** * ib_dealloc_pd - Deallocates a protection domain. @@ -270,10 +292,10 @@ void ib_dealloc_pd(struct ib_pd *pd) { int ret; - if (pd->local_mr) { - ret = ib_dereg_mr(pd->local_mr); + if (pd->__internal_mr) { + ret = pd->device->dereg_mr(pd->__internal_mr); WARN_ON(ret); - pd->local_mr = NULL; + pd->__internal_mr = NULL; } /* uverbs manipulates usecnt with proper locking, while the kabi @@ -821,7 +843,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, if (ret) { pr_err("failed to init MR pool ret= %d\n", ret); ib_destroy_qp(qp); - qp = ERR_PTR(ret); + return ERR_PTR(ret); } } @@ -1391,29 +1413,6 @@ EXPORT_SYMBOL(ib_resize_cq); /* Memory regions */ -struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags) -{ - struct ib_mr *mr; - int err; - - err = ib_check_mr_access(mr_access_flags); - if (err) - return ERR_PTR(err); - - mr = pd->device->get_dma_mr(pd, mr_access_flags); - - if (!IS_ERR(mr)) { - mr->device = pd->device; - mr->pd = pd; - mr->uobject = NULL; - atomic_inc(&pd->usecnt); - mr->need_inval = false; - } - - return mr; -} -EXPORT_SYMBOL(ib_get_dma_mr); - int ib_dereg_mr(struct ib_mr *mr) { struct ib_pd *pd = mr->pd; @@ -1812,13 +1811,13 @@ EXPORT_SYMBOL(ib_set_vf_guid); * * Constraints: * - The first sg element is allowed to have an offset. - * - Each sg element must be aligned to page_size (or physically - * contiguous to the previous element). In case an sg element has a - * non contiguous offset, the mapping prefix will not include it. + * - Each sg element must either be aligned to page_size or virtually + * contiguous to the previous element. In case an sg element has a + * non-contiguous offset, the mapping prefix will not include it. * - The last sg element is allowed to have length less than page_size. * - If sg_nents total byte length exceeds the mr max_num_sge * page_size * then only max_num_sg entries will be mapped. - * - If the MR was allocated with type IB_MR_TYPE_SG_GAPS_REG, non of these + * - If the MR was allocated with type IB_MR_TYPE_SG_GAPS, none of these * constraints holds and the page_size argument is ignored. * * Returns the number of sg elements that were mapped to the memory region. diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile index c0c7cf8af3f4..e7a5ed9f6f3f 100644 --- a/drivers/infiniband/hw/Makefile +++ b/drivers/infiniband/hw/Makefile @@ -9,3 +9,5 @@ obj-$(CONFIG_INFINIBAND_NES) += nes/ obj-$(CONFIG_INFINIBAND_OCRDMA) += ocrdma/ obj-$(CONFIG_INFINIBAND_USNIC) += usnic/ obj-$(CONFIG_INFINIBAND_HFI1) += hfi1/ +obj-$(CONFIG_INFINIBAND_HNS) += hns/ +obj-$(CONFIG_INFINIBAND_QEDR) += qedr/ diff --git a/drivers/infiniband/hw/cxgb3/iwch.c b/drivers/infiniband/hw/cxgb3/iwch.c index 8e77dc543dd1..b3e11329801d 100644 --- a/drivers/infiniband/hw/cxgb3/iwch.c +++ b/drivers/infiniband/hw/cxgb3/iwch.c @@ -36,7 +36,7 @@ #include "cxgb3_offload.h" #include "iwch_provider.h" -#include "iwch_user.h" +#include <rdma/cxgb3-abi.h> #include "iwch.h" #include "iwch_cm.h" diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index 04bbf172abde..65ee64400deb 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -2258,7 +2258,7 @@ int __init iwch_cm_init(void) { skb_queue_head_init(&rxq); - workq = create_singlethread_workqueue("iw_cxgb3"); + workq = alloc_ordered_workqueue("iw_cxgb3", WQ_MEM_RECLAIM); if (!workq) return -ENOMEM; diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 3edb80644b53..cba57bb53dba 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -58,7 +58,7 @@ #include "iwch.h" #include "iwch_provider.h" #include "iwch_cm.h" -#include "iwch_user.h" +#include <rdma/cxgb3-abi.h> #include "common.h" static struct ib_ah *iwch_ah_create(struct ib_pd *pd, @@ -1396,6 +1396,7 @@ int iwch_register_device(struct iwch_dev *dev) (1ull << IB_USER_VERBS_CMD_POST_SEND) | (1ull << IB_USER_VERBS_CMD_POST_RECV); dev->ibdev.node_type = RDMA_NODE_RNIC; + BUILD_BUG_ON(sizeof(IWCH_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX); memcpy(dev->ibdev.node_desc, IWCH_NODE_DESC, sizeof(IWCH_NODE_DESC)); dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports; dev->ibdev.num_comp_vectors = 1; diff --git a/drivers/infiniband/hw/cxgb4/Kconfig b/drivers/infiniband/hw/cxgb4/Kconfig index 23f38cf2c5cd..afe8b28e0878 100644 --- a/drivers/infiniband/hw/cxgb4/Kconfig +++ b/drivers/infiniband/hw/cxgb4/Kconfig @@ -1,6 +1,7 @@ config INFINIBAND_CXGB4 tristate "Chelsio T4/T5 RDMA Driver" depends on CHELSIO_T4 && INET && (IPV6 || IPV6=n) + select CHELSIO_LIB select GENERIC_ALLOCATOR ---help--- This is an iWARP/RDMA driver for the Chelsio T4 and T5 diff --git a/drivers/infiniband/hw/cxgb4/Makefile b/drivers/infiniband/hw/cxgb4/Makefile index e11cf7299945..fa40b685831b 100644 --- a/drivers/infiniband/hw/cxgb4/Makefile +++ b/drivers/infiniband/hw/cxgb4/Makefile @@ -1,4 +1,5 @@ ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb4 +ccflags-y += -Idrivers/net/ethernet/chelsio/libcxgb obj-$(CONFIG_INFINIBAND_CXGB4) += iw_cxgb4.o diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 3aca7f6171b4..f1510cc76d2d 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -49,6 +49,7 @@ #include <rdma/ib_addr.h> +#include <libcxgb_cm.h> #include "iw_cxgb4.h" #include "clip_tbl.h" @@ -239,15 +240,13 @@ int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb) static void release_tid(struct c4iw_rdev *rdev, u32 hwtid, struct sk_buff *skb) { - struct cpl_tid_release *req; + u32 len = roundup(sizeof(struct cpl_tid_release), 16); - skb = get_skb(skb, sizeof *req, GFP_KERNEL); + skb = get_skb(skb, len, GFP_KERNEL); if (!skb) return; - req = (struct cpl_tid_release *) skb_put(skb, sizeof(*req)); - INIT_TP_WR(req, hwtid); - OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid)); - set_wr_txq(skb, CPL_PRIORITY_SETUP, 0); + + cxgb_mk_tid_release(skb, len, hwtid, 0); c4iw_ofld_send(rdev, skb); return; } @@ -333,6 +332,8 @@ static void remove_ep_tid(struct c4iw_ep *ep) spin_lock_irqsave(&ep->com.dev->lock, flags); _remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid, 0); + if (idr_is_empty(&ep->com.dev->hwtid_idr)) + wake_up(&ep->com.dev->wait); spin_unlock_irqrestore(&ep->com.dev->lock, flags); } @@ -464,72 +465,6 @@ static struct net_device *get_real_dev(struct net_device *egress_dev) return rdma_vlan_dev_real_dev(egress_dev) ? : egress_dev; } -static int our_interface(struct c4iw_dev *dev, struct net_device *egress_dev) -{ - int i; - - egress_dev = get_real_dev(egress_dev); - for (i = 0; i < dev->rdev.lldi.nports; i++) - if (dev->rdev.lldi.ports[i] == egress_dev) - return 1; - return 0; -} - -static struct dst_entry *find_route6(struct c4iw_dev *dev, __u8 *local_ip, - __u8 *peer_ip, __be16 local_port, - __be16 peer_port, u8 tos, - __u32 sin6_scope_id) -{ - struct dst_entry *dst = NULL; - - if (IS_ENABLED(CONFIG_IPV6)) { - struct flowi6 fl6; - - memset(&fl6, 0, sizeof(fl6)); - memcpy(&fl6.daddr, peer_ip, 16); - memcpy(&fl6.saddr, local_ip, 16); - if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL) - fl6.flowi6_oif = sin6_scope_id; - dst = ip6_route_output(&init_net, NULL, &fl6); - if (!dst) - goto out; - if (!our_interface(dev, ip6_dst_idev(dst)->dev) && - !(ip6_dst_idev(dst)->dev->flags & IFF_LOOPBACK)) { - dst_release(dst); - dst = NULL; - } - } - -out: - return dst; -} - -static struct dst_entry *find_route(struct c4iw_dev *dev, __be32 local_ip, - __be32 peer_ip, __be16 local_port, - __be16 peer_port, u8 tos) -{ - struct rtable *rt; - struct flowi4 fl4; - struct neighbour *n; - - rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip, - peer_port, local_port, IPPROTO_TCP, - tos, 0); - if (IS_ERR(rt)) - return NULL; - n = dst_neigh_lookup(&rt->dst, &peer_ip); - if (!n) - return NULL; - if (!our_interface(dev, n->dev) && - !(n->dev->flags & IFF_LOOPBACK)) { - neigh_release(n); - dst_release(&rt->dst); - return NULL; - } - neigh_release(n); - return &rt->dst; -} - static void arp_failure_discard(void *handle, struct sk_buff *skb) { pr_err(MOD "ARP failure\n"); @@ -704,56 +639,32 @@ static int send_flowc(struct c4iw_ep *ep) static int send_halfclose(struct c4iw_ep *ep) { - struct cpl_close_con_req *req; struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list); - int wrlen = roundup(sizeof *req, 16); + u32 wrlen = roundup(sizeof(struct cpl_close_con_req), 16); PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); if (WARN_ON(!skb)) return -ENOMEM; - set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); - t4_set_arp_err_handler(skb, NULL, arp_failure_discard); - req = (struct cpl_close_con_req *) skb_put(skb, wrlen); - memset(req, 0, wrlen); - INIT_TP_WR(req, ep->hwtid); - OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, - ep->hwtid)); + cxgb_mk_close_con_req(skb, wrlen, ep->hwtid, ep->txq_idx, + NULL, arp_failure_discard); + return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); } static int send_abort(struct c4iw_ep *ep) { - struct cpl_abort_req *req; - int wrlen = roundup(sizeof *req, 16); + u32 wrlen = roundup(sizeof(struct cpl_abort_req), 16); struct sk_buff *req_skb = skb_dequeue(&ep->com.ep_skb_list); PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); if (WARN_ON(!req_skb)) return -ENOMEM; - set_wr_txq(req_skb, CPL_PRIORITY_DATA, ep->txq_idx); - t4_set_arp_err_handler(req_skb, ep, abort_arp_failure); - req = (struct cpl_abort_req *)skb_put(req_skb, wrlen); - memset(req, 0, wrlen); - INIT_TP_WR(req, ep->hwtid); - OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid)); - req->cmd = CPL_ABORT_SEND_RST; - return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t); -} + cxgb_mk_abort_req(req_skb, wrlen, ep->hwtid, ep->txq_idx, + ep, abort_arp_failure); -static void best_mtu(const unsigned short *mtus, unsigned short mtu, - unsigned int *idx, int use_ts, int ipv6) -{ - unsigned short hdr_size = (ipv6 ? - sizeof(struct ipv6hdr) : - sizeof(struct iphdr)) + - sizeof(struct tcphdr) + - (use_ts ? - round_up(TCPOLEN_TIMESTAMP, 4) : 0); - unsigned short data_size = mtu - hdr_size; - - cxgb4_best_aligned_mtu(mtus, hdr_size, data_size, 8, idx); + return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t); } static int send_connect(struct c4iw_ep *ep) @@ -768,7 +679,7 @@ static int send_connect(struct c4iw_ep *ep) u64 opt0; u32 opt2; unsigned int mtu_idx; - int wscale; + u32 wscale; int win, sizev4, sizev6, wrlen; struct sockaddr_in *la = (struct sockaddr_in *) &ep->com.local_addr; @@ -815,10 +726,10 @@ static int send_connect(struct c4iw_ep *ep) } set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx); - best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, - enable_tcp_timestamps, - (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1); - wscale = compute_wscale(rcv_win); + cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, + enable_tcp_timestamps, + (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1); + wscale = cxgb_compute_wscale(rcv_win); /* * Specify the largest window that will fit in opt0. The @@ -1445,9 +1356,9 @@ static void established_upcall(struct c4iw_ep *ep) static int update_rx_credits(struct c4iw_ep *ep, u32 credits) { - struct cpl_rx_data_ack *req; struct sk_buff *skb; - int wrlen = roundup(sizeof *req, 16); + u32 wrlen = roundup(sizeof(struct cpl_rx_data_ack), 16); + u32 credit_dack; PDBG("%s ep %p tid %u credits %u\n", __func__, ep, ep->hwtid, credits); skb = get_skb(NULL, wrlen, GFP_KERNEL); @@ -1464,15 +1375,12 @@ static int update_rx_credits(struct c4iw_ep *ep, u32 credits) if (ep->rcv_win > RCV_BUFSIZ_M * 1024) credits += ep->rcv_win - RCV_BUFSIZ_M * 1024; - req = (struct cpl_rx_data_ack *) skb_put(skb, wrlen); - memset(req, 0, wrlen); - INIT_TP_WR(req, ep->hwtid); - OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK, - ep->hwtid)); - req->credit_dack = cpu_to_be32(credits | RX_FORCE_ACK_F | - RX_DACK_CHANGE_F | - RX_DACK_MODE_V(dack_mode)); - set_wr_txq(skb, CPL_PRIORITY_ACK, ep->ctrlq_idx); + credit_dack = credits | RX_FORCE_ACK_F | RX_DACK_CHANGE_F | + RX_DACK_MODE_V(dack_mode); + + cxgb_mk_rx_data_ack(skb, wrlen, ep->hwtid, ep->ctrlq_idx, + credit_dack); + c4iw_ofld_send(&ep->com.dev->rdev, skb); return credits; } @@ -1827,8 +1735,12 @@ static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) (ep->mpa_pkt + sizeof(*mpa)); ep->ird = ntohs(mpa_v2_params->ird) & MPA_V2_IRD_ORD_MASK; + ep->ird = min_t(u32, ep->ird, + cur_max_read_depth(ep->com.dev)); ep->ord = ntohs(mpa_v2_params->ord) & MPA_V2_IRD_ORD_MASK; + ep->ord = min_t(u32, ep->ord, + cur_max_read_depth(ep->com.dev)); PDBG("%s initiator ird %u ord %u\n", __func__, ep->ird, ep->ord); if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL) @@ -1966,7 +1878,7 @@ static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) struct sk_buff *skb; struct fw_ofld_connection_wr *req; unsigned int mtu_idx; - int wscale; + u32 wscale; struct sockaddr_in *sin; int win; @@ -1991,10 +1903,10 @@ static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) htons(FW_OFLD_CONNECTION_WR_CPLRXDATAACK_F); req->tcb.tx_max = (__force __be32) jiffies; req->tcb.rcv_adv = htons(1); - best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, - enable_tcp_timestamps, - (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1); - wscale = compute_wscale(rcv_win); + cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, + enable_tcp_timestamps, + (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1); + wscale = cxgb_compute_wscale(rcv_win); /* * Specify the largest window that will fit in opt0. The @@ -2048,15 +1960,6 @@ static inline int act_open_has_tid(int status) status != CPL_ERR_CONN_EXIST); } -/* Returns whether a CPL status conveys negative advice. - */ -static int is_neg_adv(unsigned int status) -{ - return status == CPL_ERR_RTX_NEG_ADVICE || - status == CPL_ERR_PERSIST_NEG_ADVICE || - status == CPL_ERR_KEEPALV_NEG_ADVICE; -} - static char *neg_adv_str(unsigned int status) { switch (status) { @@ -2113,8 +2016,10 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, } ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t, n, pdev, rt_tos2priority(tos)); - if (!ep->l2t) + if (!ep->l2t) { + dev_put(pdev); goto out; + } ep->mtu = pdev->mtu; ep->tx_chan = cxgb4_port_chan(pdev); ep->smac_idx = cxgb4_tp_smt_idx(adapter_type, @@ -2210,16 +2115,21 @@ static int c4iw_reconnect(struct c4iw_ep *ep) /* find a route */ if (ep->com.cm_id->m_local_addr.ss_family == AF_INET) { - ep->dst = find_route(ep->com.dev, laddr->sin_addr.s_addr, - raddr->sin_addr.s_addr, laddr->sin_port, - raddr->sin_port, ep->com.cm_id->tos); + ep->dst = cxgb_find_route(&ep->com.dev->rdev.lldi, get_real_dev, + laddr->sin_addr.s_addr, + raddr->sin_addr.s_addr, + laddr->sin_port, + raddr->sin_port, ep->com.cm_id->tos); iptype = 4; ra = (__u8 *)&raddr->sin_addr; } else { - ep->dst = find_route6(ep->com.dev, laddr6->sin6_addr.s6_addr, - raddr6->sin6_addr.s6_addr, - laddr6->sin6_port, raddr6->sin6_port, 0, - raddr6->sin6_scope_id); + ep->dst = cxgb_find_route6(&ep->com.dev->rdev.lldi, + get_real_dev, + laddr6->sin6_addr.s6_addr, + raddr6->sin6_addr.s6_addr, + laddr6->sin6_port, + raddr6->sin6_port, 0, + raddr6->sin6_scope_id); iptype = 6; ra = (__u8 *)&raddr6->sin6_addr; } @@ -2291,7 +2201,7 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) PDBG("%s ep %p atid %u status %u errno %d\n", __func__, ep, atid, status, status2errno(status)); - if (is_neg_adv(status)) { + if (cxgb_is_neg_adv(status)) { PDBG("%s Connection problems for atid %u status %u (%s)\n", __func__, atid, status, neg_adv_str(status)); ep->stats.connect_neg_adv++; @@ -2418,7 +2328,7 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, unsigned int mtu_idx; u64 opt0; u32 opt2; - int wscale; + u32 wscale; struct cpl_t5_pass_accept_rpl *rpl5 = NULL; int win; enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type; @@ -2439,10 +2349,10 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, ep->hwtid)); - best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, - enable_tcp_timestamps && req->tcpopt.tstamp, - (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1); - wscale = compute_wscale(rcv_win); + cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, + enable_tcp_timestamps && req->tcpopt.tstamp, + (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1); + wscale = cxgb_compute_wscale(rcv_win); /* * Specify the largest window that will fit in opt0. The @@ -2514,42 +2424,6 @@ static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb) return; } -static void get_4tuple(struct cpl_pass_accept_req *req, enum chip_type type, - int *iptype, __u8 *local_ip, __u8 *peer_ip, - __be16 *local_port, __be16 *peer_port) -{ - int eth_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ? - ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len)) : - T6_ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len)); - int ip_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ? - IP_HDR_LEN_G(be32_to_cpu(req->hdr_len)) : - T6_IP_HDR_LEN_G(be32_to_cpu(req->hdr_len)); - struct iphdr *ip = (struct iphdr *)((u8 *)(req + 1) + eth_len); - struct ipv6hdr *ip6 = (struct ipv6hdr *)((u8 *)(req + 1) + eth_len); - struct tcphdr *tcp = (struct tcphdr *) - ((u8 *)(req + 1) + eth_len + ip_len); - - if (ip->version == 4) { - PDBG("%s saddr 0x%x daddr 0x%x sport %u dport %u\n", __func__, - ntohl(ip->saddr), ntohl(ip->daddr), ntohs(tcp->source), - ntohs(tcp->dest)); - *iptype = 4; - memcpy(peer_ip, &ip->saddr, 4); - memcpy(local_ip, &ip->daddr, 4); - } else { - PDBG("%s saddr %pI6 daddr %pI6 sport %u dport %u\n", __func__, - ip6->saddr.s6_addr, ip6->daddr.s6_addr, ntohs(tcp->source), - ntohs(tcp->dest)); - *iptype = 6; - memcpy(peer_ip, ip6->saddr.s6_addr, 16); - memcpy(local_ip, ip6->daddr.s6_addr, 16); - } - *peer_port = tcp->source; - *local_port = tcp->dest; - - return; -} - static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) { struct c4iw_ep *child_ep = NULL, *parent_ep; @@ -2578,8 +2452,8 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) goto reject; } - get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type, &iptype, - local_ip, peer_ip, &local_port, &peer_port); + cxgb_get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type, + &iptype, local_ip, peer_ip, &local_port, &peer_port); /* Find output route */ if (iptype == 4) { @@ -2587,18 +2461,19 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) , __func__, parent_ep, hwtid, local_ip, peer_ip, ntohs(local_port), ntohs(peer_port), peer_mss); - dst = find_route(dev, *(__be32 *)local_ip, *(__be32 *)peer_ip, - local_port, peer_port, - tos); + dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev, + *(__be32 *)local_ip, *(__be32 *)peer_ip, + local_port, peer_port, tos); } else { PDBG("%s parent ep %p hwtid %u laddr %pI6 raddr %pI6 lport %d rport %d peer_mss %d\n" , __func__, parent_ep, hwtid, local_ip, peer_ip, ntohs(local_port), ntohs(peer_port), peer_mss); - dst = find_route6(dev, local_ip, peer_ip, local_port, peer_port, - PASS_OPEN_TOS_G(ntohl(req->tos_stid)), - ((struct sockaddr_in6 *) - &parent_ep->com.local_addr)->sin6_scope_id); + dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev, + local_ip, peer_ip, local_port, peer_port, + PASS_OPEN_TOS_G(ntohl(req->tos_stid)), + ((struct sockaddr_in6 *) + &parent_ep->com.local_addr)->sin6_scope_id); } if (!dst) { printk(KERN_ERR MOD "%s - failed to find dst entry!\n", @@ -2831,18 +2706,18 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) { struct cpl_abort_req_rss *req = cplhdr(skb); struct c4iw_ep *ep; - struct cpl_abort_rpl *rpl; struct sk_buff *rpl_skb; struct c4iw_qp_attributes attrs; int ret; int release = 0; unsigned int tid = GET_TID(req); + u32 len = roundup(sizeof(struct cpl_abort_rpl), 16); ep = get_ep_from_tid(dev, tid); if (!ep) return 0; - if (is_neg_adv(req->status)) { + if (cxgb_is_neg_adv(req->status)) { PDBG("%s Negative advice on abort- tid %u status %d (%s)\n", __func__, ep->hwtid, req->status, neg_adv_str(req->status)); @@ -2935,11 +2810,9 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) release = 1; goto out; } - set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); - rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl)); - INIT_TP_WR(rpl, ep->hwtid); - OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid)); - rpl->cmd = CPL_ABORT_NO_RST; + + cxgb_mk_abort_rpl(rpl_skb, len, ep->hwtid, ep->txq_idx); + c4iw_ofld_send(&ep->com.dev->rdev, rpl_skb); out: if (release) @@ -3136,7 +3009,7 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { if (conn_param->ord > ep->ird) { if (RELAXED_IRD_NEGOTIATION) { - ep->ord = ep->ird; + conn_param->ord = ep->ird; } else { ep->ird = conn_param->ird; ep->ord = conn_param->ord; @@ -3371,9 +3244,11 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) PDBG("%s saddr %pI4 sport 0x%x raddr %pI4 rport 0x%x\n", __func__, &laddr->sin_addr, ntohs(laddr->sin_port), ra, ntohs(raddr->sin_port)); - ep->dst = find_route(dev, laddr->sin_addr.s_addr, - raddr->sin_addr.s_addr, laddr->sin_port, - raddr->sin_port, cm_id->tos); + ep->dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev, + laddr->sin_addr.s_addr, + raddr->sin_addr.s_addr, + laddr->sin_port, + raddr->sin_port, cm_id->tos); } else { iptype = 6; ra = (__u8 *)&raddr6->sin6_addr; @@ -3392,10 +3267,12 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) __func__, laddr6->sin6_addr.s6_addr, ntohs(laddr6->sin6_port), raddr6->sin6_addr.s6_addr, ntohs(raddr6->sin6_port)); - ep->dst = find_route6(dev, laddr6->sin6_addr.s6_addr, - raddr6->sin6_addr.s6_addr, - laddr6->sin6_port, raddr6->sin6_port, 0, - raddr6->sin6_scope_id); + ep->dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev, + laddr6->sin6_addr.s6_addr, + raddr6->sin6_addr.s6_addr, + laddr6->sin6_port, + raddr6->sin6_port, 0, + raddr6->sin6_scope_id); } if (!ep->dst) { printk(KERN_ERR MOD "%s - cannot find route.\n", __func__); @@ -4037,8 +3914,9 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) ntohl(iph->daddr), ntohs(tcph->dest), ntohl(iph->saddr), ntohs(tcph->source), iph->tos); - dst = find_route(dev, iph->daddr, iph->saddr, tcph->dest, tcph->source, - iph->tos); + dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev, + iph->daddr, iph->saddr, tcph->dest, + tcph->source, iph->tos); if (!dst) { pr_err("%s - failed to find dst entry!\n", __func__); @@ -4313,7 +4191,7 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb) kfree_skb(skb); return 0; } - if (is_neg_adv(req->status)) { + if (cxgb_is_neg_adv(req->status)) { PDBG("%s Negative advice on abort- tid %u status %d (%s)\n", __func__, ep->hwtid, req->status, neg_adv_str(req->status)); @@ -4357,7 +4235,7 @@ int __init c4iw_cm_init(void) spin_lock_init(&timeout_lock); skb_queue_head_init(&rxq); - workq = create_singlethread_workqueue("iw_cxgb4"); + workq = alloc_ordered_workqueue("iw_cxgb4", WQ_MEM_RECLAIM); if (!workq) return -ENOMEM; diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 812ab7278b8e..867b8cf82be8 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -666,6 +666,18 @@ skip_cqe: return ret; } +static void invalidate_mr(struct c4iw_dev *rhp, u32 rkey) +{ + struct c4iw_mr *mhp; + unsigned long flags; + + spin_lock_irqsave(&rhp->lock, flags); + mhp = get_mhp(rhp, rkey >> 8); + if (mhp) + mhp->attr.state = 0; + spin_unlock_irqrestore(&rhp->lock, flags); +} + /* * Get one cq entry from c4iw and map it to openib. * @@ -721,6 +733,7 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) CQE_OPCODE(&cqe) == FW_RI_SEND_WITH_SE_INV) { wc->ex.invalidate_rkey = CQE_WRID_STAG(&cqe); wc->wc_flags |= IB_WC_WITH_INVALIDATE; + invalidate_mr(qhp->rhp, wc->ex.invalidate_rkey); } } else { switch (CQE_OPCODE(&cqe)) { @@ -746,6 +759,10 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc) break; case FW_RI_FAST_REGISTER: wc->opcode = IB_WC_REG_MR; + + /* Invalidate the MR if the fastreg failed */ + if (CQE_STATUS(&cqe) != T4_ERR_SUCCESS) + invalidate_mr(qhp->rhp, CQE_WRID_FR_STAG(&cqe)); break; default: printk(KERN_ERR MOD "Unexpected opcode %d " @@ -1016,15 +1033,15 @@ int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata) int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) { struct c4iw_cq *chp; - int ret; + int ret = 0; unsigned long flag; chp = to_c4iw_cq(ibcq); spin_lock_irqsave(&chp->lock, flag); - ret = t4_arm_cq(&chp->cq, - (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED); + t4_arm_cq(&chp->cq, + (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED); + if (flags & IB_CQ_REPORT_MISSED_EVENTS) + ret = t4_cq_notempty(&chp->cq); spin_unlock_irqrestore(&chp->lock, flag); - if (ret && !(flags & IB_CQ_REPORT_MISSED_EVENTS)) - ret = 0; return ret; } diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 071d7332ec06..93e3d270a98a 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -872,9 +872,13 @@ static void c4iw_rdev_close(struct c4iw_rdev *rdev) static void c4iw_dealloc(struct uld_ctx *ctx) { c4iw_rdev_close(&ctx->dev->rdev); + WARN_ON_ONCE(!idr_is_empty(&ctx->dev->cqidr)); idr_destroy(&ctx->dev->cqidr); + WARN_ON_ONCE(!idr_is_empty(&ctx->dev->qpidr)); idr_destroy(&ctx->dev->qpidr); + WARN_ON_ONCE(!idr_is_empty(&ctx->dev->mmidr)); idr_destroy(&ctx->dev->mmidr); + wait_event(ctx->dev->wait, idr_is_empty(&ctx->dev->hwtid_idr)); idr_destroy(&ctx->dev->hwtid_idr); idr_destroy(&ctx->dev->stid_idr); idr_destroy(&ctx->dev->atid_idr); @@ -992,6 +996,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) mutex_init(&devp->rdev.stats.lock); mutex_init(&devp->db_mutex); INIT_LIST_HEAD(&devp->db_fc_list); + init_waitqueue_head(&devp->wait); devp->avail_ird = devp->rdev.lldi.max_ird_adapter; if (c4iw_debugfs_root) { @@ -1475,6 +1480,10 @@ static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...) static struct cxgb4_uld_info c4iw_uld_info = { .name = DRV_NAME, + .nrxq = MAX_ULD_QSETS, + .rxq_size = 511, + .ciq = true, + .lro = false, .add = c4iw_uld_add, .rx_handler = c4iw_uld_rx_handler, .state_change = c4iw_uld_state_change, diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index aa47e0ae80bc..7e7f79e55006 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -58,7 +58,7 @@ #include "cxgb4.h" #include "cxgb4_uld.h" #include "l2t.h" -#include "user.h" +#include <rdma/cxgb4-abi.h> #define DRV_NAME "iw_cxgb4" #define MOD DRV_NAME ":" @@ -263,6 +263,7 @@ struct c4iw_dev { struct idr stid_idr; struct list_head db_fc_list; u32 avail_ird; + wait_queue_head_t wait; }; static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev) @@ -881,15 +882,6 @@ static inline struct c4iw_listen_ep *to_listen_ep(struct iw_cm_id *cm_id) return cm_id->provider_data; } -static inline int compute_wscale(int win) -{ - int wscale = 0; - - while (wscale < 14 && (65535<<wscale) < win) - wscale++; - return wscale; -} - static inline int ocqp_supported(const struct cxgb4_lld_info *infop) { #if defined(__i386__) || defined(__x86_64__) || defined(CONFIG_PPC64) diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c index 0b91b0f4df71..80e27749420a 100644 --- a/drivers/infiniband/hw/cxgb4/mem.c +++ b/drivers/infiniband/hw/cxgb4/mem.c @@ -695,7 +695,7 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd, mhp->attr.pdid = php->pdid; mhp->attr.type = FW_RI_STAG_NSMR; mhp->attr.stag = stag; - mhp->attr.state = 1; + mhp->attr.state = 0; mmid = (stag) >> 8; mhp->ibmr.rkey = mhp->ibmr.lkey = stag; if (insert_handle(rhp, &rhp->mmidr, mhp, mmid)) { diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index df127ce6b6ec..645e606a17c5 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -563,6 +563,7 @@ int c4iw_register_device(struct c4iw_dev *dev) (1ull << IB_USER_VERBS_CMD_POST_SEND) | (1ull << IB_USER_VERBS_CMD_POST_RECV); dev->ibdev.node_type = RDMA_NODE_RNIC; + BUILD_BUG_ON(sizeof(C4IW_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX); memcpy(dev->ibdev.node_desc, C4IW_NODE_DESC, sizeof(C4IW_NODE_DESC)); dev->ibdev.phys_port_cnt = dev->rdev.lldi.nports; dev->ibdev.num_comp_vectors = dev->rdev.lldi.nciq; diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index edb1172b6f54..f57deba6717c 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -609,10 +609,42 @@ static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe, return 0; } +static void build_tpte_memreg(struct fw_ri_fr_nsmr_tpte_wr *fr, + struct ib_reg_wr *wr, struct c4iw_mr *mhp, + u8 *len16) +{ + __be64 *p = (__be64 *)fr->pbl; + + fr->r2 = cpu_to_be32(0); + fr->stag = cpu_to_be32(mhp->ibmr.rkey); + + fr->tpte.valid_to_pdid = cpu_to_be32(FW_RI_TPTE_VALID_F | + FW_RI_TPTE_STAGKEY_V((mhp->ibmr.rkey & FW_RI_TPTE_STAGKEY_M)) | + FW_RI_TPTE_STAGSTATE_V(1) | + FW_RI_TPTE_STAGTYPE_V(FW_RI_STAG_NSMR) | + FW_RI_TPTE_PDID_V(mhp->attr.pdid)); + fr->tpte.locread_to_qpid = cpu_to_be32( + FW_RI_TPTE_PERM_V(c4iw_ib_to_tpt_access(wr->access)) | + FW_RI_TPTE_ADDRTYPE_V(FW_RI_VA_BASED_TO) | + FW_RI_TPTE_PS_V(ilog2(wr->mr->page_size) - 12)); + fr->tpte.nosnoop_pbladdr = cpu_to_be32(FW_RI_TPTE_PBLADDR_V( + PBL_OFF(&mhp->rhp->rdev, mhp->attr.pbl_addr)>>3)); + fr->tpte.dca_mwbcnt_pstag = cpu_to_be32(0); + fr->tpte.len_hi = cpu_to_be32(0); + fr->tpte.len_lo = cpu_to_be32(mhp->ibmr.length); + fr->tpte.va_hi = cpu_to_be32(mhp->ibmr.iova >> 32); + fr->tpte.va_lo_fbo = cpu_to_be32(mhp->ibmr.iova & 0xffffffff); + + p[0] = cpu_to_be64((u64)mhp->mpl[0]); + p[1] = cpu_to_be64((u64)mhp->mpl[1]); + + *len16 = DIV_ROUND_UP(sizeof(*fr), 16); +} + static int build_memreg(struct t4_sq *sq, union t4_wr *wqe, - struct ib_reg_wr *wr, u8 *len16, bool dsgl_supported) + struct ib_reg_wr *wr, struct c4iw_mr *mhp, u8 *len16, + bool dsgl_supported) { - struct c4iw_mr *mhp = to_c4iw_mr(wr->mr); struct fw_ri_immd *imdp; __be64 *p; int i; @@ -674,16 +706,19 @@ static int build_memreg(struct t4_sq *sq, union t4_wr *wqe, return 0; } -static int build_inv_stag(union t4_wr *wqe, struct ib_send_wr *wr, - u8 *len16) +static int build_inv_stag(struct c4iw_dev *dev, union t4_wr *wqe, + struct ib_send_wr *wr, u8 *len16) { + struct c4iw_mr *mhp = get_mhp(dev, wr->ex.invalidate_rkey >> 8); + + mhp->attr.state = 0; wqe->inv.stag_inv = cpu_to_be32(wr->ex.invalidate_rkey); wqe->inv.r2 = 0; *len16 = DIV_ROUND_UP(sizeof wqe->inv, 16); return 0; } -void _free_qp(struct kref *kref) +static void _free_qp(struct kref *kref) { struct c4iw_qp *qhp; @@ -816,18 +851,32 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, if (!qhp->wq.sq.oldest_read) qhp->wq.sq.oldest_read = swsqe; break; - case IB_WR_REG_MR: - fw_opcode = FW_RI_FR_NSMR_WR; + case IB_WR_REG_MR: { + struct c4iw_mr *mhp = to_c4iw_mr(reg_wr(wr)->mr); + swsqe->opcode = FW_RI_FAST_REGISTER; - err = build_memreg(&qhp->wq.sq, wqe, reg_wr(wr), &len16, - qhp->rhp->rdev.lldi.ulptx_memwrite_dsgl); + if (qhp->rhp->rdev.lldi.fr_nsmr_tpte_wr_support && + !mhp->attr.state && mhp->mpl_len <= 2) { + fw_opcode = FW_RI_FR_NSMR_TPTE_WR; + build_tpte_memreg(&wqe->fr_tpte, reg_wr(wr), + mhp, &len16); + } else { + fw_opcode = FW_RI_FR_NSMR_WR; + err = build_memreg(&qhp->wq.sq, wqe, reg_wr(wr), + mhp, &len16, + qhp->rhp->rdev.lldi.ulptx_memwrite_dsgl); + if (err) + break; + } + mhp->attr.state = 1; break; + } case IB_WR_LOCAL_INV: if (wr->send_flags & IB_SEND_FENCE) fw_flags |= FW_RI_LOCAL_FENCE_FLAG; fw_opcode = FW_RI_INV_LSTAG_WR; swsqe->opcode = FW_RI_LOCAL_INV; - err = build_inv_stag(wqe, wr, &len16); + err = build_inv_stag(qhp->rhp, wqe, wr, &len16); break; default: PDBG("%s post of type=%d TBD!\n", __func__, diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index 6126bbe36095..862381aa83c8 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -95,6 +95,7 @@ union t4_wr { struct fw_ri_rdma_read_wr read; struct fw_ri_bind_mw_wr bind; struct fw_ri_fr_nsmr_wr fr; + struct fw_ri_fr_nsmr_tpte_wr fr_tpte; struct fw_ri_inv_lstag_wr inv; struct t4_status_page status; __be64 flits[T4_EQ_ENTRY_SIZE / sizeof(__be64) * T4_SQ_NUM_SLOTS]; @@ -170,7 +171,7 @@ struct t4_cqe { __be32 msn; } rcqe; struct { - u32 nada1; + u32 stag; u16 nada2; u16 cidx; } scqe; @@ -232,6 +233,7 @@ struct t4_cqe { /* used for SQ completion processing */ #define CQE_WRID_SQ_IDX(x) ((x)->u.scqe.cidx) +#define CQE_WRID_FR_STAG(x) (be32_to_cpu((x)->u.scqe.stag)) /* generic accessor macros */ #define CQE_WRID_HI(x) (be32_to_cpu((x)->u.gen.wrid_hi)) @@ -634,6 +636,11 @@ static inline int t4_valid_cqe(struct t4_cq *cq, struct t4_cqe *cqe) return (CQE_GENBIT(cqe) == cq->gen); } +static inline int t4_cq_notempty(struct t4_cq *cq) +{ + return cq->sw_in_use || t4_valid_cqe(cq, &cq->queue[cq->cidx]); +} + static inline int t4_next_hw_cqe(struct t4_cq *cq, struct t4_cqe **cqe) { int ret; diff --git a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h index 1e26669793c3..010c709ba3bb 100644 --- a/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h +++ b/drivers/infiniband/hw/cxgb4/t4fw_ri_api.h @@ -669,6 +669,18 @@ struct fw_ri_fr_nsmr_wr { #define FW_RI_FR_NSMR_WR_DCACPU_G(x) \ (((x) >> FW_RI_FR_NSMR_WR_DCACPU_S) & FW_RI_FR_NSMR_WR_DCACPU_M) +struct fw_ri_fr_nsmr_tpte_wr { + __u8 opcode; + __u8 flags; + __u16 wrid; + __u8 r1[3]; + __u8 len16; + __u32 r2; + __u32 stag; + struct fw_ri_tpte tpte; + __u64 pbl[2]; +}; + struct fw_ri_inv_lstag_wr { __u8 opcode; __u8 flags; diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index 79575ee873f2..a26a9a0bfc41 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -47,7 +47,7 @@ #include <linux/topology.h> #include <linux/cpumask.h> #include <linux/module.h> -#include <linux/cpumask.h> +#include <linux/interrupt.h> #include "hfi.h" #include "affinity.h" @@ -56,7 +56,7 @@ struct hfi1_affinity_node_list node_affinity = { .list = LIST_HEAD_INIT(node_affinity.list), - .lock = __SPIN_LOCK_UNLOCKED(&node_affinity.lock), + .lock = __MUTEX_INITIALIZER(node_affinity.lock) }; /* Name of IRQ types, indexed by enum irq_type */ @@ -160,14 +160,14 @@ void node_affinity_destroy(void) struct list_head *pos, *q; struct hfi1_affinity_node *entry; - spin_lock(&node_affinity.lock); + mutex_lock(&node_affinity.lock); list_for_each_safe(pos, q, &node_affinity.list) { entry = list_entry(pos, struct hfi1_affinity_node, list); list_del(pos); kfree(entry); } - spin_unlock(&node_affinity.lock); + mutex_unlock(&node_affinity.lock); kfree(hfi1_per_node_cntr); } @@ -234,9 +234,8 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd) if (cpumask_first(local_mask) >= nr_cpu_ids) local_mask = topology_core_cpumask(0); - spin_lock(&node_affinity.lock); + mutex_lock(&node_affinity.lock); entry = node_affinity_lookup(dd->node); - spin_unlock(&node_affinity.lock); /* * If this is the first time this NUMA node's affinity is used, @@ -247,6 +246,7 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd) if (!entry) { dd_dev_err(dd, "Unable to allocate global affinity node\n"); + mutex_unlock(&node_affinity.lock); return -ENOMEM; } init_cpu_mask_set(&entry->def_intr); @@ -303,15 +303,113 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd) &entry->general_intr_mask); } - spin_lock(&node_affinity.lock); node_affinity_add_tail(entry); - spin_unlock(&node_affinity.lock); } - + mutex_unlock(&node_affinity.lock); return 0; } -int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) +/* + * Function updates the irq affinity hint for msix after it has been changed + * by the user using the /proc/irq interface. This function only accepts + * one cpu in the mask. + */ +static void hfi1_update_sdma_affinity(struct hfi1_msix_entry *msix, int cpu) +{ + struct sdma_engine *sde = msix->arg; + struct hfi1_devdata *dd = sde->dd; + struct hfi1_affinity_node *entry; + struct cpu_mask_set *set; + int i, old_cpu; + + if (cpu > num_online_cpus() || cpu == sde->cpu) + return; + + mutex_lock(&node_affinity.lock); + entry = node_affinity_lookup(dd->node); + if (!entry) + goto unlock; + + old_cpu = sde->cpu; + sde->cpu = cpu; + cpumask_clear(&msix->mask); + cpumask_set_cpu(cpu, &msix->mask); + dd_dev_dbg(dd, "IRQ vector: %u, type %s engine %u -> cpu: %d\n", + msix->msix.vector, irq_type_names[msix->type], + sde->this_idx, cpu); + irq_set_affinity_hint(msix->msix.vector, &msix->mask); + + /* + * Set the new cpu in the hfi1_affinity_node and clean + * the old cpu if it is not used by any other IRQ + */ + set = &entry->def_intr; + cpumask_set_cpu(cpu, &set->mask); + cpumask_set_cpu(cpu, &set->used); + for (i = 0; i < dd->num_msix_entries; i++) { + struct hfi1_msix_entry *other_msix; + + other_msix = &dd->msix_entries[i]; + if (other_msix->type != IRQ_SDMA || other_msix == msix) + continue; + + if (cpumask_test_cpu(old_cpu, &other_msix->mask)) + goto unlock; + } + cpumask_clear_cpu(old_cpu, &set->mask); + cpumask_clear_cpu(old_cpu, &set->used); +unlock: + mutex_unlock(&node_affinity.lock); +} + +static void hfi1_irq_notifier_notify(struct irq_affinity_notify *notify, + const cpumask_t *mask) +{ + int cpu = cpumask_first(mask); + struct hfi1_msix_entry *msix = container_of(notify, + struct hfi1_msix_entry, + notify); + + /* Only one CPU configuration supported currently */ + hfi1_update_sdma_affinity(msix, cpu); +} + +static void hfi1_irq_notifier_release(struct kref *ref) +{ + /* + * This is required by affinity notifier. We don't have anything to + * free here. + */ +} + +static void hfi1_setup_sdma_notifier(struct hfi1_msix_entry *msix) +{ + struct irq_affinity_notify *notify = &msix->notify; + + notify->irq = msix->msix.vector; + notify->notify = hfi1_irq_notifier_notify; + notify->release = hfi1_irq_notifier_release; + + if (irq_set_affinity_notifier(notify->irq, notify)) + pr_err("Failed to register sdma irq affinity notifier for irq %d\n", + notify->irq); +} + +static void hfi1_cleanup_sdma_notifier(struct hfi1_msix_entry *msix) +{ + struct irq_affinity_notify *notify = &msix->notify; + + if (irq_set_affinity_notifier(notify->irq, NULL)) + pr_err("Failed to cleanup sdma irq affinity notifier for irq %d\n", + notify->irq); +} + +/* + * Function sets the irq affinity for msix. + * It *must* be called with node_affinity.lock held. + */ +static int get_irq_affinity(struct hfi1_devdata *dd, + struct hfi1_msix_entry *msix) { int ret; cpumask_var_t diff; @@ -329,9 +427,7 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) if (!ret) return -ENOMEM; - spin_lock(&node_affinity.lock); entry = node_affinity_lookup(dd->node); - spin_unlock(&node_affinity.lock); switch (msix->type) { case IRQ_SDMA: @@ -361,7 +457,6 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) * finds its CPU here. */ if (cpu == -1 && set) { - spin_lock(&node_affinity.lock); if (cpumask_equal(&set->mask, &set->used)) { /* * We've used up all the CPUs, bump up the generation @@ -373,17 +468,6 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) cpumask_andnot(diff, &set->mask, &set->used); cpu = cpumask_first(diff); cpumask_set_cpu(cpu, &set->used); - spin_unlock(&node_affinity.lock); - } - - switch (msix->type) { - case IRQ_SDMA: - sde->cpu = cpu; - break; - case IRQ_GENERAL: - case IRQ_RCVCTXT: - case IRQ_OTHER: - break; } cpumask_set_cpu(cpu, &msix->mask); @@ -392,10 +476,25 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) extra, cpu); irq_set_affinity_hint(msix->msix.vector, &msix->mask); + if (msix->type == IRQ_SDMA) { + sde->cpu = cpu; + hfi1_setup_sdma_notifier(msix); + } + free_cpumask_var(diff); return 0; } +int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) +{ + int ret; + + mutex_lock(&node_affinity.lock); + ret = get_irq_affinity(dd, msix); + mutex_unlock(&node_affinity.lock); + return ret; +} + void hfi1_put_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) { @@ -403,13 +502,13 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd; struct hfi1_affinity_node *entry; - spin_lock(&node_affinity.lock); + mutex_lock(&node_affinity.lock); entry = node_affinity_lookup(dd->node); - spin_unlock(&node_affinity.lock); switch (msix->type) { case IRQ_SDMA: set = &entry->def_intr; + hfi1_cleanup_sdma_notifier(msix); break; case IRQ_GENERAL: /* Don't do accounting for general contexts */ @@ -421,21 +520,21 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *dd, set = &entry->rcv_intr; break; default: + mutex_unlock(&node_affinity.lock); return; } if (set) { - spin_lock(&node_affinity.lock); cpumask_andnot(&set->used, &set->used, &msix->mask); if (cpumask_empty(&set->used) && set->gen) { set->gen--; cpumask_copy(&set->used, &set->mask); } - spin_unlock(&node_affinity.lock); } irq_set_affinity_hint(msix->msix.vector, NULL); cpumask_clear(&msix->mask); + mutex_unlock(&node_affinity.lock); } /* This should be called with node_affinity.lock held */ @@ -536,7 +635,7 @@ int hfi1_get_proc_affinity(int node) if (!ret) goto free_available_mask; - spin_lock(&affinity->lock); + mutex_lock(&affinity->lock); /* * If we've used all available HW threads, clear the mask and start * overloading. @@ -644,7 +743,8 @@ int hfi1_get_proc_affinity(int node) cpu = -1; else cpumask_set_cpu(cpu, &set->used); - spin_unlock(&affinity->lock); + + mutex_unlock(&affinity->lock); hfi1_cdbg(PROC, "Process assigned to CPU %d", cpu); free_cpumask_var(intrs_mask); @@ -665,49 +765,53 @@ void hfi1_put_proc_affinity(int cpu) if (cpu < 0) return; - spin_lock(&affinity->lock); + + mutex_lock(&affinity->lock); cpumask_clear_cpu(cpu, &set->used); hfi1_cdbg(PROC, "Returning CPU %d for future process assignment", cpu); if (cpumask_empty(&set->used) && set->gen) { set->gen--; cpumask_copy(&set->used, &set->mask); } - spin_unlock(&affinity->lock); + mutex_unlock(&affinity->lock); } -/* Prevents concurrent reads and writes of the sdma_affinity attrib */ -static DEFINE_MUTEX(sdma_affinity_mutex); - int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf, size_t count) { struct hfi1_affinity_node *entry; - struct cpumask mask; + cpumask_var_t mask; int ret, i; - spin_lock(&node_affinity.lock); + mutex_lock(&node_affinity.lock); entry = node_affinity_lookup(dd->node); - spin_unlock(&node_affinity.lock); - if (!entry) - return -EINVAL; + if (!entry) { + ret = -EINVAL; + goto unlock; + } - ret = cpulist_parse(buf, &mask); + ret = zalloc_cpumask_var(&mask, GFP_KERNEL); + if (!ret) { + ret = -ENOMEM; + goto unlock; + } + + ret = cpulist_parse(buf, mask); if (ret) - return ret; + goto out; - if (!cpumask_subset(&mask, cpu_online_mask) || cpumask_empty(&mask)) { + if (!cpumask_subset(mask, cpu_online_mask) || cpumask_empty(mask)) { dd_dev_warn(dd, "Invalid CPU mask\n"); - return -EINVAL; + ret = -EINVAL; + goto out; } - mutex_lock(&sdma_affinity_mutex); /* reset the SDMA interrupt affinity details */ init_cpu_mask_set(&entry->def_intr); - cpumask_copy(&entry->def_intr.mask, &mask); - /* - * Reassign the affinity for each SDMA interrupt. - */ + cpumask_copy(&entry->def_intr.mask, mask); + + /* Reassign the affinity for each SDMA interrupt. */ for (i = 0; i < dd->num_msix_entries; i++) { struct hfi1_msix_entry *msix; @@ -715,13 +819,15 @@ int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf, if (msix->type != IRQ_SDMA) continue; - ret = hfi1_get_irq_affinity(dd, msix); + ret = get_irq_affinity(dd, msix); if (ret) break; } - - mutex_unlock(&sdma_affinity_mutex); +out: + free_cpumask_var(mask); +unlock: + mutex_unlock(&node_affinity.lock); return ret ? ret : strnlen(buf, PAGE_SIZE); } @@ -729,15 +835,15 @@ int hfi1_get_sdma_affinity(struct hfi1_devdata *dd, char *buf) { struct hfi1_affinity_node *entry; - spin_lock(&node_affinity.lock); + mutex_lock(&node_affinity.lock); entry = node_affinity_lookup(dd->node); - spin_unlock(&node_affinity.lock); - if (!entry) + if (!entry) { + mutex_unlock(&node_affinity.lock); return -EINVAL; + } - mutex_lock(&sdma_affinity_mutex); cpumap_print_to_pagebuf(true, buf, &entry->def_intr.mask); - mutex_unlock(&sdma_affinity_mutex); + mutex_unlock(&node_affinity.lock); return strnlen(buf, PAGE_SIZE); } diff --git a/drivers/infiniband/hw/hfi1/affinity.h b/drivers/infiniband/hw/hfi1/affinity.h index 8879cf7a8cac..b89ea3c0ee1a 100644 --- a/drivers/infiniband/hw/hfi1/affinity.h +++ b/drivers/infiniband/hw/hfi1/affinity.h @@ -121,8 +121,7 @@ struct hfi1_affinity_node_list { int num_core_siblings; int num_online_nodes; int num_online_cpus; - /* protect affinity node list */ - spinlock_t lock; + struct mutex lock; /* protects affinity nodes */ }; int node_affinity_init(void); diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index b32638d58ae8..9bf5f23544d4 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -971,7 +971,9 @@ static struct flag_table dc8051_info_err_flags[] = { FLAG_ENTRY0("Failed LNI(VerifyCap_1)", FAILED_LNI_VERIFY_CAP1), FLAG_ENTRY0("Failed LNI(VerifyCap_2)", FAILED_LNI_VERIFY_CAP2), FLAG_ENTRY0("Failed LNI(ConfigLT)", FAILED_LNI_CONFIGLT), - FLAG_ENTRY0("Host Handshake Timeout", HOST_HANDSHAKE_TIMEOUT) + FLAG_ENTRY0("Host Handshake Timeout", HOST_HANDSHAKE_TIMEOUT), + FLAG_ENTRY0("External Device Request Timeout", + EXTERNAL_DEVICE_REQ_TIMEOUT), }; /* @@ -6825,7 +6827,6 @@ void handle_link_up(struct work_struct *work) set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SPEED_POLICY, 0, OPA_LINKDOWN_REASON_SPEED_POLICY); set_link_state(ppd, HLS_DN_OFFLINE); - tune_serdes(ppd); start_link(ppd); } } @@ -6998,12 +6999,10 @@ void handle_link_down(struct work_struct *work) * If there is no cable attached, turn the DC off. Otherwise, * start the link bring up. */ - if (ppd->port_type == PORT_TYPE_QSFP && !qsfp_mod_present(ppd)) { + if (ppd->port_type == PORT_TYPE_QSFP && !qsfp_mod_present(ppd)) dc_shutdown(ppd->dd); - } else { - tune_serdes(ppd); + else start_link(ppd); - } } void handle_link_bounce(struct work_struct *work) @@ -7016,7 +7015,6 @@ void handle_link_bounce(struct work_struct *work) */ if (ppd->host_link_state & HLS_UP) { set_link_state(ppd, HLS_DN_OFFLINE); - tune_serdes(ppd); start_link(ppd); } else { dd_dev_info(ppd->dd, "%s: link not up (%s), nothing to do\n", @@ -7531,7 +7529,6 @@ done: set_link_down_reason(ppd, OPA_LINKDOWN_REASON_WIDTH_POLICY, 0, OPA_LINKDOWN_REASON_WIDTH_POLICY); set_link_state(ppd, HLS_DN_OFFLINE); - tune_serdes(ppd); start_link(ppd); } } @@ -9161,6 +9158,12 @@ set_local_link_attributes_fail: */ int start_link(struct hfi1_pportdata *ppd) { + /* + * Tune the SerDes to a ballpark setting for optimal signal and bit + * error rate. Needs to be done before starting the link. + */ + tune_serdes(ppd); + if (!ppd->link_enabled) { dd_dev_info(ppd->dd, "%s: stopping link start because link is disabled\n", @@ -9401,8 +9404,6 @@ void qsfp_event(struct work_struct *work) */ set_qsfp_int_n(ppd, 1); - tune_serdes(ppd); - start_link(ppd); } @@ -9490,6 +9491,73 @@ static void init_lcb(struct hfi1_devdata *dd) write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 0x00); } +/* + * Perform a test read on the QSFP. Return 0 on success, -ERRNO + * on error. + */ +static int test_qsfp_read(struct hfi1_pportdata *ppd) +{ + int ret; + u8 status; + + /* report success if not a QSFP */ + if (ppd->port_type != PORT_TYPE_QSFP) + return 0; + + /* read byte 2, the status byte */ + ret = one_qsfp_read(ppd, ppd->dd->hfi1_id, 2, &status, 1); + if (ret < 0) + return ret; + if (ret != 1) + return -EIO; + + return 0; /* success */ +} + +/* + * Values for QSFP retry. + * + * Give up after 10s (20 x 500ms). The overall timeout was empirically + * arrived at from experience on a large cluster. + */ +#define MAX_QSFP_RETRIES 20 +#define QSFP_RETRY_WAIT 500 /* msec */ + +/* + * Try a QSFP read. If it fails, schedule a retry for later. + * Called on first link activation after driver load. + */ +static void try_start_link(struct hfi1_pportdata *ppd) +{ + if (test_qsfp_read(ppd)) { + /* read failed */ + if (ppd->qsfp_retry_count >= MAX_QSFP_RETRIES) { + dd_dev_err(ppd->dd, "QSFP not responding, giving up\n"); + return; + } + dd_dev_info(ppd->dd, + "QSFP not responding, waiting and retrying %d\n", + (int)ppd->qsfp_retry_count); + ppd->qsfp_retry_count++; + queue_delayed_work(ppd->hfi1_wq, &ppd->start_link_work, + msecs_to_jiffies(QSFP_RETRY_WAIT)); + return; + } + ppd->qsfp_retry_count = 0; + + start_link(ppd); +} + +/* + * Workqueue function to start the link after a delay. + */ +void handle_start_link(struct work_struct *work) +{ + struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata, + start_link_work.work); + try_start_link(ppd); +} + int bringup_serdes(struct hfi1_pportdata *ppd) { struct hfi1_devdata *dd = ppd->dd; @@ -9525,14 +9593,8 @@ int bringup_serdes(struct hfi1_pportdata *ppd) set_qsfp_int_n(ppd, 1); } - /* - * Tune the SerDes to a ballpark setting for - * optimal signal and bit error rate - * Needs to be done before starting the link - */ - tune_serdes(ppd); - - return start_link(ppd); + try_start_link(ppd); + return 0; } void hfi1_quiet_serdes(struct hfi1_pportdata *ppd) @@ -9549,6 +9611,10 @@ void hfi1_quiet_serdes(struct hfi1_pportdata *ppd) ppd->driver_link_ready = 0; ppd->link_enabled = 0; + ppd->qsfp_retry_count = MAX_QSFP_RETRIES; /* prevent more retries */ + flush_delayed_work(&ppd->start_link_work); + cancel_delayed_work_sync(&ppd->start_link_work); + ppd->offline_disabled_reason = HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED); set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SMA_DISABLED, 0, @@ -9648,12 +9714,12 @@ void hfi1_clear_tids(struct hfi1_ctxtdata *rcd) hfi1_put_tid(dd, i, PT_INVALID, 0, 0); } -struct hfi1_message_header *hfi1_get_msgheader( - struct hfi1_devdata *dd, __le32 *rhf_addr) +struct ib_header *hfi1_get_msgheader( + struct hfi1_devdata *dd, __le32 *rhf_addr) { u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr)); - return (struct hfi1_message_header *) + return (struct ib_header *) (rhf_addr - dd->rhf_offset + offset); } @@ -11489,10 +11555,10 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt) !(rcvctrl & RCV_CTXT_CTRL_ENABLE_SMASK)) { /* reset the tail and hdr addresses, and sequence count */ write_kctxt_csr(dd, ctxt, RCV_HDR_ADDR, - rcd->rcvhdrq_phys); + rcd->rcvhdrq_dma); if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR, - rcd->rcvhdrqtailaddr_phys); + rcd->rcvhdrqtailaddr_dma); rcd->seq_cnt = 1; /* reset the cached receive header queue head value */ @@ -11557,9 +11623,9 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt) * update with a dummy tail address and then disable * receive context. */ - if (dd->rcvhdrtail_dummy_physaddr) { + if (dd->rcvhdrtail_dummy_dma) { write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR, - dd->rcvhdrtail_dummy_physaddr); + dd->rcvhdrtail_dummy_dma); /* Enabling RcvCtxtCtrl.TailUpd is intentional. */ rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK; } @@ -11570,7 +11636,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt) rcvctrl |= RCV_CTXT_CTRL_INTR_AVAIL_SMASK; if (op & HFI1_RCVCTRL_INTRAVAIL_DIS) rcvctrl &= ~RCV_CTXT_CTRL_INTR_AVAIL_SMASK; - if (op & HFI1_RCVCTRL_TAILUPD_ENB && rcd->rcvhdrqtailaddr_phys) + if (op & HFI1_RCVCTRL_TAILUPD_ENB && rcd->rcvhdrqtailaddr_dma) rcvctrl |= RCV_CTXT_CTRL_TAIL_UPD_SMASK; if (op & HFI1_RCVCTRL_TAILUPD_DIS) { /* See comment on RcvCtxtCtrl.TailUpd above */ @@ -11642,7 +11708,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op, int ctxt) * so it doesn't contain an address that is invalid. */ write_kctxt_csr(dd, ctxt, RCV_HDR_TAIL_ADDR, - dd->rcvhdrtail_dummy_physaddr); + dd->rcvhdrtail_dummy_dma); } u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp) @@ -12865,7 +12931,7 @@ fail: */ static int set_up_context_variables(struct hfi1_devdata *dd) { - int num_kernel_contexts; + unsigned long num_kernel_contexts; int total_contexts; int ret; unsigned ngroups; @@ -12894,9 +12960,9 @@ static int set_up_context_variables(struct hfi1_devdata *dd) */ if (num_kernel_contexts > (dd->chip_send_contexts - num_vls - 1)) { dd_dev_err(dd, - "Reducing # kernel rcv contexts to: %d, from %d\n", + "Reducing # kernel rcv contexts to: %d, from %lu\n", (int)(dd->chip_send_contexts - num_vls - 1), - (int)num_kernel_contexts); + num_kernel_contexts); num_kernel_contexts = dd->chip_send_contexts - num_vls - 1; } /* @@ -13319,9 +13385,9 @@ static void init_rbufs(struct hfi1_devdata *dd) /* * Give up after 1ms - maximum wait time. * - * RBuf size is 148KiB. Slowest possible is PCIe Gen1 x1 at + * RBuf size is 136KiB. Slowest possible is PCIe Gen1 x1 at * 250MB/s bandwidth. Lower rate to 66% for overhead to get: - * 148 KB / (66% * 250MB/s) = 920us + * 136 KB / (66% * 250MB/s) = 844us */ if (count++ > 500) { dd_dev_err(dd, @@ -14500,6 +14566,11 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, if (ret) goto bail_cleanup; + /* call before get_platform_config(), after init_chip_resources() */ + ret = eprom_init(dd); + if (ret) + goto bail_free_rcverr; + /* Needs to be called before hfi1_firmware_init */ get_platform_config(dd); @@ -14620,10 +14691,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev, if (ret) goto bail_free_cntrs; - ret = eprom_init(dd); - if (ret) - goto bail_free_rcverr; - goto bail; bail_free_rcverr: diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index ed11107c50fe..92345259a8f4 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -82,7 +82,7 @@ */ #define CM_VAU 3 /* HFI link credit count, AKA receive buffer depth (RBUF_DEPTH) */ -#define CM_GLOBAL_CREDITS 0x940 +#define CM_GLOBAL_CREDITS 0x880 /* Number of PKey entries in the HW */ #define MAX_PKEY_VALUES 16 @@ -254,12 +254,14 @@ #define FAILED_LNI_VERIFY_CAP2 BIT(10) #define FAILED_LNI_CONFIGLT BIT(11) #define HOST_HANDSHAKE_TIMEOUT BIT(12) +#define EXTERNAL_DEVICE_REQ_TIMEOUT BIT(13) #define FAILED_LNI (FAILED_LNI_POLLING | FAILED_LNI_DEBOUNCE \ | FAILED_LNI_ESTBCOMM | FAILED_LNI_OPTEQ \ | FAILED_LNI_VERIFY_CAP1 \ | FAILED_LNI_VERIFY_CAP2 \ - | FAILED_LNI_CONFIGLT | HOST_HANDSHAKE_TIMEOUT) + | FAILED_LNI_CONFIGLT | HOST_HANDSHAKE_TIMEOUT \ + | EXTERNAL_DEVICE_REQ_TIMEOUT) /* DC_DC8051_DBG_ERR_INFO_SET_BY_8051.HOST_MSG - host message flags */ #define HOST_REQ_DONE BIT(0) @@ -706,6 +708,7 @@ void handle_link_up(struct work_struct *work); void handle_link_down(struct work_struct *work); void handle_link_downgrade(struct work_struct *work); void handle_link_bounce(struct work_struct *work); +void handle_start_link(struct work_struct *work); void handle_sma_message(struct work_struct *work); void reset_qsfp(struct hfi1_pportdata *ppd); void qsfp_event(struct work_struct *work); @@ -1335,7 +1338,7 @@ enum { u64 get_all_cpu_total(u64 __percpu *cntr); void hfi1_start_cleanup(struct hfi1_devdata *dd); void hfi1_clear_tids(struct hfi1_ctxtdata *rcd); -struct hfi1_message_header *hfi1_get_msgheader( +struct ib_header *hfi1_get_msgheader( struct hfi1_devdata *dd, __le32 *rhf_addr); int hfi1_init_ctxt(struct send_context *sc); void hfi1_put_tid(struct hfi1_devdata *dd, u32 index, diff --git a/drivers/infiniband/hw/hfi1/common.h b/drivers/infiniband/hw/hfi1/common.h index fcc9c217a97a..da7be21bedb4 100644 --- a/drivers/infiniband/hw/hfi1/common.h +++ b/drivers/infiniband/hw/hfi1/common.h @@ -320,14 +320,6 @@ struct diag_pkt { /* RHF receive type error - bypass packet errors */ #define RHF_RTE_BYPASS_NO_ERR 0x0 -/* - * This structure contains the first field common to all protocols - * that employ this chip. - */ -struct hfi1_message_header { - __be16 lrh[4]; -}; - /* IB - LRH header constants */ #define HFI1_LRH_GRH 0x0003 /* 1. word of IB LRH - next header: GRH */ #define HFI1_LRH_BTH 0x0002 /* 1. word of IB LRH - next header: BTH */ diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index dbab9d9cc288..632ba21759ab 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -59,6 +59,40 @@ static struct dentry *hfi1_dbg_root; +/* wrappers to enforce srcu in seq file */ +static ssize_t hfi1_seq_read( + struct file *file, + char __user *buf, + size_t size, + loff_t *ppos) +{ + struct dentry *d = file->f_path.dentry; + int srcu_idx; + ssize_t r; + + r = debugfs_use_file_start(d, &srcu_idx); + if (likely(!r)) + r = seq_read(file, buf, size, ppos); + debugfs_use_file_finish(srcu_idx); + return r; +} + +static loff_t hfi1_seq_lseek( + struct file *file, + loff_t offset, + int whence) +{ + struct dentry *d = file->f_path.dentry; + int srcu_idx; + loff_t r; + + r = debugfs_use_file_start(d, &srcu_idx); + if (likely(!r)) + r = seq_lseek(file, offset, whence); + debugfs_use_file_finish(srcu_idx); + return r; +} + #define private2dd(file) (file_inode(file)->i_private) #define private2ppd(file) (file_inode(file)->i_private) @@ -87,8 +121,8 @@ static int _##name##_open(struct inode *inode, struct file *s) \ static const struct file_operations _##name##_file_ops = { \ .owner = THIS_MODULE, \ .open = _##name##_open, \ - .read = seq_read, \ - .llseek = seq_lseek, \ + .read = hfi1_seq_read, \ + .llseek = hfi1_seq_lseek, \ .release = seq_release \ } @@ -105,11 +139,9 @@ do { \ DEBUGFS_FILE_CREATE(#name, parent, data, &_##name##_file_ops, S_IRUGO) static void *_opcode_stats_seq_start(struct seq_file *s, loff_t *pos) -__acquires(RCU) { struct hfi1_opcode_stats_perctx *opstats; - rcu_read_lock(); if (*pos >= ARRAY_SIZE(opstats->stats)) return NULL; return pos; @@ -126,9 +158,7 @@ static void *_opcode_stats_seq_next(struct seq_file *s, void *v, loff_t *pos) } static void _opcode_stats_seq_stop(struct seq_file *s, void *v) -__releases(RCU) { - rcu_read_unlock(); } static int _opcode_stats_seq_show(struct seq_file *s, void *v) @@ -223,28 +253,32 @@ DEBUGFS_SEQ_FILE_OPEN(ctx_stats) DEBUGFS_FILE_OPS(ctx_stats); static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos) -__acquires(RCU) + __acquires(RCU) { struct qp_iter *iter; loff_t n = *pos; - rcu_read_lock(); iter = qp_iter_init(s->private); + + /* stop calls rcu_read_unlock */ + rcu_read_lock(); + if (!iter) return NULL; - while (n--) { + do { if (qp_iter_next(iter)) { kfree(iter); return NULL; } - } + } while (n--); return iter; } static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr, loff_t *pos) + __must_hold(RCU) { struct qp_iter *iter = iter_ptr; @@ -259,7 +293,7 @@ static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr, } static void _qp_stats_seq_stop(struct seq_file *s, void *iter_ptr) -__releases(RCU) + __releases(RCU) { rcu_read_unlock(); } @@ -281,12 +315,10 @@ DEBUGFS_SEQ_FILE_OPEN(qp_stats) DEBUGFS_FILE_OPS(qp_stats); static void *_sdes_seq_start(struct seq_file *s, loff_t *pos) -__acquires(RCU) { struct hfi1_ibdev *ibd; struct hfi1_devdata *dd; - rcu_read_lock(); ibd = (struct hfi1_ibdev *)s->private; dd = dd_from_dev(ibd); if (!dd->per_sdma || *pos >= dd->num_sdma) @@ -306,9 +338,7 @@ static void *_sdes_seq_next(struct seq_file *s, void *v, loff_t *pos) } static void _sdes_seq_stop(struct seq_file *s, void *v) -__releases(RCU) { - rcu_read_unlock(); } static int _sdes_seq_show(struct seq_file *s, void *v) @@ -335,11 +365,9 @@ static ssize_t dev_counters_read(struct file *file, char __user *buf, struct hfi1_devdata *dd; ssize_t rval; - rcu_read_lock(); dd = private2dd(file); avail = hfi1_read_cntrs(dd, NULL, &counters); rval = simple_read_from_buffer(buf, count, ppos, counters, avail); - rcu_read_unlock(); return rval; } @@ -352,11 +380,9 @@ static ssize_t dev_names_read(struct file *file, char __user *buf, struct hfi1_devdata *dd; ssize_t rval; - rcu_read_lock(); dd = private2dd(file); avail = hfi1_read_cntrs(dd, &names, NULL); rval = simple_read_from_buffer(buf, count, ppos, names, avail); - rcu_read_unlock(); return rval; } @@ -379,11 +405,9 @@ static ssize_t portnames_read(struct file *file, char __user *buf, struct hfi1_devdata *dd; ssize_t rval; - rcu_read_lock(); dd = private2dd(file); avail = hfi1_read_portcntrs(dd->pport, &names, NULL); rval = simple_read_from_buffer(buf, count, ppos, names, avail); - rcu_read_unlock(); return rval; } @@ -396,11 +420,9 @@ static ssize_t portcntrs_debugfs_read(struct file *file, char __user *buf, struct hfi1_pportdata *ppd; ssize_t rval; - rcu_read_lock(); ppd = private2ppd(file); avail = hfi1_read_portcntrs(ppd, NULL, &counters); rval = simple_read_from_buffer(buf, count, ppos, counters, avail); - rcu_read_unlock(); return rval; } @@ -430,16 +452,13 @@ static ssize_t asic_flags_read(struct file *file, char __user *buf, int used; int i; - rcu_read_lock(); ppd = private2ppd(file); dd = ppd->dd; size = PAGE_SIZE; used = 0; tmp = kmalloc(size, GFP_KERNEL); - if (!tmp) { - rcu_read_unlock(); + if (!tmp) return -ENOMEM; - } scratch0 = read_csr(dd, ASIC_CFG_SCRATCH); used += scnprintf(tmp + used, size - used, @@ -466,7 +485,6 @@ static ssize_t asic_flags_read(struct file *file, char __user *buf, used += scnprintf(tmp + used, size - used, "Write bits to clear\n"); ret = simple_read_from_buffer(buf, count, ppos, tmp, used); - rcu_read_unlock(); kfree(tmp); return ret; } @@ -482,15 +500,12 @@ static ssize_t asic_flags_write(struct file *file, const char __user *buf, u64 scratch0; u64 clear; - rcu_read_lock(); ppd = private2ppd(file); dd = ppd->dd; buff = kmalloc(count + 1, GFP_KERNEL); - if (!buff) { - ret = -ENOMEM; - goto do_return; - } + if (!buff) + return -ENOMEM; ret = copy_from_user(buff, buf, count); if (ret > 0) { @@ -523,8 +538,6 @@ static ssize_t asic_flags_write(struct file *file, const char __user *buf, do_free: kfree(buff); - do_return: - rcu_read_unlock(); return ret; } @@ -538,18 +551,14 @@ static ssize_t qsfp_debugfs_dump(struct file *file, char __user *buf, char *tmp; int ret; - rcu_read_lock(); ppd = private2ppd(file); tmp = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!tmp) { - rcu_read_unlock(); + if (!tmp) return -ENOMEM; - } ret = qsfp_dump(ppd, tmp, PAGE_SIZE); if (ret > 0) ret = simple_read_from_buffer(buf, count, ppos, tmp, ret); - rcu_read_unlock(); kfree(tmp); return ret; } @@ -565,7 +574,6 @@ static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf, int offset; int total_written; - rcu_read_lock(); ppd = private2ppd(file); /* byte offset format: [offsetSize][i2cAddr][offsetHigh][offsetLow] */ @@ -573,16 +581,12 @@ static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf, offset = *ppos & 0xffff; /* explicitly reject invalid address 0 to catch cp and cat */ - if (i2c_addr == 0) { - ret = -EINVAL; - goto _return; - } + if (i2c_addr == 0) + return -EINVAL; buff = kmalloc(count, GFP_KERNEL); - if (!buff) { - ret = -ENOMEM; - goto _return; - } + if (!buff) + return -ENOMEM; ret = copy_from_user(buff, buf, count); if (ret > 0) { @@ -602,8 +606,6 @@ static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf, _free: kfree(buff); - _return: - rcu_read_unlock(); return ret; } @@ -632,7 +634,6 @@ static ssize_t __i2c_debugfs_read(struct file *file, char __user *buf, int offset; int total_read; - rcu_read_lock(); ppd = private2ppd(file); /* byte offset format: [offsetSize][i2cAddr][offsetHigh][offsetLow] */ @@ -640,16 +641,12 @@ static ssize_t __i2c_debugfs_read(struct file *file, char __user *buf, offset = *ppos & 0xffff; /* explicitly reject invalid address 0 to catch cp and cat */ - if (i2c_addr == 0) { - ret = -EINVAL; - goto _return; - } + if (i2c_addr == 0) + return -EINVAL; buff = kmalloc(count, GFP_KERNEL); - if (!buff) { - ret = -ENOMEM; - goto _return; - } + if (!buff) + return -ENOMEM; total_read = i2c_read(ppd, target, i2c_addr, offset, buff, count); if (total_read < 0) { @@ -669,8 +666,6 @@ static ssize_t __i2c_debugfs_read(struct file *file, char __user *buf, _free: kfree(buff); - _return: - rcu_read_unlock(); return ret; } @@ -697,26 +692,20 @@ static ssize_t __qsfp_debugfs_write(struct file *file, const char __user *buf, int ret; int total_written; - rcu_read_lock(); - if (*ppos + count > QSFP_PAGESIZE * 4) { /* base page + page00-page03 */ - ret = -EINVAL; - goto _return; - } + if (*ppos + count > QSFP_PAGESIZE * 4) /* base page + page00-page03 */ + return -EINVAL; ppd = private2ppd(file); buff = kmalloc(count, GFP_KERNEL); - if (!buff) { - ret = -ENOMEM; - goto _return; - } + if (!buff) + return -ENOMEM; ret = copy_from_user(buff, buf, count); if (ret > 0) { ret = -EFAULT; goto _free; } - total_written = qsfp_write(ppd, target, *ppos, buff, count); if (total_written < 0) { ret = total_written; @@ -729,8 +718,6 @@ static ssize_t __qsfp_debugfs_write(struct file *file, const char __user *buf, _free: kfree(buff); - _return: - rcu_read_unlock(); return ret; } @@ -757,7 +744,6 @@ static ssize_t __qsfp_debugfs_read(struct file *file, char __user *buf, int ret; int total_read; - rcu_read_lock(); if (*ppos + count > QSFP_PAGESIZE * 4) { /* base page + page00-page03 */ ret = -EINVAL; goto _return; @@ -790,7 +776,6 @@ static ssize_t __qsfp_debugfs_read(struct file *file, char __user *buf, _free: kfree(buff); _return: - rcu_read_unlock(); return ret; } @@ -948,6 +933,43 @@ static const struct counter_info port_cntr_ops[] = { DEBUGFS_OPS("asic_flags", asic_flags_read, asic_flags_write), }; +static void *_sdma_cpu_list_seq_start(struct seq_file *s, loff_t *pos) +{ + if (*pos >= num_online_cpus()) + return NULL; + + return pos; +} + +static void *_sdma_cpu_list_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + ++*pos; + if (*pos >= num_online_cpus()) + return NULL; + + return pos; +} + +static void _sdma_cpu_list_seq_stop(struct seq_file *s, void *v) +{ + /* nothing allocated */ +} + +static int _sdma_cpu_list_seq_show(struct seq_file *s, void *v) +{ + struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private; + struct hfi1_devdata *dd = dd_from_dev(ibd); + loff_t *spos = v; + loff_t i = *spos; + + sdma_seqfile_dump_cpu_list(s, dd, (unsigned long)i); + return 0; +} + +DEBUGFS_SEQ_FILE_OPS(sdma_cpu_list); +DEBUGFS_SEQ_FILE_OPEN(sdma_cpu_list) +DEBUGFS_FILE_OPS(sdma_cpu_list); + void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd) { char name[sizeof("port0counters") + 1]; @@ -976,6 +998,7 @@ void hfi1_dbg_ibdev_init(struct hfi1_ibdev *ibd) DEBUGFS_SEQ_FILE_CREATE(ctx_stats, ibd->hfi1_ibdev_dbg, ibd); DEBUGFS_SEQ_FILE_CREATE(qp_stats, ibd->hfi1_ibdev_dbg, ibd); DEBUGFS_SEQ_FILE_CREATE(sdes, ibd->hfi1_ibdev_dbg, ibd); + DEBUGFS_SEQ_FILE_CREATE(sdma_cpu_list, ibd->hfi1_ibdev_dbg, ibd); /* dev counter files */ for (i = 0; i < ARRAY_SIZE(cntr_ops); i++) DEBUGFS_FILE_CREATE(cntr_ops[i].name, @@ -1006,7 +1029,6 @@ void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd) debugfs_remove_recursive(ibd->hfi1_ibdev_dbg); out: ibd->hfi1_ibdev_dbg = NULL; - synchronize_rcu(); } /* @@ -1031,9 +1053,7 @@ static const char * const hfi1_statnames[] = { }; static void *_driver_stats_names_seq_start(struct seq_file *s, loff_t *pos) -__acquires(RCU) { - rcu_read_lock(); if (*pos >= ARRAY_SIZE(hfi1_statnames)) return NULL; return pos; @@ -1051,9 +1071,7 @@ static void *_driver_stats_names_seq_next( } static void _driver_stats_names_seq_stop(struct seq_file *s, void *v) -__releases(RCU) { - rcu_read_unlock(); } static int _driver_stats_names_seq_show(struct seq_file *s, void *v) @@ -1069,9 +1087,7 @@ DEBUGFS_SEQ_FILE_OPEN(driver_stats_names) DEBUGFS_FILE_OPS(driver_stats_names); static void *_driver_stats_seq_start(struct seq_file *s, loff_t *pos) -__acquires(RCU) { - rcu_read_lock(); if (*pos >= ARRAY_SIZE(hfi1_statnames)) return NULL; return pos; @@ -1086,9 +1102,7 @@ static void *_driver_stats_seq_next(struct seq_file *s, void *v, loff_t *pos) } static void _driver_stats_seq_stop(struct seq_file *s, void *v) -__releases(RCU) { - rcu_read_unlock(); } static u64 hfi1_sps_ints(void) diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 8246dc7d0573..6563e4d38b80 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -276,7 +276,7 @@ inline int hfi1_rcvbuf_validate(u32 size, u8 type, u16 *encoded) static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, struct hfi1_packet *packet) { - struct hfi1_message_header *rhdr = packet->hdr; + struct ib_header *rhdr = packet->hdr; u32 rte = rhf_rcv_type_err(packet->rhf); int lnh = be16_to_cpu(rhdr->lrh[0]) & 3; struct hfi1_ibport *ibp = &ppd->ibport_data; @@ -288,10 +288,9 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, if (packet->rhf & RHF_TID_ERR) { /* For TIDERR and RC QPs preemptively schedule a NAK */ - struct hfi1_ib_header *hdr = (struct hfi1_ib_header *)rhdr; - struct hfi1_other_headers *ohdr = NULL; + struct ib_other_headers *ohdr = NULL; u32 tlen = rhf_pkt_len(packet->rhf); /* in bytes */ - u16 lid = be16_to_cpu(hdr->lrh[1]); + u16 lid = be16_to_cpu(rhdr->lrh[1]); u32 qp_num; u32 rcv_flags = 0; @@ -301,14 +300,14 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, /* Check for GRH */ if (lnh == HFI1_LRH_BTH) { - ohdr = &hdr->u.oth; + ohdr = &rhdr->u.oth; } else if (lnh == HFI1_LRH_GRH) { u32 vtf; - ohdr = &hdr->u.l.oth; - if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR) + ohdr = &rhdr->u.l.oth; + if (rhdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR) goto drop; - vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow); + vtf = be32_to_cpu(rhdr->u.l.grh.version_tclass_flow); if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION) goto drop; rcv_flags |= HFI1_HAS_GRH; @@ -344,7 +343,7 @@ static void rcv_hdrerr(struct hfi1_ctxtdata *rcd, struct hfi1_pportdata *ppd, case IB_QPT_RC: hfi1_rc_hdrerr( rcd, - hdr, + rhdr, rcv_flags, qp); break; @@ -452,8 +451,8 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, bool do_cnp) { struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); - struct hfi1_ib_header *hdr = pkt->hdr; - struct hfi1_other_headers *ohdr = pkt->ohdr; + struct ib_header *hdr = pkt->hdr; + struct ib_other_headers *ohdr = pkt->ohdr; struct ib_grh *grh = NULL; u32 rqpn = 0, bth1; u16 rlid, dlid = be16_to_cpu(hdr->lrh[1]); @@ -487,7 +486,7 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, return; } - sc = hdr2sc((struct hfi1_message_header *)hdr, pkt->rhf); + sc = hdr2sc(hdr, pkt->rhf); bth1 = be32_to_cpu(ohdr->bth[1]); if (do_cnp && (bth1 & HFI1_FECN_SMASK)) { @@ -599,8 +598,8 @@ static void __prescan_rxq(struct hfi1_packet *packet) __le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head + dd->rhf_offset; struct rvt_qp *qp; - struct hfi1_ib_header *hdr; - struct hfi1_other_headers *ohdr; + struct ib_header *hdr; + struct ib_other_headers *ohdr; struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; u64 rhf = rhf_to_cpu(rhf_addr); u32 etype = rhf_rcv_type(rhf), qpn, bth1; @@ -616,8 +615,8 @@ static void __prescan_rxq(struct hfi1_packet *packet) if (etype != RHF_RCV_TYPE_IB) goto next; - hdr = (struct hfi1_ib_header *) - hfi1_get_msgheader(dd, rhf_addr); + hdr = hfi1_get_msgheader(dd, rhf_addr); + lnh = be16_to_cpu(hdr->lrh[0]) & 3; if (lnh == HFI1_LRH_BTH) @@ -888,14 +887,15 @@ void set_all_slowpath(struct hfi1_devdata *dd) } static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd, - struct hfi1_packet packet, + struct hfi1_packet *packet, struct hfi1_devdata *dd) { struct work_struct *lsaw = &rcd->ppd->linkstate_active_work; - struct hfi1_message_header *hdr = hfi1_get_msgheader(packet.rcd->dd, - packet.rhf_addr); + struct ib_header *hdr = hfi1_get_msgheader(packet->rcd->dd, + packet->rhf_addr); + u8 etype = rhf_rcv_type(packet->rhf); - if (hdr2sc(hdr, packet.rhf) != 0xf) { + if (etype == RHF_RCV_TYPE_IB && hdr2sc(hdr, packet->rhf) != 0xf) { int hwstate = read_logical_state(dd); if (hwstate != LSTATE_ACTIVE) { @@ -979,7 +979,7 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread) /* Auto activate link on non-SC15 packet receive */ if (unlikely(rcd->ppd->host_link_state == HLS_UP_ARMED) && - set_armed_to_active(rcd, packet, dd)) + set_armed_to_active(rcd, &packet, dd)) goto bail; last = process_rcv_packet(&packet, thread); } diff --git a/drivers/infiniband/hw/hfi1/eprom.c b/drivers/infiniband/hw/hfi1/eprom.c index 36b77943cbfd..e70c223801b4 100644 --- a/drivers/infiniband/hw/hfi1/eprom.c +++ b/drivers/infiniband/hw/hfi1/eprom.c @@ -49,7 +49,26 @@ #include "common.h" #include "eprom.h" +/* + * The EPROM is logically divided into three partitions: + * partition 0: the first 128K, visible from PCI ROM BAR + * partition 1: 4K config file (sector size) + * partition 2: the rest + */ +#define P0_SIZE (128 * 1024) +#define P1_SIZE (4 * 1024) +#define P1_START P0_SIZE +#define P2_START (P0_SIZE + P1_SIZE) + +/* controller page size, in bytes */ +#define EP_PAGE_SIZE 256 +#define EP_PAGE_MASK (EP_PAGE_SIZE - 1) +#define EP_PAGE_DWORDS (EP_PAGE_SIZE / sizeof(u32)) + +/* controller commands */ #define CMD_SHIFT 24 +#define CMD_NOP (0) +#define CMD_READ_DATA(addr) ((0x03 << CMD_SHIFT) | addr) #define CMD_RELEASE_POWERDOWN_NOID ((0xab << CMD_SHIFT)) /* controller interface speeds */ @@ -61,6 +80,90 @@ * Double it for safety. */ #define EPROM_TIMEOUT 80000 /* ms */ + +/* + * Read a 256 byte (64 dword) EPROM page. + * All callers have verified the offset is at a page boundary. + */ +static void read_page(struct hfi1_devdata *dd, u32 offset, u32 *result) +{ + int i; + + write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_READ_DATA(offset)); + for (i = 0; i < EP_PAGE_DWORDS; i++) + result[i] = (u32)read_csr(dd, ASIC_EEP_DATA); + write_csr(dd, ASIC_EEP_ADDR_CMD, CMD_NOP); /* close open page */ +} + +/* + * Read length bytes starting at offset from the start of the EPROM. + */ +static int read_length(struct hfi1_devdata *dd, u32 start, u32 len, void *dest) +{ + u32 buffer[EP_PAGE_DWORDS]; + u32 end; + u32 start_offset; + u32 read_start; + u32 bytes; + + if (len == 0) + return 0; + + end = start + len; + + /* + * Make sure the read range is not outside of the controller read + * command address range. Note that '>' is correct below - the end + * of the range is OK if it stops at the limit, but no higher. + */ + if (end > (1 << CMD_SHIFT)) + return -EINVAL; + + /* read the first partial page */ + start_offset = start & EP_PAGE_MASK; + if (start_offset) { + /* partial starting page */ + + /* align and read the page that contains the start */ + read_start = start & ~EP_PAGE_MASK; + read_page(dd, read_start, buffer); + + /* the rest of the page is available data */ + bytes = EP_PAGE_SIZE - start_offset; + + if (len <= bytes) { + /* end is within this page */ + memcpy(dest, (u8 *)buffer + start_offset, len); + return 0; + } + + memcpy(dest, (u8 *)buffer + start_offset, bytes); + + start += bytes; + len -= bytes; + dest += bytes; + } + /* start is now page aligned */ + + /* read whole pages */ + while (len >= EP_PAGE_SIZE) { + read_page(dd, start, buffer); + memcpy(dest, buffer, EP_PAGE_SIZE); + + start += EP_PAGE_SIZE; + len -= EP_PAGE_SIZE; + dest += EP_PAGE_SIZE; + } + + /* read the last partial page */ + if (len) { + read_page(dd, start, buffer); + memcpy(dest, buffer, len); + } + + return 0; +} + /* * Initialize the EPROM handler. */ @@ -100,3 +203,85 @@ int eprom_init(struct hfi1_devdata *dd) done_asic: return ret; } + +/* magic character sequence that trails an image */ +#define IMAGE_TRAIL_MAGIC "egamiAPO" + +/* + * Read all of partition 1. The actual file is at the front. Adjust + * the returned size if a trailing image magic is found. + */ +static int read_partition_platform_config(struct hfi1_devdata *dd, void **data, + u32 *size) +{ + void *buffer; + void *p; + u32 length; + int ret; + + buffer = kmalloc(P1_SIZE, GFP_KERNEL); + if (!buffer) + return -ENOMEM; + + ret = read_length(dd, P1_START, P1_SIZE, buffer); + if (ret) { + kfree(buffer); + return ret; + } + + /* scan for image magic that may trail the actual data */ + p = strnstr(buffer, IMAGE_TRAIL_MAGIC, P1_SIZE); + if (p) + length = p - buffer; + else + length = P1_SIZE; + + *data = buffer; + *size = length; + return 0; +} + +/* + * Read the platform configuration file from the EPROM. + * + * On success, an allocated buffer containing the data and its size are + * returned. It is up to the caller to free this buffer. + * + * Return value: + * 0 - success + * -ENXIO - no EPROM is available + * -EBUSY - not able to acquire access to the EPROM + * -ENOENT - no recognizable file written + * -ENOMEM - buffer could not be allocated + */ +int eprom_read_platform_config(struct hfi1_devdata *dd, void **data, u32 *size) +{ + u32 directory[EP_PAGE_DWORDS]; /* aligned buffer */ + int ret; + + if (!dd->eprom_available) + return -ENXIO; + + ret = acquire_chip_resource(dd, CR_EPROM, EPROM_TIMEOUT); + if (ret) + return -EBUSY; + + /* read the last page of P0 for the EPROM format magic */ + ret = read_length(dd, P1_START - EP_PAGE_SIZE, EP_PAGE_SIZE, directory); + if (ret) + goto done; + + /* last dword of P0 contains a magic indicator */ + if (directory[EP_PAGE_DWORDS - 1] == 0) { + /* partition format */ + ret = read_partition_platform_config(dd, data, size); + goto done; + } + + /* nothing recognized */ + ret = -ENOENT; + +done: + release_chip_resource(dd, CR_EPROM); + return ret; +} diff --git a/drivers/infiniband/hw/hfi1/eprom.h b/drivers/infiniband/hw/hfi1/eprom.h index d41f0b1afb15..e774184f1643 100644 --- a/drivers/infiniband/hw/hfi1/eprom.h +++ b/drivers/infiniband/hw/hfi1/eprom.h @@ -45,8 +45,8 @@ * */ -struct hfi1_cmd; struct hfi1_devdata; int eprom_init(struct hfi1_devdata *dd); -int handle_eprom_command(struct file *fp, const struct hfi1_cmd *cmd); +int eprom_read_platform_config(struct hfi1_devdata *dd, void **buf_ret, + u32 *size_ret); diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 1ecbec192358..677efa0e8cd6 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -58,7 +58,6 @@ #include "trace.h" #include "user_sdma.h" #include "user_exp_rcv.h" -#include "eprom.h" #include "aspm.h" #include "mmu_rb.h" @@ -183,6 +182,7 @@ static int hfi1_file_open(struct inode *inode, struct file *fp) if (fd) { fd->rec_cpu_num = -1; /* no cpu affinity by default */ fd->mm = current->mm; + atomic_inc(&fd->mm->mm_count); } fp->private_data = fd; @@ -222,7 +222,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, ret = assign_ctxt(fp, &uinfo); if (ret < 0) return ret; - setup_ctxt(fp); + ret = setup_ctxt(fp); if (ret) return ret; ret = user_init(fp); @@ -439,9 +439,10 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) struct hfi1_filedata *fd = fp->private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; struct hfi1_devdata *dd; - unsigned long flags, pfn; + unsigned long flags; u64 token = vma->vm_pgoff << PAGE_SHIFT, memaddr = 0; + void *memvirt = NULL; u8 subctxt, mapio = 0, vmf = 0, type; ssize_t memlen = 0; int ret = 0; @@ -492,7 +493,8 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) * second or third page allocated for credit returns (if number * of enabled contexts > 64 and 128 respectively). */ - memaddr = dd->cr_base[uctxt->numa_id].pa + + memvirt = dd->cr_base[uctxt->numa_id].va; + memaddr = virt_to_phys(memvirt) + (((u64)uctxt->sc->hw_free - (u64)dd->cr_base[uctxt->numa_id].va) & PAGE_MASK); memlen = PAGE_SIZE; @@ -507,8 +509,8 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) mapio = 1; break; case RCV_HDRQ: - memaddr = uctxt->rcvhdrq_phys; memlen = uctxt->rcvhdrq_size; + memvirt = uctxt->rcvhdrq; break; case RCV_EGRBUF: { unsigned long addr; @@ -532,14 +534,21 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) vma->vm_flags &= ~VM_MAYWRITE; addr = vma->vm_start; for (i = 0 ; i < uctxt->egrbufs.numbufs; i++) { + memlen = uctxt->egrbufs.buffers[i].len; + memvirt = uctxt->egrbufs.buffers[i].addr; ret = remap_pfn_range( vma, addr, - uctxt->egrbufs.buffers[i].phys >> PAGE_SHIFT, - uctxt->egrbufs.buffers[i].len, + /* + * virt_to_pfn() does the same, but + * it's not available on x86_64 + * when CONFIG_MMU is enabled. + */ + PFN_DOWN(__pa(memvirt)), + memlen, vma->vm_page_prot); if (ret < 0) goto done; - addr += uctxt->egrbufs.buffers[i].len; + addr += memlen; } ret = 0; goto done; @@ -595,8 +604,8 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) ret = -EPERM; goto done; } - memaddr = uctxt->rcvhdrqtailaddr_phys; memlen = PAGE_SIZE; + memvirt = (void *)uctxt->rcvhdrtail_kvaddr; flags &= ~VM_MAYWRITE; break; case SUBCTXT_UREGS: @@ -649,16 +658,24 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma) "%u:%u type:%u io/vf:%d/%d, addr:0x%llx, len:%lu(%lu), flags:0x%lx\n", ctxt, subctxt, type, mapio, vmf, memaddr, memlen, vma->vm_end - vma->vm_start, vma->vm_flags); - pfn = (unsigned long)(memaddr >> PAGE_SHIFT); if (vmf) { - vma->vm_pgoff = pfn; + vma->vm_pgoff = PFN_DOWN(memaddr); vma->vm_ops = &vm_ops; ret = 0; } else if (mapio) { - ret = io_remap_pfn_range(vma, vma->vm_start, pfn, memlen, + ret = io_remap_pfn_range(vma, vma->vm_start, + PFN_DOWN(memaddr), + memlen, vma->vm_page_prot); + } else if (memvirt) { + ret = remap_pfn_range(vma, vma->vm_start, + PFN_DOWN(__pa(memvirt)), + memlen, + vma->vm_page_prot); } else { - ret = remap_pfn_range(vma, vma->vm_start, pfn, memlen, + ret = remap_pfn_range(vma, vma->vm_start, + PFN_DOWN(memaddr), + memlen, vma->vm_page_prot); } done: @@ -779,6 +796,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) mutex_unlock(&hfi1_mutex); hfi1_free_ctxtdata(dd, uctxt); done: + mmdrop(fdata->mm); kobject_put(&dd->kobj); kfree(fdata); return 0; @@ -959,14 +977,16 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd, */ uctxt->sc = sc_alloc(dd, SC_USER, uctxt->rcvhdrqentsize, uctxt->dd->node); - if (!uctxt->sc) - return -ENOMEM; - + if (!uctxt->sc) { + ret = -ENOMEM; + goto ctxdata_free; + } hfi1_cdbg(PROC, "allocated send context %u(%u)\n", uctxt->sc->sw_index, uctxt->sc->hw_context); ret = sc_enable(uctxt->sc); if (ret) - return ret; + goto ctxdata_free; + /* * Setup shared context resources if the user-level has requested * shared contexts and this is the 'master' process. @@ -980,7 +1000,7 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd, * send context because it will be done during file close */ if (ret) - return ret; + goto ctxdata_free; } uctxt->userversion = uinfo->userversion; uctxt->flags = hfi1_cap_mask; /* save current flag state */ @@ -1000,6 +1020,11 @@ static int allocate_ctxt(struct file *fp, struct hfi1_devdata *dd, fd->uctxt = uctxt; return 0; + +ctxdata_free: + dd->rcd[ctxt] = NULL; + hfi1_free_ctxtdata(dd, uctxt); + return ret; } static int init_subctxts(struct hfi1_ctxtdata *uctxt, @@ -1258,7 +1283,7 @@ static int get_base_info(struct file *fp, void __user *ubase, __u32 len) uctxt->rcvhdrq); binfo.rcvegr_bufbase = HFI1_MMAP_TOKEN(RCV_EGRBUF, uctxt->ctxt, fd->subctxt, - uctxt->egrbufs.rcvtids[0].phys); + uctxt->egrbufs.rcvtids[0].dma); binfo.sdma_comp_bufbase = HFI1_MMAP_TOKEN(SDMA_COMP, uctxt->ctxt, fd->subctxt, 0); /* diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 1000e0fd96d9..7eef11b316ff 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -64,6 +64,8 @@ #include <linux/kthread.h> #include <linux/i2c.h> #include <linux/i2c-algo-bit.h> +#include <rdma/ib_hdrs.h> +#include <linux/rhashtable.h> #include <rdma/rdma_vt.h> #include "chip_registers.h" @@ -171,12 +173,12 @@ struct ctxt_eager_bufs { u32 threshold; /* head update threshold */ struct eager_buffer { void *addr; - dma_addr_t phys; + dma_addr_t dma; ssize_t len; } *buffers; struct { void *addr; - dma_addr_t phys; + dma_addr_t dma; } *rcvtids; }; @@ -207,8 +209,8 @@ struct hfi1_ctxtdata { /* size of each of the rcvhdrq entries */ u16 rcvhdrqentsize; /* mmap of hdrq, must fit in 44 bits */ - dma_addr_t rcvhdrq_phys; - dma_addr_t rcvhdrqtailaddr_phys; + dma_addr_t rcvhdrq_dma; + dma_addr_t rcvhdrqtailaddr_dma; struct ctxt_eager_bufs egrbufs; /* this receive context's assigned PIO ACK send context */ struct send_context *sc; @@ -350,7 +352,7 @@ struct hfi1_packet { struct hfi1_ctxtdata *rcd; __le32 *rhf_addr; struct rvt_qp *qp; - struct hfi1_other_headers *ohdr; + struct ib_other_headers *ohdr; u64 rhf; u32 maxcnt; u32 rhqoff; @@ -529,6 +531,7 @@ struct hfi1_msix_entry { void *arg; char name[MAX_NAME_SIZE]; cpumask_t mask; + struct irq_affinity_notify notify; }; /* per-SL CCA information */ @@ -605,6 +608,7 @@ struct hfi1_pportdata { struct work_struct freeze_work; struct work_struct link_downgrade_work; struct work_struct link_bounce_work; + struct delayed_work start_link_work; /* host link state variables */ struct mutex hls_lock; u32 host_link_state; @@ -659,6 +663,7 @@ struct hfi1_pportdata { u8 linkinit_reason; u8 local_tx_rate; /* rate given to 8051 firmware */ u8 last_pstate; /* info only */ + u8 qsfp_retry_count; /* placeholders for IB MAD packet settings */ u8 overrun_threshold; @@ -1058,8 +1063,6 @@ struct hfi1_devdata { u8 psxmitwait_supported; /* cycle length of PS* counters in HW (in picoseconds) */ u16 psxmitwait_check_rate; - /* high volume overflow errors deferred to tasklet */ - struct tasklet_struct error_tasklet; /* MSI-X information */ struct hfi1_msix_entry *msix_entries; @@ -1162,7 +1165,7 @@ struct hfi1_devdata { /* receive context tail dummy address */ __le64 *rcvhdrtail_dummy_kvaddr; - dma_addr_t rcvhdrtail_dummy_physaddr; + dma_addr_t rcvhdrtail_dummy_dma; bool eprom_available; /* true if EPROM is available for this device */ bool aspm_supported; /* Does HW support ASPM */ @@ -1173,6 +1176,7 @@ struct hfi1_devdata { atomic_t aspm_disabled_cnt; struct hfi1_affinity *affinity; + struct rhashtable sdma_rht; struct kobject kobj; }; @@ -1266,15 +1270,32 @@ static inline u32 driver_lstate(struct hfi1_pportdata *ppd) void receive_interrupt_work(struct work_struct *work); /* extract service channel from header and rhf */ -static inline int hdr2sc(struct hfi1_message_header *hdr, u64 rhf) +static inline int hdr2sc(struct ib_header *hdr, u64 rhf) { return ((be16_to_cpu(hdr->lrh[0]) >> 12) & 0xf) | ((!!(rhf_dc_info(rhf))) << 4); } +#define HFI1_JKEY_WIDTH 16 +#define HFI1_JKEY_MASK (BIT(16) - 1) +#define HFI1_ADMIN_JKEY_RANGE 32 + +/* + * J_KEYs are split and allocated in the following groups: + * 0 - 31 - users with administrator privileges + * 32 - 63 - kernel protocols using KDETH packets + * 64 - 65535 - all other users using KDETH packets + */ static inline u16 generate_jkey(kuid_t uid) { - return from_kuid(current_user_ns(), uid) & 0xffff; + u16 jkey = from_kuid(current_user_ns(), uid) & HFI1_JKEY_MASK; + + if (capable(CAP_SYS_ADMIN)) + jkey &= HFI1_ADMIN_JKEY_RANGE - 1; + else if (jkey < 64) + jkey |= BIT(HFI1_JKEY_WIDTH - 1); + + return jkey; } /* @@ -1584,7 +1605,7 @@ void hfi1_process_ecn_slowpath(struct rvt_qp *qp, struct hfi1_packet *pkt, static inline bool process_ecn(struct rvt_qp *qp, struct hfi1_packet *pkt, bool do_cnp) { - struct hfi1_other_headers *ohdr = pkt->ohdr; + struct ib_other_headers *ohdr = pkt->ohdr; u32 bth1; bth1 = be32_to_cpu(ohdr->bth[1]); @@ -1656,7 +1677,6 @@ struct cc_state *get_cc_state_protected(struct hfi1_pportdata *ppd) struct hfi1_devdata *hfi1_init_dd(struct pci_dev *, const struct pci_device_id *); void hfi1_free_devdata(struct hfi1_devdata *); -void cc_state_reclaim(struct rcu_head *rcu); struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra); /* LED beaconing functions */ @@ -1788,7 +1808,7 @@ extern unsigned int hfi1_max_mtu; extern unsigned int hfi1_cu; extern unsigned int user_credit_return_threshold; extern int num_user_contexts; -extern unsigned n_krcvqs; +extern unsigned long n_krcvqs; extern uint krcvqs[]; extern int krcvqsset; extern uint kdeth_qp; diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index a358d23ecd54..60db61536fed 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -94,7 +94,7 @@ module_param_array(krcvqs, uint, &krcvqsset, S_IRUGO); MODULE_PARM_DESC(krcvqs, "Array of the number of non-control kernel receive queues by VL"); /* computed based on above array */ -unsigned n_krcvqs; +unsigned long n_krcvqs; static unsigned hfi1_rcvarr_split = 25; module_param_named(rcvarr_split, hfi1_rcvarr_split, uint, S_IRUGO); @@ -336,6 +336,7 @@ struct hfi1_ctxtdata *hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, u32 ctxt, } return rcd; bail: + dd->rcd[ctxt] = NULL; kfree(rcd->egrbufs.rcvtids); kfree(rcd->egrbufs.buffers); kfree(rcd); @@ -500,6 +501,7 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, INIT_WORK(&ppd->link_downgrade_work, handle_link_downgrade); INIT_WORK(&ppd->sma_message_work, handle_sma_message); INIT_WORK(&ppd->link_bounce_work, handle_link_bounce); + INIT_DELAYED_WORK(&ppd->start_link_work, handle_start_link); INIT_WORK(&ppd->linkstate_active_work, receive_interrupt_work); INIT_WORK(&ppd->qsfp_info.qsfp_work, qsfp_event); @@ -708,7 +710,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit) /* allocate dummy tail memory for all receive contexts */ dd->rcvhdrtail_dummy_kvaddr = dma_zalloc_coherent( &dd->pcidev->dev, sizeof(u64), - &dd->rcvhdrtail_dummy_physaddr, + &dd->rcvhdrtail_dummy_dma, GFP_KERNEL); if (!dd->rcvhdrtail_dummy_kvaddr) { @@ -941,12 +943,12 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) if (rcd->rcvhdrq) { dma_free_coherent(&dd->pcidev->dev, rcd->rcvhdrq_size, - rcd->rcvhdrq, rcd->rcvhdrq_phys); + rcd->rcvhdrq, rcd->rcvhdrq_dma); rcd->rcvhdrq = NULL; if (rcd->rcvhdrtail_kvaddr) { dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE, (void *)rcd->rcvhdrtail_kvaddr, - rcd->rcvhdrqtailaddr_phys); + rcd->rcvhdrqtailaddr_dma); rcd->rcvhdrtail_kvaddr = NULL; } } @@ -955,11 +957,11 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) kfree(rcd->egrbufs.rcvtids); for (e = 0; e < rcd->egrbufs.alloced; e++) { - if (rcd->egrbufs.buffers[e].phys) + if (rcd->egrbufs.buffers[e].dma) dma_free_coherent(&dd->pcidev->dev, rcd->egrbufs.buffers[e].len, rcd->egrbufs.buffers[e].addr, - rcd->egrbufs.buffers[e].phys); + rcd->egrbufs.buffers[e].dma); } kfree(rcd->egrbufs.buffers); @@ -1333,7 +1335,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd) spin_unlock(&ppd->cc_state_lock); if (cc_state) - call_rcu(&cc_state->rcu, cc_state_reclaim); + kfree_rcu(cc_state, rcu); } free_credit_return(dd); @@ -1353,7 +1355,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd) if (dd->rcvhdrtail_dummy_kvaddr) { dma_free_coherent(&dd->pcidev->dev, sizeof(u64), (void *)dd->rcvhdrtail_dummy_kvaddr, - dd->rcvhdrtail_dummy_physaddr); + dd->rcvhdrtail_dummy_dma); dd->rcvhdrtail_dummy_kvaddr = NULL; } @@ -1576,7 +1578,7 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) u64 reg; if (!rcd->rcvhdrq) { - dma_addr_t phys_hdrqtail; + dma_addr_t dma_hdrqtail; gfp_t gfp_flags; /* @@ -1589,7 +1591,7 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) gfp_flags = (rcd->ctxt >= dd->first_user_ctxt) ? GFP_USER : GFP_KERNEL; rcd->rcvhdrq = dma_zalloc_coherent( - &dd->pcidev->dev, amt, &rcd->rcvhdrq_phys, + &dd->pcidev->dev, amt, &rcd->rcvhdrq_dma, gfp_flags | __GFP_COMP); if (!rcd->rcvhdrq) { @@ -1601,11 +1603,11 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) if (HFI1_CAP_KGET_MASK(rcd->flags, DMA_RTAIL)) { rcd->rcvhdrtail_kvaddr = dma_zalloc_coherent( - &dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail, + &dd->pcidev->dev, PAGE_SIZE, &dma_hdrqtail, gfp_flags); if (!rcd->rcvhdrtail_kvaddr) goto bail_free; - rcd->rcvhdrqtailaddr_phys = phys_hdrqtail; + rcd->rcvhdrqtailaddr_dma = dma_hdrqtail; } rcd->rcvhdrq_size = amt; @@ -1633,7 +1635,7 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd) * before enabling any receive context */ write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_TAIL_ADDR, - dd->rcvhdrtail_dummy_physaddr); + dd->rcvhdrtail_dummy_dma); return 0; @@ -1644,7 +1646,7 @@ bail_free: vfree(rcd->user_event_mask); rcd->user_event_mask = NULL; dma_free_coherent(&dd->pcidev->dev, amt, rcd->rcvhdrq, - rcd->rcvhdrq_phys); + rcd->rcvhdrq_dma); rcd->rcvhdrq = NULL; bail: return -ENOMEM; @@ -1705,15 +1707,15 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) rcd->egrbufs.buffers[idx].addr = dma_zalloc_coherent(&dd->pcidev->dev, rcd->egrbufs.rcvtid_size, - &rcd->egrbufs.buffers[idx].phys, + &rcd->egrbufs.buffers[idx].dma, gfp_flags); if (rcd->egrbufs.buffers[idx].addr) { rcd->egrbufs.buffers[idx].len = rcd->egrbufs.rcvtid_size; rcd->egrbufs.rcvtids[rcd->egrbufs.alloced].addr = rcd->egrbufs.buffers[idx].addr; - rcd->egrbufs.rcvtids[rcd->egrbufs.alloced].phys = - rcd->egrbufs.buffers[idx].phys; + rcd->egrbufs.rcvtids[rcd->egrbufs.alloced].dma = + rcd->egrbufs.buffers[idx].dma; rcd->egrbufs.alloced++; alloced_bytes += rcd->egrbufs.rcvtid_size; idx++; @@ -1754,14 +1756,14 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) for (i = 0, j = 0, offset = 0; j < idx; i++) { if (i >= rcd->egrbufs.count) break; - rcd->egrbufs.rcvtids[i].phys = - rcd->egrbufs.buffers[j].phys + offset; + rcd->egrbufs.rcvtids[i].dma = + rcd->egrbufs.buffers[j].dma + offset; rcd->egrbufs.rcvtids[i].addr = rcd->egrbufs.buffers[j].addr + offset; rcd->egrbufs.alloced++; - if ((rcd->egrbufs.buffers[j].phys + offset + + if ((rcd->egrbufs.buffers[j].dma + offset + new_size) == - (rcd->egrbufs.buffers[j].phys + + (rcd->egrbufs.buffers[j].dma + rcd->egrbufs.buffers[j].len)) { j++; offset = 0; @@ -1813,7 +1815,7 @@ int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd) for (idx = 0; idx < rcd->egrbufs.alloced; idx++) { hfi1_put_tid(dd, rcd->eager_base + idx, PT_EAGER, - rcd->egrbufs.rcvtids[idx].phys, order); + rcd->egrbufs.rcvtids[idx].dma, order); cond_resched(); } goto bail; @@ -1825,9 +1827,9 @@ bail_rcvegrbuf_phys: dma_free_coherent(&dd->pcidev->dev, rcd->egrbufs.buffers[idx].len, rcd->egrbufs.buffers[idx].addr, - rcd->egrbufs.buffers[idx].phys); + rcd->egrbufs.buffers[idx].dma); rcd->egrbufs.buffers[idx].addr = NULL; - rcd->egrbufs.buffers[idx].phys = 0; + rcd->egrbufs.buffers[idx].dma = 0; rcd->egrbufs.buffers[idx].len = 0; } bail: diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 1263abe01999..9487c9bb8920 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -1013,7 +1013,6 @@ static int set_port_states(struct hfi1_pportdata *ppd, struct opa_smp *smp, * offline. */ set_link_state(ppd, HLS_DN_OFFLINE); - tune_serdes(ppd); start_link(ppd); } else { set_link_state(ppd, link_state); @@ -1407,12 +1406,6 @@ static int set_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys) if (key == okey) continue; /* - * Don't update pkeys[2], if an HFI port without MgmtAllowed - * by neighbor is a switch. - */ - if (i == 2 && !ppd->mgmt_allowed && ppd->neighbor_type == 1) - continue; - /* * The SM gives us the complete PKey table. We have * to ensure that we put the PKeys in the matching * slots. @@ -1819,6 +1812,11 @@ static int __subn_get_opa_cable_info(struct opa_smp *smp, u32 am, u8 *data, u32 len = OPA_AM_CI_LEN(am) + 1; int ret; + if (dd->pport->port_type != PORT_TYPE_QSFP) { + smp->status |= IB_SMP_INVALID_FIELD; + return reply((struct ib_mad_hdr *)smp); + } + #define __CI_PAGE_SIZE BIT(7) /* 128 bytes */ #define __CI_PAGE_MASK ~(__CI_PAGE_SIZE - 1) #define __CI_PAGE_NUM(a) ((a) & __CI_PAGE_MASK) @@ -2599,7 +2597,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp, u8 lq, num_vls; u8 res_lli, res_ler; u64 port_mask; - unsigned long port_num; + u8 port_num; unsigned long vl; u32 vl_select_mask; int vfi; @@ -2633,9 +2631,9 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp, */ port_mask = be64_to_cpu(req->port_select_mask[3]); port_num = find_first_bit((unsigned long *)&port_mask, - sizeof(port_mask)); + sizeof(port_mask) * 8); - if ((u8)port_num != port) { + if (port_num != port) { pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)pmp); } @@ -2837,7 +2835,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp, */ port_mask = be64_to_cpu(req->port_select_mask[3]); port_num = find_first_bit((unsigned long *)&port_mask, - sizeof(port_mask)); + sizeof(port_mask) * 8); if (port_num != port) { pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; @@ -3010,7 +3008,7 @@ static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp, */ port_mask = be64_to_cpu(req->port_select_mask[3]); port_num = find_first_bit((unsigned long *)&port_mask, - sizeof(port_mask)); + sizeof(port_mask) * 8); if (port_num != port) { pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; @@ -3247,7 +3245,7 @@ static int pma_set_opa_errorinfo(struct opa_pma_mad *pmp, */ port_mask = be64_to_cpu(req->port_select_mask[3]); port_num = find_first_bit((unsigned long *)&port_mask, - sizeof(port_mask)); + sizeof(port_mask) * 8); if (port_num != port) { pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; @@ -3398,7 +3396,7 @@ static void apply_cc_state(struct hfi1_pportdata *ppd) spin_unlock(&ppd->cc_state_lock); - call_rcu(&old_cc_state->rcu, cc_state_reclaim); + kfree_rcu(old_cc_state, rcu); } static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data, @@ -3553,13 +3551,6 @@ static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, return reply((struct ib_mad_hdr *)smp); } -void cc_state_reclaim(struct rcu_head *rcu) -{ - struct cc_state *cc_state = container_of(rcu, struct cc_state, rcu); - - kfree(cc_state); -} - static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, u32 *resp_len) diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index ac1bf4a73571..50a3a36d9363 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -551,11 +551,11 @@ static inline u32 group_size(u32 group) } /* - * Obtain the credit return addresses, kernel virtual and physical, for the + * Obtain the credit return addresses, kernel virtual and bus, for the * given sc. * * To understand this routine: - * o va and pa are arrays of struct credit_return. One for each physical + * o va and dma are arrays of struct credit_return. One for each physical * send context, per NUMA. * o Each send context always looks in its relative location in a struct * credit_return for its credit return. @@ -563,14 +563,14 @@ static inline u32 group_size(u32 group) * with the same value. Use the address of the first send context in the * group. */ -static void cr_group_addresses(struct send_context *sc, dma_addr_t *pa) +static void cr_group_addresses(struct send_context *sc, dma_addr_t *dma) { u32 gc = group_context(sc->hw_context, sc->group); u32 index = sc->hw_context & 0x7; sc->hw_free = &sc->dd->cr_base[sc->node].va[gc].cr[index]; - *pa = (unsigned long) - &((struct credit_return *)sc->dd->cr_base[sc->node].pa)[gc]; + *dma = (unsigned long) + &((struct credit_return *)sc->dd->cr_base[sc->node].dma)[gc]; } /* @@ -710,7 +710,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, { struct send_context_info *sci; struct send_context *sc = NULL; - dma_addr_t pa; + dma_addr_t dma; unsigned long flags; u64 reg; u32 thresh; @@ -763,7 +763,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, sc->sw_index = sw_index; sc->hw_context = hw_context; - cr_group_addresses(sc, &pa); + cr_group_addresses(sc, &dma); sc->credits = sci->credits; /* PIO Send Memory Address details */ @@ -805,7 +805,7 @@ struct send_context *sc_alloc(struct hfi1_devdata *dd, int type, ((u64)opval << SC(CHECK_OPCODE_VALUE_SHIFT))); /* set up credit return */ - reg = pa & SC(CREDIT_RETURN_ADDR_ADDRESS_SMASK); + reg = dma & SC(CREDIT_RETURN_ADDR_ADDRESS_SMASK); write_kctxt_csr(dd, hw_context, SC(CREDIT_RETURN_ADDR), reg); /* @@ -2064,7 +2064,7 @@ int init_credit_return(struct hfi1_devdata *dd) dd->cr_base[i].va = dma_zalloc_coherent( &dd->pcidev->dev, bytes, - &dd->cr_base[i].pa, + &dd->cr_base[i].dma, GFP_KERNEL); if (!dd->cr_base[i].va) { set_dev_node(&dd->pcidev->dev, dd->node); @@ -2097,7 +2097,7 @@ void free_credit_return(struct hfi1_devdata *dd) TXE_NUM_CONTEXTS * sizeof(struct credit_return), dd->cr_base[i].va, - dd->cr_base[i].pa); + dd->cr_base[i].dma); } } kfree(dd->cr_base); diff --git a/drivers/infiniband/hw/hfi1/pio.h b/drivers/infiniband/hw/hfi1/pio.h index 464cbd27b975..e709eaf743b5 100644 --- a/drivers/infiniband/hw/hfi1/pio.h +++ b/drivers/infiniband/hw/hfi1/pio.h @@ -154,7 +154,7 @@ struct credit_return { /* NUMA indexed credit return array */ struct credit_return_base { struct credit_return *va; - dma_addr_t pa; + dma_addr_t dma; }; /* send context configuration sizes (one per type) */ diff --git a/drivers/infiniband/hw/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c index 8c25e1b58849..aa7773643107 100644 --- a/drivers/infiniband/hw/hfi1/pio_copy.c +++ b/drivers/infiniband/hw/hfi1/pio_copy.c @@ -165,9 +165,6 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc, preempt_enable(); } -/* USE_SHIFTS is faster in user-space tests on a Xeon X5570 @ 2.93GHz */ -#define USE_SHIFTS 1 -#ifdef USE_SHIFTS /* * Handle carry bytes using shifts and masks. * @@ -187,150 +184,6 @@ void pio_copy(struct hfi1_devdata *dd, struct pio_buf *pbuf, u64 pbc, #define mshift(x) (8 * (x)) /* - * Read nbytes bytes from "from" and return them in the LSB bytes - * of pbuf->carry. Other bytes are zeroed. Any previous value - * pbuf->carry is lost. - * - * NOTES: - * o do not read from from if nbytes is zero - * o from may _not_ be u64 aligned - * o nbytes must not span a QW boundary - */ -static inline void read_low_bytes(struct pio_buf *pbuf, const void *from, - unsigned int nbytes) -{ - unsigned long off; - - if (nbytes == 0) { - pbuf->carry.val64 = 0; - } else { - /* align our pointer */ - off = (unsigned long)from & 0x7; - from = (void *)((unsigned long)from & ~0x7l); - pbuf->carry.val64 = ((*(u64 *)from) - << zshift(nbytes + off))/* zero upper bytes */ - >> zshift(nbytes); /* place at bottom */ - } - pbuf->carry_bytes = nbytes; -} - -/* - * Read nbytes bytes from "from" and put them at the next significant bytes - * of pbuf->carry. Unused bytes are zeroed. It is expected that the extra - * read does not overfill carry. - * - * NOTES: - * o from may _not_ be u64 aligned - * o nbytes may span a QW boundary - */ -static inline void read_extra_bytes(struct pio_buf *pbuf, - const void *from, unsigned int nbytes) -{ - unsigned long off = (unsigned long)from & 0x7; - unsigned int room, xbytes; - - /* align our pointer */ - from = (void *)((unsigned long)from & ~0x7l); - - /* check count first - don't read anything if count is zero */ - while (nbytes) { - /* find the number of bytes in this u64 */ - room = 8 - off; /* this u64 has room for this many bytes */ - xbytes = min(room, nbytes); - - /* - * shift down to zero lower bytes, shift up to zero upper - * bytes, shift back down to move into place - */ - pbuf->carry.val64 |= (((*(u64 *)from) - >> mshift(off)) - << zshift(xbytes)) - >> zshift(xbytes + pbuf->carry_bytes); - off = 0; - pbuf->carry_bytes += xbytes; - nbytes -= xbytes; - from += sizeof(u64); - } -} - -/* - * Zero extra bytes from the end of pbuf->carry. - * - * NOTES: - * o zbytes <= old_bytes - */ -static inline void zero_extra_bytes(struct pio_buf *pbuf, unsigned int zbytes) -{ - unsigned int remaining; - - if (zbytes == 0) /* nothing to do */ - return; - - remaining = pbuf->carry_bytes - zbytes; /* remaining bytes */ - - /* NOTE: zshift only guaranteed to work if remaining != 0 */ - if (remaining) - pbuf->carry.val64 = (pbuf->carry.val64 << zshift(remaining)) - >> zshift(remaining); - else - pbuf->carry.val64 = 0; - pbuf->carry_bytes = remaining; -} - -/* - * Write a quad word using parts of pbuf->carry and the next 8 bytes of src. - * Put the unused part of the next 8 bytes of src into the LSB bytes of - * pbuf->carry with the upper bytes zeroed.. - * - * NOTES: - * o result must keep unused bytes zeroed - * o src must be u64 aligned - */ -static inline void merge_write8( - struct pio_buf *pbuf, - void __iomem *dest, - const void *src) -{ - u64 new, temp; - - new = *(u64 *)src; - temp = pbuf->carry.val64 | (new << mshift(pbuf->carry_bytes)); - writeq(temp, dest); - pbuf->carry.val64 = new >> zshift(pbuf->carry_bytes); -} - -/* - * Write a quad word using all bytes of carry. - */ -static inline void carry8_write8(union mix carry, void __iomem *dest) -{ - writeq(carry.val64, dest); -} - -/* - * Write a quad word using all the valid bytes of carry. If carry - * has zero valid bytes, nothing is written. - * Returns 0 on nothing written, non-zero on quad word written. - */ -static inline int carry_write8(struct pio_buf *pbuf, void __iomem *dest) -{ - if (pbuf->carry_bytes) { - /* unused bytes are always kept zeroed, so just write */ - writeq(pbuf->carry.val64, dest); - return 1; - } - - return 0; -} - -#else /* USE_SHIFTS */ -/* - * Handle carry bytes using byte copies. - * - * NOTE: the value the unused portion of carry is left uninitialized. - */ - -/* * Jump copy - no-loop copy for < 8 bytes. */ static inline void jcopy(u8 *dest, const u8 *src, u32 n) @@ -338,18 +191,25 @@ static inline void jcopy(u8 *dest, const u8 *src, u32 n) switch (n) { case 7: *dest++ = *src++; + /* fall through */ case 6: *dest++ = *src++; + /* fall through */ case 5: *dest++ = *src++; + /* fall through */ case 4: *dest++ = *src++; + /* fall through */ case 3: *dest++ = *src++; + /* fall through */ case 2: *dest++ = *src++; + /* fall through */ case 1: *dest++ = *src++; + /* fall through */ } } @@ -365,6 +225,7 @@ static inline void jcopy(u8 *dest, const u8 *src, u32 n) static inline void read_low_bytes(struct pio_buf *pbuf, const void *from, unsigned int nbytes) { + pbuf->carry.val64 = 0; jcopy(&pbuf->carry.val8[0], from, nbytes); pbuf->carry_bytes = nbytes; } @@ -385,40 +246,31 @@ static inline void read_extra_bytes(struct pio_buf *pbuf, } /* - * Zero extra bytes from the end of pbuf->carry. - * - * We do not care about the value of unused bytes in carry, so just - * reduce the byte count. + * Write a quad word using parts of pbuf->carry and the next 8 bytes of src. + * Put the unused part of the next 8 bytes of src into the LSB bytes of + * pbuf->carry with the upper bytes zeroed.. * * NOTES: - * o zbytes <= old_bytes - */ -static inline void zero_extra_bytes(struct pio_buf *pbuf, unsigned int zbytes) -{ - pbuf->carry_bytes -= zbytes; -} - -/* - * Write a quad word using parts of pbuf->carry and the next 8 bytes of src. - * Put the unused part of the next 8 bytes of src into the low bytes of - * pbuf->carry. + * o result must keep unused bytes zeroed + * o src must be u64 aligned */ static inline void merge_write8( struct pio_buf *pbuf, - void *dest, + void __iomem *dest, const void *src) { - u32 remainder = 8 - pbuf->carry_bytes; + u64 new, temp; - jcopy(&pbuf->carry.val8[pbuf->carry_bytes], src, remainder); - writeq(pbuf->carry.val64, dest); - jcopy(&pbuf->carry.val8[0], src + remainder, pbuf->carry_bytes); + new = *(u64 *)src; + temp = pbuf->carry.val64 | (new << mshift(pbuf->carry_bytes)); + writeq(temp, dest); + pbuf->carry.val64 = new >> zshift(pbuf->carry_bytes); } /* * Write a quad word using all bytes of carry. */ -static inline void carry8_write8(union mix carry, void *dest) +static inline void carry8_write8(union mix carry, void __iomem *dest) { writeq(carry.val64, dest); } @@ -428,20 +280,16 @@ static inline void carry8_write8(union mix carry, void *dest) * has zero valid bytes, nothing is written. * Returns 0 on nothing written, non-zero on quad word written. */ -static inline int carry_write8(struct pio_buf *pbuf, void *dest) +static inline int carry_write8(struct pio_buf *pbuf, void __iomem *dest) { if (pbuf->carry_bytes) { - u64 zero = 0; - - jcopy(&pbuf->carry.val8[pbuf->carry_bytes], (u8 *)&zero, - 8 - pbuf->carry_bytes); + /* unused bytes are always kept zeroed, so just write */ writeq(pbuf->carry.val64, dest); return 1; } return 0; } -#endif /* USE_SHIFTS */ /* * Segmented PIO Copy - start @@ -550,8 +398,8 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes) { void __iomem *dest = pbuf->start + (pbuf->qw_written * sizeof(u64)); void __iomem *dend; /* 8-byte data end */ - unsigned long qw_to_write = (pbuf->carry_bytes + nbytes) >> 3; - unsigned long bytes_left = (pbuf->carry_bytes + nbytes) & 0x7; + unsigned long qw_to_write = nbytes >> 3; + unsigned long bytes_left = nbytes & 0x7; /* calculate 8-byte data end */ dend = dest + (qw_to_write * sizeof(u64)); @@ -621,16 +469,46 @@ static void mid_copy_mix(struct pio_buf *pbuf, const void *from, size_t nbytes) dest += sizeof(u64); } - /* adjust carry */ - if (pbuf->carry_bytes < bytes_left) { - /* need to read more */ - read_extra_bytes(pbuf, from, bytes_left - pbuf->carry_bytes); + pbuf->qw_written += qw_to_write; + + /* handle carry and left-over bytes */ + if (pbuf->carry_bytes + bytes_left >= 8) { + unsigned long nread; + + /* there is enough to fill another qw - fill carry */ + nread = 8 - pbuf->carry_bytes; + read_extra_bytes(pbuf, from, nread); + + /* + * One more write - but need to make sure dest is correct. + * Check for wrap and the possibility the write + * should be in SOP space. + * + * The two checks immediately below cannot both be true, hence + * the else. If we have wrapped, we cannot still be within the + * first block. Conversely, if we are still in the first block, + * we cannot have wrapped. We do the wrap check first as that + * is more likely. + */ + /* adjust if we have wrapped */ + if (dest >= pbuf->end) + dest -= pbuf->size; + /* jump to the SOP range if within the first block */ + else if (pbuf->qw_written < PIO_BLOCK_QWS) + dest += SOP_DISTANCE; + + /* flush out full carry */ + carry8_write8(pbuf->carry, dest); + pbuf->qw_written++; + + /* now adjust and read the rest of the bytes into carry */ + bytes_left -= nread; + from += nread; /* from is now not aligned */ + read_low_bytes(pbuf, from, bytes_left); } else { - /* remove invalid bytes */ - zero_extra_bytes(pbuf, pbuf->carry_bytes - bytes_left); + /* not enough to fill another qw, append the rest to carry */ + read_extra_bytes(pbuf, from, bytes_left); } - - pbuf->qw_written += qw_to_write; } /* @@ -771,6 +649,9 @@ void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes) read_extra_bytes(pbuf, from, to_fill); from += to_fill; nbytes -= to_fill; + /* may not be enough valid bytes left to align */ + if (extra > nbytes) + extra = nbytes; /* ...now write carry */ dest = pbuf->start + (pbuf->qw_written * sizeof(u64)); @@ -798,6 +679,15 @@ void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes) read_low_bytes(pbuf, from, extra); from += extra; nbytes -= extra; + /* + * If no bytes are left, return early - we are done. + * NOTE: This short-circuit is *required* because + * "extra" may have been reduced in size and "from" + * is not aligned, as required when leaving this + * if block. + */ + if (nbytes == 0) + return; } /* at this point, from is QW aligned */ diff --git a/drivers/infiniband/hw/hfi1/platform.c b/drivers/infiniband/hw/hfi1/platform.c index 965c8aef0c60..202433178864 100644 --- a/drivers/infiniband/hw/hfi1/platform.c +++ b/drivers/infiniband/hw/hfi1/platform.c @@ -47,29 +47,39 @@ #include "hfi.h" #include "efivar.h" +#include "eprom.h" void get_platform_config(struct hfi1_devdata *dd) { int ret = 0; unsigned long size = 0; u8 *temp_platform_config = NULL; + u32 esize; + + ret = eprom_read_platform_config(dd, (void **)&temp_platform_config, + &esize); + if (!ret) { + /* success */ + size = esize; + goto success; + } + /* fail, try EFI variable */ ret = read_hfi1_efi_var(dd, "configuration", &size, (void **)&temp_platform_config); - if (ret) { - dd_dev_info(dd, - "%s: Failed to get platform config from UEFI, falling back to request firmware\n", - __func__); - /* fall back to request firmware */ - platform_config_load = 1; - goto bail; - } + if (!ret) + goto success; + + dd_dev_info(dd, + "%s: Failed to get platform config from UEFI, falling back to request firmware\n", + __func__); + /* fall back to request firmware */ + platform_config_load = 1; + return; +success: dd->platform_config.data = temp_platform_config; dd->platform_config.size = size; - -bail: - /* exit */; } void free_platform_config(struct hfi1_devdata *dd) diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index a5aa3517e7d5..9fc75e7e8781 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -202,8 +202,7 @@ static void flush_iowait(struct rvt_qp *qp) write_seqlock_irqsave(&dev->iowait_lock, flags); if (!list_empty(&priv->s_iowait.list)) { list_del_init(&priv->s_iowait.list); - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); } write_sequnlock_irqrestore(&dev->iowait_lock, flags); } @@ -450,13 +449,14 @@ static void qp_pio_drain(struct rvt_qp *qp) */ void hfi1_schedule_send(struct rvt_qp *qp) { + lockdep_assert_held(&qp->s_lock); if (hfi1_send_ok(qp)) _hfi1_schedule_send(qp); } /** - * hfi1_get_credit - flush the send work queue of a QP - * @qp: the qp who's send work queue to flush + * hfi1_get_credit - handle credit in aeth + * @qp: the qp * @aeth: the Acknowledge Extended Transport Header * * The QP s_lock should be held. @@ -465,6 +465,7 @@ void hfi1_get_credit(struct rvt_qp *qp, u32 aeth) { u32 credit = (aeth >> HFI1_AETH_CREDIT_SHIFT) & HFI1_AETH_CREDIT_MASK; + lockdep_assert_held(&qp->s_lock); /* * If the credit is invalid, we can send * as many packets as we like. Otherwise, we have to @@ -503,8 +504,7 @@ void hfi1_qp_wakeup(struct rvt_qp *qp, u32 flag) } spin_unlock_irqrestore(&qp->s_lock, flags); /* Notify hfi1_destroy_qp() if it is waiting. */ - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); } static int iowait_sleep( @@ -544,7 +544,7 @@ static int iowait_sleep( qp->s_flags |= RVT_S_WAIT_DMA_DESC; list_add_tail(&priv->s_iowait.list, &sde->dmawait); trace_hfi1_qpsleep(qp, RVT_S_WAIT_DMA_DESC); - atomic_inc(&qp->refcount); + rvt_get_qp(qp); } write_sequnlock(&dev->iowait_lock); qp->s_flags &= ~RVT_S_BUSY; @@ -656,10 +656,6 @@ struct qp_iter *qp_iter_init(struct hfi1_ibdev *dev) iter->dev = dev; iter->specials = dev->rdi.ibdev.phys_port_cnt * 2; - if (qp_iter_next(iter)) { - kfree(iter); - return NULL; - } return iter; } @@ -812,6 +808,13 @@ void *qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, kfree(priv); return ERR_PTR(-ENOMEM); } + iowait_init( + &priv->s_iowait, + 1, + _hfi1_do_send, + iowait_sleep, + iowait_wakeup, + iowait_sdma_drained); setup_timer(&priv->s_rnr_timer, hfi1_rc_rnr_retry, (unsigned long)qp); qp->s_timer.function = hfi1_rc_timeout; return priv; @@ -852,6 +855,7 @@ unsigned free_all_qps(struct rvt_dev_info *rdi) void flush_qp_waiters(struct rvt_qp *qp) { + lockdep_assert_held(&qp->s_lock); flush_iowait(qp); hfi1_stop_rc_timers(qp); } @@ -877,13 +881,6 @@ void notify_qp_reset(struct rvt_qp *qp) { struct hfi1_qp_priv *priv = qp->priv; - iowait_init( - &priv->s_iowait, - 1, - _hfi1_do_send, - iowait_sleep, - iowait_wakeup, - iowait_sdma_drained); priv->r_adefered = 0; clear_ahg(qp); } @@ -967,8 +964,7 @@ void notify_error_qp(struct rvt_qp *qp) if (!list_empty(&priv->s_iowait.list) && !(qp->s_flags & RVT_S_BUSY)) { qp->s_flags &= ~RVT_S_ANY_WAIT_IO; list_del_init(&priv->s_iowait.list); - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); } write_sequnlock(&dev->iowait_lock); diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c index a207717ade2a..1869f639c3ae 100644 --- a/drivers/infiniband/hw/hfi1/qsfp.c +++ b/drivers/infiniband/hw/hfi1/qsfp.c @@ -161,7 +161,7 @@ static struct hfi1_i2c_bus *init_i2c_bus(struct hfi1_devdata *dd, bus->algo.getsda = hfi1_getsda; bus->algo.getscl = hfi1_getscl; bus->algo.udelay = 5; - bus->algo.timeout = usecs_to_jiffies(50); + bus->algo.timeout = usecs_to_jiffies(100000); bus->algo.data = bus; bus->adapter.owner = THIS_MODULE; @@ -706,8 +706,8 @@ int get_cable_info(struct hfi1_devdata *dd, u32 port_num, u32 addr, u32 len, u8 *data) { struct hfi1_pportdata *ppd; - u32 excess_len = 0; - int ret = 0; + u32 excess_len = len; + int ret = 0, offset = 0; if (port_num > dd->num_pports || port_num < 1) { dd_dev_info(dd, "%s: Invalid port number %d\n", @@ -740,6 +740,34 @@ int get_cable_info(struct hfi1_devdata *dd, u32 port_num, u32 addr, u32 len, } memcpy(data, &ppd->qsfp_info.cache[addr], len); + + if (addr <= QSFP_MONITOR_VAL_END && + (addr + len) >= QSFP_MONITOR_VAL_START) { + /* Overlap with the dynamic channel monitor range */ + if (addr < QSFP_MONITOR_VAL_START) { + if (addr + len <= QSFP_MONITOR_VAL_END) + len = addr + len - QSFP_MONITOR_VAL_START; + else + len = QSFP_MONITOR_RANGE; + offset = QSFP_MONITOR_VAL_START - addr; + addr = QSFP_MONITOR_VAL_START; + } else if (addr == QSFP_MONITOR_VAL_START) { + offset = 0; + if (addr + len > QSFP_MONITOR_VAL_END) + len = QSFP_MONITOR_RANGE; + } else { + offset = 0; + if (addr + len > QSFP_MONITOR_VAL_END) + len = QSFP_MONITOR_VAL_END - addr + 1; + } + /* Refresh the values of the dynamic monitors from the cable */ + ret = one_qsfp_read(ppd, dd->hfi1_id, addr, data + offset, len); + if (ret != len) { + ret = -EAGAIN; + goto set_zeroes; + } + } + return 0; set_zeroes: diff --git a/drivers/infiniband/hw/hfi1/qsfp.h b/drivers/infiniband/hw/hfi1/qsfp.h index 69275ebd9597..36cf52359848 100644 --- a/drivers/infiniband/hw/hfi1/qsfp.h +++ b/drivers/infiniband/hw/hfi1/qsfp.h @@ -74,6 +74,9 @@ /* Defined fields that Intel requires of qualified cables */ /* Byte 0 is Identifier, not checked */ /* Byte 1 is reserved "status MSB" */ +#define QSFP_MONITOR_VAL_START 22 +#define QSFP_MONITOR_VAL_END 81 +#define QSFP_MONITOR_RANGE (QSFP_MONITOR_VAL_END - QSFP_MONITOR_VAL_START + 1) #define QSFP_TX_CTRL_BYTE_OFFS 86 #define QSFP_PWR_CTRL_BYTE_OFFS 93 #define QSFP_CDR_CTRL_BYTE_OFFS 98 diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c index 5da190e6011b..8bc5013f39a1 100644 --- a/drivers/infiniband/hw/hfi1/rc.c +++ b/drivers/infiniband/hw/hfi1/rc.c @@ -55,7 +55,7 @@ #include "trace.h" /* cut down ridiculously long IB macro names */ -#define OP(x) IB_OPCODE_RC_##x +#define OP(x) RC_OP(x) /** * hfi1_add_retry_timer - add/start a retry timer @@ -68,6 +68,7 @@ static inline void hfi1_add_retry_timer(struct rvt_qp *qp) struct ib_qp *ibqp = &qp->ibqp; struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); + lockdep_assert_held(&qp->s_lock); qp->s_flags |= RVT_S_TIMER; /* 4.096 usec. * (1 << qp->timeout) */ qp->s_timer.expires = jiffies + qp->timeout_jiffies + @@ -86,6 +87,7 @@ void hfi1_add_rnr_timer(struct rvt_qp *qp, u32 to) { struct hfi1_qp_priv *priv = qp->priv; + lockdep_assert_held(&qp->s_lock); qp->s_flags |= RVT_S_WAIT_RNR; qp->s_timer.expires = jiffies + usecs_to_jiffies(to); add_timer(&priv->s_rnr_timer); @@ -103,6 +105,7 @@ static inline void hfi1_mod_retry_timer(struct rvt_qp *qp) struct ib_qp *ibqp = &qp->ibqp; struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device); + lockdep_assert_held(&qp->s_lock); qp->s_flags |= RVT_S_TIMER; /* 4.096 usec. * (1 << qp->timeout) */ mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies + @@ -120,6 +123,7 @@ static inline int hfi1_stop_retry_timer(struct rvt_qp *qp) { int rval = 0; + lockdep_assert_held(&qp->s_lock); /* Remove QP from retry */ if (qp->s_flags & RVT_S_TIMER) { qp->s_flags &= ~RVT_S_TIMER; @@ -138,6 +142,7 @@ void hfi1_stop_rc_timers(struct rvt_qp *qp) { struct hfi1_qp_priv *priv = qp->priv; + lockdep_assert_held(&qp->s_lock); /* Remove QP from all timers */ if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) { qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR); @@ -158,6 +163,7 @@ static inline int hfi1_stop_rnr_timer(struct rvt_qp *qp) int rval = 0; struct hfi1_qp_priv *priv = qp->priv; + lockdep_assert_held(&qp->s_lock); /* Remove QP from rnr timer */ if (qp->s_flags & RVT_S_WAIT_RNR) { qp->s_flags &= ~RVT_S_WAIT_RNR; @@ -178,18 +184,6 @@ void hfi1_del_timers_sync(struct rvt_qp *qp) del_timer_sync(&priv->s_rnr_timer); } -/* only opcode mask for adaptive pio */ -const u32 rc_only_opcode = - BIT(OP(SEND_ONLY) & 0x1f) | - BIT(OP(SEND_ONLY_WITH_IMMEDIATE & 0x1f)) | - BIT(OP(RDMA_WRITE_ONLY & 0x1f)) | - BIT(OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE & 0x1f)) | - BIT(OP(RDMA_READ_REQUEST & 0x1f)) | - BIT(OP(ACKNOWLEDGE & 0x1f)) | - BIT(OP(ATOMIC_ACKNOWLEDGE & 0x1f)) | - BIT(OP(COMPARE_SWAP & 0x1f)) | - BIT(OP(FETCH_ADD & 0x1f)); - static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, u32 psn, u32 pmtu) { @@ -216,7 +210,7 @@ static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe, * Note the QP s_lock must be held. */ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, - struct hfi1_other_headers *ohdr, + struct ib_other_headers *ohdr, struct hfi1_pkt_state *ps) { struct rvt_ack_entry *e; @@ -228,6 +222,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, u32 pmtu = qp->pmtu; struct hfi1_qp_priv *priv = qp->priv; + lockdep_assert_held(&qp->s_lock); /* Don't send an ACK if we aren't supposed to. */ if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) goto bail; @@ -299,10 +294,7 @@ static int make_rc_ack(struct hfi1_ibdev *dev, struct rvt_qp *qp, len = 0; qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE); ohdr->u.at.aeth = hfi1_compute_aeth(qp); - ohdr->u.at.atomic_ack_eth[0] = - cpu_to_be32(e->atomic_data >> 32); - ohdr->u.at.atomic_ack_eth[1] = - cpu_to_be32(e->atomic_data); + ib_u64_put(e->atomic_data, &ohdr->u.at.atomic_ack_eth); hwords += sizeof(ohdr->u.at) / sizeof(u32); bth2 = mask_psn(e->psn); e->sent = 1; @@ -390,7 +382,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) { struct hfi1_qp_priv *priv = qp->priv; struct hfi1_ibdev *dev = to_idev(qp->ibqp.device); - struct hfi1_other_headers *ohdr; + struct ib_other_headers *ohdr; struct rvt_sge_state *ss; struct rvt_swqe *wqe; /* header size in 32-bit words LRH+BTH = (8+12)/4. */ @@ -403,6 +395,7 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) int middle = 0; int delta; + lockdep_assert_held(&qp->s_lock); ps->s_txreq = get_txreq(ps->dev, qp); if (IS_ERR(ps->s_txreq)) goto bail_no_tx; @@ -566,8 +559,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) qp->s_flags |= RVT_S_WAIT_SSN_CREDIT; goto bail; } - ohdr->u.rc.reth.vaddr = - cpu_to_be64(wqe->rdma_wr.remote_addr); + put_ib_reth_vaddr( + wqe->rdma_wr.remote_addr, + &ohdr->u.rc.reth); ohdr->u.rc.reth.rkey = cpu_to_be32(wqe->rdma_wr.rkey); ohdr->u.rc.reth.length = cpu_to_be32(len); @@ -608,8 +602,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) qp->s_lsn++; } - ohdr->u.rc.reth.vaddr = - cpu_to_be64(wqe->rdma_wr.remote_addr); + put_ib_reth_vaddr( + wqe->rdma_wr.remote_addr, + &ohdr->u.rc.reth); ohdr->u.rc.reth.rkey = cpu_to_be32(wqe->rdma_wr.rkey); ohdr->u.rc.reth.length = cpu_to_be32(len); @@ -640,20 +635,18 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) } if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { qp->s_state = OP(COMPARE_SWAP); - ohdr->u.atomic_eth.swap_data = cpu_to_be64( - wqe->atomic_wr.swap); - ohdr->u.atomic_eth.compare_data = cpu_to_be64( - wqe->atomic_wr.compare_add); + put_ib_ateth_swap(wqe->atomic_wr.swap, + &ohdr->u.atomic_eth); + put_ib_ateth_compare(wqe->atomic_wr.compare_add, + &ohdr->u.atomic_eth); } else { qp->s_state = OP(FETCH_ADD); - ohdr->u.atomic_eth.swap_data = cpu_to_be64( - wqe->atomic_wr.compare_add); - ohdr->u.atomic_eth.compare_data = 0; + put_ib_ateth_swap(wqe->atomic_wr.compare_add, + &ohdr->u.atomic_eth); + put_ib_ateth_compare(0, &ohdr->u.atomic_eth); } - ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32( - wqe->atomic_wr.remote_addr >> 32); - ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32( - wqe->atomic_wr.remote_addr); + put_ib_ateth_vaddr(wqe->atomic_wr.remote_addr, + &ohdr->u.atomic_eth); ohdr->u.atomic_eth.rkey = cpu_to_be32( wqe->atomic_wr.rkey); hwords += sizeof(struct ib_atomic_eth) / sizeof(u32); @@ -779,8 +772,9 @@ int hfi1_make_rc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) * See restart_rc(). */ len = (delta_psn(qp->s_psn, wqe->psn)) * pmtu; - ohdr->u.rc.reth.vaddr = - cpu_to_be64(wqe->rdma_wr.remote_addr + len); + put_ib_reth_vaddr( + wqe->rdma_wr.remote_addr + len, + &ohdr->u.rc.reth); ohdr->u.rc.reth.rkey = cpu_to_be32(wqe->rdma_wr.rkey); ohdr->u.rc.reth.length = cpu_to_be32(wqe->length - len); @@ -841,7 +835,7 @@ bail_no_tx: * * This is called from hfi1_rc_rcv() and handle_receive_interrupt(). * Note that RDMA reads and atomics are handled in the - * send side QP state and tasklet. + * send side QP state and send engine. */ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, int is_fecn) @@ -856,8 +850,8 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, u32 vl, plen; struct send_context *sc; struct pio_buf *pbuf; - struct hfi1_ib_header hdr; - struct hfi1_other_headers *ohdr; + struct ib_header hdr; + struct ib_other_headers *ohdr; unsigned long flags; /* Don't send ACK or NAK if a RDMA read or atomic is pending. */ @@ -917,7 +911,7 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, if (!pbuf) { /* * We have no room to send at the moment. Pass - * responsibility for sending the ACK to the send tasklet + * responsibility for sending the ACK to the send engine * so that when enough buffer space becomes available, * the ACK is sent ahead of other outgoing packets. */ @@ -932,16 +926,19 @@ void hfi1_send_rc_ack(struct hfi1_ctxtdata *rcd, struct rvt_qp *qp, return; queue_ack: - this_cpu_inc(*ibp->rvp.rc_qacks); spin_lock_irqsave(&qp->s_lock, flags); + if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) + goto unlock; + this_cpu_inc(*ibp->rvp.rc_qacks); qp->s_flags |= RVT_S_ACK_PENDING | RVT_S_RESP_PENDING; qp->s_nak_state = qp->r_nak_state; qp->s_ack_psn = qp->r_ack_psn; if (is_fecn) qp->s_flags |= RVT_S_ECN; - /* Schedule the send tasklet. */ + /* Schedule the send engine. */ hfi1_schedule_send(qp); +unlock: spin_unlock_irqrestore(&qp->s_lock, flags); } @@ -960,6 +957,7 @@ static void reset_psn(struct rvt_qp *qp, u32 psn) struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, n); u32 opcode; + lockdep_assert_held(&qp->s_lock); qp->s_cur = n; /* @@ -1027,7 +1025,7 @@ done: qp->s_psn = psn; /* * Set RVT_S_WAIT_PSN as rc_complete() may start the timer - * asynchronously before the send tasklet can get scheduled. + * asynchronously before the send engine can get scheduled. * Doing it in hfi1_make_rc_req() is too late. */ if ((cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) && @@ -1045,6 +1043,8 @@ static void restart_rc(struct rvt_qp *qp, u32 psn, int wait) struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_acked); struct hfi1_ibport *ibp; + lockdep_assert_held(&qp->r_lock); + lockdep_assert_held(&qp->s_lock); if (qp->s_retry == 0) { if (qp->s_mig_state == IB_MIG_ARMED) { hfi1_migrate_qp(qp); @@ -1121,6 +1121,7 @@ static void reset_sending_psn(struct rvt_qp *qp, u32 psn) struct rvt_swqe *wqe; u32 n = qp->s_last; + lockdep_assert_held(&qp->s_lock); /* Find the work request corresponding to the given PSN. */ for (;;) { wqe = rvt_get_swqe_ptr(qp, n); @@ -1141,15 +1142,16 @@ static void reset_sending_psn(struct rvt_qp *qp, u32 psn) /* * This should be called with the QP s_lock held and interrupts disabled. */ -void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_ib_header *hdr) +void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) { - struct hfi1_other_headers *ohdr; + struct ib_other_headers *ohdr; struct rvt_swqe *wqe; struct ib_wc wc; unsigned i; u32 opcode; u32 psn; + lockdep_assert_held(&qp->s_lock); if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND)) return; @@ -1241,6 +1243,7 @@ static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp, struct ib_wc wc; unsigned i; + lockdep_assert_held(&qp->s_lock); /* * Don't decrement refcount and don't generate a * completion if the SWQE is being resent until the send @@ -1340,6 +1343,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, int diff; unsigned long to; + lockdep_assert_held(&qp->s_lock); /* * Note that NAKs implicitly ACK outstanding SEND and RDMA write * requests and implicitly NAK RDMA read and atomic requests issued @@ -1389,7 +1393,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, restart_rc(qp, qp->s_last_psn + 1, 0); if (list_empty(&qp->rspwait)) { qp->r_flags |= RVT_R_RSP_SEND; - atomic_inc(&qp->refcount); + rvt_get_qp(qp); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); } @@ -1555,6 +1559,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn, { struct rvt_swqe *wqe; + lockdep_assert_held(&qp->s_lock); /* Remove QP from retry timer */ hfi1_stop_rc_timers(qp); @@ -1573,7 +1578,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn, restart_rc(qp, qp->s_last_psn + 1, 0); if (list_empty(&qp->rspwait)) { qp->r_flags |= RVT_R_RSP_SEND; - atomic_inc(&qp->refcount); + rvt_get_qp(qp); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); } } @@ -1595,7 +1600,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct hfi1_ibport *ibp, u32 psn, * Called at interrupt level. */ static void rc_rcv_resp(struct hfi1_ibport *ibp, - struct hfi1_other_headers *ohdr, + struct ib_other_headers *ohdr, void *data, u32 tlen, struct rvt_qp *qp, u32 opcode, u32 psn, u32 hdrsize, u32 pmtu, struct hfi1_ctxtdata *rcd) @@ -1649,14 +1654,10 @@ static void rc_rcv_resp(struct hfi1_ibport *ibp, case OP(ATOMIC_ACKNOWLEDGE): case OP(RDMA_READ_RESPONSE_FIRST): aeth = be32_to_cpu(ohdr->u.aeth); - if (opcode == OP(ATOMIC_ACKNOWLEDGE)) { - __be32 *p = ohdr->u.at.atomic_ack_eth; - - val = ((u64)be32_to_cpu(p[0]) << 32) | - be32_to_cpu(p[1]); - } else { + if (opcode == OP(ATOMIC_ACKNOWLEDGE)) + val = ib_u64_get(&ohdr->u.at.atomic_ack_eth); + else val = 0; - } if (!do_rc_ack(qp, aeth, psn, opcode, val, rcd) || opcode != OP(RDMA_READ_RESPONSE_FIRST)) goto ack_done; @@ -1782,7 +1783,7 @@ static inline void rc_defered_ack(struct hfi1_ctxtdata *rcd, { if (list_empty(&qp->rspwait)) { qp->r_flags |= RVT_R_RSP_NAK; - atomic_inc(&qp->refcount); + rvt_get_qp(qp); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); } } @@ -1796,8 +1797,7 @@ static inline void rc_cancel_ack(struct rvt_qp *qp) return; list_del_init(&qp->rspwait); qp->r_flags &= ~RVT_R_RSP_NAK; - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); } /** @@ -1815,7 +1815,7 @@ static inline void rc_cancel_ack(struct rvt_qp *qp) * Return 1 if no more processing is needed; otherwise return 0 to * schedule a response to be sent. */ -static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data, +static noinline int rc_rcv_error(struct ib_other_headers *ohdr, void *data, struct rvt_qp *qp, u32 opcode, u32 psn, int diff, struct hfi1_ctxtdata *rcd) { @@ -1923,7 +1923,7 @@ static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data, } if (len != 0) { u32 rkey = be32_to_cpu(reth->rkey); - u64 vaddr = be64_to_cpu(reth->vaddr); + u64 vaddr = get_ib_reth_vaddr(reth); int ok; ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey, @@ -1946,7 +1946,7 @@ static noinline int rc_rcv_error(struct hfi1_other_headers *ohdr, void *data, case OP(FETCH_ADD): { /* * If we didn't find the atomic request in the ack queue - * or the send tasklet is already backed up to send an + * or the send engine is already backed up to send an * earlier entry, we can ignore this request. */ if (!e || e->opcode != (u8)opcode || old_req) @@ -2123,13 +2123,13 @@ void process_becn(struct hfi1_pportdata *ppd, u8 sl, u16 rlid, u32 lqpn, void hfi1_rc_rcv(struct hfi1_packet *packet) { struct hfi1_ctxtdata *rcd = packet->rcd; - struct hfi1_ib_header *hdr = packet->hdr; + struct ib_header *hdr = packet->hdr; u32 rcv_flags = packet->rcv_flags; void *data = packet->ebuf; u32 tlen = packet->tlen; struct rvt_qp *qp = packet->qp; struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); - struct hfi1_other_headers *ohdr = packet->ohdr; + struct ib_other_headers *ohdr = packet->ohdr; u32 bth0, opcode; u32 hdrsize = packet->hlen; u32 psn; @@ -2143,6 +2143,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet) int copy_last = 0; u32 rkey; + lockdep_assert_held(&qp->r_lock); bth0 = be32_to_cpu(ohdr->bth[0]); if (hfi1_ruc_check_hdr(ibp, hdr, rcv_flags & HFI1_HAS_GRH, qp, bth0)) return; @@ -2342,7 +2343,7 @@ send_last: qp->r_sge.sg_list = NULL; if (qp->r_len != 0) { u32 rkey = be32_to_cpu(reth->rkey); - u64 vaddr = be64_to_cpu(reth->vaddr); + u64 vaddr = get_ib_reth_vaddr(reth); int ok; /* Check rkey & NAK */ @@ -2397,7 +2398,7 @@ send_last: len = be32_to_cpu(reth->length); if (len) { u32 rkey = be32_to_cpu(reth->rkey); - u64 vaddr = be64_to_cpu(reth->vaddr); + u64 vaddr = get_ib_reth_vaddr(reth); int ok; /* Check rkey & NAK */ @@ -2432,7 +2433,7 @@ send_last: qp->r_nak_state = 0; qp->r_head_ack_queue = next; - /* Schedule the send tasklet. */ + /* Schedule the send engine. */ qp->s_flags |= RVT_S_RESP_PENDING; hfi1_schedule_send(qp); @@ -2469,8 +2470,7 @@ send_last: e->rdma_sge.mr = NULL; } ateth = &ohdr->u.atomic_eth; - vaddr = ((u64)be32_to_cpu(ateth->vaddr[0]) << 32) | - be32_to_cpu(ateth->vaddr[1]); + vaddr = get_ib_ateth_vaddr(ateth); if (unlikely(vaddr & (sizeof(u64) - 1))) goto nack_inv_unlck; rkey = be32_to_cpu(ateth->rkey); @@ -2481,11 +2481,11 @@ send_last: goto nack_acc_unlck; /* Perform atomic OP and save result. */ maddr = (atomic64_t *)qp->r_sge.sge.vaddr; - sdata = be64_to_cpu(ateth->swap_data); + sdata = get_ib_ateth_swap(ateth); e->atomic_data = (opcode == OP(FETCH_ADD)) ? (u64)atomic64_add_return(sdata, maddr) - sdata : (u64)cmpxchg((u64 *)qp->r_sge.sge.vaddr, - be64_to_cpu(ateth->compare_data), + get_ib_ateth_compare(ateth), sdata); rvt_put_mr(qp->r_sge.sge.mr); qp->r_sge.num_sge = 0; @@ -2499,7 +2499,7 @@ send_last: qp->r_nak_state = 0; qp->r_head_ack_queue = next; - /* Schedule the send tasklet. */ + /* Schedule the send engine. */ qp->s_flags |= RVT_S_RESP_PENDING; hfi1_schedule_send(qp); @@ -2575,12 +2575,12 @@ send_ack: void hfi1_rc_hdrerr( struct hfi1_ctxtdata *rcd, - struct hfi1_ib_header *hdr, + struct ib_header *hdr, u32 rcv_flags, struct rvt_qp *qp) { int has_grh = rcv_flags & HFI1_HAS_GRH; - struct hfi1_other_headers *ohdr; + struct ib_other_headers *ohdr; struct hfi1_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num); int diff; u32 opcode; diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c index 48d5094f98e2..a1576aea4756 100644 --- a/drivers/infiniband/hw/hfi1/ruc.c +++ b/drivers/infiniband/hw/hfi1/ruc.c @@ -262,7 +262,7 @@ static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id) * * The s_lock will be acquired around the hfi1_migrate_qp() call. */ -int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr, +int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr, int has_grh, struct rvt_qp *qp, u32 bth0) { __be64 guid; @@ -352,7 +352,7 @@ err: * * This is called from hfi1_do_send() to * forward a WQE addressed to the same HFI. - * Note that although we are single threaded due to the tasklet, we still + * Note that although we are single threaded due to the send engine, we still * have to protect against post_send(). We don't have to worry about * receive interrupts since this is a connected protocol and all packets * will pass through here. @@ -765,7 +765,7 @@ static inline void build_ahg(struct rvt_qp *qp, u32 npsn) } } -void hfi1_make_ruc_header(struct rvt_qp *qp, struct hfi1_other_headers *ohdr, +void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, u32 bth0, u32 bth2, int middle, struct hfi1_pkt_state *ps) { @@ -846,7 +846,7 @@ void _hfi1_do_send(struct work_struct *work) * @work: contains a pointer to the QP * * Process entries in the send work queue until credit or queue is - * exhausted. Only allow one CPU to send a packet per QP (tasklet). + * exhausted. Only allow one CPU to send a packet per QP. * Otherwise, two threads could send packets out of order. */ void hfi1_do_send(struct rvt_qp *qp) @@ -909,7 +909,7 @@ void hfi1_do_send(struct rvt_qp *qp) spin_unlock_irqrestore(&qp->s_lock, ps.flags); /* * If the packet cannot be sent now, return and - * the send tasklet will be woken up later. + * the send engine will be woken up later. */ if (hfi1_verbs_send(qp, &ps)) return; diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c index f9befc05b349..fd39bcaa062d 100644 --- a/drivers/infiniband/hw/hfi1/sdma.c +++ b/drivers/infiniband/hw/hfi1/sdma.c @@ -726,6 +726,34 @@ u16 sdma_get_descq_cnt(void) } /** + * sdma_engine_get_vl() - return vl for a given sdma engine + * @sde: sdma engine + * + * This function returns the vl mapped to a given engine, or an error if + * the mapping can't be found. The mapping fields are protected by RCU. + */ +int sdma_engine_get_vl(struct sdma_engine *sde) +{ + struct hfi1_devdata *dd = sde->dd; + struct sdma_vl_map *m; + u8 vl; + + if (sde->this_idx >= TXE_NUM_SDMA_ENGINES) + return -EINVAL; + + rcu_read_lock(); + m = rcu_dereference(dd->sdma_map); + if (unlikely(!m)) { + rcu_read_unlock(); + return -EINVAL; + } + vl = m->engine_to_vl[sde->this_idx]; + rcu_read_unlock(); + + return vl; +} + +/** * sdma_select_engine_vl() - select sdma engine * @dd: devdata * @selector: a spreading factor @@ -788,6 +816,326 @@ struct sdma_engine *sdma_select_engine_sc( return sdma_select_engine_vl(dd, selector, vl); } +struct sdma_rht_map_elem { + u32 mask; + u8 ctr; + struct sdma_engine *sde[0]; +}; + +struct sdma_rht_node { + unsigned long cpu_id; + struct sdma_rht_map_elem *map[HFI1_MAX_VLS_SUPPORTED]; + struct rhash_head node; +}; + +#define NR_CPUS_HINT 192 + +static const struct rhashtable_params sdma_rht_params = { + .nelem_hint = NR_CPUS_HINT, + .head_offset = offsetof(struct sdma_rht_node, node), + .key_offset = offsetof(struct sdma_rht_node, cpu_id), + .key_len = FIELD_SIZEOF(struct sdma_rht_node, cpu_id), + .max_size = NR_CPUS, + .min_size = 8, + .automatic_shrinking = true, +}; + +/* + * sdma_select_user_engine() - select sdma engine based on user setup + * @dd: devdata + * @selector: a spreading factor + * @vl: this vl + * + * This function returns an sdma engine for a user sdma request. + * User defined sdma engine affinity setting is honored when applicable, + * otherwise system default sdma engine mapping is used. To ensure correct + * ordering, the mapping from <selector, vl> to sde must remain unchanged. + */ +struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd, + u32 selector, u8 vl) +{ + struct sdma_rht_node *rht_node; + struct sdma_engine *sde = NULL; + const struct cpumask *current_mask = tsk_cpus_allowed(current); + unsigned long cpu_id; + + /* + * To ensure that always the same sdma engine(s) will be + * selected make sure the process is pinned to this CPU only. + */ + if (cpumask_weight(current_mask) != 1) + goto out; + + cpu_id = smp_processor_id(); + rcu_read_lock(); + rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu_id, + sdma_rht_params); + + if (rht_node && rht_node->map[vl]) { + struct sdma_rht_map_elem *map = rht_node->map[vl]; + + sde = map->sde[selector & map->mask]; + } + rcu_read_unlock(); + + if (sde) + return sde; + +out: + return sdma_select_engine_vl(dd, selector, vl); +} + +static void sdma_populate_sde_map(struct sdma_rht_map_elem *map) +{ + int i; + + for (i = 0; i < roundup_pow_of_two(map->ctr ? : 1) - map->ctr; i++) + map->sde[map->ctr + i] = map->sde[i]; +} + +static void sdma_cleanup_sde_map(struct sdma_rht_map_elem *map, + struct sdma_engine *sde) +{ + unsigned int i, pow; + + /* only need to check the first ctr entries for a match */ + for (i = 0; i < map->ctr; i++) { + if (map->sde[i] == sde) { + memmove(&map->sde[i], &map->sde[i + 1], + (map->ctr - i - 1) * sizeof(map->sde[0])); + map->ctr--; + pow = roundup_pow_of_two(map->ctr ? : 1); + map->mask = pow - 1; + sdma_populate_sde_map(map); + break; + } + } +} + +/* + * Prevents concurrent reads and writes of the sdma engine cpu_mask + */ +static DEFINE_MUTEX(process_to_sde_mutex); + +ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf, + size_t count) +{ + struct hfi1_devdata *dd = sde->dd; + cpumask_var_t mask, new_mask; + unsigned long cpu; + int ret, vl, sz; + + vl = sdma_engine_get_vl(sde); + if (unlikely(vl < 0)) + return -EINVAL; + + ret = zalloc_cpumask_var(&mask, GFP_KERNEL); + if (!ret) + return -ENOMEM; + + ret = zalloc_cpumask_var(&new_mask, GFP_KERNEL); + if (!ret) { + free_cpumask_var(mask); + return -ENOMEM; + } + ret = cpulist_parse(buf, mask); + if (ret) + goto out_free; + + if (!cpumask_subset(mask, cpu_online_mask)) { + dd_dev_warn(sde->dd, "Invalid CPU mask\n"); + ret = -EINVAL; + goto out_free; + } + + sz = sizeof(struct sdma_rht_map_elem) + + (TXE_NUM_SDMA_ENGINES * sizeof(struct sdma_engine *)); + + mutex_lock(&process_to_sde_mutex); + + for_each_cpu(cpu, mask) { + struct sdma_rht_node *rht_node; + + /* Check if we have this already mapped */ + if (cpumask_test_cpu(cpu, &sde->cpu_mask)) { + cpumask_set_cpu(cpu, new_mask); + continue; + } + + rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu, + sdma_rht_params); + if (!rht_node) { + rht_node = kzalloc(sizeof(*rht_node), GFP_KERNEL); + if (!rht_node) { + ret = -ENOMEM; + goto out; + } + + rht_node->map[vl] = kzalloc(sz, GFP_KERNEL); + if (!rht_node->map[vl]) { + kfree(rht_node); + ret = -ENOMEM; + goto out; + } + rht_node->cpu_id = cpu; + rht_node->map[vl]->mask = 0; + rht_node->map[vl]->ctr = 1; + rht_node->map[vl]->sde[0] = sde; + + ret = rhashtable_insert_fast(&dd->sdma_rht, + &rht_node->node, + sdma_rht_params); + if (ret) { + kfree(rht_node->map[vl]); + kfree(rht_node); + dd_dev_err(sde->dd, "Failed to set process to sde affinity for cpu %lu\n", + cpu); + goto out; + } + + } else { + int ctr, pow; + + /* Add new user mappings */ + if (!rht_node->map[vl]) + rht_node->map[vl] = kzalloc(sz, GFP_KERNEL); + + if (!rht_node->map[vl]) { + ret = -ENOMEM; + goto out; + } + + rht_node->map[vl]->ctr++; + ctr = rht_node->map[vl]->ctr; + rht_node->map[vl]->sde[ctr - 1] = sde; + pow = roundup_pow_of_two(ctr); + rht_node->map[vl]->mask = pow - 1; + + /* Populate the sde map table */ + sdma_populate_sde_map(rht_node->map[vl]); + } + cpumask_set_cpu(cpu, new_mask); + } + + /* Clean up old mappings */ + for_each_cpu(cpu, cpu_online_mask) { + struct sdma_rht_node *rht_node; + + /* Don't cleanup sdes that are set in the new mask */ + if (cpumask_test_cpu(cpu, mask)) + continue; + + rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpu, + sdma_rht_params); + if (rht_node) { + bool empty = true; + int i; + + /* Remove mappings for old sde */ + for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) + if (rht_node->map[i]) + sdma_cleanup_sde_map(rht_node->map[i], + sde); + + /* Free empty hash table entries */ + for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) { + if (!rht_node->map[i]) + continue; + + if (rht_node->map[i]->ctr) { + empty = false; + break; + } + } + + if (empty) { + ret = rhashtable_remove_fast(&dd->sdma_rht, + &rht_node->node, + sdma_rht_params); + WARN_ON(ret); + + for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) + kfree(rht_node->map[i]); + + kfree(rht_node); + } + } + } + + cpumask_copy(&sde->cpu_mask, new_mask); +out: + mutex_unlock(&process_to_sde_mutex); +out_free: + free_cpumask_var(mask); + free_cpumask_var(new_mask); + return ret ? : strnlen(buf, PAGE_SIZE); +} + +ssize_t sdma_get_cpu_to_sde_map(struct sdma_engine *sde, char *buf) +{ + mutex_lock(&process_to_sde_mutex); + if (cpumask_empty(&sde->cpu_mask)) + snprintf(buf, PAGE_SIZE, "%s\n", "empty"); + else + cpumap_print_to_pagebuf(true, buf, &sde->cpu_mask); + mutex_unlock(&process_to_sde_mutex); + return strnlen(buf, PAGE_SIZE); +} + +static void sdma_rht_free(void *ptr, void *arg) +{ + struct sdma_rht_node *rht_node = ptr; + int i; + + for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) + kfree(rht_node->map[i]); + + kfree(rht_node); +} + +/** + * sdma_seqfile_dump_cpu_list() - debugfs dump the cpu to sdma mappings + * @s: seq file + * @dd: hfi1_devdata + * @cpuid: cpu id + * + * This routine dumps the process to sde mappings per cpu + */ +void sdma_seqfile_dump_cpu_list(struct seq_file *s, + struct hfi1_devdata *dd, + unsigned long cpuid) +{ + struct sdma_rht_node *rht_node; + int i, j; + + rht_node = rhashtable_lookup_fast(&dd->sdma_rht, &cpuid, + sdma_rht_params); + if (!rht_node) + return; + + seq_printf(s, "cpu%3lu: ", cpuid); + for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) { + if (!rht_node->map[i] || !rht_node->map[i]->ctr) + continue; + + seq_printf(s, " vl%d: [", i); + + for (j = 0; j < rht_node->map[i]->ctr; j++) { + if (!rht_node->map[i]->sde[j]) + continue; + + if (j > 0) + seq_puts(s, ","); + + seq_printf(s, " sdma%2d", + rht_node->map[i]->sde[j]->this_idx); + } + seq_puts(s, " ]"); + } + + seq_puts(s, "\n"); +} + /* * Free the indicated map struct */ @@ -1161,6 +1509,10 @@ int sdma_init(struct hfi1_devdata *dd, u8 port) dd->num_sdma = num_engines; if (sdma_map_init(dd, port, ppd->vls_operational, NULL)) goto bail; + + if (rhashtable_init(&dd->sdma_rht, &sdma_rht_params)) + goto bail; + dd_dev_info(dd, "SDMA num_sdma: %u\n", dd->num_sdma); return 0; @@ -1252,6 +1604,7 @@ void sdma_exit(struct hfi1_devdata *dd) sdma_finalput(&sde->state); } sdma_clean(dd, dd->num_sdma); + rhashtable_free_and_destroy(&dd->sdma_rht, sdma_rht_free, NULL); } /* @@ -2086,6 +2439,11 @@ nodesc: * @sde: sdma engine to use * @wait: wait structure to use when full (may be NULL) * @tx_list: list of sdma_txreqs to submit + * @count: pointer to a u32 which, after return will contain the total number of + * sdma_txreqs removed from the tx_list. This will include sdma_txreqs + * whose SDMA descriptors are submitted to the ring and the sdma_txreqs + * which are added to SDMA engine flush list if the SDMA engine state is + * not running. * * The call submits the list into the ring. * @@ -2100,18 +2458,18 @@ nodesc: * side locking. * * Return: - * > 0 - Success (value is number of sdma_txreq's submitted), + * 0 - Success, * -EINVAL - sdma_txreq incomplete, -EBUSY - no space in ring (wait == NULL) * -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state */ int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait, - struct list_head *tx_list) + struct list_head *tx_list, u32 *count_out) { struct sdma_txreq *tx, *tx_next; int ret = 0; unsigned long flags; u16 tail = INVALID_TAIL; - int count = 0; + u32 submit_count = 0, flush_count = 0, total_count; spin_lock_irqsave(&sde->tail_lock, flags); retry: @@ -2127,33 +2485,34 @@ retry: } list_del_init(&tx->list); tail = submit_tx(sde, tx); - count++; + submit_count++; if (tail != INVALID_TAIL && - (count & SDMA_TAIL_UPDATE_THRESH) == 0) { + (submit_count & SDMA_TAIL_UPDATE_THRESH) == 0) { sdma_update_tail(sde, tail); tail = INVALID_TAIL; } } update_tail: + total_count = submit_count + flush_count; if (wait) - iowait_sdma_add(wait, count); + iowait_sdma_add(wait, total_count); if (tail != INVALID_TAIL) sdma_update_tail(sde, tail); spin_unlock_irqrestore(&sde->tail_lock, flags); - return ret == 0 ? count : ret; + *count_out = total_count; + return ret; unlock_noconn: spin_lock(&sde->flushlist_lock); list_for_each_entry_safe(tx, tx_next, tx_list, list) { tx->wait = wait; list_del_init(&tx->list); - if (wait) - iowait_sdma_inc(wait); tx->next_descq_idx = 0; #ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER tx->sn = sde->tail_sn++; trace_hfi1_sdma_in_sn(sde, tx->sn); #endif list_add_tail(&tx->list, &sde->flushlist); + flush_count++; if (wait) { wait->tx_count++; wait->count += tx->num_desc; diff --git a/drivers/infiniband/hw/hfi1/sdma.h b/drivers/infiniband/hw/hfi1/sdma.h index 8f50c99fe711..56257ea3598f 100644 --- a/drivers/infiniband/hw/hfi1/sdma.h +++ b/drivers/infiniband/hw/hfi1/sdma.h @@ -413,6 +413,8 @@ struct sdma_engine { spinlock_t flushlist_lock; /* private: */ struct list_head flushlist; + struct cpumask cpu_mask; + struct kobject kobj; }; int sdma_init(struct hfi1_devdata *dd, u8 port); @@ -847,7 +849,8 @@ int sdma_send_txreq(struct sdma_engine *sde, struct sdma_txreq *tx); int sdma_send_txlist(struct sdma_engine *sde, struct iowait *wait, - struct list_head *tx_list); + struct list_head *tx_list, + u32 *count); int sdma_ahg_alloc(struct sdma_engine *sde); void sdma_ahg_free(struct sdma_engine *sde, int ahg_index); @@ -1058,7 +1061,15 @@ struct sdma_engine *sdma_select_engine_vl( u32 selector, u8 vl); +struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd, + u32 selector, u8 vl); +ssize_t sdma_get_cpu_to_sde_map(struct sdma_engine *sde, char *buf); +ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf, + size_t count); +int sdma_engine_get_vl(struct sdma_engine *sde); void sdma_seqfile_dump_sde(struct seq_file *s, struct sdma_engine *); +void sdma_seqfile_dump_cpu_list(struct seq_file *s, struct hfi1_devdata *dd, + unsigned long cpuid); #ifdef CONFIG_SDMA_VERBOSITY void sdma_dumpstate(struct sdma_engine *); diff --git a/drivers/infiniband/hw/hfi1/sysfs.c b/drivers/infiniband/hw/hfi1/sysfs.c index 74c84c655f7e..edba22461a9c 100644 --- a/drivers/infiniband/hw/hfi1/sysfs.c +++ b/drivers/infiniband/hw/hfi1/sysfs.c @@ -766,13 +766,95 @@ bail: return ret; } +struct sde_attribute { + struct attribute attr; + ssize_t (*show)(struct sdma_engine *sde, char *buf); + ssize_t (*store)(struct sdma_engine *sde, const char *buf, size_t cnt); +}; + +static ssize_t sde_show(struct kobject *kobj, struct attribute *attr, char *buf) +{ + struct sde_attribute *sde_attr = + container_of(attr, struct sde_attribute, attr); + struct sdma_engine *sde = + container_of(kobj, struct sdma_engine, kobj); + + if (!sde_attr->show) + return -EINVAL; + + return sde_attr->show(sde, buf); +} + +static ssize_t sde_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) +{ + struct sde_attribute *sde_attr = + container_of(attr, struct sde_attribute, attr); + struct sdma_engine *sde = + container_of(kobj, struct sdma_engine, kobj); + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (!sde_attr->store) + return -EINVAL; + + return sde_attr->store(sde, buf, count); +} + +static const struct sysfs_ops sde_sysfs_ops = { + .show = sde_show, + .store = sde_store, +}; + +static struct kobj_type sde_ktype = { + .sysfs_ops = &sde_sysfs_ops, +}; + +#define SDE_ATTR(_name, _mode, _show, _store) \ + struct sde_attribute sde_attr_##_name = \ + __ATTR(_name, _mode, _show, _store) + +static ssize_t sde_show_cpu_to_sde_map(struct sdma_engine *sde, char *buf) +{ + return sdma_get_cpu_to_sde_map(sde, buf); +} + +static ssize_t sde_store_cpu_to_sde_map(struct sdma_engine *sde, + const char *buf, size_t count) +{ + return sdma_set_cpu_to_sde_map(sde, buf, count); +} + +static ssize_t sde_show_vl(struct sdma_engine *sde, char *buf) +{ + int vl; + + vl = sdma_engine_get_vl(sde); + if (vl < 0) + return vl; + + return snprintf(buf, PAGE_SIZE, "%d\n", vl); +} + +static SDE_ATTR(cpu_list, S_IWUSR | S_IRUGO, + sde_show_cpu_to_sde_map, + sde_store_cpu_to_sde_map); +static SDE_ATTR(vl, S_IRUGO, sde_show_vl, NULL); + +static struct sde_attribute *sde_attribs[] = { + &sde_attr_cpu_list, + &sde_attr_vl +}; + /* * Register and create our files in /sys/class/infiniband. */ int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd) { struct ib_device *dev = &dd->verbs_dev.rdi.ibdev; - int i, ret; + struct device *class_dev = &dev->dev; + int i, j, ret; for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i) { ret = device_create_file(&dev->dev, hfi1_attributes[i]); @@ -780,10 +862,29 @@ int hfi1_verbs_register_sysfs(struct hfi1_devdata *dd) goto bail; } + for (i = 0; i < dd->num_sdma; i++) { + ret = kobject_init_and_add(&dd->per_sdma[i].kobj, + &sde_ktype, &class_dev->kobj, + "sdma%d", i); + if (ret) + goto bail; + + for (j = 0; j < ARRAY_SIZE(sde_attribs); j++) { + ret = sysfs_create_file(&dd->per_sdma[i].kobj, + &sde_attribs[j]->attr); + if (ret) + goto bail; + } + } + return 0; bail: for (i = 0; i < ARRAY_SIZE(hfi1_attributes); ++i) device_remove_file(&dev->dev, hfi1_attributes[i]); + + for (i = 0; i < dd->num_sdma; i++) + kobject_del(&dd->per_sdma[i].kobj); + return ret; } diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c index 4cfb13771897..01f525cd985a 100644 --- a/drivers/infiniband/hw/hfi1/trace.c +++ b/drivers/infiniband/hw/hfi1/trace.c @@ -47,9 +47,9 @@ #define CREATE_TRACE_POINTS #include "trace.h" -u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr) +u8 ibhdr_exhdr_len(struct ib_header *hdr) { - struct hfi1_other_headers *ohdr; + struct ib_other_headers *ohdr; u8 opcode; u8 lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3); @@ -67,16 +67,11 @@ u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr) #define AETH_PRN "aeth syn 0x%.2x %s msn 0x%.8x" #define DETH_PRN "deth qkey 0x%.8x sqpn 0x%.6x" #define IETH_PRN "ieth rkey 0x%.8x" -#define ATOMICACKETH_PRN "origdata %lld" -#define ATOMICETH_PRN "vaddr 0x%llx rkey 0x%.8x sdata %lld cdata %lld" +#define ATOMICACKETH_PRN "origdata %llx" +#define ATOMICETH_PRN "vaddr 0x%llx rkey 0x%.8x sdata %llx cdata %llx" #define OP(transport, op) IB_OPCODE_## transport ## _ ## op -static u64 ib_u64_get(__be32 *p) -{ - return ((u64)be32_to_cpu(p[0]) << 32) | be32_to_cpu(p[1]); -} - static const char *parse_syndrome(u8 syndrome) { switch (syndrome >> 5) { @@ -113,8 +108,7 @@ const char *parse_everbs_hdrs( case OP(RC, RDMA_WRITE_ONLY_WITH_IMMEDIATE): case OP(UC, RDMA_WRITE_ONLY_WITH_IMMEDIATE): trace_seq_printf(p, RETH_PRN " " IMM_PRN, - (unsigned long long)ib_u64_get( - (__be32 *)&eh->rc.reth.vaddr), + get_ib_reth_vaddr(&eh->rc.reth), be32_to_cpu(eh->rc.reth.rkey), be32_to_cpu(eh->rc.reth.length), be32_to_cpu(eh->rc.imm_data)); @@ -126,8 +120,7 @@ const char *parse_everbs_hdrs( case OP(RC, RDMA_WRITE_ONLY): case OP(UC, RDMA_WRITE_ONLY): trace_seq_printf(p, RETH_PRN, - (unsigned long long)ib_u64_get( - (__be32 *)&eh->rc.reth.vaddr), + get_ib_reth_vaddr(&eh->rc.reth), be32_to_cpu(eh->rc.reth.rkey), be32_to_cpu(eh->rc.reth.length)); break; @@ -145,20 +138,16 @@ const char *parse_everbs_hdrs( be32_to_cpu(eh->at.aeth) >> 24, parse_syndrome(be32_to_cpu(eh->at.aeth) >> 24), be32_to_cpu(eh->at.aeth) & HFI1_MSN_MASK, - (unsigned long long) - ib_u64_get(eh->at.atomic_ack_eth)); + ib_u64_get(&eh->at.atomic_ack_eth)); break; /* atomiceth */ case OP(RC, COMPARE_SWAP): case OP(RC, FETCH_ADD): trace_seq_printf(p, ATOMICETH_PRN, - (unsigned long long)ib_u64_get( - eh->atomic_eth.vaddr), + get_ib_ateth_vaddr(&eh->atomic_eth), eh->atomic_eth.rkey, - (unsigned long long)ib_u64_get( - (__be32 *)&eh->atomic_eth.swap_data), - (unsigned long long)ib_u64_get( - (__be32 *)&eh->atomic_eth.compare_data)); + get_ib_ateth_swap(&eh->atomic_eth), + get_ib_ateth_compare(&eh->atomic_eth)); break; /* deth */ case OP(UD, SEND_ONLY): diff --git a/drivers/infiniband/hw/hfi1/trace_ctxts.h b/drivers/infiniband/hw/hfi1/trace_ctxts.h index 31654bbac1cf..26ae789e47cf 100644 --- a/drivers/infiniband/hw/hfi1/trace_ctxts.h +++ b/drivers/infiniband/hw/hfi1/trace_ctxts.h @@ -67,9 +67,9 @@ TRACE_EVENT(hfi1_uctxtdata, __field(u64, hw_free) __field(void __iomem *, piobase) __field(u16, rcvhdrq_cnt) - __field(u64, rcvhdrq_phys) + __field(u64, rcvhdrq_dma) __field(u32, eager_cnt) - __field(u64, rcvegr_phys) + __field(u64, rcvegr_dma) ), TP_fast_assign(DD_DEV_ASSIGN(dd); __entry->ctxt = uctxt->ctxt; @@ -77,10 +77,9 @@ TRACE_EVENT(hfi1_uctxtdata, __entry->hw_free = le64_to_cpu(*uctxt->sc->hw_free); __entry->piobase = uctxt->sc->base_addr; __entry->rcvhdrq_cnt = uctxt->rcvhdrq_cnt; - __entry->rcvhdrq_phys = uctxt->rcvhdrq_phys; + __entry->rcvhdrq_dma = uctxt->rcvhdrq_dma; __entry->eager_cnt = uctxt->egrbufs.alloced; - __entry->rcvegr_phys = - uctxt->egrbufs.rcvtids[0].phys; + __entry->rcvegr_dma = uctxt->egrbufs.rcvtids[0].dma; ), TP_printk("[%s] ctxt %u " UCTXT_FMT, __get_str(dev), @@ -89,9 +88,9 @@ TRACE_EVENT(hfi1_uctxtdata, __entry->hw_free, __entry->piobase, __entry->rcvhdrq_cnt, - __entry->rcvhdrq_phys, + __entry->rcvhdrq_dma, __entry->eager_cnt, - __entry->rcvegr_phys + __entry->rcvegr_dma ) ); diff --git a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h index c3e41aed0034..382fcda3a5f6 100644 --- a/drivers/infiniband/hw/hfi1/trace_ibhdrs.h +++ b/drivers/infiniband/hw/hfi1/trace_ibhdrs.h @@ -55,7 +55,7 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM hfi1_ibhdrs -u8 ibhdr_exhdr_len(struct hfi1_ib_header *hdr); +u8 ibhdr_exhdr_len(struct ib_header *hdr); const char *parse_everbs_hdrs(struct trace_seq *p, u8 opcode, void *ehdrs); #define __parse_ib_ehdrs(op, ehdrs) parse_everbs_hdrs(p, op, ehdrs) @@ -74,7 +74,7 @@ __print_symbolic(lrh, \ DECLARE_EVENT_CLASS(hfi1_ibhdr_template, TP_PROTO(struct hfi1_devdata *dd, - struct hfi1_ib_header *hdr), + struct ib_header *hdr), TP_ARGS(dd, hdr), TP_STRUCT__entry( DD_DEV_ENTRY(dd) @@ -102,7 +102,7 @@ DECLARE_EVENT_CLASS(hfi1_ibhdr_template, __dynamic_array(u8, ehdrs, ibhdr_exhdr_len(hdr)) ), TP_fast_assign( - struct hfi1_other_headers *ohdr; + struct ib_other_headers *ohdr; DD_DEV_ASSIGN(dd); /* LRH */ @@ -185,19 +185,19 @@ DECLARE_EVENT_CLASS(hfi1_ibhdr_template, ); DEFINE_EVENT(hfi1_ibhdr_template, input_ibhdr, - TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr), + TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr), TP_ARGS(dd, hdr)); DEFINE_EVENT(hfi1_ibhdr_template, pio_output_ibhdr, - TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr), + TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr), TP_ARGS(dd, hdr)); DEFINE_EVENT(hfi1_ibhdr_template, ack_output_ibhdr, - TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr), + TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr), TP_ARGS(dd, hdr)); DEFINE_EVENT(hfi1_ibhdr_template, sdma_output_ibhdr, - TP_PROTO(struct hfi1_devdata *dd, struct hfi1_ib_header *hdr), + TP_PROTO(struct hfi1_devdata *dd, struct ib_header *hdr), TP_ARGS(dd, hdr)); #endif /* __HFI1_TRACE_IBHDRS_H */ diff --git a/drivers/infiniband/hw/hfi1/trace_rx.h b/drivers/infiniband/hw/hfi1/trace_rx.h index 9ba1f615ec95..11e02b228922 100644 --- a/drivers/infiniband/hw/hfi1/trace_rx.h +++ b/drivers/infiniband/hw/hfi1/trace_rx.h @@ -260,7 +260,7 @@ TRACE_EVENT(hfi1_mmu_invalidate, TRACE_EVENT(snoop_capture, TP_PROTO(struct hfi1_devdata *dd, int hdr_len, - struct hfi1_ib_header *hdr, + struct ib_header *hdr, int data_len, void *data), TP_ARGS(dd, hdr_len, hdr, data_len, data), @@ -279,7 +279,7 @@ TRACE_EVENT(snoop_capture, __dynamic_array(u8, raw_pkt, data_len) ), TP_fast_assign( - struct hfi1_other_headers *ohdr; + struct ib_other_headers *ohdr; __entry->lnh = (u8)(be16_to_cpu(hdr->lrh[0]) & 3); if (__entry->lnh == HFI1_LRH_BTH) diff --git a/drivers/infiniband/hw/hfi1/uc.c b/drivers/infiniband/hw/hfi1/uc.c index a726d96d185f..5e6d1bac4914 100644 --- a/drivers/infiniband/hw/hfi1/uc.c +++ b/drivers/infiniband/hw/hfi1/uc.c @@ -50,14 +50,7 @@ #include "qp.h" /* cut down ridiculously long IB macro names */ -#define OP(x) IB_OPCODE_UC_##x - -/* only opcode mask for adaptive pio */ -const u32 uc_only_opcode = - BIT(OP(SEND_ONLY) & 0x1f) | - BIT(OP(SEND_ONLY_WITH_IMMEDIATE & 0x1f)) | - BIT(OP(RDMA_WRITE_ONLY & 0x1f)) | - BIT(OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE & 0x1f)); +#define OP(x) UC_OP(x) /** * hfi1_make_uc_req - construct a request packet (SEND, RDMA write) @@ -70,7 +63,7 @@ const u32 uc_only_opcode = int hfi1_make_uc_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) { struct hfi1_qp_priv *priv = qp->priv; - struct hfi1_other_headers *ohdr; + struct ib_other_headers *ohdr; struct rvt_swqe *wqe; u32 hwords = 5; u32 bth0 = 0; @@ -304,12 +297,12 @@ bail_no_tx: void hfi1_uc_rcv(struct hfi1_packet *packet) { struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data; - struct hfi1_ib_header *hdr = packet->hdr; + struct ib_header *hdr = packet->hdr; u32 rcv_flags = packet->rcv_flags; void *data = packet->ebuf; u32 tlen = packet->tlen; struct rvt_qp *qp = packet->qp; - struct hfi1_other_headers *ohdr = packet->ohdr; + struct ib_other_headers *ohdr = packet->ohdr; u32 bth0, opcode; u32 hdrsize = packet->hlen; u32 psn; diff --git a/drivers/infiniband/hw/hfi1/ud.c b/drivers/infiniband/hw/hfi1/ud.c index f01e8e1d62d3..97ae24b6314c 100644 --- a/drivers/infiniband/hw/hfi1/ud.c +++ b/drivers/infiniband/hw/hfi1/ud.c @@ -271,7 +271,7 @@ drop: int hfi1_make_ud_req(struct rvt_qp *qp, struct hfi1_pkt_state *ps) { struct hfi1_qp_priv *priv = qp->priv; - struct hfi1_other_headers *ohdr; + struct ib_other_headers *ohdr; struct ib_ah_attr *ah_attr; struct hfi1_pportdata *ppd; struct hfi1_ibport *ibp; @@ -510,8 +510,8 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, u32 bth0, plen, vl, hwords = 5; u16 lrh0; u8 sl = ibp->sc_to_sl[sc5]; - struct hfi1_ib_header hdr; - struct hfi1_other_headers *ohdr; + struct ib_header hdr; + struct ib_other_headers *ohdr; struct pio_buf *pbuf; struct send_context *ctxt = qp_to_send_context(qp, sc5); struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); @@ -559,8 +559,8 @@ void return_cnp(struct hfi1_ibport *ibp, struct rvt_qp *qp, u32 remote_qpn, /* * opa_smp_check() - Do the regular pkey checking, and the additional - * checks for SMPs specified in OPAv1 rev 0.90, section 9.10.26 - * ("SMA Packet Checks"). + * checks for SMPs specified in OPAv1 rev 1.0, 9/19/2016 update, section + * 9.10.25 ("SMA Packet Checks"). * * Note that: * - Checks are done using the pkey directly from the packet's BTH, @@ -603,23 +603,28 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5, /* * SMPs fall into one of four (disjoint) categories: - * SMA request, SMA response, trap, or trap repress. - * Our response depends, in part, on which type of - * SMP we're processing. + * SMA request, SMA response, SMA trap, or SMA trap repress. + * Our response depends, in part, on which type of SMP we're + * processing. * - * If this is not an SMA request, or trap repress: - * - accept MAD if the port is running an SM - * - pkey == FULL_MGMT_P_KEY => - * reply with unsupported method (i.e., just mark - * the smp's status field here, and let it be - * processed normally) - * - pkey != LIM_MGMT_P_KEY => - * increment port recv constraint errors, drop MAD - * If this is an SMA request or trap repress: + * If this is an SMA response, skip the check here. + * + * If this is an SMA request or SMA trap repress: * - pkey != FULL_MGMT_P_KEY => * increment port recv constraint errors, drop MAD + * + * Otherwise: + * - accept if the port is running an SM + * - drop MAD if it's an SMA trap + * - pkey == FULL_MGMT_P_KEY => + * reply with unsupported method + * - pkey != FULL_MGMT_P_KEY => + * increment port recv constraint errors, drop MAD */ switch (smp->method) { + case IB_MGMT_METHOD_GET_RESP: + case IB_MGMT_METHOD_REPORT_RESP: + break; case IB_MGMT_METHOD_GET: case IB_MGMT_METHOD_SET: case IB_MGMT_METHOD_REPORT: @@ -629,23 +634,17 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5, return 1; } break; - case IB_MGMT_METHOD_SEND: - case IB_MGMT_METHOD_TRAP: - case IB_MGMT_METHOD_GET_RESP: - case IB_MGMT_METHOD_REPORT_RESP: + default: if (ibp->rvp.port_cap_flags & IB_PORT_SM) return 0; + if (smp->method == IB_MGMT_METHOD_TRAP) + return 1; if (pkey == FULL_MGMT_P_KEY) { smp->status |= IB_SMP_UNSUP_METHOD; return 0; } - if (pkey != LIM_MGMT_P_KEY) { - ingress_pkey_table_fail(ppd, pkey, slid); - return 1; - } - break; - default: - break; + ingress_pkey_table_fail(ppd, pkey, slid); + return 1; } return 0; } @@ -665,7 +664,7 @@ static int opa_smp_check(struct hfi1_ibport *ibp, u16 pkey, u8 sc5, */ void hfi1_ud_rcv(struct hfi1_packet *packet) { - struct hfi1_other_headers *ohdr = packet->ohdr; + struct ib_other_headers *ohdr = packet->ohdr; int opcode; u32 hdrsize = packet->hlen; struct ib_wc wc; @@ -675,13 +674,13 @@ void hfi1_ud_rcv(struct hfi1_packet *packet) int mgmt_pkey_idx = -1; struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data; struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - struct hfi1_ib_header *hdr = packet->hdr; + struct ib_header *hdr = packet->hdr; u32 rcv_flags = packet->rcv_flags; void *data = packet->ebuf; u32 tlen = packet->tlen; struct rvt_qp *qp = packet->qp; bool has_grh = rcv_flags & HFI1_HAS_GRH; - u8 sc5 = hdr2sc((struct hfi1_message_header *)hdr, packet->rhf); + u8 sc5 = hdr2sc(hdr, packet->rhf); u32 bth1; u8 sl_from_sc, sl; u16 slid; diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 0ecf27903dc2..a761f804111e 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -114,6 +114,8 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 #define KDETH_HCRC_LOWER_SHIFT 24 #define KDETH_HCRC_LOWER_MASK 0xff +#define AHG_KDETH_INTR_SHIFT 12 + #define PBC2LRH(x) ((((x) & 0xfff) << 2) - 4) #define LRH2PBC(x) ((((x) >> 2) + 1) & 0xfff) @@ -546,7 +548,7 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, u8 opcode, sc, vl; int req_queued = 0; u16 dlid; - u8 selector; + u32 selector; if (iovec[idx].iov_len < sizeof(info) + sizeof(req->hdr)) { hfi1_cdbg( @@ -751,12 +753,9 @@ int hfi1_user_sdma_process_request(struct file *fp, struct iovec *iovec, dlid = be16_to_cpu(req->hdr.lrh[1]); selector = dlid_to_selector(dlid); + selector += uctxt->ctxt + fd->subctxt; + req->sde = sdma_select_user_engine(dd, selector, vl); - /* Have to select the engine */ - req->sde = sdma_select_engine_vl(dd, - (u32)(uctxt->ctxt + fd->subctxt + - selector), - vl); if (!req->sde || !sdma_running(req->sde)) { ret = -ECOMM; goto free_req; @@ -892,7 +891,7 @@ static inline u32 get_lrh_len(struct hfi1_pkt_header hdr, u32 len) static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) { - int ret = 0; + int ret = 0, count; unsigned npkts = 0; struct user_sdma_txreq *tx = NULL; struct hfi1_user_sdma_pkt_q *pq = NULL; @@ -1088,23 +1087,18 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts) npkts++; } dosend: - ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps); - if (list_empty(&req->txps)) { - req->seqsubmitted = req->seqnum; - if (req->seqnum == req->info.npkts) { - set_bit(SDMA_REQ_SEND_DONE, &req->flags); - /* - * The txreq has already been submitted to the HW queue - * so we can free the AHG entry now. Corruption will not - * happen due to the sequential manner in which - * descriptors are processed. - */ - if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags)) - sdma_ahg_free(req->sde, req->ahg_idx); - } - } else if (ret > 0) { - req->seqsubmitted += ret; - ret = 0; + ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps, &count); + req->seqsubmitted += count; + if (req->seqsubmitted == req->info.npkts) { + set_bit(SDMA_REQ_SEND_DONE, &req->flags); + /* + * The txreq has already been submitted to the HW queue + * so we can free the AHG entry now. Corruption will not + * happen due to the sequential manner in which + * descriptors are processed. + */ + if (test_bit(SDMA_REQ_HAVE_AHG, &req->flags)) + sdma_ahg_free(req->sde, req->ahg_idx); } return ret; @@ -1480,7 +1474,8 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, /* Clear KDETH.SH on last packet */ if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT)) { val |= cpu_to_le16(KDETH_GET(hdr->kdeth.ver_tid_offset, - INTR) >> 16); + INTR) << + AHG_KDETH_INTR_SHIFT); val &= cpu_to_le16(~(1U << 13)); AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val); } else { diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 2b359540901d..4b7a16ceb362 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -76,7 +76,7 @@ static unsigned int hfi1_max_ahs = 0xFFFF; module_param_named(max_ahs, hfi1_max_ahs, uint, S_IRUGO); MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support"); -unsigned int hfi1_max_cqes = 0x2FFFF; +unsigned int hfi1_max_cqes = 0x2FFFFF; module_param_named(max_cqes, hfi1_max_cqes, uint, S_IRUGO); MODULE_PARM_DESC(max_cqes, "Maximum number of completion queue entries to support"); @@ -89,7 +89,7 @@ unsigned int hfi1_max_qp_wrs = 0x3FFF; module_param_named(max_qp_wrs, hfi1_max_qp_wrs, uint, S_IRUGO); MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support"); -unsigned int hfi1_max_qps = 16384; +unsigned int hfi1_max_qps = 32768; module_param_named(max_qps, hfi1_max_qps, uint, S_IRUGO); MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support"); @@ -335,7 +335,7 @@ const u8 hdr_len_by_opcode[256] = { [IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST] = 12 + 8 + 4, [IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY] = 12 + 8 + 4, [IB_OPCODE_RC_ACKNOWLEDGE] = 12 + 8 + 4, - [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = 12 + 8 + 4, + [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = 12 + 8 + 4 + 8, [IB_OPCODE_RC_COMPARE_SWAP] = 12 + 8 + 28, [IB_OPCODE_RC_FETCH_ADD] = 12 + 8 + 28, [IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = 12 + 8 + 4, @@ -403,6 +403,28 @@ static const opcode_handler opcode_handler_tbl[256] = { [IB_OPCODE_CNP] = &hfi1_cnp_rcv }; +#define OPMASK 0x1f + +static const u32 pio_opmask[BIT(3)] = { + /* RC */ + [IB_OPCODE_RC >> 5] = + BIT(RC_OP(SEND_ONLY) & OPMASK) | + BIT(RC_OP(SEND_ONLY_WITH_IMMEDIATE) & OPMASK) | + BIT(RC_OP(RDMA_WRITE_ONLY) & OPMASK) | + BIT(RC_OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE) & OPMASK) | + BIT(RC_OP(RDMA_READ_REQUEST) & OPMASK) | + BIT(RC_OP(ACKNOWLEDGE) & OPMASK) | + BIT(RC_OP(ATOMIC_ACKNOWLEDGE) & OPMASK) | + BIT(RC_OP(COMPARE_SWAP) & OPMASK) | + BIT(RC_OP(FETCH_ADD) & OPMASK), + /* UC */ + [IB_OPCODE_UC >> 5] = + BIT(UC_OP(SEND_ONLY) & OPMASK) | + BIT(UC_OP(SEND_ONLY_WITH_IMMEDIATE) & OPMASK) | + BIT(UC_OP(RDMA_WRITE_ONLY) & OPMASK) | + BIT(UC_OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE) & OPMASK), +}; + /* * System image GUID. */ @@ -567,7 +589,7 @@ static inline opcode_handler qp_ok(int opcode, struct hfi1_packet *packet) void hfi1_ib_rcv(struct hfi1_packet *packet) { struct hfi1_ctxtdata *rcd = packet->rcd; - struct hfi1_ib_header *hdr = packet->hdr; + struct ib_header *hdr = packet->hdr; u32 tlen = packet->tlen; struct hfi1_pportdata *ppd = rcd->ppd; struct hfi1_ibport *ibp = &ppd->ibport_data; @@ -719,7 +741,7 @@ static void verbs_sdma_complete( if (tx->wqe) { hfi1_send_complete(qp, tx->wqe, IB_WC_SUCCESS); } else if (qp->ibqp.qp_type == IB_QPT_RC) { - struct hfi1_ib_header *hdr; + struct ib_header *hdr; hdr = &tx->phdr.hdr; hfi1_rc_send_complete(qp, hdr); @@ -748,7 +770,7 @@ static int wait_kmem(struct hfi1_ibdev *dev, qp->s_flags |= RVT_S_WAIT_KMEM; list_add_tail(&priv->s_iowait.list, &dev->memwait); trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM); - atomic_inc(&qp->refcount); + rvt_get_qp(qp); } write_sequnlock(&dev->iowait_lock); qp->s_flags &= ~RVT_S_BUSY; @@ -959,7 +981,7 @@ static int pio_wait(struct rvt_qp *qp, was_empty = list_empty(&sc->piowait); list_add_tail(&priv->s_iowait.list, &sc->piowait); trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO); - atomic_inc(&qp->refcount); + rvt_get_qp(qp); /* counting: only call wantpiobuf_intr if first user */ if (was_empty) hfi1_sc_wantpiobuf_intr(sc, 1); @@ -1200,7 +1222,7 @@ static inline send_routine get_send_routine(struct rvt_qp *qp, { struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); struct hfi1_qp_priv *priv = qp->priv; - struct hfi1_ib_header *h = &tx->phdr.hdr; + struct ib_header *h = &tx->phdr.hdr; if (unlikely(!(dd->flags & HFI1_HAS_SEND_DMA))) return dd->process_pio_send; @@ -1210,22 +1232,18 @@ static inline send_routine get_send_routine(struct rvt_qp *qp, case IB_QPT_GSI: case IB_QPT_UD: break; - case IB_QPT_RC: - if (piothreshold && - qp->s_cur_size <= min(piothreshold, qp->pmtu) && - (BIT(get_opcode(h) & 0x1f) & rc_only_opcode) && - iowait_sdma_pending(&priv->s_iowait) == 0 && - !sdma_txreq_built(&tx->txreq)) - return dd->process_pio_send; - break; case IB_QPT_UC: + case IB_QPT_RC: { + u8 op = get_opcode(h); + if (piothreshold && qp->s_cur_size <= min(piothreshold, qp->pmtu) && - (BIT(get_opcode(h) & 0x1f) & uc_only_opcode) && + (BIT(op & OPMASK) & pio_opmask[op >> 5]) && iowait_sdma_pending(&priv->s_iowait) == 0 && !sdma_txreq_built(&tx->txreq)) return dd->process_pio_send; break; + } default: break; } @@ -1244,8 +1262,8 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) { struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); struct hfi1_qp_priv *priv = qp->priv; - struct hfi1_other_headers *ohdr; - struct hfi1_ib_header *hdr; + struct ib_other_headers *ohdr; + struct ib_header *hdr; send_routine sr; int ret; u8 lnh; @@ -1423,7 +1441,8 @@ static int modify_device(struct ib_device *device, } if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC) { - memcpy(device->node_desc, device_modify->node_desc, 64); + memcpy(device->node_desc, device_modify->node_desc, + IB_DEVICE_NODE_DESC_MAX); for (i = 0; i < dd->num_pports; i++) { struct hfi1_ibport *ibp = &dd->pport[i].ibport_data; @@ -1754,7 +1773,7 @@ void hfi1_cnp_rcv(struct hfi1_packet *packet) { struct hfi1_ibport *ibp = &packet->rcd->ppd->ibport_data; struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); - struct hfi1_ib_header *hdr = packet->hdr; + struct ib_header *hdr = packet->hdr; struct rvt_qp *qp = packet->qp; u32 lqpn, rqpn = 0; u16 rlid = 0; @@ -1781,7 +1800,7 @@ void hfi1_cnp_rcv(struct hfi1_packet *packet) return; } - sc5 = hdr2sc((struct hfi1_message_header *)hdr, packet->rhf); + sc5 = hdr2sc(hdr, packet->rhf); sl = ibp->sc_to_sl[sc5]; lqpn = qp->ibqp.qp_num; diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h index d1b101c54828..1c3815d89eb7 100644 --- a/drivers/infiniband/hw/hfi1/verbs.h +++ b/drivers/infiniband/hw/hfi1/verbs.h @@ -60,6 +60,7 @@ #include <rdma/ib_pack.h> #include <rdma/ib_user_verbs.h> #include <rdma/ib_mad.h> +#include <rdma/ib_hdrs.h> #include <rdma/rdma_vt.h> #include <rdma/rdmavt_qp.h> #include <rdma/rdmavt_cq.h> @@ -80,16 +81,6 @@ struct hfi1_packet; */ #define HFI1_UVERBS_ABI_VERSION 2 -#define IB_SEQ_NAK (3 << 29) - -/* AETH NAK opcode values */ -#define IB_RNR_NAK 0x20 -#define IB_NAK_PSN_ERROR 0x60 -#define IB_NAK_INVALID_REQUEST 0x61 -#define IB_NAK_REMOTE_ACCESS_ERROR 0x62 -#define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63 -#define IB_NAK_INVALID_RD_REQUEST 0x64 - /* IB Performance Manager status values */ #define IB_PMA_SAMPLE_STATUS_DONE 0x00 #define IB_PMA_SAMPLE_STATUS_STARTED 0x01 @@ -104,80 +95,16 @@ struct hfi1_packet; #define HFI1_VENDOR_IPG cpu_to_be16(0xFFA0) -#define IB_BTH_REQ_ACK BIT(31) -#define IB_BTH_SOLICITED BIT(23) -#define IB_BTH_MIG_REQ BIT(22) - -#define IB_GRH_VERSION 6 -#define IB_GRH_VERSION_MASK 0xF -#define IB_GRH_VERSION_SHIFT 28 -#define IB_GRH_TCLASS_MASK 0xFF -#define IB_GRH_TCLASS_SHIFT 20 -#define IB_GRH_FLOW_MASK 0xFFFFF -#define IB_GRH_FLOW_SHIFT 0 -#define IB_GRH_NEXT_HDR 0x1B - #define IB_DEFAULT_GID_PREFIX cpu_to_be64(0xfe80000000000000ULL) +#define RC_OP(x) IB_OPCODE_RC_##x +#define UC_OP(x) IB_OPCODE_UC_##x + /* flags passed by hfi1_ib_rcv() */ enum { HFI1_HAS_GRH = (1 << 0), }; -struct ib_reth { - __be64 vaddr; - __be32 rkey; - __be32 length; -} __packed; - -struct ib_atomic_eth { - __be32 vaddr[2]; /* unaligned so access as 2 32-bit words */ - __be32 rkey; - __be64 swap_data; - __be64 compare_data; -} __packed; - -union ib_ehdrs { - struct { - __be32 deth[2]; - __be32 imm_data; - } ud; - struct { - struct ib_reth reth; - __be32 imm_data; - } rc; - struct { - __be32 aeth; - __be32 atomic_ack_eth[2]; - } at; - __be32 imm_data; - __be32 aeth; - __be32 ieth; - struct ib_atomic_eth atomic_eth; -} __packed; - -struct hfi1_other_headers { - __be32 bth[3]; - union ib_ehdrs u; -} __packed; - -/* - * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes - * long (72 w/ imm_data). Only the first 56 bytes of the IB header - * will be in the eager header buffer. The remaining 12 or 16 bytes - * are in the data buffer. - */ -struct hfi1_ib_header { - __be16 lrh[4]; - union { - struct { - struct ib_grh grh; - struct hfi1_other_headers oth; - } l; - struct hfi1_other_headers oth; - } u; -} __packed; - struct hfi1_ahg_info { u32 ahgdesc[2]; u16 tx_flags; @@ -187,7 +114,7 @@ struct hfi1_ahg_info { struct hfi1_sdma_header { __le64 pbc; - struct hfi1_ib_header hdr; + struct ib_header hdr; } __packed; /* @@ -386,7 +313,7 @@ void hfi1_rc_rcv(struct hfi1_packet *packet); void hfi1_rc_hdrerr( struct hfi1_ctxtdata *rcd, - struct hfi1_ib_header *hdr, + struct ib_header *hdr, u32 rcv_flags, struct rvt_qp *qp); @@ -400,7 +327,7 @@ void hfi1_rc_timeout(unsigned long arg); void hfi1_del_timers_sync(struct rvt_qp *qp); void hfi1_stop_rc_timers(struct rvt_qp *qp); -void hfi1_rc_send_complete(struct rvt_qp *qp, struct hfi1_ib_header *hdr); +void hfi1_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr); void hfi1_rc_error(struct rvt_qp *qp, enum ib_wc_status err); @@ -423,7 +350,7 @@ int hfi1_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe); extern const u32 rc_only_opcode; extern const u32 uc_only_opcode; -static inline u8 get_opcode(struct hfi1_ib_header *h) +static inline u8 get_opcode(struct ib_header *h) { u16 lnh = be16_to_cpu(h->lrh[0]) & 3; @@ -433,13 +360,13 @@ static inline u8 get_opcode(struct hfi1_ib_header *h) return be32_to_cpu(h->u.l.oth.bth[0]) >> 24; } -int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct hfi1_ib_header *hdr, +int hfi1_ruc_check_hdr(struct hfi1_ibport *ibp, struct ib_header *hdr, int has_grh, struct rvt_qp *qp, u32 bth0); u32 hfi1_make_grh(struct hfi1_ibport *ibp, struct ib_grh *hdr, struct ib_global_route *grh, u32 hwords, u32 nwords); -void hfi1_make_ruc_header(struct rvt_qp *qp, struct hfi1_other_headers *ohdr, +void hfi1_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, u32 bth0, u32 bth2, int middle, struct hfi1_pkt_state *ps); diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.c b/drivers/infiniband/hw/hfi1/verbs_txreq.c index d8fb056526f8..094ab829ec42 100644 --- a/drivers/infiniband/hw/hfi1/verbs_txreq.c +++ b/drivers/infiniband/hw/hfi1/verbs_txreq.c @@ -109,7 +109,7 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev, qp->s_flags |= RVT_S_WAIT_TX; list_add_tail(&priv->s_iowait.list, &dev->txwait); trace_hfi1_qpsleep(qp, RVT_S_WAIT_TX); - atomic_inc(&qp->refcount); + rvt_get_qp(qp); } qp->s_flags &= ~RVT_S_BUSY; } diff --git a/drivers/infiniband/hw/hns/Kconfig b/drivers/infiniband/hw/hns/Kconfig new file mode 100644 index 000000000000..e1a6e055cd60 --- /dev/null +++ b/drivers/infiniband/hw/hns/Kconfig @@ -0,0 +1,10 @@ +config INFINIBAND_HNS + tristate "HNS RoCE Driver" + depends on NET_VENDOR_HISILICON + depends on ARM64 && HNS && HNS_DSAF && HNS_ENET + ---help--- + This is a RoCE/RDMA driver for the Hisilicon RoCE engine. The engine + is used in Hisilicon Hi1610 and more further ICT SoC. + + To compile this driver as a module, choose M here: the module + will be called hns-roce. diff --git a/drivers/infiniband/hw/hns/Makefile b/drivers/infiniband/hw/hns/Makefile new file mode 100644 index 000000000000..7e8ebd24dcae --- /dev/null +++ b/drivers/infiniband/hw/hns/Makefile @@ -0,0 +1,8 @@ +# +# Makefile for the Hisilicon RoCE drivers. +# + +obj-$(CONFIG_INFINIBAND_HNS) += hns-roce.o +hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_eq.o hns_roce_pd.o \ + hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \ + hns_roce_cq.o hns_roce_alloc.o hns_roce_hw_v1.o diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c new file mode 100644 index 000000000000..24f79ee39fdf --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2016 Hisilicon Limited. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/platform_device.h> +#include <rdma/ib_addr.h> +#include <rdma/ib_cache.h> +#include "hns_roce_device.h" + +#define HNS_ROCE_PORT_NUM_SHIFT 24 +#define HNS_ROCE_VLAN_SL_BIT_MASK 7 +#define HNS_ROCE_VLAN_SL_SHIFT 13 + +struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *ah_attr) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibpd->device); + struct device *dev = &hr_dev->pdev->dev; + struct ib_gid_attr gid_attr; + struct hns_roce_ah *ah; + u16 vlan_tag = 0xffff; + struct in6_addr in6; + union ib_gid sgid; + int ret; + + ah = kzalloc(sizeof(*ah), GFP_ATOMIC); + if (!ah) + return ERR_PTR(-ENOMEM); + + /* Get mac address */ + memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(ah_attr->grh.dgid.raw)); + if (rdma_is_multicast_addr(&in6)) + rdma_get_mcast_mac(&in6, ah->av.mac); + else + memcpy(ah->av.mac, ah_attr->dmac, sizeof(ah_attr->dmac)); + + /* Get source gid */ + ret = ib_get_cached_gid(ibpd->device, ah_attr->port_num, + ah_attr->grh.sgid_index, &sgid, &gid_attr); + if (ret) { + dev_err(dev, "get sgid failed! ret = %d\n", ret); + kfree(ah); + return ERR_PTR(ret); + } + + if (gid_attr.ndev) { + if (is_vlan_dev(gid_attr.ndev)) + vlan_tag = vlan_dev_vlan_id(gid_attr.ndev); + dev_put(gid_attr.ndev); + } + + if (vlan_tag < 0x1000) + vlan_tag |= (ah_attr->sl & HNS_ROCE_VLAN_SL_BIT_MASK) << + HNS_ROCE_VLAN_SL_SHIFT; + + ah->av.port_pd = cpu_to_be32(to_hr_pd(ibpd)->pdn | (ah_attr->port_num << + HNS_ROCE_PORT_NUM_SHIFT)); + ah->av.gid_index = ah_attr->grh.sgid_index; + ah->av.vlan = cpu_to_le16(vlan_tag); + dev_dbg(dev, "gid_index = 0x%x,vlan = 0x%x\n", ah->av.gid_index, + ah->av.vlan); + + if (ah_attr->static_rate) + ah->av.stat_rate = IB_RATE_10_GBPS; + + memcpy(ah->av.dgid, ah_attr->grh.dgid.raw, HNS_ROCE_GID_SIZE); + ah->av.sl_tclass_flowlabel = cpu_to_le32(ah_attr->sl << + HNS_ROCE_SL_SHIFT); + + return &ah->ibah; +} + +int hns_roce_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr) +{ + struct hns_roce_ah *ah = to_hr_ah(ibah); + + memset(ah_attr, 0, sizeof(*ah_attr)); + + ah_attr->sl = le32_to_cpu(ah->av.sl_tclass_flowlabel) >> + HNS_ROCE_SL_SHIFT; + ah_attr->port_num = le32_to_cpu(ah->av.port_pd) >> + HNS_ROCE_PORT_NUM_SHIFT; + ah_attr->static_rate = ah->av.stat_rate; + ah_attr->ah_flags = IB_AH_GRH; + ah_attr->grh.traffic_class = le32_to_cpu(ah->av.sl_tclass_flowlabel) >> + HNS_ROCE_TCLASS_SHIFT; + ah_attr->grh.flow_label = le32_to_cpu(ah->av.sl_tclass_flowlabel) & + HNS_ROCE_FLOW_LABLE_MASK; + ah_attr->grh.hop_limit = ah->av.hop_limit; + ah_attr->grh.sgid_index = ah->av.gid_index; + memcpy(ah_attr->grh.dgid.raw, ah->av.dgid, HNS_ROCE_GID_SIZE); + + return 0; +} + +int hns_roce_destroy_ah(struct ib_ah *ah) +{ + kfree(to_hr_ah(ah)); + + return 0; +} diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c new file mode 100644 index 000000000000..863a17a2de40 --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -0,0 +1,257 @@ +/* + * Copyright (c) 2016 Hisilicon Limited. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/platform_device.h> +#include "hns_roce_device.h" + +int hns_roce_bitmap_alloc(struct hns_roce_bitmap *bitmap, unsigned long *obj) +{ + int ret = 0; + + spin_lock(&bitmap->lock); + *obj = find_next_zero_bit(bitmap->table, bitmap->max, bitmap->last); + if (*obj >= bitmap->max) { + bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top) + & bitmap->mask; + *obj = find_first_zero_bit(bitmap->table, bitmap->max); + } + + if (*obj < bitmap->max) { + set_bit(*obj, bitmap->table); + bitmap->last = (*obj + 1); + if (bitmap->last == bitmap->max) + bitmap->last = 0; + *obj |= bitmap->top; + } else { + ret = -1; + } + + spin_unlock(&bitmap->lock); + + return ret; +} + +void hns_roce_bitmap_free(struct hns_roce_bitmap *bitmap, unsigned long obj) +{ + hns_roce_bitmap_free_range(bitmap, obj, 1); +} + +int hns_roce_bitmap_alloc_range(struct hns_roce_bitmap *bitmap, int cnt, + int align, unsigned long *obj) +{ + int ret = 0; + int i; + + if (likely(cnt == 1 && align == 1)) + return hns_roce_bitmap_alloc(bitmap, obj); + + spin_lock(&bitmap->lock); + + *obj = bitmap_find_next_zero_area(bitmap->table, bitmap->max, + bitmap->last, cnt, align - 1); + if (*obj >= bitmap->max) { + bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top) + & bitmap->mask; + *obj = bitmap_find_next_zero_area(bitmap->table, bitmap->max, 0, + cnt, align - 1); + } + + if (*obj < bitmap->max) { + for (i = 0; i < cnt; i++) + set_bit(*obj + i, bitmap->table); + + if (*obj == bitmap->last) { + bitmap->last = (*obj + cnt); + if (bitmap->last >= bitmap->max) + bitmap->last = 0; + } + *obj |= bitmap->top; + } else { + ret = -1; + } + + spin_unlock(&bitmap->lock); + + return ret; +} + +void hns_roce_bitmap_free_range(struct hns_roce_bitmap *bitmap, + unsigned long obj, int cnt) +{ + int i; + + obj &= bitmap->max + bitmap->reserved_top - 1; + + spin_lock(&bitmap->lock); + for (i = 0; i < cnt; i++) + clear_bit(obj + i, bitmap->table); + + bitmap->last = min(bitmap->last, obj); + bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top) + & bitmap->mask; + spin_unlock(&bitmap->lock); +} + +int hns_roce_bitmap_init(struct hns_roce_bitmap *bitmap, u32 num, u32 mask, + u32 reserved_bot, u32 reserved_top) +{ + u32 i; + + if (num != roundup_pow_of_two(num)) + return -EINVAL; + + bitmap->last = 0; + bitmap->top = 0; + bitmap->max = num - reserved_top; + bitmap->mask = mask; + bitmap->reserved_top = reserved_top; + spin_lock_init(&bitmap->lock); + bitmap->table = kcalloc(BITS_TO_LONGS(bitmap->max), sizeof(long), + GFP_KERNEL); + if (!bitmap->table) + return -ENOMEM; + + for (i = 0; i < reserved_bot; ++i) + set_bit(i, bitmap->table); + + return 0; +} + +void hns_roce_bitmap_cleanup(struct hns_roce_bitmap *bitmap) +{ + kfree(bitmap->table); +} + +void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size, + struct hns_roce_buf *buf) +{ + int i; + struct device *dev = &hr_dev->pdev->dev; + u32 bits_per_long = BITS_PER_LONG; + + if (buf->nbufs == 1) { + dma_free_coherent(dev, size, buf->direct.buf, buf->direct.map); + } else { + if (bits_per_long == 64) + vunmap(buf->direct.buf); + + for (i = 0; i < buf->nbufs; ++i) + if (buf->page_list[i].buf) + dma_free_coherent(&hr_dev->pdev->dev, PAGE_SIZE, + buf->page_list[i].buf, + buf->page_list[i].map); + kfree(buf->page_list); + } +} + +int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, + struct hns_roce_buf *buf) +{ + int i = 0; + dma_addr_t t; + struct page **pages; + struct device *dev = &hr_dev->pdev->dev; + u32 bits_per_long = BITS_PER_LONG; + + /* SQ/RQ buf lease than one page, SQ + RQ = 8K */ + if (size <= max_direct) { + buf->nbufs = 1; + /* Npages calculated by page_size */ + buf->npages = 1 << get_order(size); + buf->page_shift = PAGE_SHIFT; + /* MTT PA must be recorded in 4k alignment, t is 4k aligned */ + buf->direct.buf = dma_alloc_coherent(dev, size, &t, GFP_KERNEL); + if (!buf->direct.buf) + return -ENOMEM; + + buf->direct.map = t; + + while (t & ((1 << buf->page_shift) - 1)) { + --buf->page_shift; + buf->npages *= 2; + } + + memset(buf->direct.buf, 0, size); + } else { + buf->nbufs = (size + PAGE_SIZE - 1) / PAGE_SIZE; + buf->npages = buf->nbufs; + buf->page_shift = PAGE_SHIFT; + buf->page_list = kcalloc(buf->nbufs, sizeof(*buf->page_list), + GFP_KERNEL); + + if (!buf->page_list) + return -ENOMEM; + + for (i = 0; i < buf->nbufs; ++i) { + buf->page_list[i].buf = dma_alloc_coherent(dev, + PAGE_SIZE, &t, + GFP_KERNEL); + + if (!buf->page_list[i].buf) + goto err_free; + + buf->page_list[i].map = t; + memset(buf->page_list[i].buf, 0, PAGE_SIZE); + } + if (bits_per_long == 64) { + pages = kmalloc_array(buf->nbufs, sizeof(*pages), + GFP_KERNEL); + if (!pages) + goto err_free; + + for (i = 0; i < buf->nbufs; ++i) + pages[i] = virt_to_page(buf->page_list[i].buf); + + buf->direct.buf = vmap(pages, buf->nbufs, VM_MAP, + PAGE_KERNEL); + kfree(pages); + if (!buf->direct.buf) + goto err_free; + } + } + + return 0; + +err_free: + hns_roce_buf_free(hr_dev, size, buf); + return -ENOMEM; +} + +void hns_roce_cleanup_bitmap(struct hns_roce_dev *hr_dev) +{ + hns_roce_cleanup_qp_table(hr_dev); + hns_roce_cleanup_cq_table(hr_dev); + hns_roce_cleanup_mr_table(hr_dev); + hns_roce_cleanup_pd_table(hr_dev); + hns_roce_cleanup_uar_table(hr_dev); +} diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.c b/drivers/infiniband/hw/hns/hns_roce_cmd.c new file mode 100644 index 000000000000..2a0b6c05da5f --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.c @@ -0,0 +1,368 @@ +/* + * Copyright (c) 2016 Hisilicon Limited. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/dmapool.h> +#include <linux/platform_device.h> +#include "hns_roce_common.h" +#include "hns_roce_device.h" +#include "hns_roce_cmd.h" + +#define CMD_POLL_TOKEN 0xffff +#define CMD_MAX_NUM 32 +#define STATUS_MASK 0xff +#define CMD_TOKEN_MASK 0x1f +#define GO_BIT_TIMEOUT_MSECS 10000 + +enum { + HCR_TOKEN_OFFSET = 0x14, + HCR_STATUS_OFFSET = 0x18, + HCR_GO_BIT = 15, +}; + +static int cmd_pending(struct hns_roce_dev *hr_dev) +{ + u32 status = readl(hr_dev->cmd.hcr + HCR_TOKEN_OFFSET); + + return (!!(status & (1 << HCR_GO_BIT))); +} + +/* this function should be serialized with "hcr_mutex" */ +static int __hns_roce_cmd_mbox_post_hw(struct hns_roce_dev *hr_dev, + u64 in_param, u64 out_param, + u32 in_modifier, u8 op_modifier, u16 op, + u16 token, int event) +{ + struct hns_roce_cmdq *cmd = &hr_dev->cmd; + struct device *dev = &hr_dev->pdev->dev; + u32 __iomem *hcr = (u32 *)cmd->hcr; + int ret = -EAGAIN; + unsigned long end; + u32 val = 0; + + end = msecs_to_jiffies(GO_BIT_TIMEOUT_MSECS) + jiffies; + while (cmd_pending(hr_dev)) { + if (time_after(jiffies, end)) { + dev_dbg(dev, "jiffies=%d end=%d\n", (int)jiffies, + (int)end); + goto out; + } + cond_resched(); + } + + roce_set_field(val, ROCEE_MB6_ROCEE_MB_CMD_M, ROCEE_MB6_ROCEE_MB_CMD_S, + op); + roce_set_field(val, ROCEE_MB6_ROCEE_MB_CMD_MDF_M, + ROCEE_MB6_ROCEE_MB_CMD_MDF_S, op_modifier); + roce_set_bit(val, ROCEE_MB6_ROCEE_MB_EVENT_S, event); + roce_set_bit(val, ROCEE_MB6_ROCEE_MB_HW_RUN_S, 1); + roce_set_field(val, ROCEE_MB6_ROCEE_MB_TOKEN_M, + ROCEE_MB6_ROCEE_MB_TOKEN_S, token); + + __raw_writeq(cpu_to_le64(in_param), hcr + 0); + __raw_writeq(cpu_to_le64(out_param), hcr + 2); + __raw_writel(cpu_to_le32(in_modifier), hcr + 4); + /* Memory barrier */ + wmb(); + + __raw_writel(cpu_to_le32(val), hcr + 5); + + mmiowb(); + ret = 0; + +out: + return ret; +} + +static int hns_roce_cmd_mbox_post_hw(struct hns_roce_dev *hr_dev, u64 in_param, + u64 out_param, u32 in_modifier, + u8 op_modifier, u16 op, u16 token, + int event) +{ + struct hns_roce_cmdq *cmd = &hr_dev->cmd; + int ret = -EAGAIN; + + mutex_lock(&cmd->hcr_mutex); + ret = __hns_roce_cmd_mbox_post_hw(hr_dev, in_param, out_param, + in_modifier, op_modifier, op, token, + event); + mutex_unlock(&cmd->hcr_mutex); + + return ret; +} + +/* this should be called with "poll_sem" */ +static int __hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, u64 in_param, + u64 out_param, unsigned long in_modifier, + u8 op_modifier, u16 op, + unsigned long timeout) +{ + struct device *dev = &hr_dev->pdev->dev; + u8 __iomem *hcr = hr_dev->cmd.hcr; + unsigned long end = 0; + u32 status = 0; + int ret; + + ret = hns_roce_cmd_mbox_post_hw(hr_dev, in_param, out_param, + in_modifier, op_modifier, op, + CMD_POLL_TOKEN, 0); + if (ret) { + dev_err(dev, "[cmd_poll]hns_roce_cmd_mbox_post_hw failed\n"); + goto out; + } + + end = msecs_to_jiffies(timeout) + jiffies; + while (cmd_pending(hr_dev) && time_before(jiffies, end)) + cond_resched(); + + if (cmd_pending(hr_dev)) { + dev_err(dev, "[cmd_poll]hw run cmd TIMEDOUT!\n"); + ret = -ETIMEDOUT; + goto out; + } + + status = le32_to_cpu((__force __be32) + __raw_readl(hcr + HCR_STATUS_OFFSET)); + if ((status & STATUS_MASK) != 0x1) { + dev_err(dev, "mailbox status 0x%x!\n", status); + ret = -EBUSY; + goto out; + } + +out: + return ret; +} + +static int hns_roce_cmd_mbox_poll(struct hns_roce_dev *hr_dev, u64 in_param, + u64 out_param, unsigned long in_modifier, + u8 op_modifier, u16 op, unsigned long timeout) +{ + int ret; + + down(&hr_dev->cmd.poll_sem); + ret = __hns_roce_cmd_mbox_poll(hr_dev, in_param, out_param, in_modifier, + op_modifier, op, timeout); + up(&hr_dev->cmd.poll_sem); + + return ret; +} + +void hns_roce_cmd_event(struct hns_roce_dev *hr_dev, u16 token, u8 status, + u64 out_param) +{ + struct hns_roce_cmd_context + *context = &hr_dev->cmd.context[token & hr_dev->cmd.token_mask]; + + if (token != context->token) + return; + + context->result = (status == HNS_ROCE_CMD_SUCCESS) ? 0 : (-EIO); + context->out_param = out_param; + complete(&context->done); +} + +/* this should be called with "use_events" */ +static int __hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param, + u64 out_param, unsigned long in_modifier, + u8 op_modifier, u16 op, + unsigned long timeout) +{ + struct hns_roce_cmdq *cmd = &hr_dev->cmd; + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_cmd_context *context; + int ret = 0; + + spin_lock(&cmd->context_lock); + WARN_ON(cmd->free_head < 0); + context = &cmd->context[cmd->free_head]; + context->token += cmd->token_mask + 1; + cmd->free_head = context->next; + spin_unlock(&cmd->context_lock); + + init_completion(&context->done); + + ret = hns_roce_cmd_mbox_post_hw(hr_dev, in_param, out_param, + in_modifier, op_modifier, op, + context->token, 1); + if (ret) + goto out; + + /* + * It is timeout when wait_for_completion_timeout return 0 + * The return value is the time limit set in advance + * how many seconds showing + */ + if (!wait_for_completion_timeout(&context->done, + msecs_to_jiffies(timeout))) { + dev_err(dev, "[cmd]wait_for_completion_timeout timeout\n"); + ret = -EBUSY; + goto out; + } + + ret = context->result; + if (ret) { + dev_err(dev, "[cmd]event mod cmd process error!err=%d\n", ret); + goto out; + } + +out: + spin_lock(&cmd->context_lock); + context->next = cmd->free_head; + cmd->free_head = context - cmd->context; + spin_unlock(&cmd->context_lock); + + return ret; +} + +static int hns_roce_cmd_mbox_wait(struct hns_roce_dev *hr_dev, u64 in_param, + u64 out_param, unsigned long in_modifier, + u8 op_modifier, u16 op, unsigned long timeout) +{ + int ret = 0; + + down(&hr_dev->cmd.event_sem); + ret = __hns_roce_cmd_mbox_wait(hr_dev, in_param, out_param, + in_modifier, op_modifier, op, timeout); + up(&hr_dev->cmd.event_sem); + + return ret; +} + +int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, + unsigned long in_modifier, u8 op_modifier, u16 op, + unsigned long timeout) +{ + if (hr_dev->cmd.use_events) + return hns_roce_cmd_mbox_wait(hr_dev, in_param, out_param, + in_modifier, op_modifier, op, + timeout); + else + return hns_roce_cmd_mbox_poll(hr_dev, in_param, out_param, + in_modifier, op_modifier, op, + timeout); +} + +int hns_roce_cmd_init(struct hns_roce_dev *hr_dev) +{ + struct device *dev = &hr_dev->pdev->dev; + + mutex_init(&hr_dev->cmd.hcr_mutex); + sema_init(&hr_dev->cmd.poll_sem, 1); + hr_dev->cmd.use_events = 0; + hr_dev->cmd.toggle = 1; + hr_dev->cmd.max_cmds = CMD_MAX_NUM; + hr_dev->cmd.hcr = hr_dev->reg_base + ROCEE_MB1_REG; + hr_dev->cmd.pool = dma_pool_create("hns_roce_cmd", dev, + HNS_ROCE_MAILBOX_SIZE, + HNS_ROCE_MAILBOX_SIZE, 0); + if (!hr_dev->cmd.pool) + return -ENOMEM; + + return 0; +} + +void hns_roce_cmd_cleanup(struct hns_roce_dev *hr_dev) +{ + dma_pool_destroy(hr_dev->cmd.pool); +} + +int hns_roce_cmd_use_events(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_cmdq *hr_cmd = &hr_dev->cmd; + int i; + + hr_cmd->context = kmalloc(hr_cmd->max_cmds * + sizeof(struct hns_roce_cmd_context), + GFP_KERNEL); + if (!hr_cmd->context) + return -ENOMEM; + + for (i = 0; i < hr_cmd->max_cmds; ++i) { + hr_cmd->context[i].token = i; + hr_cmd->context[i].next = i + 1; + } + + hr_cmd->context[hr_cmd->max_cmds - 1].next = -1; + hr_cmd->free_head = 0; + + sema_init(&hr_cmd->event_sem, hr_cmd->max_cmds); + spin_lock_init(&hr_cmd->context_lock); + + hr_cmd->token_mask = CMD_TOKEN_MASK; + hr_cmd->use_events = 1; + + down(&hr_cmd->poll_sem); + + return 0; +} + +void hns_roce_cmd_use_polling(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_cmdq *hr_cmd = &hr_dev->cmd; + int i; + + hr_cmd->use_events = 0; + + for (i = 0; i < hr_cmd->max_cmds; ++i) + down(&hr_cmd->event_sem); + + kfree(hr_cmd->context); + up(&hr_cmd->poll_sem); +} + +struct hns_roce_cmd_mailbox + *hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_cmd_mailbox *mailbox; + + mailbox = kmalloc(sizeof(*mailbox), GFP_KERNEL); + if (!mailbox) + return ERR_PTR(-ENOMEM); + + mailbox->buf = dma_pool_alloc(hr_dev->cmd.pool, GFP_KERNEL, + &mailbox->dma); + if (!mailbox->buf) { + kfree(mailbox); + return ERR_PTR(-ENOMEM); + } + + return mailbox; +} + +void hns_roce_free_cmd_mailbox(struct hns_roce_dev *hr_dev, + struct hns_roce_cmd_mailbox *mailbox) +{ + if (!mailbox) + return; + + dma_pool_free(hr_dev->cmd.pool, mailbox->buf, mailbox->dma); + kfree(mailbox); +} diff --git a/drivers/infiniband/hw/hns/hns_roce_cmd.h b/drivers/infiniband/hw/hns/hns_roce_cmd.h new file mode 100644 index 000000000000..e3997d312c55 --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_cmd.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2016 Hisilicon Limited. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _HNS_ROCE_CMD_H +#define _HNS_ROCE_CMD_H + +#define HNS_ROCE_MAILBOX_SIZE 4096 + +enum { + /* TPT commands */ + HNS_ROCE_CMD_SW2HW_MPT = 0xd, + HNS_ROCE_CMD_HW2SW_MPT = 0xf, + + /* CQ commands */ + HNS_ROCE_CMD_SW2HW_CQ = 0x16, + HNS_ROCE_CMD_HW2SW_CQ = 0x17, + + /* QP/EE commands */ + HNS_ROCE_CMD_RST2INIT_QP = 0x19, + HNS_ROCE_CMD_INIT2RTR_QP = 0x1a, + HNS_ROCE_CMD_RTR2RTS_QP = 0x1b, + HNS_ROCE_CMD_RTS2RTS_QP = 0x1c, + HNS_ROCE_CMD_2ERR_QP = 0x1e, + HNS_ROCE_CMD_RTS2SQD_QP = 0x1f, + HNS_ROCE_CMD_SQD2SQD_QP = 0x38, + HNS_ROCE_CMD_SQD2RTS_QP = 0x20, + HNS_ROCE_CMD_2RST_QP = 0x21, + HNS_ROCE_CMD_QUERY_QP = 0x22, +}; + +enum { + HNS_ROCE_CMD_TIME_CLASS_A = 10000, + HNS_ROCE_CMD_TIME_CLASS_B = 10000, + HNS_ROCE_CMD_TIME_CLASS_C = 10000, +}; + +struct hns_roce_cmd_mailbox { + void *buf; + dma_addr_t dma; +}; + +int hns_roce_cmd_mbox(struct hns_roce_dev *hr_dev, u64 in_param, u64 out_param, + unsigned long in_modifier, u8 op_modifier, u16 op, + unsigned long timeout); + +struct hns_roce_cmd_mailbox + *hns_roce_alloc_cmd_mailbox(struct hns_roce_dev *hr_dev); +void hns_roce_free_cmd_mailbox(struct hns_roce_dev *hr_dev, + struct hns_roce_cmd_mailbox *mailbox); + +#endif /* _HNS_ROCE_CMD_H */ diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h new file mode 100644 index 000000000000..297016103aa7 --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_common.h @@ -0,0 +1,325 @@ +/* + * Copyright (c) 2016 Hisilicon Limited. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _HNS_ROCE_COMMON_H +#define _HNS_ROCE_COMMON_H + +#ifndef assert +#define assert(cond) +#endif + +#define roce_write(dev, reg, val) writel((val), (dev)->reg_base + (reg)) +#define roce_read(dev, reg) readl((dev)->reg_base + (reg)) +#define roce_raw_write(value, addr) \ + __raw_writel((__force u32)cpu_to_le32(value), (addr)) + +#define roce_get_field(origin, mask, shift) \ + (((origin) & (mask)) >> (shift)) + +#define roce_get_bit(origin, shift) \ + roce_get_field((origin), (1ul << (shift)), (shift)) + +#define roce_set_field(origin, mask, shift, val) \ + do { \ + (origin) &= (~(mask)); \ + (origin) |= (((u32)(val) << (shift)) & (mask)); \ + } while (0) + +#define roce_set_bit(origin, shift, val) \ + roce_set_field((origin), (1ul << (shift)), (shift), (val)) + +#define ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S 3 +#define ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S 4 + +#define ROCEE_GLB_CFG_SQ_EXT_DB_MODE_S 5 + +#define ROCEE_GLB_CFG_OTH_EXT_DB_MODE_S 6 + +#define ROCEE_GLB_CFG_ROCEE_PORT_ST_S 10 +#define ROCEE_GLB_CFG_ROCEE_PORT_ST_M \ + (((1UL << 6) - 1) << ROCEE_GLB_CFG_ROCEE_PORT_ST_S) + +#define ROCEE_GLB_CFG_TRP_RAQ_DROP_EN_S 16 + +#define ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_S 0 +#define ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_M \ + (((1UL << 24) - 1) << ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_S) + +#define ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_S 24 +#define ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_M \ + (((1UL << 4) - 1) << ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_S) + +#define ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_S 0 +#define ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_M \ + (((1UL << 24) - 1) << ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_S) + +#define ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_S 24 +#define ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_M \ + (((1UL << 4) - 1) << ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_S) + +#define ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_S 0 +#define ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_M \ + (((1UL << 16) - 1) << ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_S) + +#define ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_S 16 +#define ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_M \ + (((1UL << 16) - 1) << ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_S) + +#define ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_S 0 +#define ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_M \ + (((1UL << 16) - 1) << ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_S) + +#define ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_S 16 +#define ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_M \ + (((1UL << 16) - 1) << ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_S) + +#define ROCEE_RAQ_WL_ROCEE_RAQ_WL_S 0 +#define ROCEE_RAQ_WL_ROCEE_RAQ_WL_M \ + (((1UL << 8) - 1) << ROCEE_RAQ_WL_ROCEE_RAQ_WL_S) + +#define ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_POL_TIME_INTERVAL_S 0 +#define ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_POL_TIME_INTERVAL_M \ + (((1UL << 15) - 1) << \ + ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_POL_TIME_INTERVAL_S) + +#define ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_CFG_S 16 +#define ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_CFG_M \ + (((1UL << 4) - 1) << \ + ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_CFG_S) + +#define ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_EN_S 20 + +#define ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_EXT_RAQ_MODE 21 + +#define ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_S 0 +#define ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_M \ + (((1UL << 5) - 1) << ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_S) + +#define ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_S 5 +#define ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_M \ + (((1UL << 5) - 1) << ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_S) + +#define ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_S 0 +#define ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_M \ + (((1UL << 5) - 1) << ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_S) + +#define ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_S 5 +#define ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_M \ + (((1UL << 5) - 1) << ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_S) + +#define ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_S 0 +#define ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_M \ + (((1UL << 5) - 1) << ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_S) + +#define ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_S 8 +#define ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_M \ + (((1UL << 5) - 1) << ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_S) + +#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_S 0 +#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_M \ + (((1UL << 19) - 1) << ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_S) + +#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_S 19 + +#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S 20 +#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M \ + (((1UL << 2) - 1) << ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S) + +#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S 22 +#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_M \ + (((1UL << 5) - 1) << ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S) + +#define ROCEE_BT_CMD_H_ROCEE_BT_CMD_HW_SYNS_S 31 + +#define ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_S 0 +#define ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_M \ + (((1UL << 3) - 1) << ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_S) + +#define ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_S 0 +#define ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_M \ + (((1UL << 15) - 1) << ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_S) + +#define ROCEE_MB6_ROCEE_MB_CMD_S 0 +#define ROCEE_MB6_ROCEE_MB_CMD_M \ + (((1UL << 8) - 1) << ROCEE_MB6_ROCEE_MB_CMD_S) + +#define ROCEE_MB6_ROCEE_MB_CMD_MDF_S 8 +#define ROCEE_MB6_ROCEE_MB_CMD_MDF_M \ + (((1UL << 4) - 1) << ROCEE_MB6_ROCEE_MB_CMD_MDF_S) + +#define ROCEE_MB6_ROCEE_MB_EVENT_S 14 + +#define ROCEE_MB6_ROCEE_MB_HW_RUN_S 15 + +#define ROCEE_MB6_ROCEE_MB_TOKEN_S 16 +#define ROCEE_MB6_ROCEE_MB_TOKEN_M \ + (((1UL << 16) - 1) << ROCEE_MB6_ROCEE_MB_TOKEN_S) + +#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_S 0 +#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_M \ + (((1UL << 24) - 1) << ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_S) + +#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_S 24 +#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_M \ + (((1UL << 4) - 1) << ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_S) + +#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_S 28 +#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_M \ + (((1UL << 3) - 1) << ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_S) + +#define ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_HW_SYNS_S 31 + +#define ROCEE_SMAC_H_ROCEE_SMAC_H_S 0 +#define ROCEE_SMAC_H_ROCEE_SMAC_H_M \ + (((1UL << 16) - 1) << ROCEE_SMAC_H_ROCEE_SMAC_H_S) + +#define ROCEE_SMAC_H_ROCEE_PORT_MTU_S 16 +#define ROCEE_SMAC_H_ROCEE_PORT_MTU_M \ + (((1UL << 4) - 1) << ROCEE_SMAC_H_ROCEE_PORT_MTU_S) + +#define ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S 0 +#define ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M \ + (((1UL << 2) - 1) << ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S) + +#define ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_S 8 +#define ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_M \ + (((1UL << 4) - 1) << ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_S) + +#define ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQ_ALM_OVF_INT_ST_S 17 + +#define ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_S 0 +#define ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_M \ + (((1UL << 5) - 1) << ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_S) + +#define ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_S 16 +#define ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_M \ + (((1UL << 16) - 1) << ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_S) + +#define ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_S 0 +#define ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_M \ + (((1UL << 16) - 1) << ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_S) + +#define ROCEE_CAEP_CEQC_SHIFT_CAEP_CEQ_ALM_OVF_INT_ST_S 16 +#define ROCEE_CAEP_CE_IRQ_MASK_CAEP_CEQ_ALM_OVF_MASK_S 1 +#define ROCEE_CAEP_CEQ_ALM_OVF_CAEP_CEQ_ALM_OVF_S 0 + +#define ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S 0 +#define ROCEE_CAEP_AE_MASK_CAEP_AE_IRQ_MASK_S 1 + +#define ROCEE_CAEP_AE_ST_CAEP_AEQ_ALM_OVF_S 0 + +#define ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_S 0 +#define ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_M \ + (((1UL << 28) - 1) << ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_S) + +#define ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S 0 +#define ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M \ + (((1UL << 28) - 1) << ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) + +#define ROCEE_SDB_INV_CNT_SDB_INV_CNT_S 0 +#define ROCEE_SDB_INV_CNT_SDB_INV_CNT_M \ + (((1UL << 16) - 1) << ROCEE_SDB_INV_CNT_SDB_INV_CNT_S) + +/*************ROCEE_REG DEFINITION****************/ +#define ROCEE_VENDOR_ID_REG 0x0 +#define ROCEE_VENDOR_PART_ID_REG 0x4 + +#define ROCEE_HW_VERSION_REG 0x8 + +#define ROCEE_SYS_IMAGE_GUID_L_REG 0xC +#define ROCEE_SYS_IMAGE_GUID_H_REG 0x10 + +#define ROCEE_PORT_GID_L_0_REG 0x50 +#define ROCEE_PORT_GID_ML_0_REG 0x54 +#define ROCEE_PORT_GID_MH_0_REG 0x58 +#define ROCEE_PORT_GID_H_0_REG 0x5C + +#define ROCEE_BT_CMD_H_REG 0x204 + +#define ROCEE_SMAC_L_0_REG 0x240 +#define ROCEE_SMAC_H_0_REG 0x244 + +#define ROCEE_QP1C_CFG3_0_REG 0x27C + +#define ROCEE_CAEP_AEQE_CONS_IDX_REG 0x3AC +#define ROCEE_CAEP_CEQC_CONS_IDX_0_REG 0x3BC + +#define ROCEE_ECC_UCERR_ALM1_REG 0xB38 +#define ROCEE_ECC_UCERR_ALM2_REG 0xB3C +#define ROCEE_ECC_CERR_ALM1_REG 0xB44 +#define ROCEE_ECC_CERR_ALM2_REG 0xB48 + +#define ROCEE_ACK_DELAY_REG 0x14 +#define ROCEE_GLB_CFG_REG 0x18 + +#define ROCEE_DMAE_USER_CFG1_REG 0x40 +#define ROCEE_DMAE_USER_CFG2_REG 0x44 + +#define ROCEE_DB_SQ_WL_REG 0x154 +#define ROCEE_DB_OTHERS_WL_REG 0x158 +#define ROCEE_RAQ_WL_REG 0x15C +#define ROCEE_WRMS_POL_TIME_INTERVAL_REG 0x160 +#define ROCEE_EXT_DB_SQ_REG 0x164 +#define ROCEE_EXT_DB_SQ_H_REG 0x168 +#define ROCEE_EXT_DB_OTH_REG 0x16C + +#define ROCEE_EXT_DB_OTH_H_REG 0x170 +#define ROCEE_EXT_DB_SQ_WL_EMPTY_REG 0x174 +#define ROCEE_EXT_DB_SQ_WL_REG 0x178 +#define ROCEE_EXT_DB_OTHERS_WL_EMPTY_REG 0x17C +#define ROCEE_EXT_DB_OTHERS_WL_REG 0x180 +#define ROCEE_EXT_RAQ_REG 0x184 +#define ROCEE_EXT_RAQ_H_REG 0x188 + +#define ROCEE_CAEP_CE_INTERVAL_CFG_REG 0x190 +#define ROCEE_CAEP_CE_BURST_NUM_CFG_REG 0x194 +#define ROCEE_BT_CMD_L_REG 0x200 + +#define ROCEE_MB1_REG 0x210 +#define ROCEE_DB_SQ_L_0_REG 0x230 +#define ROCEE_DB_OTHERS_L_0_REG 0x238 +#define ROCEE_QP1C_CFG0_0_REG 0x270 + +#define ROCEE_CAEP_AEQC_AEQE_SHIFT_REG 0x3A0 +#define ROCEE_CAEP_CEQC_SHIFT_0_REG 0x3B0 +#define ROCEE_CAEP_CE_IRQ_MASK_0_REG 0x3C0 +#define ROCEE_CAEP_CEQ_ALM_OVF_0_REG 0x3C4 +#define ROCEE_CAEP_AE_MASK_REG 0x6C8 +#define ROCEE_CAEP_AE_ST_REG 0x6CC + +#define ROCEE_SDB_ISSUE_PTR_REG 0x758 +#define ROCEE_SDB_SEND_PTR_REG 0x75C +#define ROCEE_SDB_INV_CNT_REG 0x9A4 +#define ROCEE_ECC_UCERR_ALM0_REG 0xB34 +#define ROCEE_ECC_CERR_ALM0_REG 0xB40 + +#endif /* _HNS_ROCE_COMMON_H */ diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c new file mode 100644 index 000000000000..097365932b09 --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -0,0 +1,453 @@ +/* + * Copyright (c) 2016 Hisilicon Limited. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/platform_device.h> +#include <rdma/ib_umem.h> +#include "hns_roce_device.h" +#include "hns_roce_cmd.h" +#include "hns_roce_hem.h" +#include "hns_roce_user.h" +#include "hns_roce_common.h" + +static void hns_roce_ib_cq_comp(struct hns_roce_cq *hr_cq) +{ + struct ib_cq *ibcq = &hr_cq->ib_cq; + + ibcq->comp_handler(ibcq, ibcq->cq_context); +} + +static void hns_roce_ib_cq_event(struct hns_roce_cq *hr_cq, + enum hns_roce_event event_type) +{ + struct hns_roce_dev *hr_dev; + struct ib_event event; + struct ib_cq *ibcq; + + ibcq = &hr_cq->ib_cq; + hr_dev = to_hr_dev(ibcq->device); + + if (event_type != HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID && + event_type != HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR && + event_type != HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW) { + dev_err(&hr_dev->pdev->dev, + "hns_roce_ib: Unexpected event type 0x%x on CQ %06lx\n", + event_type, hr_cq->cqn); + return; + } + + if (ibcq->event_handler) { + event.device = ibcq->device; + event.event = IB_EVENT_CQ_ERR; + event.element.cq = ibcq; + ibcq->event_handler(&event, ibcq->cq_context); + } +} + +static int hns_roce_sw2hw_cq(struct hns_roce_dev *dev, + struct hns_roce_cmd_mailbox *mailbox, + unsigned long cq_num) +{ + return hns_roce_cmd_mbox(dev, mailbox->dma, 0, cq_num, 0, + HNS_ROCE_CMD_SW2HW_CQ, HNS_ROCE_CMD_TIME_CLASS_A); +} + +static int hns_roce_cq_alloc(struct hns_roce_dev *hr_dev, int nent, + struct hns_roce_mtt *hr_mtt, + struct hns_roce_uar *hr_uar, + struct hns_roce_cq *hr_cq, int vector) +{ + struct hns_roce_cmd_mailbox *mailbox = NULL; + struct hns_roce_cq_table *cq_table = NULL; + struct device *dev = &hr_dev->pdev->dev; + dma_addr_t dma_handle; + u64 *mtts = NULL; + int ret = 0; + + cq_table = &hr_dev->cq_table; + + /* Get the physical address of cq buf */ + mtts = hns_roce_table_find(&hr_dev->mr_table.mtt_table, + hr_mtt->first_seg, &dma_handle); + if (!mtts) { + dev_err(dev, "CQ alloc.Failed to find cq buf addr.\n"); + return -EINVAL; + } + + if (vector >= hr_dev->caps.num_comp_vectors) { + dev_err(dev, "CQ alloc.Invalid vector.\n"); + return -EINVAL; + } + hr_cq->vector = vector; + + ret = hns_roce_bitmap_alloc(&cq_table->bitmap, &hr_cq->cqn); + if (ret == -1) { + dev_err(dev, "CQ alloc.Failed to alloc index.\n"); + return -ENOMEM; + } + + /* Get CQC memory HEM(Hardware Entry Memory) table */ + ret = hns_roce_table_get(hr_dev, &cq_table->table, hr_cq->cqn); + if (ret) { + dev_err(dev, "CQ alloc.Failed to get context mem.\n"); + goto err_out; + } + + /* The cq insert radix tree */ + spin_lock_irq(&cq_table->lock); + /* Radix_tree: The associated pointer and long integer key value like */ + ret = radix_tree_insert(&cq_table->tree, hr_cq->cqn, hr_cq); + spin_unlock_irq(&cq_table->lock); + if (ret) { + dev_err(dev, "CQ alloc.Failed to radix_tree_insert.\n"); + goto err_put; + } + + /* Allocate mailbox memory */ + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); + if (IS_ERR(mailbox)) { + ret = PTR_ERR(mailbox); + goto err_radix; + } + + hr_dev->hw->write_cqc(hr_dev, hr_cq, mailbox->buf, mtts, dma_handle, + nent, vector); + + /* Send mailbox to hw */ + ret = hns_roce_sw2hw_cq(hr_dev, mailbox, hr_cq->cqn); + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + if (ret) { + dev_err(dev, "CQ alloc.Failed to cmd mailbox.\n"); + goto err_radix; + } + + hr_cq->cons_index = 0; + hr_cq->uar = hr_uar; + + atomic_set(&hr_cq->refcount, 1); + init_completion(&hr_cq->free); + + return 0; + +err_radix: + spin_lock_irq(&cq_table->lock); + radix_tree_delete(&cq_table->tree, hr_cq->cqn); + spin_unlock_irq(&cq_table->lock); + +err_put: + hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn); + +err_out: + hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn); + return ret; +} + +static int hns_roce_hw2sw_cq(struct hns_roce_dev *dev, + struct hns_roce_cmd_mailbox *mailbox, + unsigned long cq_num) +{ + return hns_roce_cmd_mbox(dev, 0, mailbox ? mailbox->dma : 0, cq_num, + mailbox ? 0 : 1, HNS_ROCE_CMD_HW2SW_CQ, + HNS_ROCE_CMD_TIME_CLASS_A); +} + +static void hns_roce_free_cq(struct hns_roce_dev *hr_dev, + struct hns_roce_cq *hr_cq) +{ + struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; + struct device *dev = &hr_dev->pdev->dev; + int ret; + + ret = hns_roce_hw2sw_cq(hr_dev, NULL, hr_cq->cqn); + if (ret) + dev_err(dev, "HW2SW_CQ failed (%d) for CQN %06lx\n", ret, + hr_cq->cqn); + + /* Waiting interrupt process procedure carried out */ + synchronize_irq(hr_dev->eq_table.eq[hr_cq->vector].irq); + + /* wait for all interrupt processed */ + if (atomic_dec_and_test(&hr_cq->refcount)) + complete(&hr_cq->free); + wait_for_completion(&hr_cq->free); + + spin_lock_irq(&cq_table->lock); + radix_tree_delete(&cq_table->tree, hr_cq->cqn); + spin_unlock_irq(&cq_table->lock); + + hns_roce_table_put(hr_dev, &cq_table->table, hr_cq->cqn); + hns_roce_bitmap_free(&cq_table->bitmap, hr_cq->cqn); +} + +static int hns_roce_ib_get_cq_umem(struct hns_roce_dev *hr_dev, + struct ib_ucontext *context, + struct hns_roce_cq_buf *buf, + struct ib_umem **umem, u64 buf_addr, int cqe) +{ + int ret; + + *umem = ib_umem_get(context, buf_addr, cqe * hr_dev->caps.cq_entry_sz, + IB_ACCESS_LOCAL_WRITE, 1); + if (IS_ERR(*umem)) + return PTR_ERR(*umem); + + ret = hns_roce_mtt_init(hr_dev, ib_umem_page_count(*umem), + ilog2((unsigned int)(*umem)->page_size), + &buf->hr_mtt); + if (ret) + goto err_buf; + + ret = hns_roce_ib_umem_write_mtt(hr_dev, &buf->hr_mtt, *umem); + if (ret) + goto err_mtt; + + return 0; + +err_mtt: + hns_roce_mtt_cleanup(hr_dev, &buf->hr_mtt); + +err_buf: + ib_umem_release(*umem); + return ret; +} + +static int hns_roce_ib_alloc_cq_buf(struct hns_roce_dev *hr_dev, + struct hns_roce_cq_buf *buf, u32 nent) +{ + int ret; + + ret = hns_roce_buf_alloc(hr_dev, nent * hr_dev->caps.cq_entry_sz, + PAGE_SIZE * 2, &buf->hr_buf); + if (ret) + goto out; + + ret = hns_roce_mtt_init(hr_dev, buf->hr_buf.npages, + buf->hr_buf.page_shift, &buf->hr_mtt); + if (ret) + goto err_buf; + + ret = hns_roce_buf_write_mtt(hr_dev, &buf->hr_mtt, &buf->hr_buf); + if (ret) + goto err_mtt; + + return 0; + +err_mtt: + hns_roce_mtt_cleanup(hr_dev, &buf->hr_mtt); + +err_buf: + hns_roce_buf_free(hr_dev, nent * hr_dev->caps.cq_entry_sz, + &buf->hr_buf); +out: + return ret; +} + +static void hns_roce_ib_free_cq_buf(struct hns_roce_dev *hr_dev, + struct hns_roce_cq_buf *buf, int cqe) +{ + hns_roce_buf_free(hr_dev, (cqe + 1) * hr_dev->caps.cq_entry_sz, + &buf->hr_buf); +} + +struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_ib_create_cq ucmd; + struct hns_roce_cq *hr_cq = NULL; + struct hns_roce_uar *uar = NULL; + int vector = attr->comp_vector; + int cq_entries = attr->cqe; + int ret = 0; + + if (cq_entries < 1 || cq_entries > hr_dev->caps.max_cqes) { + dev_err(dev, "Creat CQ failed. entries=%d, max=%d\n", + cq_entries, hr_dev->caps.max_cqes); + return ERR_PTR(-EINVAL); + } + + hr_cq = kmalloc(sizeof(*hr_cq), GFP_KERNEL); + if (!hr_cq) + return ERR_PTR(-ENOMEM); + + /* In v1 engine, parameter verification */ + if (cq_entries < HNS_ROCE_MIN_CQE_NUM) + cq_entries = HNS_ROCE_MIN_CQE_NUM; + + cq_entries = roundup_pow_of_two((unsigned int)cq_entries); + hr_cq->ib_cq.cqe = cq_entries - 1; + spin_lock_init(&hr_cq->lock); + + if (context) { + if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { + dev_err(dev, "Failed to copy_from_udata.\n"); + ret = -EFAULT; + goto err_cq; + } + + /* Get user space address, write it into mtt table */ + ret = hns_roce_ib_get_cq_umem(hr_dev, context, &hr_cq->hr_buf, + &hr_cq->umem, ucmd.buf_addr, + cq_entries); + if (ret) { + dev_err(dev, "Failed to get_cq_umem.\n"); + goto err_cq; + } + + /* Get user space parameters */ + uar = &to_hr_ucontext(context)->uar; + } else { + /* Init mmt table and write buff address to mtt table */ + ret = hns_roce_ib_alloc_cq_buf(hr_dev, &hr_cq->hr_buf, + cq_entries); + if (ret) { + dev_err(dev, "Failed to alloc_cq_buf.\n"); + goto err_cq; + } + + uar = &hr_dev->priv_uar; + hr_cq->cq_db_l = hr_dev->reg_base + ROCEE_DB_OTHERS_L_0_REG + + 0x1000 * uar->index; + } + + /* Allocate cq index, fill cq_context */ + ret = hns_roce_cq_alloc(hr_dev, cq_entries, &hr_cq->hr_buf.hr_mtt, uar, + hr_cq, vector); + if (ret) { + dev_err(dev, "Creat CQ .Failed to cq_alloc.\n"); + goto err_mtt; + } + + /* Get created cq handler and carry out event */ + hr_cq->comp = hns_roce_ib_cq_comp; + hr_cq->event = hns_roce_ib_cq_event; + hr_cq->cq_depth = cq_entries; + + if (context) { + if (ib_copy_to_udata(udata, &hr_cq->cqn, sizeof(u64))) { + ret = -EFAULT; + goto err_cqc; + } + } + + return &hr_cq->ib_cq; + +err_cqc: + hns_roce_free_cq(hr_dev, hr_cq); + +err_mtt: + hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); + if (context) + ib_umem_release(hr_cq->umem); + else + hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf, + hr_cq->ib_cq.cqe); + +err_cq: + kfree(hr_cq); + return ERR_PTR(ret); +} + +int hns_roce_ib_destroy_cq(struct ib_cq *ib_cq) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device); + struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq); + + hns_roce_free_cq(hr_dev, hr_cq); + hns_roce_mtt_cleanup(hr_dev, &hr_cq->hr_buf.hr_mtt); + + if (ib_cq->uobject) + ib_umem_release(hr_cq->umem); + else + /* Free the buff of stored cq */ + hns_roce_ib_free_cq_buf(hr_dev, &hr_cq->hr_buf, ib_cq->cqe); + + kfree(hr_cq); + + return 0; +} + +void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn) +{ + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_cq *cq; + + cq = radix_tree_lookup(&hr_dev->cq_table.tree, + cqn & (hr_dev->caps.num_cqs - 1)); + if (!cq) { + dev_warn(dev, "Completion event for bogus CQ 0x%08x\n", cqn); + return; + } + + cq->comp(cq); +} + +void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type) +{ + struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_cq *cq; + + cq = radix_tree_lookup(&cq_table->tree, + cqn & (hr_dev->caps.num_cqs - 1)); + if (cq) + atomic_inc(&cq->refcount); + + if (!cq) { + dev_warn(dev, "Async event for bogus CQ %08x\n", cqn); + return; + } + + cq->event(cq, (enum hns_roce_event)event_type); + + if (atomic_dec_and_test(&cq->refcount)) + complete(&cq->free); +} + +int hns_roce_init_cq_table(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; + + spin_lock_init(&cq_table->lock); + INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC); + + return hns_roce_bitmap_init(&cq_table->bitmap, hr_dev->caps.num_cqs, + hr_dev->caps.num_cqs - 1, + hr_dev->caps.reserved_cqs, 0); +} + +void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev) +{ + hns_roce_bitmap_cleanup(&hr_dev->cq_table.bitmap); +} diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h new file mode 100644 index 000000000000..341731553a60 --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -0,0 +1,728 @@ +/* + * Copyright (c) 2016 Hisilicon Limited. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _HNS_ROCE_DEVICE_H +#define _HNS_ROCE_DEVICE_H + +#include <rdma/ib_verbs.h> + +#define DRV_NAME "hns_roce" + +#define MAC_ADDR_OCTET_NUM 6 +#define HNS_ROCE_MAX_MSG_LEN 0x80000000 + +#define HNS_ROCE_ALOGN_UP(a, b) ((((a) + (b) - 1) / (b)) * (b)) + +#define HNS_ROCE_IB_MIN_SQ_STRIDE 6 + +#define HNS_ROCE_BA_SIZE (32 * 4096) + +/* Hardware specification only for v1 engine */ +#define HNS_ROCE_MIN_CQE_NUM 0x40 +#define HNS_ROCE_MIN_WQE_NUM 0x20 + +/* Hardware specification only for v1 engine */ +#define HNS_ROCE_MAX_INNER_MTPT_NUM 0x7 +#define HNS_ROCE_MAX_MTPT_PBL_NUM 0x100000 + +#define HNS_ROCE_MAX_IRQ_NUM 34 + +#define HNS_ROCE_COMP_VEC_NUM 32 + +#define HNS_ROCE_AEQE_VEC_NUM 1 +#define HNS_ROCE_AEQE_OF_VEC_NUM 1 + +/* 4G/4K = 1M */ +#define HNS_ROCE_SL_SHIFT 28 +#define HNS_ROCE_TCLASS_SHIFT 20 +#define HNS_ROCE_FLOW_LABLE_MASK 0xfffff + +#define HNS_ROCE_MAX_PORTS 6 +#define HNS_ROCE_MAX_GID_NUM 16 +#define HNS_ROCE_GID_SIZE 16 + +#define MR_TYPE_MR 0x00 +#define MR_TYPE_DMA 0x03 + +#define PKEY_ID 0xffff +#define GUID_LEN 8 +#define NODE_DESC_SIZE 64 +#define DB_REG_OFFSET 0x1000 + +#define SERV_TYPE_RC 0 +#define SERV_TYPE_RD 1 +#define SERV_TYPE_UC 2 +#define SERV_TYPE_UD 3 + +#define PAGES_SHIFT_8 8 +#define PAGES_SHIFT_16 16 +#define PAGES_SHIFT_24 24 +#define PAGES_SHIFT_32 32 + +enum hns_roce_qp_state { + HNS_ROCE_QP_STATE_RST, + HNS_ROCE_QP_STATE_INIT, + HNS_ROCE_QP_STATE_RTR, + HNS_ROCE_QP_STATE_RTS, + HNS_ROCE_QP_STATE_SQD, + HNS_ROCE_QP_STATE_ERR, + HNS_ROCE_QP_NUM_STATE, +}; + +enum hns_roce_event { + HNS_ROCE_EVENT_TYPE_PATH_MIG = 0x01, + HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED = 0x02, + HNS_ROCE_EVENT_TYPE_COMM_EST = 0x03, + HNS_ROCE_EVENT_TYPE_SQ_DRAINED = 0x04, + HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR = 0x05, + HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR = 0x06, + HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR = 0x07, + HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH = 0x08, + HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH = 0x09, + HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR = 0x0a, + HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR = 0x0b, + HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW = 0x0c, + HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID = 0x0d, + HNS_ROCE_EVENT_TYPE_PORT_CHANGE = 0x0f, + /* 0x10 and 0x11 is unused in currently application case */ + HNS_ROCE_EVENT_TYPE_DB_OVERFLOW = 0x12, + HNS_ROCE_EVENT_TYPE_MB = 0x13, + HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW = 0x14, +}; + +/* Local Work Queue Catastrophic Error,SUBTYPE 0x5 */ +enum { + HNS_ROCE_LWQCE_QPC_ERROR = 1, + HNS_ROCE_LWQCE_MTU_ERROR = 2, + HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR = 3, + HNS_ROCE_LWQCE_WQE_ADDR_ERROR = 4, + HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR = 5, + HNS_ROCE_LWQCE_SL_ERROR = 6, + HNS_ROCE_LWQCE_PORT_ERROR = 7, +}; + +/* Local Access Violation Work Queue Error,SUBTYPE 0x7 */ +enum { + HNS_ROCE_LAVWQE_R_KEY_VIOLATION = 1, + HNS_ROCE_LAVWQE_LENGTH_ERROR = 2, + HNS_ROCE_LAVWQE_VA_ERROR = 3, + HNS_ROCE_LAVWQE_PD_ERROR = 4, + HNS_ROCE_LAVWQE_RW_ACC_ERROR = 5, + HNS_ROCE_LAVWQE_KEY_STATE_ERROR = 6, + HNS_ROCE_LAVWQE_MR_OPERATION_ERROR = 7, +}; + +/* DOORBELL overflow subtype */ +enum { + HNS_ROCE_DB_SUBTYPE_SDB_OVF = 1, + HNS_ROCE_DB_SUBTYPE_SDB_ALM_OVF = 2, + HNS_ROCE_DB_SUBTYPE_ODB_OVF = 3, + HNS_ROCE_DB_SUBTYPE_ODB_ALM_OVF = 4, + HNS_ROCE_DB_SUBTYPE_SDB_ALM_EMP = 5, + HNS_ROCE_DB_SUBTYPE_ODB_ALM_EMP = 6, +}; + +enum { + /* RQ&SRQ related operations */ + HNS_ROCE_OPCODE_SEND_DATA_RECEIVE = 0x06, + HNS_ROCE_OPCODE_RDMA_WITH_IMM_RECEIVE = 0x07, +}; + +#define HNS_ROCE_CMD_SUCCESS 1 + +#define HNS_ROCE_PORT_DOWN 0 +#define HNS_ROCE_PORT_UP 1 + +#define HNS_ROCE_MTT_ENTRY_PER_SEG 8 + +#define PAGE_ADDR_SHIFT 12 + +struct hns_roce_uar { + u64 pfn; + unsigned long index; +}; + +struct hns_roce_ucontext { + struct ib_ucontext ibucontext; + struct hns_roce_uar uar; +}; + +struct hns_roce_pd { + struct ib_pd ibpd; + unsigned long pdn; +}; + +struct hns_roce_bitmap { + /* Bitmap Traversal last a bit which is 1 */ + unsigned long last; + unsigned long top; + unsigned long max; + unsigned long reserved_top; + unsigned long mask; + spinlock_t lock; + unsigned long *table; +}; + +/* Order bitmap length -- bit num compute formula: 1 << (max_order - order) */ +/* Order = 0: bitmap is biggest, order = max bitmap is least (only a bit) */ +/* Every bit repesent to a partner free/used status in bitmap */ +/* +* Initial, bits of other bitmap are all 0 except that a bit of max_order is 1 +* Bit = 1 represent to idle and available; bit = 0: not available +*/ +struct hns_roce_buddy { + /* Members point to every order level bitmap */ + unsigned long **bits; + /* Represent to avail bits of the order level bitmap */ + u32 *num_free; + int max_order; + spinlock_t lock; +}; + +/* For Hardware Entry Memory */ +struct hns_roce_hem_table { + /* HEM type: 0 = qpc, 1 = mtt, 2 = cqc, 3 = srq, 4 = other */ + u32 type; + /* HEM array elment num */ + unsigned long num_hem; + /* HEM entry record obj total num */ + unsigned long num_obj; + /*Single obj size */ + unsigned long obj_size; + int lowmem; + struct mutex mutex; + struct hns_roce_hem **hem; +}; + +struct hns_roce_mtt { + unsigned long first_seg; + int order; + int page_shift; +}; + +/* Only support 4K page size for mr register */ +#define MR_SIZE_4K 0 + +struct hns_roce_mr { + struct ib_mr ibmr; + struct ib_umem *umem; + u64 iova; /* MR's virtual orignal addr */ + u64 size; /* Address range of MR */ + u32 key; /* Key of MR */ + u32 pd; /* PD num of MR */ + u32 access;/* Access permission of MR */ + int enabled; /* MR's active status */ + int type; /* MR's register type */ + u64 *pbl_buf;/* MR's PBL space */ + dma_addr_t pbl_dma_addr; /* MR's PBL space PA */ +}; + +struct hns_roce_mr_table { + struct hns_roce_bitmap mtpt_bitmap; + struct hns_roce_buddy mtt_buddy; + struct hns_roce_hem_table mtt_table; + struct hns_roce_hem_table mtpt_table; +}; + +struct hns_roce_wq { + u64 *wrid; /* Work request ID */ + spinlock_t lock; + int wqe_cnt; /* WQE num */ + u32 max_post; + int max_gs; + int offset; + int wqe_shift;/* WQE size */ + u32 head; + u32 tail; + void __iomem *db_reg_l; +}; + +struct hns_roce_buf_list { + void *buf; + dma_addr_t map; +}; + +struct hns_roce_buf { + struct hns_roce_buf_list direct; + struct hns_roce_buf_list *page_list; + int nbufs; + u32 npages; + int page_shift; +}; + +struct hns_roce_cq_buf { + struct hns_roce_buf hr_buf; + struct hns_roce_mtt hr_mtt; +}; + +struct hns_roce_cq { + struct ib_cq ib_cq; + struct hns_roce_cq_buf hr_buf; + spinlock_t lock; + struct ib_umem *umem; + void (*comp)(struct hns_roce_cq *); + void (*event)(struct hns_roce_cq *, enum hns_roce_event); + + struct hns_roce_uar *uar; + u32 cq_depth; + u32 cons_index; + void __iomem *cq_db_l; + void __iomem *tptr_addr; + unsigned long cqn; + u32 vector; + atomic_t refcount; + struct completion free; +}; + +struct hns_roce_srq { + struct ib_srq ibsrq; + int srqn; +}; + +struct hns_roce_uar_table { + struct hns_roce_bitmap bitmap; +}; + +struct hns_roce_qp_table { + struct hns_roce_bitmap bitmap; + spinlock_t lock; + struct hns_roce_hem_table qp_table; + struct hns_roce_hem_table irrl_table; +}; + +struct hns_roce_cq_table { + struct hns_roce_bitmap bitmap; + spinlock_t lock; + struct radix_tree_root tree; + struct hns_roce_hem_table table; +}; + +struct hns_roce_raq_table { + struct hns_roce_buf_list *e_raq_buf; +}; + +struct hns_roce_av { + __le32 port_pd; + u8 gid_index; + u8 stat_rate; + u8 hop_limit; + __le32 sl_tclass_flowlabel; + u8 dgid[HNS_ROCE_GID_SIZE]; + u8 mac[6]; + __le16 vlan; +}; + +struct hns_roce_ah { + struct ib_ah ibah; + struct hns_roce_av av; +}; + +struct hns_roce_cmd_context { + struct completion done; + int result; + int next; + u64 out_param; + u16 token; +}; + +struct hns_roce_cmdq { + struct dma_pool *pool; + u8 __iomem *hcr; + struct mutex hcr_mutex; + struct semaphore poll_sem; + /* + * Event mode: cmd register mutex protection, + * ensure to not exceed max_cmds and user use limit region + */ + struct semaphore event_sem; + int max_cmds; + spinlock_t context_lock; + int free_head; + struct hns_roce_cmd_context *context; + /* + * Result of get integer part + * which max_comds compute according a power of 2 + */ + u16 token_mask; + /* + * Process whether use event mode, init default non-zero + * After the event queue of cmd event ready, + * can switch into event mode + * close device, switch into poll mode(non event mode) + */ + u8 use_events; + u8 toggle; +}; + +struct hns_roce_dev; + +struct hns_roce_qp { + struct ib_qp ibqp; + struct hns_roce_buf hr_buf; + struct hns_roce_wq rq; + __le64 doorbell_qpn; + __le32 sq_signal_bits; + u32 sq_next_wqe; + int sq_max_wqes_per_wr; + int sq_spare_wqes; + struct hns_roce_wq sq; + + struct ib_umem *umem; + struct hns_roce_mtt mtt; + u32 buff_size; + struct mutex mutex; + u8 port; + u8 phy_port; + u8 sl; + u8 resp_depth; + u8 state; + u32 access_flags; + u32 pkey_index; + void (*event)(struct hns_roce_qp *, + enum hns_roce_event); + unsigned long qpn; + + atomic_t refcount; + struct completion free; +}; + +struct hns_roce_sqp { + struct hns_roce_qp hr_qp; +}; + +struct hns_roce_ib_iboe { + spinlock_t lock; + struct net_device *netdevs[HNS_ROCE_MAX_PORTS]; + struct notifier_block nb; + struct notifier_block nb_inet; + /* 16 GID is shared by 6 port in v1 engine. */ + union ib_gid gid_table[HNS_ROCE_MAX_GID_NUM]; + u8 phy_port[HNS_ROCE_MAX_PORTS]; +}; + +struct hns_roce_eq { + struct hns_roce_dev *hr_dev; + void __iomem *doorbell; + + int type_flag;/* Aeq:1 ceq:0 */ + int eqn; + u32 entries; + int log_entries; + int eqe_size; + int irq; + int log_page_size; + int cons_index; + struct hns_roce_buf_list *buf_list; +}; + +struct hns_roce_eq_table { + struct hns_roce_eq *eq; + void __iomem **eqc_base; +}; + +struct hns_roce_caps { + u8 num_ports; + int gid_table_len[HNS_ROCE_MAX_PORTS]; + int pkey_table_len[HNS_ROCE_MAX_PORTS]; + int local_ca_ack_delay; + int num_uars; + u32 phy_num_uars; + u32 max_sq_sg; /* 2 */ + u32 max_sq_inline; /* 32 */ + u32 max_rq_sg; /* 2 */ + int num_qps; /* 256k */ + u32 max_wqes; /* 16k */ + u32 max_sq_desc_sz; /* 64 */ + u32 max_rq_desc_sz; /* 64 */ + int max_qp_init_rdma; + int max_qp_dest_rdma; + int num_cqs; + int max_cqes; + int reserved_cqs; + int num_aeq_vectors; /* 1 */ + int num_comp_vectors; /* 32 ceq */ + int num_other_vectors; + int num_mtpts; + u32 num_mtt_segs; + int reserved_mrws; + int reserved_uars; + int num_pds; + int reserved_pds; + u32 mtt_entry_sz; + u32 cq_entry_sz; + u32 page_size_cap; + u32 reserved_lkey; + int mtpt_entry_sz; + int qpc_entry_sz; + int irrl_entry_sz; + int cqc_entry_sz; + int aeqe_depth; + int ceqe_depth[HNS_ROCE_COMP_VEC_NUM]; + enum ib_mtu max_mtu; +}; + +struct hns_roce_hw { + int (*reset)(struct hns_roce_dev *hr_dev, bool enable); + void (*hw_profile)(struct hns_roce_dev *hr_dev); + int (*hw_init)(struct hns_roce_dev *hr_dev); + void (*hw_exit)(struct hns_roce_dev *hr_dev); + void (*set_gid)(struct hns_roce_dev *hr_dev, u8 port, int gid_index, + union ib_gid *gid); + void (*set_mac)(struct hns_roce_dev *hr_dev, u8 phy_port, u8 *addr); + void (*set_mtu)(struct hns_roce_dev *hr_dev, u8 phy_port, + enum ib_mtu mtu); + int (*write_mtpt)(void *mb_buf, struct hns_roce_mr *mr, + unsigned long mtpt_idx); + void (*write_cqc)(struct hns_roce_dev *hr_dev, + struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts, + dma_addr_t dma_handle, int nent, u32 vector); + int (*clear_hem)(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table, int obj); + int (*query_qp)(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr); + int (*modify_qp)(struct ib_qp *ibqp, const struct ib_qp_attr *attr, + int attr_mask, enum ib_qp_state cur_state, + enum ib_qp_state new_state); + int (*destroy_qp)(struct ib_qp *ibqp); + int (*post_send)(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr); + int (*post_recv)(struct ib_qp *qp, struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr); + int (*req_notify_cq)(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); + int (*poll_cq)(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); + void *priv; +}; + +struct hns_roce_dev { + struct ib_device ib_dev; + struct platform_device *pdev; + struct hns_roce_uar priv_uar; + const char *irq_names[HNS_ROCE_MAX_IRQ_NUM]; + spinlock_t sm_lock; + spinlock_t bt_cmd_lock; + struct hns_roce_ib_iboe iboe; + + int irq[HNS_ROCE_MAX_IRQ_NUM]; + u8 __iomem *reg_base; + struct hns_roce_caps caps; + struct radix_tree_root qp_table_tree; + + unsigned char dev_addr[HNS_ROCE_MAX_PORTS][MAC_ADDR_OCTET_NUM]; + u64 sys_image_guid; + u32 vendor_id; + u32 vendor_part_id; + u32 hw_rev; + void __iomem *priv_addr; + + struct hns_roce_cmdq cmd; + struct hns_roce_bitmap pd_bitmap; + struct hns_roce_uar_table uar_table; + struct hns_roce_mr_table mr_table; + struct hns_roce_cq_table cq_table; + struct hns_roce_qp_table qp_table; + struct hns_roce_eq_table eq_table; + + int cmd_mod; + int loop_idc; + struct hns_roce_hw *hw; +}; + +static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev) +{ + return container_of(ib_dev, struct hns_roce_dev, ib_dev); +} + +static inline struct hns_roce_ucontext + *to_hr_ucontext(struct ib_ucontext *ibucontext) +{ + return container_of(ibucontext, struct hns_roce_ucontext, ibucontext); +} + +static inline struct hns_roce_pd *to_hr_pd(struct ib_pd *ibpd) +{ + return container_of(ibpd, struct hns_roce_pd, ibpd); +} + +static inline struct hns_roce_ah *to_hr_ah(struct ib_ah *ibah) +{ + return container_of(ibah, struct hns_roce_ah, ibah); +} + +static inline struct hns_roce_mr *to_hr_mr(struct ib_mr *ibmr) +{ + return container_of(ibmr, struct hns_roce_mr, ibmr); +} + +static inline struct hns_roce_qp *to_hr_qp(struct ib_qp *ibqp) +{ + return container_of(ibqp, struct hns_roce_qp, ibqp); +} + +static inline struct hns_roce_cq *to_hr_cq(struct ib_cq *ib_cq) +{ + return container_of(ib_cq, struct hns_roce_cq, ib_cq); +} + +static inline struct hns_roce_srq *to_hr_srq(struct ib_srq *ibsrq) +{ + return container_of(ibsrq, struct hns_roce_srq, ibsrq); +} + +static inline struct hns_roce_sqp *hr_to_hr_sqp(struct hns_roce_qp *hr_qp) +{ + return container_of(hr_qp, struct hns_roce_sqp, hr_qp); +} + +static inline void hns_roce_write64_k(__be32 val[2], void __iomem *dest) +{ + __raw_writeq(*(u64 *) val, dest); +} + +static inline struct hns_roce_qp + *__hns_roce_qp_lookup(struct hns_roce_dev *hr_dev, u32 qpn) +{ + return radix_tree_lookup(&hr_dev->qp_table_tree, + qpn & (hr_dev->caps.num_qps - 1)); +} + +static inline void *hns_roce_buf_offset(struct hns_roce_buf *buf, int offset) +{ + u32 bits_per_long_val = BITS_PER_LONG; + + if (bits_per_long_val == 64 || buf->nbufs == 1) + return (char *)(buf->direct.buf) + offset; + else + return (char *)(buf->page_list[offset >> PAGE_SHIFT].buf) + + (offset & (PAGE_SIZE - 1)); +} + +int hns_roce_init_uar_table(struct hns_roce_dev *dev); +int hns_roce_uar_alloc(struct hns_roce_dev *dev, struct hns_roce_uar *uar); +void hns_roce_uar_free(struct hns_roce_dev *dev, struct hns_roce_uar *uar); +void hns_roce_cleanup_uar_table(struct hns_roce_dev *dev); + +int hns_roce_cmd_init(struct hns_roce_dev *hr_dev); +void hns_roce_cmd_cleanup(struct hns_roce_dev *hr_dev); +void hns_roce_cmd_event(struct hns_roce_dev *hr_dev, u16 token, u8 status, + u64 out_param); +int hns_roce_cmd_use_events(struct hns_roce_dev *hr_dev); +void hns_roce_cmd_use_polling(struct hns_roce_dev *hr_dev); + +int hns_roce_mtt_init(struct hns_roce_dev *hr_dev, int npages, int page_shift, + struct hns_roce_mtt *mtt); +void hns_roce_mtt_cleanup(struct hns_roce_dev *hr_dev, + struct hns_roce_mtt *mtt); +int hns_roce_buf_write_mtt(struct hns_roce_dev *hr_dev, + struct hns_roce_mtt *mtt, struct hns_roce_buf *buf); + +int hns_roce_init_pd_table(struct hns_roce_dev *hr_dev); +int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev); +int hns_roce_init_eq_table(struct hns_roce_dev *hr_dev); +int hns_roce_init_cq_table(struct hns_roce_dev *hr_dev); +int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev); + +void hns_roce_cleanup_pd_table(struct hns_roce_dev *hr_dev); +void hns_roce_cleanup_mr_table(struct hns_roce_dev *hr_dev); +void hns_roce_cleanup_eq_table(struct hns_roce_dev *hr_dev); +void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev); +void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev); + +int hns_roce_bitmap_alloc(struct hns_roce_bitmap *bitmap, unsigned long *obj); +void hns_roce_bitmap_free(struct hns_roce_bitmap *bitmap, unsigned long obj); +int hns_roce_bitmap_init(struct hns_roce_bitmap *bitmap, u32 num, u32 mask, + u32 reserved_bot, u32 resetrved_top); +void hns_roce_bitmap_cleanup(struct hns_roce_bitmap *bitmap); +void hns_roce_cleanup_bitmap(struct hns_roce_dev *hr_dev); +int hns_roce_bitmap_alloc_range(struct hns_roce_bitmap *bitmap, int cnt, + int align, unsigned long *obj); +void hns_roce_bitmap_free_range(struct hns_roce_bitmap *bitmap, + unsigned long obj, int cnt); + +struct ib_ah *hns_roce_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); +int hns_roce_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr); +int hns_roce_destroy_ah(struct ib_ah *ah); + +struct ib_pd *hns_roce_alloc_pd(struct ib_device *ib_dev, + struct ib_ucontext *context, + struct ib_udata *udata); +int hns_roce_dealloc_pd(struct ib_pd *pd); + +struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc); +struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt_addr, int access_flags, + struct ib_udata *udata); +int hns_roce_dereg_mr(struct ib_mr *ibmr); + +void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size, + struct hns_roce_buf *buf); +int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, + struct hns_roce_buf *buf); + +int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev, + struct hns_roce_mtt *mtt, struct ib_umem *umem); + +struct ib_qp *hns_roce_create_qp(struct ib_pd *ib_pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata); +int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata); +void *get_recv_wqe(struct hns_roce_qp *hr_qp, int n); +void *get_send_wqe(struct hns_roce_qp *hr_qp, int n); +bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq, + struct ib_cq *ib_cq); +enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state); +void hns_roce_lock_cqs(struct hns_roce_cq *send_cq, + struct hns_roce_cq *recv_cq); +void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq, + struct hns_roce_cq *recv_cq); +void hns_roce_qp_remove(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp); +void hns_roce_qp_free(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp); +void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn, + int cnt); +__be32 send_ieth(struct ib_send_wr *wr); +int to_hr_qp_type(int qp_type); + +struct ib_cq *hns_roce_ib_create_cq(struct ib_device *ib_dev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *context, + struct ib_udata *udata); + +int hns_roce_ib_destroy_cq(struct ib_cq *ib_cq); + +void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn); +void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type); +void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type); +int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index); + +extern struct hns_roce_hw hns_roce_hw_v1; + +#endif /* _HNS_ROCE_DEVICE_H */ diff --git a/drivers/infiniband/hw/hns/hns_roce_eq.c b/drivers/infiniband/hw/hns/hns_roce_eq.c new file mode 100644 index 000000000000..21e21b03cfb5 --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_eq.c @@ -0,0 +1,758 @@ +/* + * Copyright (c) 2016 Hisilicon Limited. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/platform_device.h> +#include "hns_roce_common.h" +#include "hns_roce_device.h" +#include "hns_roce_eq.h" + +static void eq_set_cons_index(struct hns_roce_eq *eq, int req_not) +{ + roce_raw_write((eq->cons_index & CONS_INDEX_MASK) | + (req_not << eq->log_entries), eq->doorbell); + /* Memory barrier */ + mb(); +} + +static struct hns_roce_aeqe *get_aeqe(struct hns_roce_eq *eq, u32 entry) +{ + unsigned long off = (entry & (eq->entries - 1)) * + HNS_ROCE_AEQ_ENTRY_SIZE; + + return (struct hns_roce_aeqe *)((u8 *) + (eq->buf_list[off / HNS_ROCE_BA_SIZE].buf) + + off % HNS_ROCE_BA_SIZE); +} + +static struct hns_roce_aeqe *next_aeqe_sw(struct hns_roce_eq *eq) +{ + struct hns_roce_aeqe *aeqe = get_aeqe(eq, eq->cons_index); + + return (roce_get_bit(aeqe->asyn, HNS_ROCE_AEQE_U32_4_OWNER_S) ^ + !!(eq->cons_index & eq->entries)) ? aeqe : NULL; +} + +static void hns_roce_wq_catas_err_handle(struct hns_roce_dev *hr_dev, + struct hns_roce_aeqe *aeqe, int qpn) +{ + struct device *dev = &hr_dev->pdev->dev; + + dev_warn(dev, "Local Work Queue Catastrophic Error.\n"); + switch (roce_get_field(aeqe->asyn, HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_M, + HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S)) { + case HNS_ROCE_LWQCE_QPC_ERROR: + dev_warn(dev, "QP %d, QPC error.\n", qpn); + break; + case HNS_ROCE_LWQCE_MTU_ERROR: + dev_warn(dev, "QP %d, MTU error.\n", qpn); + break; + case HNS_ROCE_LWQCE_WQE_BA_ADDR_ERROR: + dev_warn(dev, "QP %d, WQE BA addr error.\n", qpn); + break; + case HNS_ROCE_LWQCE_WQE_ADDR_ERROR: + dev_warn(dev, "QP %d, WQE addr error.\n", qpn); + break; + case HNS_ROCE_LWQCE_SQ_WQE_SHIFT_ERROR: + dev_warn(dev, "QP %d, WQE shift error\n", qpn); + break; + case HNS_ROCE_LWQCE_SL_ERROR: + dev_warn(dev, "QP %d, SL error.\n", qpn); + break; + case HNS_ROCE_LWQCE_PORT_ERROR: + dev_warn(dev, "QP %d, port error.\n", qpn); + break; + default: + break; + } +} + +static void hns_roce_local_wq_access_err_handle(struct hns_roce_dev *hr_dev, + struct hns_roce_aeqe *aeqe, + int qpn) +{ + struct device *dev = &hr_dev->pdev->dev; + + dev_warn(dev, "Local Access Violation Work Queue Error.\n"); + switch (roce_get_field(aeqe->asyn, HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_M, + HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S)) { + case HNS_ROCE_LAVWQE_R_KEY_VIOLATION: + dev_warn(dev, "QP %d, R_key violation.\n", qpn); + break; + case HNS_ROCE_LAVWQE_LENGTH_ERROR: + dev_warn(dev, "QP %d, length error.\n", qpn); + break; + case HNS_ROCE_LAVWQE_VA_ERROR: + dev_warn(dev, "QP %d, VA error.\n", qpn); + break; + case HNS_ROCE_LAVWQE_PD_ERROR: + dev_err(dev, "QP %d, PD error.\n", qpn); + break; + case HNS_ROCE_LAVWQE_RW_ACC_ERROR: + dev_warn(dev, "QP %d, rw acc error.\n", qpn); + break; + case HNS_ROCE_LAVWQE_KEY_STATE_ERROR: + dev_warn(dev, "QP %d, key state error.\n", qpn); + break; + case HNS_ROCE_LAVWQE_MR_OPERATION_ERROR: + dev_warn(dev, "QP %d, MR operation error.\n", qpn); + break; + default: + break; + } +} + +static void hns_roce_qp_err_handle(struct hns_roce_dev *hr_dev, + struct hns_roce_aeqe *aeqe, + int event_type) +{ + struct device *dev = &hr_dev->pdev->dev; + int phy_port; + int qpn; + + qpn = roce_get_field(aeqe->event.qp_event.qp, + HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_M, + HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_S); + phy_port = roce_get_field(aeqe->event.qp_event.qp, + HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_M, + HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_S); + if (qpn <= 1) + qpn = HNS_ROCE_MAX_PORTS * qpn + phy_port; + + switch (event_type) { + case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: + dev_warn(dev, "Invalid Req Local Work Queue Error.\n" + "QP %d, phy_port %d.\n", qpn, phy_port); + break; + case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: + hns_roce_wq_catas_err_handle(hr_dev, aeqe, qpn); + break; + case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: + hns_roce_local_wq_access_err_handle(hr_dev, aeqe, qpn); + break; + default: + break; + } + + hns_roce_qp_event(hr_dev, qpn, event_type); +} + +static void hns_roce_cq_err_handle(struct hns_roce_dev *hr_dev, + struct hns_roce_aeqe *aeqe, + int event_type) +{ + struct device *dev = &hr_dev->pdev->dev; + u32 cqn; + + cqn = le32_to_cpu(roce_get_field(aeqe->event.cq_event.cq, + HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M, + HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S)); + + switch (event_type) { + case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: + dev_warn(dev, "CQ 0x%x access err.\n", cqn); + break; + case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW: + dev_warn(dev, "CQ 0x%x overflow\n", cqn); + break; + case HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID: + dev_warn(dev, "CQ 0x%x ID invalid.\n", cqn); + break; + default: + break; + } + + hns_roce_cq_event(hr_dev, cqn, event_type); +} + +static void hns_roce_db_overflow_handle(struct hns_roce_dev *hr_dev, + struct hns_roce_aeqe *aeqe) +{ + struct device *dev = &hr_dev->pdev->dev; + + switch (roce_get_field(aeqe->asyn, HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_M, + HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S)) { + case HNS_ROCE_DB_SUBTYPE_SDB_OVF: + dev_warn(dev, "SDB overflow.\n"); + break; + case HNS_ROCE_DB_SUBTYPE_SDB_ALM_OVF: + dev_warn(dev, "SDB almost overflow.\n"); + break; + case HNS_ROCE_DB_SUBTYPE_SDB_ALM_EMP: + dev_warn(dev, "SDB almost empty.\n"); + break; + case HNS_ROCE_DB_SUBTYPE_ODB_OVF: + dev_warn(dev, "ODB overflow.\n"); + break; + case HNS_ROCE_DB_SUBTYPE_ODB_ALM_OVF: + dev_warn(dev, "ODB almost overflow.\n"); + break; + case HNS_ROCE_DB_SUBTYPE_ODB_ALM_EMP: + dev_warn(dev, "SDB almost empty.\n"); + break; + default: + break; + } +} + +static int hns_roce_aeq_int(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) +{ + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_aeqe *aeqe; + int aeqes_found = 0; + int event_type; + + while ((aeqe = next_aeqe_sw(eq))) { + dev_dbg(dev, "aeqe = %p, aeqe->asyn.event_type = 0x%lx\n", aeqe, + roce_get_field(aeqe->asyn, + HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M, + HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S)); + /* Memory barrier */ + rmb(); + + event_type = roce_get_field(aeqe->asyn, + HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M, + HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S); + switch (event_type) { + case HNS_ROCE_EVENT_TYPE_PATH_MIG: + dev_warn(dev, "PATH MIG not supported\n"); + break; + case HNS_ROCE_EVENT_TYPE_COMM_EST: + dev_warn(dev, "COMMUNICATION established\n"); + break; + case HNS_ROCE_EVENT_TYPE_SQ_DRAINED: + dev_warn(dev, "SQ DRAINED not supported\n"); + break; + case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED: + dev_warn(dev, "PATH MIG failed\n"); + break; + case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: + case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: + case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: + hns_roce_qp_err_handle(hr_dev, aeqe, event_type); + break; + case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: + case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR: + case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH: + dev_warn(dev, "SRQ not support!\n"); + break; + case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: + case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW: + case HNS_ROCE_EVENT_TYPE_CQ_ID_INVALID: + hns_roce_cq_err_handle(hr_dev, aeqe, event_type); + break; + case HNS_ROCE_EVENT_TYPE_PORT_CHANGE: + dev_warn(dev, "port change.\n"); + break; + case HNS_ROCE_EVENT_TYPE_MB: + hns_roce_cmd_event(hr_dev, + le16_to_cpu(aeqe->event.cmd.token), + aeqe->event.cmd.status, + le64_to_cpu(aeqe->event.cmd.out_param + )); + break; + case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: + hns_roce_db_overflow_handle(hr_dev, aeqe); + break; + case HNS_ROCE_EVENT_TYPE_CEQ_OVERFLOW: + dev_warn(dev, "CEQ 0x%lx overflow.\n", + roce_get_field(aeqe->event.ce_event.ceqe, + HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_M, + HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_S)); + break; + default: + dev_warn(dev, "Unhandled event %d on EQ %d at index %u\n", + event_type, eq->eqn, eq->cons_index); + break; + }; + + eq->cons_index++; + aeqes_found = 1; + + if (eq->cons_index > 2 * hr_dev->caps.aeqe_depth - 1) { + dev_warn(dev, "cons_index overflow, set back to zero\n" + ); + eq->cons_index = 0; + } + } + + eq_set_cons_index(eq, 0); + + return aeqes_found; +} + +static struct hns_roce_ceqe *get_ceqe(struct hns_roce_eq *eq, u32 entry) +{ + unsigned long off = (entry & (eq->entries - 1)) * + HNS_ROCE_CEQ_ENTRY_SIZE; + + return (struct hns_roce_ceqe *)((u8 *) + (eq->buf_list[off / HNS_ROCE_BA_SIZE].buf) + + off % HNS_ROCE_BA_SIZE); +} + +static struct hns_roce_ceqe *next_ceqe_sw(struct hns_roce_eq *eq) +{ + struct hns_roce_ceqe *ceqe = get_ceqe(eq, eq->cons_index); + + return (!!(roce_get_bit(ceqe->ceqe.comp, + HNS_ROCE_CEQE_CEQE_COMP_OWNER_S))) ^ + (!!(eq->cons_index & eq->entries)) ? ceqe : NULL; +} + +static int hns_roce_ceq_int(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) +{ + struct hns_roce_ceqe *ceqe; + int ceqes_found = 0; + u32 cqn; + + while ((ceqe = next_ceqe_sw(eq))) { + /* Memory barrier */ + rmb(); + cqn = roce_get_field(ceqe->ceqe.comp, + HNS_ROCE_CEQE_CEQE_COMP_CQN_M, + HNS_ROCE_CEQE_CEQE_COMP_CQN_S); + hns_roce_cq_completion(hr_dev, cqn); + + ++eq->cons_index; + ceqes_found = 1; + + if (eq->cons_index > 2 * hr_dev->caps.ceqe_depth[eq->eqn] - 1) { + dev_warn(&eq->hr_dev->pdev->dev, + "cons_index overflow, set back to zero\n"); + eq->cons_index = 0; + } + } + + eq_set_cons_index(eq, 0); + + return ceqes_found; +} + +static int hns_roce_aeq_ovf_int(struct hns_roce_dev *hr_dev, + struct hns_roce_eq *eq) +{ + struct device *dev = &eq->hr_dev->pdev->dev; + int eqovf_found = 0; + u32 caepaemask_val; + u32 cealmovf_val; + u32 caepaest_val; + u32 aeshift_val; + u32 ceshift_val; + u32 cemask_val; + int i = 0; + + /** + * AEQ overflow ECC mult bit err CEQ overflow alarm + * must clear interrupt, mask irq, clear irq, cancel mask operation + */ + aeshift_val = roce_read(hr_dev, ROCEE_CAEP_AEQC_AEQE_SHIFT_REG); + + if (roce_get_bit(aeshift_val, + ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQ_ALM_OVF_INT_ST_S) == 1) { + dev_warn(dev, "AEQ overflow!\n"); + + /* Set mask */ + caepaemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG); + roce_set_bit(caepaemask_val, + ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S, + HNS_ROCE_INT_MASK_ENABLE); + roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, caepaemask_val); + + /* Clear int state(INT_WC : write 1 clear) */ + caepaest_val = roce_read(hr_dev, ROCEE_CAEP_AE_ST_REG); + roce_set_bit(caepaest_val, + ROCEE_CAEP_AE_ST_CAEP_AEQ_ALM_OVF_S, 1); + roce_write(hr_dev, ROCEE_CAEP_AE_ST_REG, caepaest_val); + + /* Clear mask */ + caepaemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG); + roce_set_bit(caepaemask_val, + ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S, + HNS_ROCE_INT_MASK_DISABLE); + roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, caepaemask_val); + } + + /* CEQ almost overflow */ + for (i = 0; i < hr_dev->caps.num_comp_vectors; i++) { + ceshift_val = roce_read(hr_dev, ROCEE_CAEP_CEQC_SHIFT_0_REG + + i * CEQ_REG_OFFSET); + + if (roce_get_bit(ceshift_val, + ROCEE_CAEP_CEQC_SHIFT_CAEP_CEQ_ALM_OVF_INT_ST_S) == 1) { + dev_warn(dev, "CEQ[%d] almost overflow!\n", i); + eqovf_found++; + + /* Set mask */ + cemask_val = roce_read(hr_dev, + ROCEE_CAEP_CE_IRQ_MASK_0_REG + + i * CEQ_REG_OFFSET); + roce_set_bit(cemask_val, + ROCEE_CAEP_CE_IRQ_MASK_CAEP_CEQ_ALM_OVF_MASK_S, + HNS_ROCE_INT_MASK_ENABLE); + roce_write(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG + + i * CEQ_REG_OFFSET, cemask_val); + + /* Clear int state(INT_WC : write 1 clear) */ + cealmovf_val = roce_read(hr_dev, + ROCEE_CAEP_CEQ_ALM_OVF_0_REG + + i * CEQ_REG_OFFSET); + roce_set_bit(cealmovf_val, + ROCEE_CAEP_CEQ_ALM_OVF_CAEP_CEQ_ALM_OVF_S, + 1); + roce_write(hr_dev, ROCEE_CAEP_CEQ_ALM_OVF_0_REG + + i * CEQ_REG_OFFSET, cealmovf_val); + + /* Clear mask */ + cemask_val = roce_read(hr_dev, + ROCEE_CAEP_CE_IRQ_MASK_0_REG + + i * CEQ_REG_OFFSET); + roce_set_bit(cemask_val, + ROCEE_CAEP_CE_IRQ_MASK_CAEP_CEQ_ALM_OVF_MASK_S, + HNS_ROCE_INT_MASK_DISABLE); + roce_write(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG + + i * CEQ_REG_OFFSET, cemask_val); + } + } + + /* ECC multi-bit error alarm */ + dev_warn(dev, "ECC UCERR ALARM: 0x%x, 0x%x, 0x%x\n", + roce_read(hr_dev, ROCEE_ECC_UCERR_ALM0_REG), + roce_read(hr_dev, ROCEE_ECC_UCERR_ALM1_REG), + roce_read(hr_dev, ROCEE_ECC_UCERR_ALM2_REG)); + + dev_warn(dev, "ECC CERR ALARM: 0x%x, 0x%x, 0x%x\n", + roce_read(hr_dev, ROCEE_ECC_CERR_ALM0_REG), + roce_read(hr_dev, ROCEE_ECC_CERR_ALM1_REG), + roce_read(hr_dev, ROCEE_ECC_CERR_ALM2_REG)); + + return eqovf_found; +} + +static int hns_roce_eq_int(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) +{ + int eqes_found = 0; + + if (likely(eq->type_flag == HNS_ROCE_CEQ)) + /* CEQ irq routine, CEQ is pulse irq, not clear */ + eqes_found = hns_roce_ceq_int(hr_dev, eq); + else if (likely(eq->type_flag == HNS_ROCE_AEQ)) + /* AEQ irq routine, AEQ is pulse irq, not clear */ + eqes_found = hns_roce_aeq_int(hr_dev, eq); + else + /* AEQ queue overflow irq */ + eqes_found = hns_roce_aeq_ovf_int(hr_dev, eq); + + return eqes_found; +} + +static irqreturn_t hns_roce_msi_x_interrupt(int irq, void *eq_ptr) +{ + int int_work = 0; + struct hns_roce_eq *eq = eq_ptr; + struct hns_roce_dev *hr_dev = eq->hr_dev; + + int_work = hns_roce_eq_int(hr_dev, eq); + + return IRQ_RETVAL(int_work); +} + +static void hns_roce_enable_eq(struct hns_roce_dev *hr_dev, int eq_num, + int enable_flag) +{ + void __iomem *eqc = hr_dev->eq_table.eqc_base[eq_num]; + u32 val; + + val = readl(eqc); + + if (enable_flag) + roce_set_field(val, + ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M, + ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S, + HNS_ROCE_EQ_STAT_VALID); + else + roce_set_field(val, + ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M, + ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S, + HNS_ROCE_EQ_STAT_INVALID); + writel(val, eqc); +} + +static int hns_roce_create_eq(struct hns_roce_dev *hr_dev, + struct hns_roce_eq *eq) +{ + void __iomem *eqc = hr_dev->eq_table.eqc_base[eq->eqn]; + struct device *dev = &hr_dev->pdev->dev; + dma_addr_t tmp_dma_addr; + u32 eqconsindx_val = 0; + u32 eqcuridx_val = 0; + u32 eqshift_val = 0; + int num_bas = 0; + int ret; + int i; + + num_bas = (PAGE_ALIGN(eq->entries * eq->eqe_size) + + HNS_ROCE_BA_SIZE - 1) / HNS_ROCE_BA_SIZE; + + if ((eq->entries * eq->eqe_size) > HNS_ROCE_BA_SIZE) { + dev_err(dev, "[error]eq buf %d gt ba size(%d) need bas=%d\n", + (eq->entries * eq->eqe_size), HNS_ROCE_BA_SIZE, + num_bas); + return -EINVAL; + } + + eq->buf_list = kcalloc(num_bas, sizeof(*eq->buf_list), GFP_KERNEL); + if (!eq->buf_list) + return -ENOMEM; + + for (i = 0; i < num_bas; ++i) { + eq->buf_list[i].buf = dma_alloc_coherent(dev, HNS_ROCE_BA_SIZE, + &tmp_dma_addr, + GFP_KERNEL); + if (!eq->buf_list[i].buf) { + ret = -ENOMEM; + goto err_out_free_pages; + } + + eq->buf_list[i].map = tmp_dma_addr; + memset(eq->buf_list[i].buf, 0, HNS_ROCE_BA_SIZE); + } + eq->cons_index = 0; + roce_set_field(eqshift_val, + ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M, + ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S, + HNS_ROCE_EQ_STAT_INVALID); + roce_set_field(eqshift_val, + ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_M, + ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_S, + eq->log_entries); + writel(eqshift_val, eqc); + + /* Configure eq extended address 12~44bit */ + writel((u32)(eq->buf_list[0].map >> 12), (u8 *)eqc + 4); + + /* + * Configure eq extended address 45~49 bit. + * 44 = 32 + 12, When evaluating addr to hardware, shift 12 because of + * using 4K page, and shift more 32 because of + * caculating the high 32 bit value evaluated to hardware. + */ + roce_set_field(eqcuridx_val, ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_M, + ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_S, + eq->buf_list[0].map >> 44); + roce_set_field(eqcuridx_val, + ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_M, + ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_S, 0); + writel(eqcuridx_val, (u8 *)eqc + 8); + + /* Configure eq consumer index */ + roce_set_field(eqconsindx_val, + ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_M, + ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_S, 0); + writel(eqconsindx_val, (u8 *)eqc + 0xc); + + return 0; + +err_out_free_pages: + for (i = i - 1; i >= 0; i--) + dma_free_coherent(dev, HNS_ROCE_BA_SIZE, eq->buf_list[i].buf, + eq->buf_list[i].map); + + kfree(eq->buf_list); + return ret; +} + +static void hns_roce_free_eq(struct hns_roce_dev *hr_dev, + struct hns_roce_eq *eq) +{ + int i = 0; + int npages = (PAGE_ALIGN(eq->eqe_size * eq->entries) + + HNS_ROCE_BA_SIZE - 1) / HNS_ROCE_BA_SIZE; + + if (!eq->buf_list) + return; + + for (i = 0; i < npages; ++i) + dma_free_coherent(&hr_dev->pdev->dev, HNS_ROCE_BA_SIZE, + eq->buf_list[i].buf, eq->buf_list[i].map); + + kfree(eq->buf_list); +} + +static void hns_roce_int_mask_en(struct hns_roce_dev *hr_dev) +{ + int i = 0; + u32 aemask_val; + int masken = 0; + + /* AEQ INT */ + aemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG); + roce_set_bit(aemask_val, ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S, + masken); + roce_set_bit(aemask_val, ROCEE_CAEP_AE_MASK_CAEP_AE_IRQ_MASK_S, masken); + roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, aemask_val); + + /* CEQ INT */ + for (i = 0; i < hr_dev->caps.num_comp_vectors; i++) { + /* IRQ mask */ + roce_write(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG + + i * CEQ_REG_OFFSET, masken); + } +} + +static void hns_roce_ce_int_default_cfg(struct hns_roce_dev *hr_dev) +{ + /* Configure ce int interval */ + roce_write(hr_dev, ROCEE_CAEP_CE_INTERVAL_CFG_REG, + HNS_ROCE_CEQ_DEFAULT_INTERVAL); + + /* Configure ce int burst num */ + roce_write(hr_dev, ROCEE_CAEP_CE_BURST_NUM_CFG_REG, + HNS_ROCE_CEQ_DEFAULT_BURST_NUM); +} + +int hns_roce_init_eq_table(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_eq_table *eq_table = &hr_dev->eq_table; + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_eq *eq = NULL; + int eq_num = 0; + int ret = 0; + int i = 0; + int j = 0; + + eq_num = hr_dev->caps.num_comp_vectors + hr_dev->caps.num_aeq_vectors; + eq_table->eq = kcalloc(eq_num, sizeof(*eq_table->eq), GFP_KERNEL); + if (!eq_table->eq) + return -ENOMEM; + + eq_table->eqc_base = kcalloc(eq_num, sizeof(*eq_table->eqc_base), + GFP_KERNEL); + if (!eq_table->eqc_base) { + ret = -ENOMEM; + goto err_eqc_base_alloc_fail; + } + + for (i = 0; i < eq_num; i++) { + eq = &eq_table->eq[i]; + eq->hr_dev = hr_dev; + eq->eqn = i; + eq->irq = hr_dev->irq[i]; + eq->log_page_size = PAGE_SHIFT; + + if (i < hr_dev->caps.num_comp_vectors) { + /* CEQ */ + eq_table->eqc_base[i] = hr_dev->reg_base + + ROCEE_CAEP_CEQC_SHIFT_0_REG + + HNS_ROCE_CEQC_REG_OFFSET * i; + eq->type_flag = HNS_ROCE_CEQ; + eq->doorbell = hr_dev->reg_base + + ROCEE_CAEP_CEQC_CONS_IDX_0_REG + + HNS_ROCE_CEQC_REG_OFFSET * i; + eq->entries = hr_dev->caps.ceqe_depth[i]; + eq->log_entries = ilog2(eq->entries); + eq->eqe_size = sizeof(struct hns_roce_ceqe); + } else { + /* AEQ */ + eq_table->eqc_base[i] = hr_dev->reg_base + + ROCEE_CAEP_AEQC_AEQE_SHIFT_REG; + eq->type_flag = HNS_ROCE_AEQ; + eq->doorbell = hr_dev->reg_base + + ROCEE_CAEP_AEQE_CONS_IDX_REG; + eq->entries = hr_dev->caps.aeqe_depth; + eq->log_entries = ilog2(eq->entries); + eq->eqe_size = sizeof(struct hns_roce_aeqe); + } + } + + /* Disable irq */ + hns_roce_int_mask_en(hr_dev); + + /* Configure CE irq interval and burst num */ + hns_roce_ce_int_default_cfg(hr_dev); + + for (i = 0; i < eq_num; i++) { + ret = hns_roce_create_eq(hr_dev, &eq_table->eq[i]); + if (ret) { + dev_err(dev, "eq create failed\n"); + goto err_create_eq_fail; + } + } + + for (j = 0; j < eq_num; j++) { + ret = request_irq(eq_table->eq[j].irq, hns_roce_msi_x_interrupt, + 0, hr_dev->irq_names[j], eq_table->eq + j); + if (ret) { + dev_err(dev, "request irq error!\n"); + goto err_request_irq_fail; + } + } + + for (i = 0; i < eq_num; i++) + hns_roce_enable_eq(hr_dev, i, EQ_ENABLE); + + return 0; + +err_request_irq_fail: + for (j = j - 1; j >= 0; j--) + free_irq(eq_table->eq[j].irq, eq_table->eq + j); + +err_create_eq_fail: + for (i = i - 1; i >= 0; i--) + hns_roce_free_eq(hr_dev, &eq_table->eq[i]); + + kfree(eq_table->eqc_base); + +err_eqc_base_alloc_fail: + kfree(eq_table->eq); + + return ret; +} + +void hns_roce_cleanup_eq_table(struct hns_roce_dev *hr_dev) +{ + int i; + int eq_num; + struct hns_roce_eq_table *eq_table = &hr_dev->eq_table; + + eq_num = hr_dev->caps.num_comp_vectors + hr_dev->caps.num_aeq_vectors; + for (i = 0; i < eq_num; i++) { + /* Disable EQ */ + hns_roce_enable_eq(hr_dev, i, EQ_DISABLE); + + free_irq(eq_table->eq[i].irq, eq_table->eq + i); + + hns_roce_free_eq(hr_dev, &eq_table->eq[i]); + } + + kfree(eq_table->eqc_base); + kfree(eq_table->eq); +} diff --git a/drivers/infiniband/hw/hns/hns_roce_eq.h b/drivers/infiniband/hw/hns/hns_roce_eq.h new file mode 100644 index 000000000000..c6d212d12e03 --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_eq.h @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2016 Hisilicon Limited. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _HNS_ROCE_EQ_H +#define _HNS_ROCE_EQ_H + +#define HNS_ROCE_CEQ 1 +#define HNS_ROCE_AEQ 2 + +#define HNS_ROCE_CEQ_ENTRY_SIZE 0x4 +#define HNS_ROCE_AEQ_ENTRY_SIZE 0x10 +#define HNS_ROCE_CEQC_REG_OFFSET 0x18 + +#define HNS_ROCE_CEQ_DEFAULT_INTERVAL 0x10 +#define HNS_ROCE_CEQ_DEFAULT_BURST_NUM 0x10 + +#define HNS_ROCE_INT_MASK_DISABLE 0 +#define HNS_ROCE_INT_MASK_ENABLE 1 + +#define EQ_ENABLE 1 +#define EQ_DISABLE 0 +#define CONS_INDEX_MASK 0xffff + +#define CEQ_REG_OFFSET 0x18 + +enum { + HNS_ROCE_EQ_STAT_INVALID = 0, + HNS_ROCE_EQ_STAT_VALID = 2, +}; + +struct hns_roce_aeqe { + u32 asyn; + union { + struct { + u32 qp; + u32 rsv0; + u32 rsv1; + } qp_event; + + struct { + u32 cq; + u32 rsv0; + u32 rsv1; + } cq_event; + + struct { + u32 port; + u32 rsv0; + u32 rsv1; + } port_event; + + struct { + u32 ceqe; + u32 rsv0; + u32 rsv1; + } ce_event; + + struct { + __le64 out_param; + __le16 token; + u8 status; + u8 rsv0; + } __packed cmd; + } event; +}; + +#define HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S 16 +#define HNS_ROCE_AEQE_U32_4_EVENT_TYPE_M \ + (((1UL << 8) - 1) << HNS_ROCE_AEQE_U32_4_EVENT_TYPE_S) + +#define HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S 24 +#define HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_M \ + (((1UL << 7) - 1) << HNS_ROCE_AEQE_U32_4_EVENT_SUB_TYPE_S) + +#define HNS_ROCE_AEQE_U32_4_OWNER_S 31 + +#define HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_S 0 +#define HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_M \ + (((1UL << 24) - 1) << HNS_ROCE_AEQE_EVENT_QP_EVENT_QP_QPN_S) + +#define HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_S 25 +#define HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_M \ + (((1UL << 3) - 1) << HNS_ROCE_AEQE_EVENT_QP_EVENT_PORT_NUM_S) + +#define HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S 0 +#define HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M \ + (((1UL << 16) - 1) << HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S) + +#define HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_S 0 +#define HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_M \ + (((1UL << 5) - 1) << HNS_ROCE_AEQE_EVENT_CE_EVENT_CEQE_CEQN_S) + +struct hns_roce_ceqe { + union { + int comp; + } ceqe; +}; + +#define HNS_ROCE_CEQE_CEQE_COMP_OWNER_S 0 + +#define HNS_ROCE_CEQE_CEQE_COMP_CQN_S 16 +#define HNS_ROCE_CEQE_CEQE_COMP_CQN_M \ + (((1UL << 16) - 1) << HNS_ROCE_CEQE_CEQE_COMP_CQN_S) + +#endif /* _HNS_ROCE_EQ_H */ diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c new file mode 100644 index 000000000000..250d8f280390 --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -0,0 +1,404 @@ +/* + * Copyright (c) 2016 Hisilicon Limited. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/platform_device.h> +#include "hns_roce_device.h" +#include "hns_roce_hem.h" +#include "hns_roce_common.h" + +#define HNS_ROCE_HEM_ALLOC_SIZE (1 << 17) +#define HNS_ROCE_TABLE_CHUNK_SIZE (1 << 17) + +#define DMA_ADDR_T_SHIFT 12 +#define BT_BA_SHIFT 32 + +struct hns_roce_hem *hns_roce_alloc_hem(struct hns_roce_dev *hr_dev, int npages, + gfp_t gfp_mask) +{ + struct hns_roce_hem_chunk *chunk = NULL; + struct hns_roce_hem *hem; + struct scatterlist *mem; + int order; + void *buf; + + WARN_ON(gfp_mask & __GFP_HIGHMEM); + + hem = kmalloc(sizeof(*hem), + gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN)); + if (!hem) + return NULL; + + hem->refcount = 0; + INIT_LIST_HEAD(&hem->chunk_list); + + order = get_order(HNS_ROCE_HEM_ALLOC_SIZE); + + while (npages > 0) { + if (!chunk) { + chunk = kmalloc(sizeof(*chunk), + gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN)); + if (!chunk) + goto fail; + + sg_init_table(chunk->mem, HNS_ROCE_HEM_CHUNK_LEN); + chunk->npages = 0; + chunk->nsg = 0; + list_add_tail(&chunk->list, &hem->chunk_list); + } + + while (1 << order > npages) + --order; + + /* + * Alloc memory one time. If failed, don't alloc small block + * memory, directly return fail. + */ + mem = &chunk->mem[chunk->npages]; + buf = dma_alloc_coherent(&hr_dev->pdev->dev, PAGE_SIZE << order, + &sg_dma_address(mem), gfp_mask); + if (!buf) + goto fail; + + sg_set_buf(mem, buf, PAGE_SIZE << order); + WARN_ON(mem->offset); + sg_dma_len(mem) = PAGE_SIZE << order; + + ++chunk->npages; + ++chunk->nsg; + npages -= 1 << order; + } + + return hem; + +fail: + hns_roce_free_hem(hr_dev, hem); + return NULL; +} + +void hns_roce_free_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem *hem) +{ + struct hns_roce_hem_chunk *chunk, *tmp; + int i; + + if (!hem) + return; + + list_for_each_entry_safe(chunk, tmp, &hem->chunk_list, list) { + for (i = 0; i < chunk->npages; ++i) + dma_free_coherent(&hr_dev->pdev->dev, + chunk->mem[i].length, + lowmem_page_address(sg_page(&chunk->mem[i])), + sg_dma_address(&chunk->mem[i])); + kfree(chunk); + } + + kfree(hem); +} + +static int hns_roce_set_hem(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table, unsigned long obj) +{ + struct device *dev = &hr_dev->pdev->dev; + spinlock_t *lock = &hr_dev->bt_cmd_lock; + unsigned long end = 0; + unsigned long flags; + struct hns_roce_hem_iter iter; + void __iomem *bt_cmd; + u32 bt_cmd_h_val = 0; + u32 bt_cmd_val[2]; + u32 bt_cmd_l = 0; + u64 bt_ba = 0; + int ret = 0; + + /* Find the HEM(Hardware Entry Memory) entry */ + unsigned long i = (obj & (table->num_obj - 1)) / + (HNS_ROCE_TABLE_CHUNK_SIZE / table->obj_size); + + switch (table->type) { + case HEM_TYPE_QPC: + roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M, + ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, HEM_TYPE_QPC); + break; + case HEM_TYPE_MTPT: + roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M, + ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, + HEM_TYPE_MTPT); + break; + case HEM_TYPE_CQC: + roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M, + ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, HEM_TYPE_CQC); + break; + case HEM_TYPE_SRQC: + roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M, + ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, + HEM_TYPE_SRQC); + break; + default: + return ret; + } + roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_M, + ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_S, obj); + roce_set_bit(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_S, 0); + roce_set_bit(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_HW_SYNS_S, 1); + + /* Currently iter only a chunk */ + for (hns_roce_hem_first(table->hem[i], &iter); + !hns_roce_hem_last(&iter); hns_roce_hem_next(&iter)) { + bt_ba = hns_roce_hem_addr(&iter) >> DMA_ADDR_T_SHIFT; + + spin_lock_irqsave(lock, flags); + + bt_cmd = hr_dev->reg_base + ROCEE_BT_CMD_H_REG; + + end = msecs_to_jiffies(HW_SYNC_TIMEOUT_MSECS) + jiffies; + while (1) { + if (readl(bt_cmd) >> BT_CMD_SYNC_SHIFT) { + if (!(time_before(jiffies, end))) { + dev_err(dev, "Write bt_cmd err,hw_sync is not zero.\n"); + spin_unlock_irqrestore(lock, flags); + return -EBUSY; + } + } else { + break; + } + msleep(HW_SYNC_SLEEP_TIME_INTERVAL); + } + + bt_cmd_l = (u32)bt_ba; + roce_set_field(bt_cmd_h_val, ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_M, + ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S, + bt_ba >> BT_BA_SHIFT); + + bt_cmd_val[0] = bt_cmd_l; + bt_cmd_val[1] = bt_cmd_h_val; + hns_roce_write64_k(bt_cmd_val, + hr_dev->reg_base + ROCEE_BT_CMD_L_REG); + spin_unlock_irqrestore(lock, flags); + } + + return ret; +} + +int hns_roce_table_get(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table, unsigned long obj) +{ + struct device *dev = &hr_dev->pdev->dev; + int ret = 0; + unsigned long i; + + i = (obj & (table->num_obj - 1)) / (HNS_ROCE_TABLE_CHUNK_SIZE / + table->obj_size); + + mutex_lock(&table->mutex); + + if (table->hem[i]) { + ++table->hem[i]->refcount; + goto out; + } + + table->hem[i] = hns_roce_alloc_hem(hr_dev, + HNS_ROCE_TABLE_CHUNK_SIZE >> PAGE_SHIFT, + (table->lowmem ? GFP_KERNEL : + GFP_HIGHUSER) | __GFP_NOWARN); + if (!table->hem[i]) { + ret = -ENOMEM; + goto out; + } + + /* Set HEM base address(128K/page, pa) to Hardware */ + if (hns_roce_set_hem(hr_dev, table, obj)) { + ret = -ENODEV; + dev_err(dev, "set HEM base address to HW failed.\n"); + goto out; + } + + ++table->hem[i]->refcount; +out: + mutex_unlock(&table->mutex); + return ret; +} + +void hns_roce_table_put(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table, unsigned long obj) +{ + struct device *dev = &hr_dev->pdev->dev; + unsigned long i; + + i = (obj & (table->num_obj - 1)) / + (HNS_ROCE_TABLE_CHUNK_SIZE / table->obj_size); + + mutex_lock(&table->mutex); + + if (--table->hem[i]->refcount == 0) { + /* Clear HEM base address */ + if (hr_dev->hw->clear_hem(hr_dev, table, obj)) + dev_warn(dev, "Clear HEM base address failed.\n"); + + hns_roce_free_hem(hr_dev, table->hem[i]); + table->hem[i] = NULL; + } + + mutex_unlock(&table->mutex); +} + +void *hns_roce_table_find(struct hns_roce_hem_table *table, unsigned long obj, + dma_addr_t *dma_handle) +{ + struct hns_roce_hem_chunk *chunk; + unsigned long idx; + int i; + int offset, dma_offset; + struct hns_roce_hem *hem; + struct page *page = NULL; + + if (!table->lowmem) + return NULL; + + mutex_lock(&table->mutex); + idx = (obj & (table->num_obj - 1)) * table->obj_size; + hem = table->hem[idx / HNS_ROCE_TABLE_CHUNK_SIZE]; + dma_offset = offset = idx % HNS_ROCE_TABLE_CHUNK_SIZE; + + if (!hem) + goto out; + + list_for_each_entry(chunk, &hem->chunk_list, list) { + for (i = 0; i < chunk->npages; ++i) { + if (dma_handle && dma_offset >= 0) { + if (sg_dma_len(&chunk->mem[i]) > + (u32)dma_offset) + *dma_handle = sg_dma_address( + &chunk->mem[i]) + dma_offset; + dma_offset -= sg_dma_len(&chunk->mem[i]); + } + + if (chunk->mem[i].length > (u32)offset) { + page = sg_page(&chunk->mem[i]); + goto out; + } + offset -= chunk->mem[i].length; + } + } + +out: + mutex_unlock(&table->mutex); + return page ? lowmem_page_address(page) + offset : NULL; +} + +int hns_roce_table_get_range(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table, + unsigned long start, unsigned long end) +{ + unsigned long inc = HNS_ROCE_TABLE_CHUNK_SIZE / table->obj_size; + unsigned long i = 0; + int ret = 0; + + /* Allocate MTT entry memory according to chunk(128K) */ + for (i = start; i <= end; i += inc) { + ret = hns_roce_table_get(hr_dev, table, i); + if (ret) + goto fail; + } + + return 0; + +fail: + while (i > start) { + i -= inc; + hns_roce_table_put(hr_dev, table, i); + } + return ret; +} + +void hns_roce_table_put_range(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table, + unsigned long start, unsigned long end) +{ + unsigned long i; + + for (i = start; i <= end; + i += HNS_ROCE_TABLE_CHUNK_SIZE / table->obj_size) + hns_roce_table_put(hr_dev, table, i); +} + +int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table, u32 type, + unsigned long obj_size, unsigned long nobj, + int use_lowmem) +{ + unsigned long obj_per_chunk; + unsigned long num_hem; + + obj_per_chunk = HNS_ROCE_TABLE_CHUNK_SIZE / obj_size; + num_hem = (nobj + obj_per_chunk - 1) / obj_per_chunk; + + table->hem = kcalloc(num_hem, sizeof(*table->hem), GFP_KERNEL); + if (!table->hem) + return -ENOMEM; + + table->type = type; + table->num_hem = num_hem; + table->num_obj = nobj; + table->obj_size = obj_size; + table->lowmem = use_lowmem; + mutex_init(&table->mutex); + + return 0; +} + +void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table) +{ + struct device *dev = &hr_dev->pdev->dev; + unsigned long i; + + for (i = 0; i < table->num_hem; ++i) + if (table->hem[i]) { + if (hr_dev->hw->clear_hem(hr_dev, table, + i * HNS_ROCE_TABLE_CHUNK_SIZE / table->obj_size)) + dev_err(dev, "Clear HEM base address failed.\n"); + + hns_roce_free_hem(hr_dev, table->hem[i]); + } + + kfree(table->hem); +} + +void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev) +{ + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->cq_table.table); + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.irrl_table); + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.qp_table); + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table); + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtt_table); +} diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h new file mode 100644 index 000000000000..435748858252 --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_hem.h @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2016 Hisilicon Limited. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _HNS_ROCE_HEM_H +#define _HNS_ROCE_HEM_H + +#define HW_SYNC_TIMEOUT_MSECS 500 +#define HW_SYNC_SLEEP_TIME_INTERVAL 20 +#define BT_CMD_SYNC_SHIFT 31 + +enum { + /* MAP HEM(Hardware Entry Memory) */ + HEM_TYPE_QPC = 0, + HEM_TYPE_MTPT, + HEM_TYPE_CQC, + HEM_TYPE_SRQC, + + /* UNMAP HEM */ + HEM_TYPE_MTT, + HEM_TYPE_IRRL, +}; + +#define HNS_ROCE_HEM_CHUNK_LEN \ + ((256 - sizeof(struct list_head) - 2 * sizeof(int)) / \ + (sizeof(struct scatterlist))) + +enum { + HNS_ROCE_HEM_PAGE_SHIFT = 12, + HNS_ROCE_HEM_PAGE_SIZE = 1 << HNS_ROCE_HEM_PAGE_SHIFT, +}; + +struct hns_roce_hem_chunk { + struct list_head list; + int npages; + int nsg; + struct scatterlist mem[HNS_ROCE_HEM_CHUNK_LEN]; +}; + +struct hns_roce_hem { + struct list_head chunk_list; + int refcount; +}; + +struct hns_roce_hem_iter { + struct hns_roce_hem *hem; + struct hns_roce_hem_chunk *chunk; + int page_idx; +}; + +void hns_roce_free_hem(struct hns_roce_dev *hr_dev, struct hns_roce_hem *hem); +int hns_roce_table_get(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table, unsigned long obj); +void hns_roce_table_put(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table, unsigned long obj); +void *hns_roce_table_find(struct hns_roce_hem_table *table, unsigned long obj, + dma_addr_t *dma_handle); +int hns_roce_table_get_range(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table, + unsigned long start, unsigned long end); +void hns_roce_table_put_range(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table, + unsigned long start, unsigned long end); +int hns_roce_init_hem_table(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table, u32 type, + unsigned long obj_size, unsigned long nobj, + int use_lowmem); +void hns_roce_cleanup_hem_table(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table); +void hns_roce_cleanup_hem(struct hns_roce_dev *hr_dev); + +static inline void hns_roce_hem_first(struct hns_roce_hem *hem, + struct hns_roce_hem_iter *iter) +{ + iter->hem = hem; + iter->chunk = list_empty(&hem->chunk_list) ? NULL : + list_entry(hem->chunk_list.next, + struct hns_roce_hem_chunk, list); + iter->page_idx = 0; +} + +static inline int hns_roce_hem_last(struct hns_roce_hem_iter *iter) +{ + return !iter->chunk; +} + +static inline void hns_roce_hem_next(struct hns_roce_hem_iter *iter) +{ + if (++iter->page_idx >= iter->chunk->nsg) { + if (iter->chunk->list.next == &iter->hem->chunk_list) { + iter->chunk = NULL; + return; + } + + iter->chunk = list_entry(iter->chunk->list.next, + struct hns_roce_hem_chunk, list); + iter->page_idx = 0; + } +} + +static inline dma_addr_t hns_roce_hem_addr(struct hns_roce_hem_iter *iter) +{ + return sg_dma_address(&iter->chunk->mem[iter->page_idx]); +} + +#endif /*_HNS_ROCE_HEM_H*/ diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c new file mode 100644 index 000000000000..71232e5fabf6 --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -0,0 +1,2921 @@ +/* + * Copyright (c) 2016 Hisilicon Limited. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/platform_device.h> +#include <linux/acpi.h> +#include <rdma/ib_umem.h> +#include "hns_roce_common.h" +#include "hns_roce_device.h" +#include "hns_roce_cmd.h" +#include "hns_roce_hem.h" +#include "hns_roce_hw_v1.h" + +static void set_data_seg(struct hns_roce_wqe_data_seg *dseg, struct ib_sge *sg) +{ + dseg->lkey = cpu_to_le32(sg->lkey); + dseg->addr = cpu_to_le64(sg->addr); + dseg->len = cpu_to_le32(sg->length); +} + +static void set_raddr_seg(struct hns_roce_wqe_raddr_seg *rseg, u64 remote_addr, + u32 rkey) +{ + rseg->raddr = cpu_to_le64(remote_addr); + rseg->rkey = cpu_to_le32(rkey); + rseg->len = 0; +} + +int hns_roce_v1_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); + struct hns_roce_ah *ah = to_hr_ah(ud_wr(wr)->ah); + struct hns_roce_ud_send_wqe *ud_sq_wqe = NULL; + struct hns_roce_wqe_ctrl_seg *ctrl = NULL; + struct hns_roce_wqe_data_seg *dseg = NULL; + struct hns_roce_qp *qp = to_hr_qp(ibqp); + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_sq_db sq_db; + int ps_opcode = 0, i = 0; + unsigned long flags = 0; + void *wqe = NULL; + u32 doorbell[2]; + int nreq = 0; + u32 ind = 0; + int ret = 0; + + if (unlikely(ibqp->qp_type != IB_QPT_GSI && + ibqp->qp_type != IB_QPT_RC)) { + dev_err(dev, "un-supported QP type\n"); + *bad_wr = NULL; + return -EOPNOTSUPP; + } + + spin_lock_irqsave(&qp->sq.lock, flags); + ind = qp->sq_next_wqe; + for (nreq = 0; wr; ++nreq, wr = wr->next) { + if (hns_roce_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) { + ret = -ENOMEM; + *bad_wr = wr; + goto out; + } + + if (unlikely(wr->num_sge > qp->sq.max_gs)) { + dev_err(dev, "num_sge=%d > qp->sq.max_gs=%d\n", + wr->num_sge, qp->sq.max_gs); + ret = -EINVAL; + *bad_wr = wr; + goto out; + } + + wqe = get_send_wqe(qp, ind & (qp->sq.wqe_cnt - 1)); + qp->sq.wrid[(qp->sq.head + nreq) & (qp->sq.wqe_cnt - 1)] = + wr->wr_id; + + /* Corresponding to the RC and RD type wqe process separately */ + if (ibqp->qp_type == IB_QPT_GSI) { + ud_sq_wqe = wqe; + roce_set_field(ud_sq_wqe->dmac_h, + UD_SEND_WQE_U32_4_DMAC_0_M, + UD_SEND_WQE_U32_4_DMAC_0_S, + ah->av.mac[0]); + roce_set_field(ud_sq_wqe->dmac_h, + UD_SEND_WQE_U32_4_DMAC_1_M, + UD_SEND_WQE_U32_4_DMAC_1_S, + ah->av.mac[1]); + roce_set_field(ud_sq_wqe->dmac_h, + UD_SEND_WQE_U32_4_DMAC_2_M, + UD_SEND_WQE_U32_4_DMAC_2_S, + ah->av.mac[2]); + roce_set_field(ud_sq_wqe->dmac_h, + UD_SEND_WQE_U32_4_DMAC_3_M, + UD_SEND_WQE_U32_4_DMAC_3_S, + ah->av.mac[3]); + + roce_set_field(ud_sq_wqe->u32_8, + UD_SEND_WQE_U32_8_DMAC_4_M, + UD_SEND_WQE_U32_8_DMAC_4_S, + ah->av.mac[4]); + roce_set_field(ud_sq_wqe->u32_8, + UD_SEND_WQE_U32_8_DMAC_5_M, + UD_SEND_WQE_U32_8_DMAC_5_S, + ah->av.mac[5]); + roce_set_field(ud_sq_wqe->u32_8, + UD_SEND_WQE_U32_8_OPERATION_TYPE_M, + UD_SEND_WQE_U32_8_OPERATION_TYPE_S, + HNS_ROCE_WQE_OPCODE_SEND); + roce_set_field(ud_sq_wqe->u32_8, + UD_SEND_WQE_U32_8_NUMBER_OF_DATA_SEG_M, + UD_SEND_WQE_U32_8_NUMBER_OF_DATA_SEG_S, + 2); + roce_set_bit(ud_sq_wqe->u32_8, + UD_SEND_WQE_U32_8_SEND_GL_ROUTING_HDR_FLAG_S, + 1); + + ud_sq_wqe->u32_8 |= (wr->send_flags & IB_SEND_SIGNALED ? + cpu_to_le32(HNS_ROCE_WQE_CQ_NOTIFY) : 0) | + (wr->send_flags & IB_SEND_SOLICITED ? + cpu_to_le32(HNS_ROCE_WQE_SE) : 0) | + ((wr->opcode == IB_WR_SEND_WITH_IMM) ? + cpu_to_le32(HNS_ROCE_WQE_IMM) : 0); + + roce_set_field(ud_sq_wqe->u32_16, + UD_SEND_WQE_U32_16_DEST_QP_M, + UD_SEND_WQE_U32_16_DEST_QP_S, + ud_wr(wr)->remote_qpn); + roce_set_field(ud_sq_wqe->u32_16, + UD_SEND_WQE_U32_16_MAX_STATIC_RATE_M, + UD_SEND_WQE_U32_16_MAX_STATIC_RATE_S, + ah->av.stat_rate); + + roce_set_field(ud_sq_wqe->u32_36, + UD_SEND_WQE_U32_36_FLOW_LABEL_M, + UD_SEND_WQE_U32_36_FLOW_LABEL_S, 0); + roce_set_field(ud_sq_wqe->u32_36, + UD_SEND_WQE_U32_36_PRIORITY_M, + UD_SEND_WQE_U32_36_PRIORITY_S, + ah->av.sl_tclass_flowlabel >> + HNS_ROCE_SL_SHIFT); + roce_set_field(ud_sq_wqe->u32_36, + UD_SEND_WQE_U32_36_SGID_INDEX_M, + UD_SEND_WQE_U32_36_SGID_INDEX_S, + hns_get_gid_index(hr_dev, qp->phy_port, + ah->av.gid_index)); + + roce_set_field(ud_sq_wqe->u32_40, + UD_SEND_WQE_U32_40_HOP_LIMIT_M, + UD_SEND_WQE_U32_40_HOP_LIMIT_S, + ah->av.hop_limit); + roce_set_field(ud_sq_wqe->u32_40, + UD_SEND_WQE_U32_40_TRAFFIC_CLASS_M, + UD_SEND_WQE_U32_40_TRAFFIC_CLASS_S, 0); + + memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0], GID_LEN); + + ud_sq_wqe->va0_l = (u32)wr->sg_list[0].addr; + ud_sq_wqe->va0_h = (wr->sg_list[0].addr) >> 32; + ud_sq_wqe->l_key0 = wr->sg_list[0].lkey; + + ud_sq_wqe->va1_l = (u32)wr->sg_list[1].addr; + ud_sq_wqe->va1_h = (wr->sg_list[1].addr) >> 32; + ud_sq_wqe->l_key1 = wr->sg_list[1].lkey; + ind++; + } else if (ibqp->qp_type == IB_QPT_RC) { + ctrl = wqe; + memset(ctrl, 0, sizeof(struct hns_roce_wqe_ctrl_seg)); + for (i = 0; i < wr->num_sge; i++) + ctrl->msg_length += wr->sg_list[i].length; + + ctrl->sgl_pa_h = 0; + ctrl->flag = 0; + ctrl->imm_data = send_ieth(wr); + + /*Ctrl field, ctrl set type: sig, solic, imm, fence */ + /* SO wait for conforming application scenarios */ + ctrl->flag |= (wr->send_flags & IB_SEND_SIGNALED ? + cpu_to_le32(HNS_ROCE_WQE_CQ_NOTIFY) : 0) | + (wr->send_flags & IB_SEND_SOLICITED ? + cpu_to_le32(HNS_ROCE_WQE_SE) : 0) | + ((wr->opcode == IB_WR_SEND_WITH_IMM || + wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM) ? + cpu_to_le32(HNS_ROCE_WQE_IMM) : 0) | + (wr->send_flags & IB_SEND_FENCE ? + (cpu_to_le32(HNS_ROCE_WQE_FENCE)) : 0); + + wqe += sizeof(struct hns_roce_wqe_ctrl_seg); + + switch (wr->opcode) { + case IB_WR_RDMA_READ: + ps_opcode = HNS_ROCE_WQE_OPCODE_RDMA_READ; + set_raddr_seg(wqe, atomic_wr(wr)->remote_addr, + atomic_wr(wr)->rkey); + break; + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + ps_opcode = HNS_ROCE_WQE_OPCODE_RDMA_WRITE; + set_raddr_seg(wqe, atomic_wr(wr)->remote_addr, + atomic_wr(wr)->rkey); + break; + case IB_WR_SEND: + case IB_WR_SEND_WITH_INV: + case IB_WR_SEND_WITH_IMM: + ps_opcode = HNS_ROCE_WQE_OPCODE_SEND; + break; + case IB_WR_LOCAL_INV: + break; + case IB_WR_ATOMIC_CMP_AND_SWP: + case IB_WR_ATOMIC_FETCH_AND_ADD: + case IB_WR_LSO: + default: + ps_opcode = HNS_ROCE_WQE_OPCODE_MASK; + break; + } + ctrl->flag |= cpu_to_le32(ps_opcode); + wqe += sizeof(struct hns_roce_wqe_raddr_seg); + + dseg = wqe; + if (wr->send_flags & IB_SEND_INLINE && wr->num_sge) { + if (ctrl->msg_length > + hr_dev->caps.max_sq_inline) { + ret = -EINVAL; + *bad_wr = wr; + dev_err(dev, "inline len(1-%d)=%d, illegal", + ctrl->msg_length, + hr_dev->caps.max_sq_inline); + goto out; + } + for (i = 0; i < wr->num_sge; i++) { + memcpy(wqe, ((void *) (uintptr_t) + wr->sg_list[i].addr), + wr->sg_list[i].length); + wqe += wr->sg_list[i].length; + } + ctrl->flag |= HNS_ROCE_WQE_INLINE; + } else { + /*sqe num is two */ + for (i = 0; i < wr->num_sge; i++) + set_data_seg(dseg + i, wr->sg_list + i); + + ctrl->flag |= cpu_to_le32(wr->num_sge << + HNS_ROCE_WQE_SGE_NUM_BIT); + } + ind++; + } + } + +out: + /* Set DB return */ + if (likely(nreq)) { + qp->sq.head += nreq; + /* Memory barrier */ + wmb(); + + sq_db.u32_4 = 0; + sq_db.u32_8 = 0; + roce_set_field(sq_db.u32_4, SQ_DOORBELL_U32_4_SQ_HEAD_M, + SQ_DOORBELL_U32_4_SQ_HEAD_S, + (qp->sq.head & ((qp->sq.wqe_cnt << 1) - 1))); + roce_set_field(sq_db.u32_4, SQ_DOORBELL_U32_4_PORT_M, + SQ_DOORBELL_U32_4_PORT_S, qp->phy_port); + roce_set_field(sq_db.u32_8, SQ_DOORBELL_U32_8_QPN_M, + SQ_DOORBELL_U32_8_QPN_S, qp->doorbell_qpn); + roce_set_bit(sq_db.u32_8, SQ_DOORBELL_HW_SYNC_S, 1); + + doorbell[0] = sq_db.u32_4; + doorbell[1] = sq_db.u32_8; + + hns_roce_write64_k(doorbell, qp->sq.db_reg_l); + qp->sq_next_wqe = ind; + } + + spin_unlock_irqrestore(&qp->sq.lock, flags); + + return ret; +} + +int hns_roce_v1_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + int ret = 0; + int nreq = 0; + int ind = 0; + int i = 0; + u32 reg_val = 0; + unsigned long flags = 0; + struct hns_roce_rq_wqe_ctrl *ctrl = NULL; + struct hns_roce_wqe_data_seg *scat = NULL; + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_rq_db rq_db; + uint32_t doorbell[2] = {0}; + + spin_lock_irqsave(&hr_qp->rq.lock, flags); + ind = hr_qp->rq.head & (hr_qp->rq.wqe_cnt - 1); + + for (nreq = 0; wr; ++nreq, wr = wr->next) { + if (hns_roce_wq_overflow(&hr_qp->rq, nreq, + hr_qp->ibqp.recv_cq)) { + ret = -ENOMEM; + *bad_wr = wr; + goto out; + } + + if (unlikely(wr->num_sge > hr_qp->rq.max_gs)) { + dev_err(dev, "rq:num_sge=%d > qp->sq.max_gs=%d\n", + wr->num_sge, hr_qp->rq.max_gs); + ret = -EINVAL; + *bad_wr = wr; + goto out; + } + + ctrl = get_recv_wqe(hr_qp, ind); + + roce_set_field(ctrl->rwqe_byte_12, + RQ_WQE_CTRL_RWQE_BYTE_12_RWQE_SGE_NUM_M, + RQ_WQE_CTRL_RWQE_BYTE_12_RWQE_SGE_NUM_S, + wr->num_sge); + + scat = (struct hns_roce_wqe_data_seg *)(ctrl + 1); + + for (i = 0; i < wr->num_sge; i++) + set_data_seg(scat + i, wr->sg_list + i); + + hr_qp->rq.wrid[ind] = wr->wr_id; + + ind = (ind + 1) & (hr_qp->rq.wqe_cnt - 1); + } + +out: + if (likely(nreq)) { + hr_qp->rq.head += nreq; + /* Memory barrier */ + wmb(); + + if (ibqp->qp_type == IB_QPT_GSI) { + /* SW update GSI rq header */ + reg_val = roce_read(to_hr_dev(ibqp->device), + ROCEE_QP1C_CFG3_0_REG + + QP1C_CFGN_OFFSET * hr_qp->phy_port); + roce_set_field(reg_val, + ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_M, + ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_S, + hr_qp->rq.head); + roce_write(to_hr_dev(ibqp->device), + ROCEE_QP1C_CFG3_0_REG + + QP1C_CFGN_OFFSET * hr_qp->phy_port, reg_val); + } else { + rq_db.u32_4 = 0; + rq_db.u32_8 = 0; + + roce_set_field(rq_db.u32_4, RQ_DOORBELL_U32_4_RQ_HEAD_M, + RQ_DOORBELL_U32_4_RQ_HEAD_S, + hr_qp->rq.head); + roce_set_field(rq_db.u32_8, RQ_DOORBELL_U32_8_QPN_M, + RQ_DOORBELL_U32_8_QPN_S, hr_qp->qpn); + roce_set_field(rq_db.u32_8, RQ_DOORBELL_U32_8_CMD_M, + RQ_DOORBELL_U32_8_CMD_S, 1); + roce_set_bit(rq_db.u32_8, RQ_DOORBELL_U32_8_HW_SYNC_S, + 1); + + doorbell[0] = rq_db.u32_4; + doorbell[1] = rq_db.u32_8; + + hns_roce_write64_k(doorbell, hr_qp->rq.db_reg_l); + } + } + spin_unlock_irqrestore(&hr_qp->rq.lock, flags); + + return ret; +} + +static void hns_roce_set_db_event_mode(struct hns_roce_dev *hr_dev, + int sdb_mode, int odb_mode) +{ + u32 val; + + val = roce_read(hr_dev, ROCEE_GLB_CFG_REG); + roce_set_bit(val, ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S, sdb_mode); + roce_set_bit(val, ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S, odb_mode); + roce_write(hr_dev, ROCEE_GLB_CFG_REG, val); +} + +static void hns_roce_set_db_ext_mode(struct hns_roce_dev *hr_dev, u32 sdb_mode, + u32 odb_mode) +{ + u32 val; + + /* Configure SDB/ODB extend mode */ + val = roce_read(hr_dev, ROCEE_GLB_CFG_REG); + roce_set_bit(val, ROCEE_GLB_CFG_SQ_EXT_DB_MODE_S, sdb_mode); + roce_set_bit(val, ROCEE_GLB_CFG_OTH_EXT_DB_MODE_S, odb_mode); + roce_write(hr_dev, ROCEE_GLB_CFG_REG, val); +} + +static void hns_roce_set_sdb(struct hns_roce_dev *hr_dev, u32 sdb_alept, + u32 sdb_alful) +{ + u32 val; + + /* Configure SDB */ + val = roce_read(hr_dev, ROCEE_DB_SQ_WL_REG); + roce_set_field(val, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_M, + ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_S, sdb_alful); + roce_set_field(val, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_M, + ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_S, sdb_alept); + roce_write(hr_dev, ROCEE_DB_SQ_WL_REG, val); +} + +static void hns_roce_set_odb(struct hns_roce_dev *hr_dev, u32 odb_alept, + u32 odb_alful) +{ + u32 val; + + /* Configure ODB */ + val = roce_read(hr_dev, ROCEE_DB_OTHERS_WL_REG); + roce_set_field(val, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_M, + ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_S, odb_alful); + roce_set_field(val, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_M, + ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_S, odb_alept); + roce_write(hr_dev, ROCEE_DB_OTHERS_WL_REG, val); +} + +static void hns_roce_set_sdb_ext(struct hns_roce_dev *hr_dev, u32 ext_sdb_alept, + u32 ext_sdb_alful) +{ + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_v1_priv *priv; + struct hns_roce_db_table *db; + dma_addr_t sdb_dma_addr; + u32 val; + + priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + db = &priv->db_table; + + /* Configure extend SDB threshold */ + roce_write(hr_dev, ROCEE_EXT_DB_SQ_WL_EMPTY_REG, ext_sdb_alept); + roce_write(hr_dev, ROCEE_EXT_DB_SQ_WL_REG, ext_sdb_alful); + + /* Configure extend SDB base addr */ + sdb_dma_addr = db->ext_db->sdb_buf_list->map; + roce_write(hr_dev, ROCEE_EXT_DB_SQ_REG, (u32)(sdb_dma_addr >> 12)); + + /* Configure extend SDB depth */ + val = roce_read(hr_dev, ROCEE_EXT_DB_SQ_H_REG); + roce_set_field(val, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_M, + ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_S, + db->ext_db->esdb_dep); + /* + * 44 = 32 + 12, When evaluating addr to hardware, shift 12 because of + * using 4K page, and shift more 32 because of + * caculating the high 32 bit value evaluated to hardware. + */ + roce_set_field(val, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_M, + ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_S, sdb_dma_addr >> 44); + roce_write(hr_dev, ROCEE_EXT_DB_SQ_H_REG, val); + + dev_dbg(dev, "ext SDB depth: 0x%x\n", db->ext_db->esdb_dep); + dev_dbg(dev, "ext SDB threshold: epmty: 0x%x, ful: 0x%x\n", + ext_sdb_alept, ext_sdb_alful); +} + +static void hns_roce_set_odb_ext(struct hns_roce_dev *hr_dev, u32 ext_odb_alept, + u32 ext_odb_alful) +{ + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_v1_priv *priv; + struct hns_roce_db_table *db; + dma_addr_t odb_dma_addr; + u32 val; + + priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + db = &priv->db_table; + + /* Configure extend ODB threshold */ + roce_write(hr_dev, ROCEE_EXT_DB_OTHERS_WL_EMPTY_REG, ext_odb_alept); + roce_write(hr_dev, ROCEE_EXT_DB_OTHERS_WL_REG, ext_odb_alful); + + /* Configure extend ODB base addr */ + odb_dma_addr = db->ext_db->odb_buf_list->map; + roce_write(hr_dev, ROCEE_EXT_DB_OTH_REG, (u32)(odb_dma_addr >> 12)); + + /* Configure extend ODB depth */ + val = roce_read(hr_dev, ROCEE_EXT_DB_OTH_H_REG); + roce_set_field(val, ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_M, + ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_S, + db->ext_db->eodb_dep); + roce_set_field(val, ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_M, + ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_S, + db->ext_db->eodb_dep); + roce_write(hr_dev, ROCEE_EXT_DB_OTH_H_REG, val); + + dev_dbg(dev, "ext ODB depth: 0x%x\n", db->ext_db->eodb_dep); + dev_dbg(dev, "ext ODB threshold: empty: 0x%x, ful: 0x%x\n", + ext_odb_alept, ext_odb_alful); +} + +static int hns_roce_db_ext_init(struct hns_roce_dev *hr_dev, u32 sdb_ext_mod, + u32 odb_ext_mod) +{ + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_v1_priv *priv; + struct hns_roce_db_table *db; + dma_addr_t sdb_dma_addr; + dma_addr_t odb_dma_addr; + int ret = 0; + + priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + db = &priv->db_table; + + db->ext_db = kmalloc(sizeof(*db->ext_db), GFP_KERNEL); + if (!db->ext_db) + return -ENOMEM; + + if (sdb_ext_mod) { + db->ext_db->sdb_buf_list = kmalloc( + sizeof(*db->ext_db->sdb_buf_list), GFP_KERNEL); + if (!db->ext_db->sdb_buf_list) { + ret = -ENOMEM; + goto ext_sdb_buf_fail_out; + } + + db->ext_db->sdb_buf_list->buf = dma_alloc_coherent(dev, + HNS_ROCE_V1_EXT_SDB_SIZE, + &sdb_dma_addr, GFP_KERNEL); + if (!db->ext_db->sdb_buf_list->buf) { + ret = -ENOMEM; + goto alloc_sq_db_buf_fail; + } + db->ext_db->sdb_buf_list->map = sdb_dma_addr; + + db->ext_db->esdb_dep = ilog2(HNS_ROCE_V1_EXT_SDB_DEPTH); + hns_roce_set_sdb_ext(hr_dev, HNS_ROCE_V1_EXT_SDB_ALEPT, + HNS_ROCE_V1_EXT_SDB_ALFUL); + } else + hns_roce_set_sdb(hr_dev, HNS_ROCE_V1_SDB_ALEPT, + HNS_ROCE_V1_SDB_ALFUL); + + if (odb_ext_mod) { + db->ext_db->odb_buf_list = kmalloc( + sizeof(*db->ext_db->odb_buf_list), GFP_KERNEL); + if (!db->ext_db->odb_buf_list) { + ret = -ENOMEM; + goto ext_odb_buf_fail_out; + } + + db->ext_db->odb_buf_list->buf = dma_alloc_coherent(dev, + HNS_ROCE_V1_EXT_ODB_SIZE, + &odb_dma_addr, GFP_KERNEL); + if (!db->ext_db->odb_buf_list->buf) { + ret = -ENOMEM; + goto alloc_otr_db_buf_fail; + } + db->ext_db->odb_buf_list->map = odb_dma_addr; + + db->ext_db->eodb_dep = ilog2(HNS_ROCE_V1_EXT_ODB_DEPTH); + hns_roce_set_odb_ext(hr_dev, HNS_ROCE_V1_EXT_ODB_ALEPT, + HNS_ROCE_V1_EXT_ODB_ALFUL); + } else + hns_roce_set_odb(hr_dev, HNS_ROCE_V1_ODB_ALEPT, + HNS_ROCE_V1_ODB_ALFUL); + + hns_roce_set_db_ext_mode(hr_dev, sdb_ext_mod, odb_ext_mod); + + return 0; + +alloc_otr_db_buf_fail: + kfree(db->ext_db->odb_buf_list); + +ext_odb_buf_fail_out: + if (sdb_ext_mod) { + dma_free_coherent(dev, HNS_ROCE_V1_EXT_SDB_SIZE, + db->ext_db->sdb_buf_list->buf, + db->ext_db->sdb_buf_list->map); + } + +alloc_sq_db_buf_fail: + if (sdb_ext_mod) + kfree(db->ext_db->sdb_buf_list); + +ext_sdb_buf_fail_out: + kfree(db->ext_db); + return ret; +} + +static int hns_roce_db_init(struct hns_roce_dev *hr_dev) +{ + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_v1_priv *priv; + struct hns_roce_db_table *db; + u32 sdb_ext_mod; + u32 odb_ext_mod; + u32 sdb_evt_mod; + u32 odb_evt_mod; + int ret = 0; + + priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + db = &priv->db_table; + + memset(db, 0, sizeof(*db)); + + /* Default DB mode */ + sdb_ext_mod = HNS_ROCE_SDB_EXTEND_MODE; + odb_ext_mod = HNS_ROCE_ODB_EXTEND_MODE; + sdb_evt_mod = HNS_ROCE_SDB_NORMAL_MODE; + odb_evt_mod = HNS_ROCE_ODB_POLL_MODE; + + db->sdb_ext_mod = sdb_ext_mod; + db->odb_ext_mod = odb_ext_mod; + + /* Init extend DB */ + ret = hns_roce_db_ext_init(hr_dev, sdb_ext_mod, odb_ext_mod); + if (ret) { + dev_err(dev, "Failed in extend DB configuration.\n"); + return ret; + } + + hns_roce_set_db_event_mode(hr_dev, sdb_evt_mod, odb_evt_mod); + + return 0; +} + +static void hns_roce_db_free(struct hns_roce_dev *hr_dev) +{ + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_v1_priv *priv; + struct hns_roce_db_table *db; + + priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + db = &priv->db_table; + + if (db->sdb_ext_mod) { + dma_free_coherent(dev, HNS_ROCE_V1_EXT_SDB_SIZE, + db->ext_db->sdb_buf_list->buf, + db->ext_db->sdb_buf_list->map); + kfree(db->ext_db->sdb_buf_list); + } + + if (db->odb_ext_mod) { + dma_free_coherent(dev, HNS_ROCE_V1_EXT_ODB_SIZE, + db->ext_db->odb_buf_list->buf, + db->ext_db->odb_buf_list->map); + kfree(db->ext_db->odb_buf_list); + } + + kfree(db->ext_db); +} + +static int hns_roce_raq_init(struct hns_roce_dev *hr_dev) +{ + int ret; + int raq_shift = 0; + dma_addr_t addr; + u32 val; + struct hns_roce_v1_priv *priv; + struct hns_roce_raq_table *raq; + struct device *dev = &hr_dev->pdev->dev; + + priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + raq = &priv->raq_table; + + raq->e_raq_buf = kzalloc(sizeof(*(raq->e_raq_buf)), GFP_KERNEL); + if (!raq->e_raq_buf) + return -ENOMEM; + + raq->e_raq_buf->buf = dma_alloc_coherent(dev, HNS_ROCE_V1_RAQ_SIZE, + &addr, GFP_KERNEL); + if (!raq->e_raq_buf->buf) { + ret = -ENOMEM; + goto err_dma_alloc_raq; + } + raq->e_raq_buf->map = addr; + + /* Configure raq extended address. 48bit 4K align*/ + roce_write(hr_dev, ROCEE_EXT_RAQ_REG, raq->e_raq_buf->map >> 12); + + /* Configure raq_shift */ + raq_shift = ilog2(HNS_ROCE_V1_RAQ_SIZE / HNS_ROCE_V1_RAQ_ENTRY); + val = roce_read(hr_dev, ROCEE_EXT_RAQ_H_REG); + roce_set_field(val, ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_M, + ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_S, raq_shift); + /* + * 44 = 32 + 12, When evaluating addr to hardware, shift 12 because of + * using 4K page, and shift more 32 because of + * caculating the high 32 bit value evaluated to hardware. + */ + roce_set_field(val, ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_M, + ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_S, + raq->e_raq_buf->map >> 44); + roce_write(hr_dev, ROCEE_EXT_RAQ_H_REG, val); + dev_dbg(dev, "Configure raq_shift 0x%x.\n", val); + + /* Configure raq threshold */ + val = roce_read(hr_dev, ROCEE_RAQ_WL_REG); + roce_set_field(val, ROCEE_RAQ_WL_ROCEE_RAQ_WL_M, + ROCEE_RAQ_WL_ROCEE_RAQ_WL_S, + HNS_ROCE_V1_EXT_RAQ_WF); + roce_write(hr_dev, ROCEE_RAQ_WL_REG, val); + dev_dbg(dev, "Configure raq_wl 0x%x.\n", val); + + /* Enable extend raq */ + val = roce_read(hr_dev, ROCEE_WRMS_POL_TIME_INTERVAL_REG); + roce_set_field(val, + ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_POL_TIME_INTERVAL_M, + ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_POL_TIME_INTERVAL_S, + POL_TIME_INTERVAL_VAL); + roce_set_bit(val, ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_EXT_RAQ_MODE, 1); + roce_set_field(val, + ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_CFG_M, + ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_CFG_S, + 2); + roce_set_bit(val, + ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_EN_S, 1); + roce_write(hr_dev, ROCEE_WRMS_POL_TIME_INTERVAL_REG, val); + dev_dbg(dev, "Configure WrmsPolTimeInterval 0x%x.\n", val); + + /* Enable raq drop */ + val = roce_read(hr_dev, ROCEE_GLB_CFG_REG); + roce_set_bit(val, ROCEE_GLB_CFG_TRP_RAQ_DROP_EN_S, 1); + roce_write(hr_dev, ROCEE_GLB_CFG_REG, val); + dev_dbg(dev, "Configure GlbCfg = 0x%x.\n", val); + + return 0; + +err_dma_alloc_raq: + kfree(raq->e_raq_buf); + return ret; +} + +static void hns_roce_raq_free(struct hns_roce_dev *hr_dev) +{ + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_v1_priv *priv; + struct hns_roce_raq_table *raq; + + priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + raq = &priv->raq_table; + + dma_free_coherent(dev, HNS_ROCE_V1_RAQ_SIZE, raq->e_raq_buf->buf, + raq->e_raq_buf->map); + kfree(raq->e_raq_buf); +} + +static void hns_roce_port_enable(struct hns_roce_dev *hr_dev, int enable_flag) +{ + u32 val; + + if (enable_flag) { + val = roce_read(hr_dev, ROCEE_GLB_CFG_REG); + /* Open all ports */ + roce_set_field(val, ROCEE_GLB_CFG_ROCEE_PORT_ST_M, + ROCEE_GLB_CFG_ROCEE_PORT_ST_S, + ALL_PORT_VAL_OPEN); + roce_write(hr_dev, ROCEE_GLB_CFG_REG, val); + } else { + val = roce_read(hr_dev, ROCEE_GLB_CFG_REG); + /* Close all ports */ + roce_set_field(val, ROCEE_GLB_CFG_ROCEE_PORT_ST_M, + ROCEE_GLB_CFG_ROCEE_PORT_ST_S, 0x0); + roce_write(hr_dev, ROCEE_GLB_CFG_REG, val); + } +} + +static int hns_roce_bt_init(struct hns_roce_dev *hr_dev) +{ + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_v1_priv *priv; + int ret; + + priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + + priv->bt_table.qpc_buf.buf = dma_alloc_coherent(dev, + HNS_ROCE_BT_RSV_BUF_SIZE, &priv->bt_table.qpc_buf.map, + GFP_KERNEL); + if (!priv->bt_table.qpc_buf.buf) + return -ENOMEM; + + priv->bt_table.mtpt_buf.buf = dma_alloc_coherent(dev, + HNS_ROCE_BT_RSV_BUF_SIZE, &priv->bt_table.mtpt_buf.map, + GFP_KERNEL); + if (!priv->bt_table.mtpt_buf.buf) { + ret = -ENOMEM; + goto err_failed_alloc_mtpt_buf; + } + + priv->bt_table.cqc_buf.buf = dma_alloc_coherent(dev, + HNS_ROCE_BT_RSV_BUF_SIZE, &priv->bt_table.cqc_buf.map, + GFP_KERNEL); + if (!priv->bt_table.cqc_buf.buf) { + ret = -ENOMEM; + goto err_failed_alloc_cqc_buf; + } + + return 0; + +err_failed_alloc_cqc_buf: + dma_free_coherent(dev, HNS_ROCE_BT_RSV_BUF_SIZE, + priv->bt_table.mtpt_buf.buf, priv->bt_table.mtpt_buf.map); + +err_failed_alloc_mtpt_buf: + dma_free_coherent(dev, HNS_ROCE_BT_RSV_BUF_SIZE, + priv->bt_table.qpc_buf.buf, priv->bt_table.qpc_buf.map); + + return ret; +} + +static void hns_roce_bt_free(struct hns_roce_dev *hr_dev) +{ + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_v1_priv *priv; + + priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + + dma_free_coherent(dev, HNS_ROCE_BT_RSV_BUF_SIZE, + priv->bt_table.cqc_buf.buf, priv->bt_table.cqc_buf.map); + + dma_free_coherent(dev, HNS_ROCE_BT_RSV_BUF_SIZE, + priv->bt_table.mtpt_buf.buf, priv->bt_table.mtpt_buf.map); + + dma_free_coherent(dev, HNS_ROCE_BT_RSV_BUF_SIZE, + priv->bt_table.qpc_buf.buf, priv->bt_table.qpc_buf.map); +} + +/** + * hns_roce_v1_reset - reset RoCE + * @hr_dev: RoCE device struct pointer + * @enable: true -- drop reset, false -- reset + * return 0 - success , negative --fail + */ +int hns_roce_v1_reset(struct hns_roce_dev *hr_dev, bool dereset) +{ + struct device_node *dsaf_node; + struct device *dev = &hr_dev->pdev->dev; + struct device_node *np = dev->of_node; + struct fwnode_handle *fwnode; + int ret; + + /* check if this is DT/ACPI case */ + if (dev_of_node(dev)) { + dsaf_node = of_parse_phandle(np, "dsaf-handle", 0); + if (!dsaf_node) { + dev_err(dev, "could not find dsaf-handle\n"); + return -EINVAL; + } + fwnode = &dsaf_node->fwnode; + } else if (is_acpi_device_node(dev->fwnode)) { + struct acpi_reference_args args; + + ret = acpi_node_get_property_reference(dev->fwnode, + "dsaf-handle", 0, &args); + if (ret) { + dev_err(dev, "could not find dsaf-handle\n"); + return ret; + } + fwnode = acpi_fwnode_handle(args.adev); + } else { + dev_err(dev, "cannot read data from DT or ACPI\n"); + return -ENXIO; + } + + ret = hns_dsaf_roce_reset(fwnode, false); + if (ret) + return ret; + + if (dereset) { + msleep(SLEEP_TIME_INTERVAL); + ret = hns_dsaf_roce_reset(fwnode, true); + } + + return ret; +} + +void hns_roce_v1_profile(struct hns_roce_dev *hr_dev) +{ + int i = 0; + struct hns_roce_caps *caps = &hr_dev->caps; + + hr_dev->vendor_id = le32_to_cpu(roce_read(hr_dev, ROCEE_VENDOR_ID_REG)); + hr_dev->vendor_part_id = le32_to_cpu(roce_read(hr_dev, + ROCEE_VENDOR_PART_ID_REG)); + hr_dev->hw_rev = le32_to_cpu(roce_read(hr_dev, ROCEE_HW_VERSION_REG)); + + hr_dev->sys_image_guid = le32_to_cpu(roce_read(hr_dev, + ROCEE_SYS_IMAGE_GUID_L_REG)) | + ((u64)le32_to_cpu(roce_read(hr_dev, + ROCEE_SYS_IMAGE_GUID_H_REG)) << 32); + + caps->num_qps = HNS_ROCE_V1_MAX_QP_NUM; + caps->max_wqes = HNS_ROCE_V1_MAX_WQE_NUM; + caps->num_cqs = HNS_ROCE_V1_MAX_CQ_NUM; + caps->max_cqes = HNS_ROCE_V1_MAX_CQE_NUM; + caps->max_sq_sg = HNS_ROCE_V1_SG_NUM; + caps->max_rq_sg = HNS_ROCE_V1_SG_NUM; + caps->max_sq_inline = HNS_ROCE_V1_INLINE_SIZE; + caps->num_uars = HNS_ROCE_V1_UAR_NUM; + caps->phy_num_uars = HNS_ROCE_V1_PHY_UAR_NUM; + caps->num_aeq_vectors = HNS_ROCE_AEQE_VEC_NUM; + caps->num_comp_vectors = HNS_ROCE_COMP_VEC_NUM; + caps->num_other_vectors = HNS_ROCE_AEQE_OF_VEC_NUM; + caps->num_mtpts = HNS_ROCE_V1_MAX_MTPT_NUM; + caps->num_mtt_segs = HNS_ROCE_V1_MAX_MTT_SEGS; + caps->num_pds = HNS_ROCE_V1_MAX_PD_NUM; + caps->max_qp_init_rdma = HNS_ROCE_V1_MAX_QP_INIT_RDMA; + caps->max_qp_dest_rdma = HNS_ROCE_V1_MAX_QP_DEST_RDMA; + caps->max_sq_desc_sz = HNS_ROCE_V1_MAX_SQ_DESC_SZ; + caps->max_rq_desc_sz = HNS_ROCE_V1_MAX_RQ_DESC_SZ; + caps->qpc_entry_sz = HNS_ROCE_V1_QPC_ENTRY_SIZE; + caps->irrl_entry_sz = HNS_ROCE_V1_IRRL_ENTRY_SIZE; + caps->cqc_entry_sz = HNS_ROCE_V1_CQC_ENTRY_SIZE; + caps->mtpt_entry_sz = HNS_ROCE_V1_MTPT_ENTRY_SIZE; + caps->mtt_entry_sz = HNS_ROCE_V1_MTT_ENTRY_SIZE; + caps->cq_entry_sz = HNS_ROCE_V1_CQE_ENTRY_SIZE; + caps->page_size_cap = HNS_ROCE_V1_PAGE_SIZE_SUPPORT; + caps->reserved_lkey = 0; + caps->reserved_pds = 0; + caps->reserved_mrws = 1; + caps->reserved_uars = 0; + caps->reserved_cqs = 0; + + for (i = 0; i < caps->num_ports; i++) + caps->pkey_table_len[i] = 1; + + for (i = 0; i < caps->num_ports; i++) { + /* Six ports shared 16 GID in v1 engine */ + if (i >= (HNS_ROCE_V1_GID_NUM % caps->num_ports)) + caps->gid_table_len[i] = HNS_ROCE_V1_GID_NUM / + caps->num_ports; + else + caps->gid_table_len[i] = HNS_ROCE_V1_GID_NUM / + caps->num_ports + 1; + } + + for (i = 0; i < caps->num_comp_vectors; i++) + caps->ceqe_depth[i] = HNS_ROCE_V1_NUM_COMP_EQE; + + caps->aeqe_depth = HNS_ROCE_V1_NUM_ASYNC_EQE; + caps->local_ca_ack_delay = le32_to_cpu(roce_read(hr_dev, + ROCEE_ACK_DELAY_REG)); + caps->max_mtu = IB_MTU_2048; +} + +int hns_roce_v1_init(struct hns_roce_dev *hr_dev) +{ + int ret; + u32 val; + struct device *dev = &hr_dev->pdev->dev; + + /* DMAE user config */ + val = roce_read(hr_dev, ROCEE_DMAE_USER_CFG1_REG); + roce_set_field(val, ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_M, + ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_S, 0xf); + roce_set_field(val, ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_M, + ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_S, + 1 << PAGES_SHIFT_16); + roce_write(hr_dev, ROCEE_DMAE_USER_CFG1_REG, val); + + val = roce_read(hr_dev, ROCEE_DMAE_USER_CFG2_REG); + roce_set_field(val, ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_M, + ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_S, 0xf); + roce_set_field(val, ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_M, + ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_S, + 1 << PAGES_SHIFT_16); + + ret = hns_roce_db_init(hr_dev); + if (ret) { + dev_err(dev, "doorbell init failed!\n"); + return ret; + } + + ret = hns_roce_raq_init(hr_dev); + if (ret) { + dev_err(dev, "raq init failed!\n"); + goto error_failed_raq_init; + } + + hns_roce_port_enable(hr_dev, HNS_ROCE_PORT_UP); + + ret = hns_roce_bt_init(hr_dev); + if (ret) { + dev_err(dev, "bt init failed!\n"); + goto error_failed_bt_init; + } + + return 0; + +error_failed_bt_init: + hns_roce_port_enable(hr_dev, HNS_ROCE_PORT_DOWN); + hns_roce_raq_free(hr_dev); + +error_failed_raq_init: + hns_roce_db_free(hr_dev); + return ret; +} + +void hns_roce_v1_exit(struct hns_roce_dev *hr_dev) +{ + hns_roce_bt_free(hr_dev); + hns_roce_port_enable(hr_dev, HNS_ROCE_PORT_DOWN); + hns_roce_raq_free(hr_dev); + hns_roce_db_free(hr_dev); +} + +void hns_roce_v1_set_gid(struct hns_roce_dev *hr_dev, u8 port, int gid_index, + union ib_gid *gid) +{ + u32 *p = NULL; + u8 gid_idx = 0; + + gid_idx = hns_get_gid_index(hr_dev, port, gid_index); + + p = (u32 *)&gid->raw[0]; + roce_raw_write(*p, hr_dev->reg_base + ROCEE_PORT_GID_L_0_REG + + (HNS_ROCE_V1_GID_NUM * gid_idx)); + + p = (u32 *)&gid->raw[4]; + roce_raw_write(*p, hr_dev->reg_base + ROCEE_PORT_GID_ML_0_REG + + (HNS_ROCE_V1_GID_NUM * gid_idx)); + + p = (u32 *)&gid->raw[8]; + roce_raw_write(*p, hr_dev->reg_base + ROCEE_PORT_GID_MH_0_REG + + (HNS_ROCE_V1_GID_NUM * gid_idx)); + + p = (u32 *)&gid->raw[0xc]; + roce_raw_write(*p, hr_dev->reg_base + ROCEE_PORT_GID_H_0_REG + + (HNS_ROCE_V1_GID_NUM * gid_idx)); +} + +void hns_roce_v1_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port, u8 *addr) +{ + u32 reg_smac_l; + u16 reg_smac_h; + u16 *p_h; + u32 *p; + u32 val; + + p = (u32 *)(&addr[0]); + reg_smac_l = *p; + roce_raw_write(reg_smac_l, hr_dev->reg_base + ROCEE_SMAC_L_0_REG + + PHY_PORT_OFFSET * phy_port); + + val = roce_read(hr_dev, + ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET); + p_h = (u16 *)(&addr[4]); + reg_smac_h = *p_h; + roce_set_field(val, ROCEE_SMAC_H_ROCEE_SMAC_H_M, + ROCEE_SMAC_H_ROCEE_SMAC_H_S, reg_smac_h); + roce_write(hr_dev, ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET, + val); +} + +void hns_roce_v1_set_mtu(struct hns_roce_dev *hr_dev, u8 phy_port, + enum ib_mtu mtu) +{ + u32 val; + + val = roce_read(hr_dev, + ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET); + roce_set_field(val, ROCEE_SMAC_H_ROCEE_PORT_MTU_M, + ROCEE_SMAC_H_ROCEE_PORT_MTU_S, mtu); + roce_write(hr_dev, ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET, + val); +} + +int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr, + unsigned long mtpt_idx) +{ + struct hns_roce_v1_mpt_entry *mpt_entry; + struct scatterlist *sg; + u64 *pages; + int entry; + int i; + + /* MPT filled into mailbox buf */ + mpt_entry = (struct hns_roce_v1_mpt_entry *)mb_buf; + memset(mpt_entry, 0, sizeof(*mpt_entry)); + + roce_set_field(mpt_entry->mpt_byte_4, MPT_BYTE_4_KEY_STATE_M, + MPT_BYTE_4_KEY_STATE_S, KEY_VALID); + roce_set_field(mpt_entry->mpt_byte_4, MPT_BYTE_4_KEY_M, + MPT_BYTE_4_KEY_S, mr->key); + roce_set_field(mpt_entry->mpt_byte_4, MPT_BYTE_4_PAGE_SIZE_M, + MPT_BYTE_4_PAGE_SIZE_S, MR_SIZE_4K); + roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_MW_TYPE_S, 0); + roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_MW_BIND_ENABLE_S, + (mr->access & IB_ACCESS_MW_BIND ? 1 : 0)); + roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_OWN_S, 0); + roce_set_field(mpt_entry->mpt_byte_4, MPT_BYTE_4_MEMORY_LOCATION_TYPE_M, + MPT_BYTE_4_MEMORY_LOCATION_TYPE_S, mr->type); + roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_REMOTE_ATOMIC_S, 0); + roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_LOCAL_WRITE_S, + (mr->access & IB_ACCESS_LOCAL_WRITE ? 1 : 0)); + roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_REMOTE_WRITE_S, + (mr->access & IB_ACCESS_REMOTE_WRITE ? 1 : 0)); + roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_REMOTE_READ_S, + (mr->access & IB_ACCESS_REMOTE_READ ? 1 : 0)); + roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_REMOTE_INVAL_ENABLE_S, + 0); + roce_set_bit(mpt_entry->mpt_byte_4, MPT_BYTE_4_ADDRESS_TYPE_S, 0); + + roce_set_field(mpt_entry->mpt_byte_12, MPT_BYTE_12_PBL_ADDR_H_M, + MPT_BYTE_12_PBL_ADDR_H_S, 0); + roce_set_field(mpt_entry->mpt_byte_12, MPT_BYTE_12_MW_BIND_COUNTER_M, + MPT_BYTE_12_MW_BIND_COUNTER_S, 0); + + mpt_entry->virt_addr_l = (u32)mr->iova; + mpt_entry->virt_addr_h = (u32)(mr->iova >> 32); + mpt_entry->length = (u32)mr->size; + + roce_set_field(mpt_entry->mpt_byte_28, MPT_BYTE_28_PD_M, + MPT_BYTE_28_PD_S, mr->pd); + roce_set_field(mpt_entry->mpt_byte_28, MPT_BYTE_28_L_KEY_IDX_L_M, + MPT_BYTE_28_L_KEY_IDX_L_S, mtpt_idx); + roce_set_field(mpt_entry->mpt_byte_64, MPT_BYTE_64_L_KEY_IDX_H_M, + MPT_BYTE_64_L_KEY_IDX_H_S, mtpt_idx >> MTPT_IDX_SHIFT); + + /* DMA momery regsiter */ + if (mr->type == MR_TYPE_DMA) + return 0; + + pages = (u64 *) __get_free_page(GFP_KERNEL); + if (!pages) + return -ENOMEM; + + i = 0; + for_each_sg(mr->umem->sg_head.sgl, sg, mr->umem->nmap, entry) { + pages[i] = ((u64)sg_dma_address(sg)) >> 12; + + /* Directly record to MTPT table firstly 7 entry */ + if (i >= HNS_ROCE_MAX_INNER_MTPT_NUM) + break; + i++; + } + + /* Register user mr */ + for (i = 0; i < HNS_ROCE_MAX_INNER_MTPT_NUM; i++) { + switch (i) { + case 0: + mpt_entry->pa0_l = cpu_to_le32((u32)(pages[i])); + roce_set_field(mpt_entry->mpt_byte_36, + MPT_BYTE_36_PA0_H_M, + MPT_BYTE_36_PA0_H_S, + cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_32))); + break; + case 1: + roce_set_field(mpt_entry->mpt_byte_36, + MPT_BYTE_36_PA1_L_M, + MPT_BYTE_36_PA1_L_S, + cpu_to_le32((u32)(pages[i]))); + roce_set_field(mpt_entry->mpt_byte_40, + MPT_BYTE_40_PA1_H_M, + MPT_BYTE_40_PA1_H_S, + cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_24))); + break; + case 2: + roce_set_field(mpt_entry->mpt_byte_40, + MPT_BYTE_40_PA2_L_M, + MPT_BYTE_40_PA2_L_S, + cpu_to_le32((u32)(pages[i]))); + roce_set_field(mpt_entry->mpt_byte_44, + MPT_BYTE_44_PA2_H_M, + MPT_BYTE_44_PA2_H_S, + cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_16))); + break; + case 3: + roce_set_field(mpt_entry->mpt_byte_44, + MPT_BYTE_44_PA3_L_M, + MPT_BYTE_44_PA3_L_S, + cpu_to_le32((u32)(pages[i]))); + roce_set_field(mpt_entry->mpt_byte_48, + MPT_BYTE_48_PA3_H_M, + MPT_BYTE_48_PA3_H_S, + cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_8))); + break; + case 4: + mpt_entry->pa4_l = cpu_to_le32((u32)(pages[i])); + roce_set_field(mpt_entry->mpt_byte_56, + MPT_BYTE_56_PA4_H_M, + MPT_BYTE_56_PA4_H_S, + cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_32))); + break; + case 5: + roce_set_field(mpt_entry->mpt_byte_56, + MPT_BYTE_56_PA5_L_M, + MPT_BYTE_56_PA5_L_S, + cpu_to_le32((u32)(pages[i]))); + roce_set_field(mpt_entry->mpt_byte_60, + MPT_BYTE_60_PA5_H_M, + MPT_BYTE_60_PA5_H_S, + cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_24))); + break; + case 6: + roce_set_field(mpt_entry->mpt_byte_60, + MPT_BYTE_60_PA6_L_M, + MPT_BYTE_60_PA6_L_S, + cpu_to_le32((u32)(pages[i]))); + roce_set_field(mpt_entry->mpt_byte_64, + MPT_BYTE_64_PA6_H_M, + MPT_BYTE_64_PA6_H_S, + cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_16))); + break; + default: + break; + } + } + + free_page((unsigned long) pages); + + mpt_entry->pbl_addr_l = (u32)(mr->pbl_dma_addr); + + roce_set_field(mpt_entry->mpt_byte_12, MPT_BYTE_12_PBL_ADDR_H_M, + MPT_BYTE_12_PBL_ADDR_H_S, + ((u32)(mr->pbl_dma_addr >> 32))); + + return 0; +} + +static void *get_cqe(struct hns_roce_cq *hr_cq, int n) +{ + return hns_roce_buf_offset(&hr_cq->hr_buf.hr_buf, + n * HNS_ROCE_V1_CQE_ENTRY_SIZE); +} + +static void *get_sw_cqe(struct hns_roce_cq *hr_cq, int n) +{ + struct hns_roce_cqe *hr_cqe = get_cqe(hr_cq, n & hr_cq->ib_cq.cqe); + + /* Get cqe when Owner bit is Conversely with the MSB of cons_idx */ + return (roce_get_bit(hr_cqe->cqe_byte_4, CQE_BYTE_4_OWNER_S) ^ + !!(n & (hr_cq->ib_cq.cqe + 1))) ? hr_cqe : NULL; +} + +static struct hns_roce_cqe *next_cqe_sw(struct hns_roce_cq *hr_cq) +{ + return get_sw_cqe(hr_cq, hr_cq->cons_index); +} + +void hns_roce_v1_cq_set_ci(struct hns_roce_cq *hr_cq, u32 cons_index) +{ + u32 doorbell[2]; + + doorbell[0] = cons_index & ((hr_cq->cq_depth << 1) - 1); + roce_set_bit(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_HW_SYNS_S, 1); + roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_M, + ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_S, 3); + roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_M, + ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_S, 0); + roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_M, + ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_S, hr_cq->cqn); + + hns_roce_write64_k(doorbell, hr_cq->cq_db_l); +} + +static void __hns_roce_v1_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn, + struct hns_roce_srq *srq) +{ + struct hns_roce_cqe *cqe, *dest; + u32 prod_index; + int nfreed = 0; + u8 owner_bit; + + for (prod_index = hr_cq->cons_index; get_sw_cqe(hr_cq, prod_index); + ++prod_index) { + if (prod_index == hr_cq->cons_index + hr_cq->ib_cq.cqe) + break; + } + + /* + * Now backwards through the CQ, removing CQ entries + * that match our QP by overwriting them with next entries. + */ + while ((int) --prod_index - (int) hr_cq->cons_index >= 0) { + cqe = get_cqe(hr_cq, prod_index & hr_cq->ib_cq.cqe); + if ((roce_get_field(cqe->cqe_byte_16, CQE_BYTE_16_LOCAL_QPN_M, + CQE_BYTE_16_LOCAL_QPN_S) & + HNS_ROCE_CQE_QPN_MASK) == qpn) { + /* In v1 engine, not support SRQ */ + ++nfreed; + } else if (nfreed) { + dest = get_cqe(hr_cq, (prod_index + nfreed) & + hr_cq->ib_cq.cqe); + owner_bit = roce_get_bit(dest->cqe_byte_4, + CQE_BYTE_4_OWNER_S); + memcpy(dest, cqe, sizeof(*cqe)); + roce_set_bit(dest->cqe_byte_4, CQE_BYTE_4_OWNER_S, + owner_bit); + } + } + + if (nfreed) { + hr_cq->cons_index += nfreed; + /* + * Make sure update of buffer contents is done before + * updating consumer index. + */ + wmb(); + + hns_roce_v1_cq_set_ci(hr_cq, hr_cq->cons_index); + } +} + +static void hns_roce_v1_cq_clean(struct hns_roce_cq *hr_cq, u32 qpn, + struct hns_roce_srq *srq) +{ + spin_lock_irq(&hr_cq->lock); + __hns_roce_v1_cq_clean(hr_cq, qpn, srq); + spin_unlock_irq(&hr_cq->lock); +} + +void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev, + struct hns_roce_cq *hr_cq, void *mb_buf, u64 *mtts, + dma_addr_t dma_handle, int nent, u32 vector) +{ + struct hns_roce_cq_context *cq_context = NULL; + void __iomem *tptr_addr; + + cq_context = mb_buf; + memset(cq_context, 0, sizeof(*cq_context)); + + tptr_addr = 0; + hr_dev->priv_addr = tptr_addr; + hr_cq->tptr_addr = tptr_addr; + + /* Register cq_context members */ + roce_set_field(cq_context->cqc_byte_4, + CQ_CONTEXT_CQC_BYTE_4_CQC_STATE_M, + CQ_CONTEXT_CQC_BYTE_4_CQC_STATE_S, CQ_STATE_VALID); + roce_set_field(cq_context->cqc_byte_4, CQ_CONTEXT_CQC_BYTE_4_CQN_M, + CQ_CONTEXT_CQC_BYTE_4_CQN_S, hr_cq->cqn); + cq_context->cqc_byte_4 = cpu_to_le32(cq_context->cqc_byte_4); + + cq_context->cq_bt_l = (u32)dma_handle; + cq_context->cq_bt_l = cpu_to_le32(cq_context->cq_bt_l); + + roce_set_field(cq_context->cqc_byte_12, + CQ_CONTEXT_CQC_BYTE_12_CQ_BT_H_M, + CQ_CONTEXT_CQC_BYTE_12_CQ_BT_H_S, + ((u64)dma_handle >> 32)); + roce_set_field(cq_context->cqc_byte_12, + CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_M, + CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_S, + ilog2((unsigned int)nent)); + roce_set_field(cq_context->cqc_byte_12, CQ_CONTEXT_CQC_BYTE_12_CEQN_M, + CQ_CONTEXT_CQC_BYTE_12_CEQN_S, vector); + cq_context->cqc_byte_12 = cpu_to_le32(cq_context->cqc_byte_12); + + cq_context->cur_cqe_ba0_l = (u32)(mtts[0]); + cq_context->cur_cqe_ba0_l = cpu_to_le32(cq_context->cur_cqe_ba0_l); + + roce_set_field(cq_context->cqc_byte_20, + CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_M, + CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_S, + cpu_to_le32((mtts[0]) >> 32)); + /* Dedicated hardware, directly set 0 */ + roce_set_field(cq_context->cqc_byte_20, + CQ_CONTEXT_CQC_BYTE_20_CQ_CUR_INDEX_M, + CQ_CONTEXT_CQC_BYTE_20_CQ_CUR_INDEX_S, 0); + /** + * 44 = 32 + 12, When evaluating addr to hardware, shift 12 because of + * using 4K page, and shift more 32 because of + * caculating the high 32 bit value evaluated to hardware. + */ + roce_set_field(cq_context->cqc_byte_20, + CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_M, + CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_S, + (u64)tptr_addr >> 44); + cq_context->cqc_byte_20 = cpu_to_le32(cq_context->cqc_byte_20); + + cq_context->cqe_tptr_addr_l = (u32)((u64)tptr_addr >> 12); + + roce_set_field(cq_context->cqc_byte_32, + CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_M, + CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_S, 0); + roce_set_bit(cq_context->cqc_byte_32, + CQ_CONTEXT_CQC_BYTE_32_SE_FLAG_S, 0); + roce_set_bit(cq_context->cqc_byte_32, + CQ_CONTEXT_CQC_BYTE_32_CE_FLAG_S, 0); + roce_set_bit(cq_context->cqc_byte_32, + CQ_CONTEXT_CQC_BYTE_32_NOTIFICATION_FLAG_S, 0); + roce_set_bit(cq_context->cqc_byte_32, + CQ_CQNTEXT_CQC_BYTE_32_TYPE_OF_COMPLETION_NOTIFICATION_S, + 0); + /*The initial value of cq's ci is 0 */ + roce_set_field(cq_context->cqc_byte_32, + CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_M, + CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_S, 0); + cq_context->cqc_byte_32 = cpu_to_le32(cq_context->cqc_byte_32); +} + +int hns_roce_v1_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) +{ + struct hns_roce_cq *hr_cq = to_hr_cq(ibcq); + u32 notification_flag; + u32 doorbell[2]; + int ret = 0; + + notification_flag = (flags & IB_CQ_SOLICITED_MASK) == + IB_CQ_SOLICITED ? CQ_DB_REQ_NOT : CQ_DB_REQ_NOT_SOL; + /* + * flags = 0; Notification Flag = 1, next + * flags = 1; Notification Flag = 0, solocited + */ + doorbell[0] = hr_cq->cons_index & ((hr_cq->cq_depth << 1) - 1); + roce_set_bit(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_HW_SYNS_S, 1); + roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_M, + ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_S, 3); + roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_M, + ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_MDF_S, 1); + roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_M, + ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_INP_H_S, + hr_cq->cqn | notification_flag); + + hns_roce_write64_k(doorbell, hr_cq->cq_db_l); + + return ret; +} + +static int hns_roce_v1_poll_one(struct hns_roce_cq *hr_cq, + struct hns_roce_qp **cur_qp, struct ib_wc *wc) +{ + int qpn; + int is_send; + u16 wqe_ctr; + u32 status; + u32 opcode; + struct hns_roce_cqe *cqe; + struct hns_roce_qp *hr_qp; + struct hns_roce_wq *wq; + struct hns_roce_wqe_ctrl_seg *sq_wqe; + struct hns_roce_dev *hr_dev = to_hr_dev(hr_cq->ib_cq.device); + struct device *dev = &hr_dev->pdev->dev; + + /* Find cqe according consumer index */ + cqe = next_cqe_sw(hr_cq); + if (!cqe) + return -EAGAIN; + + ++hr_cq->cons_index; + /* Memory barrier */ + rmb(); + /* 0->SQ, 1->RQ */ + is_send = !(roce_get_bit(cqe->cqe_byte_4, CQE_BYTE_4_SQ_RQ_FLAG_S)); + + /* Local_qpn in UD cqe is always 1, so it needs to compute new qpn */ + if (roce_get_field(cqe->cqe_byte_16, CQE_BYTE_16_LOCAL_QPN_M, + CQE_BYTE_16_LOCAL_QPN_S) <= 1) { + qpn = roce_get_field(cqe->cqe_byte_20, CQE_BYTE_20_PORT_NUM_M, + CQE_BYTE_20_PORT_NUM_S) + + roce_get_field(cqe->cqe_byte_16, CQE_BYTE_16_LOCAL_QPN_M, + CQE_BYTE_16_LOCAL_QPN_S) * + HNS_ROCE_MAX_PORTS; + } else { + qpn = roce_get_field(cqe->cqe_byte_16, CQE_BYTE_16_LOCAL_QPN_M, + CQE_BYTE_16_LOCAL_QPN_S); + } + + if (!*cur_qp || (qpn & HNS_ROCE_CQE_QPN_MASK) != (*cur_qp)->qpn) { + hr_qp = __hns_roce_qp_lookup(hr_dev, qpn); + if (unlikely(!hr_qp)) { + dev_err(dev, "CQ %06lx with entry for unknown QPN %06x\n", + hr_cq->cqn, (qpn & HNS_ROCE_CQE_QPN_MASK)); + return -EINVAL; + } + + *cur_qp = hr_qp; + } + + wc->qp = &(*cur_qp)->ibqp; + wc->vendor_err = 0; + + status = roce_get_field(cqe->cqe_byte_4, + CQE_BYTE_4_STATUS_OF_THE_OPERATION_M, + CQE_BYTE_4_STATUS_OF_THE_OPERATION_S) & + HNS_ROCE_CQE_STATUS_MASK; + switch (status) { + case HNS_ROCE_CQE_SUCCESS: + wc->status = IB_WC_SUCCESS; + break; + case HNS_ROCE_CQE_SYNDROME_LOCAL_LENGTH_ERR: + wc->status = IB_WC_LOC_LEN_ERR; + break; + case HNS_ROCE_CQE_SYNDROME_LOCAL_QP_OP_ERR: + wc->status = IB_WC_LOC_QP_OP_ERR; + break; + case HNS_ROCE_CQE_SYNDROME_LOCAL_PROT_ERR: + wc->status = IB_WC_LOC_PROT_ERR; + break; + case HNS_ROCE_CQE_SYNDROME_WR_FLUSH_ERR: + wc->status = IB_WC_WR_FLUSH_ERR; + break; + case HNS_ROCE_CQE_SYNDROME_MEM_MANAGE_OPERATE_ERR: + wc->status = IB_WC_MW_BIND_ERR; + break; + case HNS_ROCE_CQE_SYNDROME_BAD_RESP_ERR: + wc->status = IB_WC_BAD_RESP_ERR; + break; + case HNS_ROCE_CQE_SYNDROME_LOCAL_ACCESS_ERR: + wc->status = IB_WC_LOC_ACCESS_ERR; + break; + case HNS_ROCE_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR: + wc->status = IB_WC_REM_INV_REQ_ERR; + break; + case HNS_ROCE_CQE_SYNDROME_REMOTE_ACCESS_ERR: + wc->status = IB_WC_REM_ACCESS_ERR; + break; + case HNS_ROCE_CQE_SYNDROME_REMOTE_OP_ERR: + wc->status = IB_WC_REM_OP_ERR; + break; + case HNS_ROCE_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR: + wc->status = IB_WC_RETRY_EXC_ERR; + break; + case HNS_ROCE_CQE_SYNDROME_RNR_RETRY_EXC_ERR: + wc->status = IB_WC_RNR_RETRY_EXC_ERR; + break; + default: + wc->status = IB_WC_GENERAL_ERR; + break; + } + + /* CQE status error, directly return */ + if (wc->status != IB_WC_SUCCESS) + return 0; + + if (is_send) { + /* SQ conrespond to CQE */ + sq_wqe = get_send_wqe(*cur_qp, roce_get_field(cqe->cqe_byte_4, + CQE_BYTE_4_WQE_INDEX_M, + CQE_BYTE_4_WQE_INDEX_S)& + ((*cur_qp)->sq.wqe_cnt-1)); + switch (sq_wqe->flag & HNS_ROCE_WQE_OPCODE_MASK) { + case HNS_ROCE_WQE_OPCODE_SEND: + wc->opcode = IB_WC_SEND; + break; + case HNS_ROCE_WQE_OPCODE_RDMA_READ: + wc->opcode = IB_WC_RDMA_READ; + wc->byte_len = le32_to_cpu(cqe->byte_cnt); + break; + case HNS_ROCE_WQE_OPCODE_RDMA_WRITE: + wc->opcode = IB_WC_RDMA_WRITE; + break; + case HNS_ROCE_WQE_OPCODE_LOCAL_INV: + wc->opcode = IB_WC_LOCAL_INV; + break; + case HNS_ROCE_WQE_OPCODE_UD_SEND: + wc->opcode = IB_WC_SEND; + break; + default: + wc->status = IB_WC_GENERAL_ERR; + break; + } + wc->wc_flags = (sq_wqe->flag & HNS_ROCE_WQE_IMM ? + IB_WC_WITH_IMM : 0); + + wq = &(*cur_qp)->sq; + if ((*cur_qp)->sq_signal_bits) { + /* + * If sg_signal_bit is 1, + * firstly tail pointer updated to wqe + * which current cqe correspond to + */ + wqe_ctr = (u16)roce_get_field(cqe->cqe_byte_4, + CQE_BYTE_4_WQE_INDEX_M, + CQE_BYTE_4_WQE_INDEX_S); + wq->tail += (wqe_ctr - (u16)wq->tail) & + (wq->wqe_cnt - 1); + } + wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; + ++wq->tail; + } else { + /* RQ conrespond to CQE */ + wc->byte_len = le32_to_cpu(cqe->byte_cnt); + opcode = roce_get_field(cqe->cqe_byte_4, + CQE_BYTE_4_OPERATION_TYPE_M, + CQE_BYTE_4_OPERATION_TYPE_S) & + HNS_ROCE_CQE_OPCODE_MASK; + switch (opcode) { + case HNS_ROCE_OPCODE_RDMA_WITH_IMM_RECEIVE: + wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; + wc->wc_flags = IB_WC_WITH_IMM; + wc->ex.imm_data = le32_to_cpu(cqe->immediate_data); + break; + case HNS_ROCE_OPCODE_SEND_DATA_RECEIVE: + if (roce_get_bit(cqe->cqe_byte_4, + CQE_BYTE_4_IMM_INDICATOR_S)) { + wc->opcode = IB_WC_RECV; + wc->wc_flags = IB_WC_WITH_IMM; + wc->ex.imm_data = le32_to_cpu( + cqe->immediate_data); + } else { + wc->opcode = IB_WC_RECV; + wc->wc_flags = 0; + } + break; + default: + wc->status = IB_WC_GENERAL_ERR; + break; + } + + /* Update tail pointer, record wr_id */ + wq = &(*cur_qp)->rq; + wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; + ++wq->tail; + wc->sl = (u8)roce_get_field(cqe->cqe_byte_20, CQE_BYTE_20_SL_M, + CQE_BYTE_20_SL_S); + wc->src_qp = (u8)roce_get_field(cqe->cqe_byte_20, + CQE_BYTE_20_REMOTE_QPN_M, + CQE_BYTE_20_REMOTE_QPN_S); + wc->wc_flags |= (roce_get_bit(cqe->cqe_byte_20, + CQE_BYTE_20_GRH_PRESENT_S) ? + IB_WC_GRH : 0); + wc->pkey_index = (u16)roce_get_field(cqe->cqe_byte_28, + CQE_BYTE_28_P_KEY_IDX_M, + CQE_BYTE_28_P_KEY_IDX_S); + } + + return 0; +} + +int hns_roce_v1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) +{ + struct hns_roce_cq *hr_cq = to_hr_cq(ibcq); + struct hns_roce_qp *cur_qp = NULL; + unsigned long flags; + int npolled; + int ret = 0; + + spin_lock_irqsave(&hr_cq->lock, flags); + + for (npolled = 0; npolled < num_entries; ++npolled) { + ret = hns_roce_v1_poll_one(hr_cq, &cur_qp, wc + npolled); + if (ret) + break; + } + + if (npolled) + hns_roce_v1_cq_set_ci(hr_cq, hr_cq->cons_index); + + spin_unlock_irqrestore(&hr_cq->lock, flags); + + if (ret == 0 || ret == -EAGAIN) + return npolled; + else + return ret; +} + +int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev, + struct hns_roce_hem_table *table, int obj) +{ + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_v1_priv *priv; + unsigned long end = 0, flags = 0; + uint32_t bt_cmd_val[2] = {0}; + void __iomem *bt_cmd; + u64 bt_ba = 0; + + priv = (struct hns_roce_v1_priv *)hr_dev->hw->priv; + + switch (table->type) { + case HEM_TYPE_QPC: + roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M, + ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, HEM_TYPE_QPC); + bt_ba = priv->bt_table.qpc_buf.map >> 12; + break; + case HEM_TYPE_MTPT: + roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M, + ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, HEM_TYPE_MTPT); + bt_ba = priv->bt_table.mtpt_buf.map >> 12; + break; + case HEM_TYPE_CQC: + roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_M, + ROCEE_BT_CMD_H_ROCEE_BT_CMD_MDF_S, HEM_TYPE_CQC); + bt_ba = priv->bt_table.cqc_buf.map >> 12; + break; + case HEM_TYPE_SRQC: + dev_dbg(dev, "HEM_TYPE_SRQC not support.\n"); + return -EINVAL; + default: + return 0; + } + roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_M, + ROCEE_BT_CMD_H_ROCEE_BT_CMD_IN_MDF_S, obj); + roce_set_bit(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_S, 0); + roce_set_bit(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_HW_SYNS_S, 1); + + spin_lock_irqsave(&hr_dev->bt_cmd_lock, flags); + + bt_cmd = hr_dev->reg_base + ROCEE_BT_CMD_H_REG; + + end = msecs_to_jiffies(HW_SYNC_TIMEOUT_MSECS) + jiffies; + while (1) { + if (readl(bt_cmd) >> BT_CMD_SYNC_SHIFT) { + if (!(time_before(jiffies, end))) { + dev_err(dev, "Write bt_cmd err,hw_sync is not zero.\n"); + spin_unlock_irqrestore(&hr_dev->bt_cmd_lock, + flags); + return -EBUSY; + } + } else { + break; + } + msleep(HW_SYNC_SLEEP_TIME_INTERVAL); + } + + bt_cmd_val[0] = (uint32_t)bt_ba; + roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_M, + ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S, bt_ba >> 32); + hns_roce_write64_k(bt_cmd_val, hr_dev->reg_base + ROCEE_BT_CMD_L_REG); + + spin_unlock_irqrestore(&hr_dev->bt_cmd_lock, flags); + + return 0; +} + +static int hns_roce_v1_qp_modify(struct hns_roce_dev *hr_dev, + struct hns_roce_mtt *mtt, + enum hns_roce_qp_state cur_state, + enum hns_roce_qp_state new_state, + struct hns_roce_qp_context *context, + struct hns_roce_qp *hr_qp) +{ + static const u16 + op[HNS_ROCE_QP_NUM_STATE][HNS_ROCE_QP_NUM_STATE] = { + [HNS_ROCE_QP_STATE_RST] = { + [HNS_ROCE_QP_STATE_RST] = HNS_ROCE_CMD_2RST_QP, + [HNS_ROCE_QP_STATE_ERR] = HNS_ROCE_CMD_2ERR_QP, + [HNS_ROCE_QP_STATE_INIT] = HNS_ROCE_CMD_RST2INIT_QP, + }, + [HNS_ROCE_QP_STATE_INIT] = { + [HNS_ROCE_QP_STATE_RST] = HNS_ROCE_CMD_2RST_QP, + [HNS_ROCE_QP_STATE_ERR] = HNS_ROCE_CMD_2ERR_QP, + /* Note: In v1 engine, HW doesn't support RST2INIT. + * We use RST2INIT cmd instead of INIT2INIT. + */ + [HNS_ROCE_QP_STATE_INIT] = HNS_ROCE_CMD_RST2INIT_QP, + [HNS_ROCE_QP_STATE_RTR] = HNS_ROCE_CMD_INIT2RTR_QP, + }, + [HNS_ROCE_QP_STATE_RTR] = { + [HNS_ROCE_QP_STATE_RST] = HNS_ROCE_CMD_2RST_QP, + [HNS_ROCE_QP_STATE_ERR] = HNS_ROCE_CMD_2ERR_QP, + [HNS_ROCE_QP_STATE_RTS] = HNS_ROCE_CMD_RTR2RTS_QP, + }, + [HNS_ROCE_QP_STATE_RTS] = { + [HNS_ROCE_QP_STATE_RST] = HNS_ROCE_CMD_2RST_QP, + [HNS_ROCE_QP_STATE_ERR] = HNS_ROCE_CMD_2ERR_QP, + [HNS_ROCE_QP_STATE_RTS] = HNS_ROCE_CMD_RTS2RTS_QP, + [HNS_ROCE_QP_STATE_SQD] = HNS_ROCE_CMD_RTS2SQD_QP, + }, + [HNS_ROCE_QP_STATE_SQD] = { + [HNS_ROCE_QP_STATE_RST] = HNS_ROCE_CMD_2RST_QP, + [HNS_ROCE_QP_STATE_ERR] = HNS_ROCE_CMD_2ERR_QP, + [HNS_ROCE_QP_STATE_RTS] = HNS_ROCE_CMD_SQD2RTS_QP, + [HNS_ROCE_QP_STATE_SQD] = HNS_ROCE_CMD_SQD2SQD_QP, + }, + [HNS_ROCE_QP_STATE_ERR] = { + [HNS_ROCE_QP_STATE_RST] = HNS_ROCE_CMD_2RST_QP, + [HNS_ROCE_QP_STATE_ERR] = HNS_ROCE_CMD_2ERR_QP, + } + }; + + struct hns_roce_cmd_mailbox *mailbox; + struct device *dev = &hr_dev->pdev->dev; + int ret = 0; + + if (cur_state >= HNS_ROCE_QP_NUM_STATE || + new_state >= HNS_ROCE_QP_NUM_STATE || + !op[cur_state][new_state]) { + dev_err(dev, "[modify_qp]not support state %d to %d\n", + cur_state, new_state); + return -EINVAL; + } + + if (op[cur_state][new_state] == HNS_ROCE_CMD_2RST_QP) + return hns_roce_cmd_mbox(hr_dev, 0, 0, hr_qp->qpn, 2, + HNS_ROCE_CMD_2RST_QP, + HNS_ROCE_CMD_TIME_CLASS_A); + + if (op[cur_state][new_state] == HNS_ROCE_CMD_2ERR_QP) + return hns_roce_cmd_mbox(hr_dev, 0, 0, hr_qp->qpn, 2, + HNS_ROCE_CMD_2ERR_QP, + HNS_ROCE_CMD_TIME_CLASS_A); + + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + memcpy(mailbox->buf, context, sizeof(*context)); + + ret = hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, hr_qp->qpn, 0, + op[cur_state][new_state], + HNS_ROCE_CMD_TIME_CLASS_C); + + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + return ret; +} + +static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, + int attr_mask, enum ib_qp_state cur_state, + enum ib_qp_state new_state) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + struct hns_roce_sqp_context *context; + struct device *dev = &hr_dev->pdev->dev; + dma_addr_t dma_handle = 0; + int rq_pa_start; + u32 reg_val; + u64 *mtts; + u32 *addr; + + context = kzalloc(sizeof(*context), GFP_KERNEL); + if (!context) + return -ENOMEM; + + /* Search QP buf's MTTs */ + mtts = hns_roce_table_find(&hr_dev->mr_table.mtt_table, + hr_qp->mtt.first_seg, &dma_handle); + if (!mtts) { + dev_err(dev, "qp buf pa find failed\n"); + goto out; + } + + if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { + roce_set_field(context->qp1c_bytes_4, + QP1C_BYTES_4_SQ_WQE_SHIFT_M, + QP1C_BYTES_4_SQ_WQE_SHIFT_S, + ilog2((unsigned int)hr_qp->sq.wqe_cnt)); + roce_set_field(context->qp1c_bytes_4, + QP1C_BYTES_4_RQ_WQE_SHIFT_M, + QP1C_BYTES_4_RQ_WQE_SHIFT_S, + ilog2((unsigned int)hr_qp->rq.wqe_cnt)); + roce_set_field(context->qp1c_bytes_4, QP1C_BYTES_4_PD_M, + QP1C_BYTES_4_PD_S, to_hr_pd(ibqp->pd)->pdn); + + context->sq_rq_bt_l = (u32)(dma_handle); + roce_set_field(context->qp1c_bytes_12, + QP1C_BYTES_12_SQ_RQ_BT_H_M, + QP1C_BYTES_12_SQ_RQ_BT_H_S, + ((u32)(dma_handle >> 32))); + + roce_set_field(context->qp1c_bytes_16, QP1C_BYTES_16_RQ_HEAD_M, + QP1C_BYTES_16_RQ_HEAD_S, hr_qp->rq.head); + roce_set_field(context->qp1c_bytes_16, QP1C_BYTES_16_PORT_NUM_M, + QP1C_BYTES_16_PORT_NUM_S, hr_qp->phy_port); + roce_set_bit(context->qp1c_bytes_16, + QP1C_BYTES_16_SIGNALING_TYPE_S, + hr_qp->sq_signal_bits); + roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_RQ_BA_FLG_S, + 1); + roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_SQ_BA_FLG_S, + 1); + roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_QP1_ERR_S, + 0); + + roce_set_field(context->qp1c_bytes_20, QP1C_BYTES_20_SQ_HEAD_M, + QP1C_BYTES_20_SQ_HEAD_S, hr_qp->sq.head); + roce_set_field(context->qp1c_bytes_20, QP1C_BYTES_20_PKEY_IDX_M, + QP1C_BYTES_20_PKEY_IDX_S, attr->pkey_index); + + rq_pa_start = (u32)hr_qp->rq.offset / PAGE_SIZE; + context->cur_rq_wqe_ba_l = (u32)(mtts[rq_pa_start]); + + roce_set_field(context->qp1c_bytes_28, + QP1C_BYTES_28_CUR_RQ_WQE_BA_H_M, + QP1C_BYTES_28_CUR_RQ_WQE_BA_H_S, + (mtts[rq_pa_start]) >> 32); + roce_set_field(context->qp1c_bytes_28, + QP1C_BYTES_28_RQ_CUR_IDX_M, + QP1C_BYTES_28_RQ_CUR_IDX_S, 0); + + roce_set_field(context->qp1c_bytes_32, + QP1C_BYTES_32_RX_CQ_NUM_M, + QP1C_BYTES_32_RX_CQ_NUM_S, + to_hr_cq(ibqp->recv_cq)->cqn); + roce_set_field(context->qp1c_bytes_32, + QP1C_BYTES_32_TX_CQ_NUM_M, + QP1C_BYTES_32_TX_CQ_NUM_S, + to_hr_cq(ibqp->send_cq)->cqn); + + context->cur_sq_wqe_ba_l = (u32)mtts[0]; + + roce_set_field(context->qp1c_bytes_40, + QP1C_BYTES_40_CUR_SQ_WQE_BA_H_M, + QP1C_BYTES_40_CUR_SQ_WQE_BA_H_S, + (mtts[0]) >> 32); + roce_set_field(context->qp1c_bytes_40, + QP1C_BYTES_40_SQ_CUR_IDX_M, + QP1C_BYTES_40_SQ_CUR_IDX_S, 0); + + /* Copy context to QP1C register */ + addr = (u32 *)(hr_dev->reg_base + ROCEE_QP1C_CFG0_0_REG + + hr_qp->phy_port * sizeof(*context)); + + writel(context->qp1c_bytes_4, addr); + writel(context->sq_rq_bt_l, addr + 1); + writel(context->qp1c_bytes_12, addr + 2); + writel(context->qp1c_bytes_16, addr + 3); + writel(context->qp1c_bytes_20, addr + 4); + writel(context->cur_rq_wqe_ba_l, addr + 5); + writel(context->qp1c_bytes_28, addr + 6); + writel(context->qp1c_bytes_32, addr + 7); + writel(context->cur_sq_wqe_ba_l, addr + 8); + writel(context->qp1c_bytes_40, addr + 9); + } + + /* Modify QP1C status */ + reg_val = roce_read(hr_dev, ROCEE_QP1C_CFG0_0_REG + + hr_qp->phy_port * sizeof(*context)); + roce_set_field(reg_val, ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_M, + ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_S, new_state); + roce_write(hr_dev, ROCEE_QP1C_CFG0_0_REG + + hr_qp->phy_port * sizeof(*context), reg_val); + + hr_qp->state = new_state; + if (new_state == IB_QPS_RESET) { + hns_roce_v1_cq_clean(to_hr_cq(ibqp->recv_cq), hr_qp->qpn, + ibqp->srq ? to_hr_srq(ibqp->srq) : NULL); + if (ibqp->send_cq != ibqp->recv_cq) + hns_roce_v1_cq_clean(to_hr_cq(ibqp->send_cq), + hr_qp->qpn, NULL); + + hr_qp->rq.head = 0; + hr_qp->rq.tail = 0; + hr_qp->sq.head = 0; + hr_qp->sq.tail = 0; + hr_qp->sq_next_wqe = 0; + } + + kfree(context); + return 0; + +out: + kfree(context); + return -EINVAL; +} + +static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, + int attr_mask, enum ib_qp_state cur_state, + enum ib_qp_state new_state) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_qp_context *context; + dma_addr_t dma_handle_2 = 0; + dma_addr_t dma_handle = 0; + uint32_t doorbell[2] = {0}; + int rq_pa_start = 0; + u64 *mtts_2 = NULL; + int ret = -EINVAL; + u64 *mtts = NULL; + int port; + u8 *dmac; + u8 *smac; + + context = kzalloc(sizeof(*context), GFP_KERNEL); + if (!context) + return -ENOMEM; + + /* Search qp buf's mtts */ + mtts = hns_roce_table_find(&hr_dev->mr_table.mtt_table, + hr_qp->mtt.first_seg, &dma_handle); + if (mtts == NULL) { + dev_err(dev, "qp buf pa find failed\n"); + goto out; + } + + /* Search IRRL's mtts */ + mtts_2 = hns_roce_table_find(&hr_dev->qp_table.irrl_table, hr_qp->qpn, + &dma_handle_2); + if (mtts_2 == NULL) { + dev_err(dev, "qp irrl_table find failed\n"); + goto out; + } + + /* + *Reset to init + * Mandatory param: + * IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS + * Optional param: NA + */ + if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { + roce_set_field(context->qpc_bytes_4, + QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_M, + QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_S, + to_hr_qp_type(hr_qp->ibqp.qp_type)); + + roce_set_bit(context->qpc_bytes_4, + QP_CONTEXT_QPC_BYTE_4_ENABLE_FPMR_S, 0); + roce_set_bit(context->qpc_bytes_4, + QP_CONTEXT_QPC_BYTE_4_RDMA_READ_ENABLE_S, + !!(attr->qp_access_flags & IB_ACCESS_REMOTE_READ)); + roce_set_bit(context->qpc_bytes_4, + QP_CONTEXT_QPC_BYTE_4_RDMA_WRITE_ENABLE_S, + !!(attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE) + ); + roce_set_bit(context->qpc_bytes_4, + QP_CONTEXT_QPC_BYTE_4_ATOMIC_OPERATION_ENABLE_S, + !!(attr->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC) + ); + roce_set_bit(context->qpc_bytes_4, + QP_CONTEXT_QPC_BYTE_4_RDMAR_USE_S, 1); + roce_set_field(context->qpc_bytes_4, + QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_M, + QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_S, + ilog2((unsigned int)hr_qp->sq.wqe_cnt)); + roce_set_field(context->qpc_bytes_4, + QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_M, + QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_S, + ilog2((unsigned int)hr_qp->rq.wqe_cnt)); + roce_set_field(context->qpc_bytes_4, + QP_CONTEXT_QPC_BYTES_4_PD_M, + QP_CONTEXT_QPC_BYTES_4_PD_S, + to_hr_pd(ibqp->pd)->pdn); + hr_qp->access_flags = attr->qp_access_flags; + roce_set_field(context->qpc_bytes_8, + QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_M, + QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_S, + to_hr_cq(ibqp->send_cq)->cqn); + roce_set_field(context->qpc_bytes_8, + QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_M, + QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_S, + to_hr_cq(ibqp->recv_cq)->cqn); + + if (ibqp->srq) + roce_set_field(context->qpc_bytes_12, + QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_M, + QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_S, + to_hr_srq(ibqp->srq)->srqn); + + roce_set_field(context->qpc_bytes_12, + QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_M, + QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S, + attr->pkey_index); + hr_qp->pkey_index = attr->pkey_index; + roce_set_field(context->qpc_bytes_16, + QP_CONTEXT_QPC_BYTES_16_QP_NUM_M, + QP_CONTEXT_QPC_BYTES_16_QP_NUM_S, hr_qp->qpn); + + } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) { + roce_set_field(context->qpc_bytes_4, + QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_M, + QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_S, + to_hr_qp_type(hr_qp->ibqp.qp_type)); + roce_set_bit(context->qpc_bytes_4, + QP_CONTEXT_QPC_BYTE_4_ENABLE_FPMR_S, 0); + if (attr_mask & IB_QP_ACCESS_FLAGS) { + roce_set_bit(context->qpc_bytes_4, + QP_CONTEXT_QPC_BYTE_4_RDMA_READ_ENABLE_S, + !!(attr->qp_access_flags & + IB_ACCESS_REMOTE_READ)); + roce_set_bit(context->qpc_bytes_4, + QP_CONTEXT_QPC_BYTE_4_RDMA_WRITE_ENABLE_S, + !!(attr->qp_access_flags & + IB_ACCESS_REMOTE_WRITE)); + } else { + roce_set_bit(context->qpc_bytes_4, + QP_CONTEXT_QPC_BYTE_4_RDMA_READ_ENABLE_S, + !!(hr_qp->access_flags & + IB_ACCESS_REMOTE_READ)); + roce_set_bit(context->qpc_bytes_4, + QP_CONTEXT_QPC_BYTE_4_RDMA_WRITE_ENABLE_S, + !!(hr_qp->access_flags & + IB_ACCESS_REMOTE_WRITE)); + } + + roce_set_bit(context->qpc_bytes_4, + QP_CONTEXT_QPC_BYTE_4_RDMAR_USE_S, 1); + roce_set_field(context->qpc_bytes_4, + QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_M, + QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_S, + ilog2((unsigned int)hr_qp->sq.wqe_cnt)); + roce_set_field(context->qpc_bytes_4, + QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_M, + QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_S, + ilog2((unsigned int)hr_qp->rq.wqe_cnt)); + roce_set_field(context->qpc_bytes_4, + QP_CONTEXT_QPC_BYTES_4_PD_M, + QP_CONTEXT_QPC_BYTES_4_PD_S, + to_hr_pd(ibqp->pd)->pdn); + + roce_set_field(context->qpc_bytes_8, + QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_M, + QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_S, + to_hr_cq(ibqp->send_cq)->cqn); + roce_set_field(context->qpc_bytes_8, + QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_M, + QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_S, + to_hr_cq(ibqp->recv_cq)->cqn); + + if (ibqp->srq) + roce_set_field(context->qpc_bytes_12, + QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_M, + QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_S, + to_hr_srq(ibqp->srq)->srqn); + if (attr_mask & IB_QP_PKEY_INDEX) + roce_set_field(context->qpc_bytes_12, + QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_M, + QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S, + attr->pkey_index); + else + roce_set_field(context->qpc_bytes_12, + QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_M, + QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S, + hr_qp->pkey_index); + + roce_set_field(context->qpc_bytes_16, + QP_CONTEXT_QPC_BYTES_16_QP_NUM_M, + QP_CONTEXT_QPC_BYTES_16_QP_NUM_S, hr_qp->qpn); + } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) { + if ((attr_mask & IB_QP_ALT_PATH) || + (attr_mask & IB_QP_ACCESS_FLAGS) || + (attr_mask & IB_QP_PKEY_INDEX) || + (attr_mask & IB_QP_QKEY)) { + dev_err(dev, "INIT2RTR attr_mask error\n"); + goto out; + } + + dmac = (u8 *)attr->ah_attr.dmac; + + context->sq_rq_bt_l = (u32)(dma_handle); + roce_set_field(context->qpc_bytes_24, + QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_M, + QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_S, + ((u32)(dma_handle >> 32))); + roce_set_bit(context->qpc_bytes_24, + QP_CONTEXT_QPC_BYTE_24_REMOTE_ENABLE_E2E_CREDITS_S, + 1); + roce_set_field(context->qpc_bytes_24, + QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_M, + QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_S, + attr->min_rnr_timer); + context->irrl_ba_l = (u32)(dma_handle_2); + roce_set_field(context->qpc_bytes_32, + QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_M, + QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_S, + ((u32)(dma_handle_2 >> 32)) & + QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_M); + roce_set_field(context->qpc_bytes_32, + QP_CONTEXT_QPC_BYTES_32_MIG_STATE_M, + QP_CONTEXT_QPC_BYTES_32_MIG_STATE_S, 0); + roce_set_bit(context->qpc_bytes_32, + QP_CONTEXT_QPC_BYTE_32_LOCAL_ENABLE_E2E_CREDITS_S, + 1); + roce_set_bit(context->qpc_bytes_32, + QP_CONTEXT_QPC_BYTE_32_SIGNALING_TYPE_S, + hr_qp->sq_signal_bits); + + for (port = 0; port < hr_dev->caps.num_ports; port++) { + smac = (u8 *)hr_dev->dev_addr[port]; + dev_dbg(dev, "smac: %2x: %2x: %2x: %2x: %2x: %2x\n", + smac[0], smac[1], smac[2], smac[3], smac[4], + smac[5]); + if ((dmac[0] == smac[0]) && (dmac[1] == smac[1]) && + (dmac[2] == smac[2]) && (dmac[3] == smac[3]) && + (dmac[4] == smac[4]) && (dmac[5] == smac[5])) { + roce_set_bit(context->qpc_bytes_32, + QP_CONTEXT_QPC_BYTE_32_LOOPBACK_INDICATOR_S, + 1); + break; + } + } + + if (hr_dev->loop_idc == 0x1) + roce_set_bit(context->qpc_bytes_32, + QP_CONTEXT_QPC_BYTE_32_LOOPBACK_INDICATOR_S, 1); + + roce_set_bit(context->qpc_bytes_32, + QP_CONTEXT_QPC_BYTE_32_GLOBAL_HEADER_S, + attr->ah_attr.ah_flags); + roce_set_field(context->qpc_bytes_32, + QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_M, + QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_S, + ilog2((unsigned int)attr->max_dest_rd_atomic)); + + roce_set_field(context->qpc_bytes_36, + QP_CONTEXT_QPC_BYTES_36_DEST_QP_M, + QP_CONTEXT_QPC_BYTES_36_DEST_QP_S, + attr->dest_qp_num); + + /* Configure GID index */ + roce_set_field(context->qpc_bytes_36, + QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_M, + QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_S, + hns_get_gid_index(hr_dev, + attr->ah_attr.port_num - 1, + attr->ah_attr.grh.sgid_index)); + + memcpy(&(context->dmac_l), dmac, 4); + + roce_set_field(context->qpc_bytes_44, + QP_CONTEXT_QPC_BYTES_44_DMAC_H_M, + QP_CONTEXT_QPC_BYTES_44_DMAC_H_S, + *((u16 *)(&dmac[4]))); + roce_set_field(context->qpc_bytes_44, + QP_CONTEXT_QPC_BYTES_44_MAXIMUM_STATIC_RATE_M, + QP_CONTEXT_QPC_BYTES_44_MAXIMUM_STATIC_RATE_S, + attr->ah_attr.static_rate); + roce_set_field(context->qpc_bytes_44, + QP_CONTEXT_QPC_BYTES_44_HOPLMT_M, + QP_CONTEXT_QPC_BYTES_44_HOPLMT_S, + attr->ah_attr.grh.hop_limit); + + roce_set_field(context->qpc_bytes_48, + QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_M, + QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_S, + attr->ah_attr.grh.flow_label); + roce_set_field(context->qpc_bytes_48, + QP_CONTEXT_QPC_BYTES_48_TCLASS_M, + QP_CONTEXT_QPC_BYTES_48_TCLASS_S, + attr->ah_attr.grh.traffic_class); + roce_set_field(context->qpc_bytes_48, + QP_CONTEXT_QPC_BYTES_48_MTU_M, + QP_CONTEXT_QPC_BYTES_48_MTU_S, attr->path_mtu); + + memcpy(context->dgid, attr->ah_attr.grh.dgid.raw, + sizeof(attr->ah_attr.grh.dgid.raw)); + + dev_dbg(dev, "dmac:%x :%lx\n", context->dmac_l, + roce_get_field(context->qpc_bytes_44, + QP_CONTEXT_QPC_BYTES_44_DMAC_H_M, + QP_CONTEXT_QPC_BYTES_44_DMAC_H_S)); + + roce_set_field(context->qpc_bytes_68, + QP_CONTEXT_QPC_BYTES_68_RQ_HEAD_M, + QP_CONTEXT_QPC_BYTES_68_RQ_HEAD_S, + hr_qp->rq.head); + roce_set_field(context->qpc_bytes_68, + QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_M, + QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_S, 0); + + rq_pa_start = (u32)hr_qp->rq.offset / PAGE_SIZE; + context->cur_rq_wqe_ba_l = (u32)(mtts[rq_pa_start]); + + roce_set_field(context->qpc_bytes_76, + QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_M, + QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_S, + mtts[rq_pa_start] >> 32); + roce_set_field(context->qpc_bytes_76, + QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_M, + QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_S, 0); + + context->rx_rnr_time = 0; + + roce_set_field(context->qpc_bytes_84, + QP_CONTEXT_QPC_BYTES_84_LAST_ACK_PSN_M, + QP_CONTEXT_QPC_BYTES_84_LAST_ACK_PSN_S, + attr->rq_psn - 1); + roce_set_field(context->qpc_bytes_84, + QP_CONTEXT_QPC_BYTES_84_TRRL_HEAD_M, + QP_CONTEXT_QPC_BYTES_84_TRRL_HEAD_S, 0); + + roce_set_field(context->qpc_bytes_88, + QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_M, + QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_S, + attr->rq_psn); + roce_set_bit(context->qpc_bytes_88, + QP_CONTEXT_QPC_BYTES_88_RX_REQ_PSN_ERR_FLAG_S, 0); + roce_set_bit(context->qpc_bytes_88, + QP_CONTEXT_QPC_BYTES_88_RX_LAST_OPCODE_FLG_S, 0); + roce_set_field(context->qpc_bytes_88, + QP_CONTEXT_QPC_BYTES_88_RQ_REQ_LAST_OPERATION_TYPE_M, + QP_CONTEXT_QPC_BYTES_88_RQ_REQ_LAST_OPERATION_TYPE_S, + 0); + roce_set_field(context->qpc_bytes_88, + QP_CONTEXT_QPC_BYTES_88_RQ_REQ_RDMA_WR_FLAG_M, + QP_CONTEXT_QPC_BYTES_88_RQ_REQ_RDMA_WR_FLAG_S, + 0); + + context->dma_length = 0; + context->r_key = 0; + context->va_l = 0; + context->va_h = 0; + + roce_set_field(context->qpc_bytes_108, + QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_M, + QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_S, 0); + roce_set_bit(context->qpc_bytes_108, + QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_FLG_S, 0); + roce_set_bit(context->qpc_bytes_108, + QP_CONTEXT_QPC_BYTES_108_TRRL_TDB_PSN_FLG_S, 0); + + roce_set_field(context->qpc_bytes_112, + QP_CONTEXT_QPC_BYTES_112_TRRL_TDB_PSN_M, + QP_CONTEXT_QPC_BYTES_112_TRRL_TDB_PSN_S, 0); + roce_set_field(context->qpc_bytes_112, + QP_CONTEXT_QPC_BYTES_112_TRRL_TAIL_M, + QP_CONTEXT_QPC_BYTES_112_TRRL_TAIL_S, 0); + + /* For chip resp ack */ + roce_set_field(context->qpc_bytes_156, + QP_CONTEXT_QPC_BYTES_156_PORT_NUM_M, + QP_CONTEXT_QPC_BYTES_156_PORT_NUM_S, + hr_qp->phy_port); + roce_set_field(context->qpc_bytes_156, + QP_CONTEXT_QPC_BYTES_156_SL_M, + QP_CONTEXT_QPC_BYTES_156_SL_S, attr->ah_attr.sl); + hr_qp->sl = attr->ah_attr.sl; + } else if (cur_state == IB_QPS_RTR && + new_state == IB_QPS_RTS) { + /* If exist optional param, return error */ + if ((attr_mask & IB_QP_ALT_PATH) || + (attr_mask & IB_QP_ACCESS_FLAGS) || + (attr_mask & IB_QP_QKEY) || + (attr_mask & IB_QP_PATH_MIG_STATE) || + (attr_mask & IB_QP_CUR_STATE) || + (attr_mask & IB_QP_MIN_RNR_TIMER)) { + dev_err(dev, "RTR2RTS attr_mask error\n"); + goto out; + } + + context->rx_cur_sq_wqe_ba_l = (u32)(mtts[0]); + + roce_set_field(context->qpc_bytes_120, + QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_M, + QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_S, + (mtts[0]) >> 32); + + roce_set_field(context->qpc_bytes_124, + QP_CONTEXT_QPC_BYTES_124_RX_ACK_MSN_M, + QP_CONTEXT_QPC_BYTES_124_RX_ACK_MSN_S, 0); + roce_set_field(context->qpc_bytes_124, + QP_CONTEXT_QPC_BYTES_124_IRRL_MSG_IDX_M, + QP_CONTEXT_QPC_BYTES_124_IRRL_MSG_IDX_S, 0); + + roce_set_field(context->qpc_bytes_128, + QP_CONTEXT_QPC_BYTES_128_RX_ACK_EPSN_M, + QP_CONTEXT_QPC_BYTES_128_RX_ACK_EPSN_S, + attr->sq_psn); + roce_set_bit(context->qpc_bytes_128, + QP_CONTEXT_QPC_BYTES_128_RX_ACK_PSN_ERR_FLG_S, 0); + roce_set_field(context->qpc_bytes_128, + QP_CONTEXT_QPC_BYTES_128_ACK_LAST_OPERATION_TYPE_M, + QP_CONTEXT_QPC_BYTES_128_ACK_LAST_OPERATION_TYPE_S, + 0); + roce_set_bit(context->qpc_bytes_128, + QP_CONTEXT_QPC_BYTES_128_IRRL_PSN_VLD_FLG_S, 0); + + roce_set_field(context->qpc_bytes_132, + QP_CONTEXT_QPC_BYTES_132_IRRL_PSN_M, + QP_CONTEXT_QPC_BYTES_132_IRRL_PSN_S, 0); + roce_set_field(context->qpc_bytes_132, + QP_CONTEXT_QPC_BYTES_132_IRRL_TAIL_M, + QP_CONTEXT_QPC_BYTES_132_IRRL_TAIL_S, 0); + + roce_set_field(context->qpc_bytes_136, + QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_PSN_M, + QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_PSN_S, + attr->sq_psn); + roce_set_field(context->qpc_bytes_136, + QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_FPKT_PSN_L_M, + QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_FPKT_PSN_L_S, + attr->sq_psn); + + roce_set_field(context->qpc_bytes_140, + QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_FPKT_PSN_H_M, + QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_FPKT_PSN_H_S, + (attr->sq_psn >> SQ_PSN_SHIFT)); + roce_set_field(context->qpc_bytes_140, + QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_MSN_M, + QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_MSN_S, 0); + roce_set_bit(context->qpc_bytes_140, + QP_CONTEXT_QPC_BYTES_140_RNR_RETRY_FLG_S, 0); + + roce_set_field(context->qpc_bytes_148, + QP_CONTEXT_QPC_BYTES_148_CHECK_FLAG_M, + QP_CONTEXT_QPC_BYTES_148_CHECK_FLAG_S, 0); + roce_set_field(context->qpc_bytes_148, + QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_M, + QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_S, + attr->retry_cnt); + roce_set_field(context->qpc_bytes_148, + QP_CONTEXT_QPC_BYTES_148_RNR_RETRY_COUNT_M, + QP_CONTEXT_QPC_BYTES_148_RNR_RETRY_COUNT_S, + attr->rnr_retry); + roce_set_field(context->qpc_bytes_148, + QP_CONTEXT_QPC_BYTES_148_LSN_M, + QP_CONTEXT_QPC_BYTES_148_LSN_S, 0x100); + + context->rnr_retry = 0; + + roce_set_field(context->qpc_bytes_156, + QP_CONTEXT_QPC_BYTES_156_RETRY_COUNT_INIT_M, + QP_CONTEXT_QPC_BYTES_156_RETRY_COUNT_INIT_S, + attr->retry_cnt); + if (attr->timeout < 0x12) { + dev_info(dev, "ack timeout value(0x%x) must bigger than 0x12.\n", + attr->timeout); + roce_set_field(context->qpc_bytes_156, + QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_M, + QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_S, + 0x12); + } else { + roce_set_field(context->qpc_bytes_156, + QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_M, + QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_S, + attr->timeout); + } + roce_set_field(context->qpc_bytes_156, + QP_CONTEXT_QPC_BYTES_156_RNR_RETRY_COUNT_INIT_M, + QP_CONTEXT_QPC_BYTES_156_RNR_RETRY_COUNT_INIT_S, + attr->rnr_retry); + roce_set_field(context->qpc_bytes_156, + QP_CONTEXT_QPC_BYTES_156_PORT_NUM_M, + QP_CONTEXT_QPC_BYTES_156_PORT_NUM_S, + hr_qp->phy_port); + roce_set_field(context->qpc_bytes_156, + QP_CONTEXT_QPC_BYTES_156_SL_M, + QP_CONTEXT_QPC_BYTES_156_SL_S, attr->ah_attr.sl); + hr_qp->sl = attr->ah_attr.sl; + roce_set_field(context->qpc_bytes_156, + QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_M, + QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_S, + ilog2((unsigned int)attr->max_rd_atomic)); + roce_set_field(context->qpc_bytes_156, + QP_CONTEXT_QPC_BYTES_156_ACK_REQ_IND_M, + QP_CONTEXT_QPC_BYTES_156_ACK_REQ_IND_S, 0); + context->pkt_use_len = 0; + + roce_set_field(context->qpc_bytes_164, + QP_CONTEXT_QPC_BYTES_164_SQ_PSN_M, + QP_CONTEXT_QPC_BYTES_164_SQ_PSN_S, attr->sq_psn); + roce_set_field(context->qpc_bytes_164, + QP_CONTEXT_QPC_BYTES_164_IRRL_HEAD_M, + QP_CONTEXT_QPC_BYTES_164_IRRL_HEAD_S, 0); + + roce_set_field(context->qpc_bytes_168, + QP_CONTEXT_QPC_BYTES_168_RETRY_SQ_PSN_M, + QP_CONTEXT_QPC_BYTES_168_RETRY_SQ_PSN_S, + attr->sq_psn); + roce_set_field(context->qpc_bytes_168, + QP_CONTEXT_QPC_BYTES_168_SGE_USE_FLA_M, + QP_CONTEXT_QPC_BYTES_168_SGE_USE_FLA_S, 0); + roce_set_field(context->qpc_bytes_168, + QP_CONTEXT_QPC_BYTES_168_DB_TYPE_M, + QP_CONTEXT_QPC_BYTES_168_DB_TYPE_S, 0); + roce_set_bit(context->qpc_bytes_168, + QP_CONTEXT_QPC_BYTES_168_MSG_LP_IND_S, 0); + roce_set_bit(context->qpc_bytes_168, + QP_CONTEXT_QPC_BYTES_168_CSDB_LP_IND_S, 0); + roce_set_bit(context->qpc_bytes_168, + QP_CONTEXT_QPC_BYTES_168_QP_ERR_FLG_S, 0); + context->sge_use_len = 0; + + roce_set_field(context->qpc_bytes_176, + QP_CONTEXT_QPC_BYTES_176_DB_CUR_INDEX_M, + QP_CONTEXT_QPC_BYTES_176_DB_CUR_INDEX_S, 0); + roce_set_field(context->qpc_bytes_176, + QP_CONTEXT_QPC_BYTES_176_RETRY_DB_CUR_INDEX_M, + QP_CONTEXT_QPC_BYTES_176_RETRY_DB_CUR_INDEX_S, + 0); + roce_set_field(context->qpc_bytes_180, + QP_CONTEXT_QPC_BYTES_180_SQ_CUR_INDEX_M, + QP_CONTEXT_QPC_BYTES_180_SQ_CUR_INDEX_S, 0); + roce_set_field(context->qpc_bytes_180, + QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_M, + QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_S, 0); + + context->tx_cur_sq_wqe_ba_l = (u32)(mtts[0]); + + roce_set_field(context->qpc_bytes_188, + QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_M, + QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_S, + (mtts[0]) >> 32); + roce_set_bit(context->qpc_bytes_188, + QP_CONTEXT_QPC_BYTES_188_PKT_RETRY_FLG_S, 0); + roce_set_field(context->qpc_bytes_188, + QP_CONTEXT_QPC_BYTES_188_TX_RETRY_CUR_INDEX_M, + QP_CONTEXT_QPC_BYTES_188_TX_RETRY_CUR_INDEX_S, + 0); + } else if (!((cur_state == IB_QPS_INIT && new_state == IB_QPS_RESET) || + (cur_state == IB_QPS_INIT && new_state == IB_QPS_ERR) || + (cur_state == IB_QPS_RTR && new_state == IB_QPS_RESET) || + (cur_state == IB_QPS_RTR && new_state == IB_QPS_ERR) || + (cur_state == IB_QPS_RTS && new_state == IB_QPS_RESET) || + (cur_state == IB_QPS_RTS && new_state == IB_QPS_ERR) || + (cur_state == IB_QPS_ERR && new_state == IB_QPS_RESET) || + (cur_state == IB_QPS_ERR && new_state == IB_QPS_ERR))) { + dev_err(dev, "not support this status migration\n"); + goto out; + } + + /* Every status migrate must change state */ + roce_set_field(context->qpc_bytes_144, + QP_CONTEXT_QPC_BYTES_144_QP_STATE_M, + QP_CONTEXT_QPC_BYTES_144_QP_STATE_S, attr->qp_state); + + /* SW pass context to HW */ + ret = hns_roce_v1_qp_modify(hr_dev, &hr_qp->mtt, + to_hns_roce_state(cur_state), + to_hns_roce_state(new_state), context, + hr_qp); + if (ret) { + dev_err(dev, "hns_roce_qp_modify failed\n"); + goto out; + } + + /* + * Use rst2init to instead of init2init with drv, + * need to hw to flash RQ HEAD by DB again + */ + if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) { + /* Memory barrier */ + wmb(); + + roce_set_field(doorbell[0], RQ_DOORBELL_U32_4_RQ_HEAD_M, + RQ_DOORBELL_U32_4_RQ_HEAD_S, hr_qp->rq.head); + roce_set_field(doorbell[1], RQ_DOORBELL_U32_8_QPN_M, + RQ_DOORBELL_U32_8_QPN_S, hr_qp->qpn); + roce_set_field(doorbell[1], RQ_DOORBELL_U32_8_CMD_M, + RQ_DOORBELL_U32_8_CMD_S, 1); + roce_set_bit(doorbell[1], RQ_DOORBELL_U32_8_HW_SYNC_S, 1); + + if (ibqp->uobject) { + hr_qp->rq.db_reg_l = hr_dev->reg_base + + ROCEE_DB_OTHERS_L_0_REG + + DB_REG_OFFSET * hr_dev->priv_uar.index; + } + + hns_roce_write64_k(doorbell, hr_qp->rq.db_reg_l); + } + + hr_qp->state = new_state; + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) + hr_qp->resp_depth = attr->max_dest_rd_atomic; + if (attr_mask & IB_QP_PORT) { + hr_qp->port = attr->port_num - 1; + hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port]; + } + + if (new_state == IB_QPS_RESET && !ibqp->uobject) { + hns_roce_v1_cq_clean(to_hr_cq(ibqp->recv_cq), hr_qp->qpn, + ibqp->srq ? to_hr_srq(ibqp->srq) : NULL); + if (ibqp->send_cq != ibqp->recv_cq) + hns_roce_v1_cq_clean(to_hr_cq(ibqp->send_cq), + hr_qp->qpn, NULL); + + hr_qp->rq.head = 0; + hr_qp->rq.tail = 0; + hr_qp->sq.head = 0; + hr_qp->sq.tail = 0; + hr_qp->sq_next_wqe = 0; + } +out: + kfree(context); + return ret; +} + +int hns_roce_v1_modify_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, + int attr_mask, enum ib_qp_state cur_state, + enum ib_qp_state new_state) +{ + + if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) + return hns_roce_v1_m_sqp(ibqp, attr, attr_mask, cur_state, + new_state); + else + return hns_roce_v1_m_qp(ibqp, attr, attr_mask, cur_state, + new_state); +} + +static enum ib_qp_state to_ib_qp_state(enum hns_roce_qp_state state) +{ + switch (state) { + case HNS_ROCE_QP_STATE_RST: + return IB_QPS_RESET; + case HNS_ROCE_QP_STATE_INIT: + return IB_QPS_INIT; + case HNS_ROCE_QP_STATE_RTR: + return IB_QPS_RTR; + case HNS_ROCE_QP_STATE_RTS: + return IB_QPS_RTS; + case HNS_ROCE_QP_STATE_SQD: + return IB_QPS_SQD; + case HNS_ROCE_QP_STATE_ERR: + return IB_QPS_ERR; + default: + return IB_QPS_ERR; + } +} + +static int hns_roce_v1_query_qpc(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp, + struct hns_roce_qp_context *hr_context) +{ + struct hns_roce_cmd_mailbox *mailbox; + int ret; + + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); + if (IS_ERR(mailbox)) + return PTR_ERR(mailbox); + + ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, hr_qp->qpn, 0, + HNS_ROCE_CMD_QUERY_QP, + HNS_ROCE_CMD_TIME_CLASS_A); + if (!ret) + memcpy(hr_context, mailbox->buf, sizeof(*hr_context)); + else + dev_err(&hr_dev->pdev->dev, "QUERY QP cmd process error\n"); + + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + + return ret; +} + +int hns_roce_v1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, + int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_qp_context *context; + int tmp_qp_state = 0; + int ret = 0; + int state; + + context = kzalloc(sizeof(*context), GFP_KERNEL); + if (!context) + return -ENOMEM; + + memset(qp_attr, 0, sizeof(*qp_attr)); + memset(qp_init_attr, 0, sizeof(*qp_init_attr)); + + mutex_lock(&hr_qp->mutex); + + if (hr_qp->state == IB_QPS_RESET) { + qp_attr->qp_state = IB_QPS_RESET; + goto done; + } + + ret = hns_roce_v1_query_qpc(hr_dev, hr_qp, context); + if (ret) { + dev_err(dev, "query qpc error\n"); + ret = -EINVAL; + goto out; + } + + state = roce_get_field(context->qpc_bytes_144, + QP_CONTEXT_QPC_BYTES_144_QP_STATE_M, + QP_CONTEXT_QPC_BYTES_144_QP_STATE_S); + tmp_qp_state = (int)to_ib_qp_state((enum hns_roce_qp_state)state); + if (tmp_qp_state == -1) { + dev_err(dev, "to_ib_qp_state error\n"); + ret = -EINVAL; + goto out; + } + hr_qp->state = (u8)tmp_qp_state; + qp_attr->qp_state = (enum ib_qp_state)hr_qp->state; + qp_attr->path_mtu = (enum ib_mtu)roce_get_field(context->qpc_bytes_48, + QP_CONTEXT_QPC_BYTES_48_MTU_M, + QP_CONTEXT_QPC_BYTES_48_MTU_S); + qp_attr->path_mig_state = IB_MIG_ARMED; + if (hr_qp->ibqp.qp_type == IB_QPT_UD) + qp_attr->qkey = QKEY_VAL; + + qp_attr->rq_psn = roce_get_field(context->qpc_bytes_88, + QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_M, + QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_S); + qp_attr->sq_psn = (u32)roce_get_field(context->qpc_bytes_164, + QP_CONTEXT_QPC_BYTES_164_SQ_PSN_M, + QP_CONTEXT_QPC_BYTES_164_SQ_PSN_S); + qp_attr->dest_qp_num = (u8)roce_get_field(context->qpc_bytes_36, + QP_CONTEXT_QPC_BYTES_36_DEST_QP_M, + QP_CONTEXT_QPC_BYTES_36_DEST_QP_S); + qp_attr->qp_access_flags = ((roce_get_bit(context->qpc_bytes_4, + QP_CONTEXT_QPC_BYTE_4_RDMA_READ_ENABLE_S)) << 2) | + ((roce_get_bit(context->qpc_bytes_4, + QP_CONTEXT_QPC_BYTE_4_RDMA_WRITE_ENABLE_S)) << 1) | + ((roce_get_bit(context->qpc_bytes_4, + QP_CONTEXT_QPC_BYTE_4_ATOMIC_OPERATION_ENABLE_S)) << 3); + + if (hr_qp->ibqp.qp_type == IB_QPT_RC || + hr_qp->ibqp.qp_type == IB_QPT_UC) { + qp_attr->ah_attr.sl = roce_get_field(context->qpc_bytes_156, + QP_CONTEXT_QPC_BYTES_156_SL_M, + QP_CONTEXT_QPC_BYTES_156_SL_S); + qp_attr->ah_attr.grh.flow_label = roce_get_field( + context->qpc_bytes_48, + QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_M, + QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_S); + qp_attr->ah_attr.grh.sgid_index = roce_get_field( + context->qpc_bytes_36, + QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_M, + QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_S); + qp_attr->ah_attr.grh.hop_limit = roce_get_field( + context->qpc_bytes_44, + QP_CONTEXT_QPC_BYTES_44_HOPLMT_M, + QP_CONTEXT_QPC_BYTES_44_HOPLMT_S); + qp_attr->ah_attr.grh.traffic_class = roce_get_field( + context->qpc_bytes_48, + QP_CONTEXT_QPC_BYTES_48_TCLASS_M, + QP_CONTEXT_QPC_BYTES_48_TCLASS_S); + + memcpy(qp_attr->ah_attr.grh.dgid.raw, context->dgid, + sizeof(qp_attr->ah_attr.grh.dgid.raw)); + } + + qp_attr->pkey_index = roce_get_field(context->qpc_bytes_12, + QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_M, + QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S); + qp_attr->port_num = (u8)roce_get_field(context->qpc_bytes_156, + QP_CONTEXT_QPC_BYTES_156_PORT_NUM_M, + QP_CONTEXT_QPC_BYTES_156_PORT_NUM_S) + 1; + qp_attr->sq_draining = 0; + qp_attr->max_rd_atomic = roce_get_field(context->qpc_bytes_156, + QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_M, + QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_S); + qp_attr->max_dest_rd_atomic = roce_get_field(context->qpc_bytes_32, + QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_M, + QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_S); + qp_attr->min_rnr_timer = (u8)(roce_get_field(context->qpc_bytes_24, + QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_M, + QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_S)); + qp_attr->timeout = (u8)(roce_get_field(context->qpc_bytes_156, + QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_M, + QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_S)); + qp_attr->retry_cnt = roce_get_field(context->qpc_bytes_148, + QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_M, + QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_S); + qp_attr->rnr_retry = context->rnr_retry; + +done: + qp_attr->cur_qp_state = qp_attr->qp_state; + qp_attr->cap.max_recv_wr = hr_qp->rq.wqe_cnt; + qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs; + + if (!ibqp->uobject) { + qp_attr->cap.max_send_wr = hr_qp->sq.wqe_cnt; + qp_attr->cap.max_send_sge = hr_qp->sq.max_gs; + } else { + qp_attr->cap.max_send_wr = 0; + qp_attr->cap.max_send_sge = 0; + } + + qp_init_attr->cap = qp_attr->cap; + +out: + mutex_unlock(&hr_qp->mutex); + kfree(context); + return ret; +} + +static void hns_roce_v1_destroy_qp_common(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp, + int is_user) +{ + u32 sdbinvcnt; + unsigned long end = 0; + u32 sdbinvcnt_val; + u32 sdbsendptr_val; + u32 sdbisusepr_val; + struct hns_roce_cq *send_cq, *recv_cq; + struct device *dev = &hr_dev->pdev->dev; + + if (hr_qp->ibqp.qp_type == IB_QPT_RC) { + if (hr_qp->state != IB_QPS_RESET) { + /* + * Set qp to ERR, + * waiting for hw complete processing all dbs + */ + if (hns_roce_v1_qp_modify(hr_dev, NULL, + to_hns_roce_state( + (enum ib_qp_state)hr_qp->state), + HNS_ROCE_QP_STATE_ERR, NULL, + hr_qp)) + dev_err(dev, "modify QP %06lx to ERR failed.\n", + hr_qp->qpn); + + /* Record issued doorbell */ + sdbisusepr_val = roce_read(hr_dev, + ROCEE_SDB_ISSUE_PTR_REG); + /* + * Query db process status, + * until hw process completely + */ + end = msecs_to_jiffies( + HNS_ROCE_QP_DESTROY_TIMEOUT_MSECS) + jiffies; + do { + sdbsendptr_val = roce_read(hr_dev, + ROCEE_SDB_SEND_PTR_REG); + if (!time_before(jiffies, end)) { + dev_err(dev, "destroy qp(0x%lx) timeout!!!", + hr_qp->qpn); + break; + } + } while ((short)(roce_get_field(sdbsendptr_val, + ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, + ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) - + roce_get_field(sdbisusepr_val, + ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_M, + ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_S) + ) < 0); + + /* Get list pointer */ + sdbinvcnt = roce_read(hr_dev, ROCEE_SDB_INV_CNT_REG); + + /* Query db's list status, until hw reversal */ + do { + sdbinvcnt_val = roce_read(hr_dev, + ROCEE_SDB_INV_CNT_REG); + if (!time_before(jiffies, end)) { + dev_err(dev, "destroy qp(0x%lx) timeout!!!", + hr_qp->qpn); + dev_err(dev, "SdbInvCnt = 0x%x\n", + sdbinvcnt_val); + break; + } + } while ((short)(roce_get_field(sdbinvcnt_val, + ROCEE_SDB_INV_CNT_SDB_INV_CNT_M, + ROCEE_SDB_INV_CNT_SDB_INV_CNT_S) - + (sdbinvcnt + SDB_INV_CNT_OFFSET)) < 0); + + /* Modify qp to reset before destroying qp */ + if (hns_roce_v1_qp_modify(hr_dev, NULL, + to_hns_roce_state( + (enum ib_qp_state)hr_qp->state), + HNS_ROCE_QP_STATE_RST, NULL, hr_qp)) + dev_err(dev, "modify QP %06lx to RESET failed.\n", + hr_qp->qpn); + } + } + + send_cq = to_hr_cq(hr_qp->ibqp.send_cq); + recv_cq = to_hr_cq(hr_qp->ibqp.recv_cq); + + hns_roce_lock_cqs(send_cq, recv_cq); + + if (!is_user) { + __hns_roce_v1_cq_clean(recv_cq, hr_qp->qpn, hr_qp->ibqp.srq ? + to_hr_srq(hr_qp->ibqp.srq) : NULL); + if (send_cq != recv_cq) + __hns_roce_v1_cq_clean(send_cq, hr_qp->qpn, NULL); + } + + hns_roce_qp_remove(hr_dev, hr_qp); + + hns_roce_unlock_cqs(send_cq, recv_cq); + + hns_roce_qp_free(hr_dev, hr_qp); + + /* Not special_QP, free their QPN */ + if ((hr_qp->ibqp.qp_type == IB_QPT_RC) || + (hr_qp->ibqp.qp_type == IB_QPT_UC) || + (hr_qp->ibqp.qp_type == IB_QPT_UD)) + hns_roce_release_range_qp(hr_dev, hr_qp->qpn, 1); + + hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt); + + if (is_user) { + ib_umem_release(hr_qp->umem); + } else { + kfree(hr_qp->sq.wrid); + kfree(hr_qp->rq.wrid); + hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf); + } +} + +int hns_roce_v1_destroy_qp(struct ib_qp *ibqp) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + + hns_roce_v1_destroy_qp_common(hr_dev, hr_qp, !!ibqp->pd->uobject); + + if (hr_qp->ibqp.qp_type == IB_QPT_GSI) + kfree(hr_to_hr_sqp(hr_qp)); + else + kfree(hr_qp); + + return 0; +} + +struct hns_roce_v1_priv hr_v1_priv; + +struct hns_roce_hw hns_roce_hw_v1 = { + .reset = hns_roce_v1_reset, + .hw_profile = hns_roce_v1_profile, + .hw_init = hns_roce_v1_init, + .hw_exit = hns_roce_v1_exit, + .set_gid = hns_roce_v1_set_gid, + .set_mac = hns_roce_v1_set_mac, + .set_mtu = hns_roce_v1_set_mtu, + .write_mtpt = hns_roce_v1_write_mtpt, + .write_cqc = hns_roce_v1_write_cqc, + .clear_hem = hns_roce_v1_clear_hem, + .modify_qp = hns_roce_v1_modify_qp, + .query_qp = hns_roce_v1_query_qp, + .destroy_qp = hns_roce_v1_destroy_qp, + .post_send = hns_roce_v1_post_send, + .post_recv = hns_roce_v1_post_recv, + .req_notify_cq = hns_roce_v1_req_notify_cq, + .poll_cq = hns_roce_v1_poll_cq, + .priv = &hr_v1_priv, +}; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h new file mode 100644 index 000000000000..539b0a3b92b0 --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h @@ -0,0 +1,990 @@ +/* + * Copyright (c) 2016 Hisilicon Limited. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _HNS_ROCE_HW_V1_H +#define _HNS_ROCE_HW_V1_H + +#define CQ_STATE_VALID 2 + +#define HNS_ROCE_V1_MAX_PD_NUM 0x8000 +#define HNS_ROCE_V1_MAX_CQ_NUM 0x10000 +#define HNS_ROCE_V1_MAX_CQE_NUM 0x8000 + +#define HNS_ROCE_V1_MAX_QP_NUM 0x40000 +#define HNS_ROCE_V1_MAX_WQE_NUM 0x4000 + +#define HNS_ROCE_V1_MAX_MTPT_NUM 0x80000 + +#define HNS_ROCE_V1_MAX_MTT_SEGS 0x100000 + +#define HNS_ROCE_V1_MAX_QP_INIT_RDMA 128 +#define HNS_ROCE_V1_MAX_QP_DEST_RDMA 128 + +#define HNS_ROCE_V1_MAX_SQ_DESC_SZ 64 +#define HNS_ROCE_V1_MAX_RQ_DESC_SZ 64 +#define HNS_ROCE_V1_SG_NUM 2 +#define HNS_ROCE_V1_INLINE_SIZE 32 + +#define HNS_ROCE_V1_UAR_NUM 256 +#define HNS_ROCE_V1_PHY_UAR_NUM 8 + +#define HNS_ROCE_V1_GID_NUM 16 + +#define HNS_ROCE_V1_NUM_COMP_EQE 0x8000 +#define HNS_ROCE_V1_NUM_ASYNC_EQE 0x400 + +#define HNS_ROCE_V1_QPC_ENTRY_SIZE 256 +#define HNS_ROCE_V1_IRRL_ENTRY_SIZE 8 +#define HNS_ROCE_V1_CQC_ENTRY_SIZE 64 +#define HNS_ROCE_V1_MTPT_ENTRY_SIZE 64 +#define HNS_ROCE_V1_MTT_ENTRY_SIZE 64 + +#define HNS_ROCE_V1_CQE_ENTRY_SIZE 32 +#define HNS_ROCE_V1_PAGE_SIZE_SUPPORT 0xFFFFF000 + +#define HNS_ROCE_V1_EXT_RAQ_WF 8 +#define HNS_ROCE_V1_RAQ_ENTRY 64 +#define HNS_ROCE_V1_RAQ_DEPTH 32768 +#define HNS_ROCE_V1_RAQ_SIZE (HNS_ROCE_V1_RAQ_ENTRY * HNS_ROCE_V1_RAQ_DEPTH) + +#define HNS_ROCE_V1_SDB_DEPTH 0x400 +#define HNS_ROCE_V1_ODB_DEPTH 0x400 + +#define HNS_ROCE_V1_DB_RSVD 0x80 + +#define HNS_ROCE_V1_SDB_ALEPT HNS_ROCE_V1_DB_RSVD +#define HNS_ROCE_V1_SDB_ALFUL (HNS_ROCE_V1_SDB_DEPTH - HNS_ROCE_V1_DB_RSVD) +#define HNS_ROCE_V1_ODB_ALEPT HNS_ROCE_V1_DB_RSVD +#define HNS_ROCE_V1_ODB_ALFUL (HNS_ROCE_V1_ODB_DEPTH - HNS_ROCE_V1_DB_RSVD) + +#define HNS_ROCE_V1_EXT_SDB_DEPTH 0x4000 +#define HNS_ROCE_V1_EXT_ODB_DEPTH 0x4000 +#define HNS_ROCE_V1_EXT_SDB_ENTRY 16 +#define HNS_ROCE_V1_EXT_ODB_ENTRY 16 +#define HNS_ROCE_V1_EXT_SDB_SIZE \ + (HNS_ROCE_V1_EXT_SDB_DEPTH * HNS_ROCE_V1_EXT_SDB_ENTRY) +#define HNS_ROCE_V1_EXT_ODB_SIZE \ + (HNS_ROCE_V1_EXT_ODB_DEPTH * HNS_ROCE_V1_EXT_ODB_ENTRY) + +#define HNS_ROCE_V1_EXT_SDB_ALEPT HNS_ROCE_V1_DB_RSVD +#define HNS_ROCE_V1_EXT_SDB_ALFUL \ + (HNS_ROCE_V1_EXT_SDB_DEPTH - HNS_ROCE_V1_DB_RSVD) +#define HNS_ROCE_V1_EXT_ODB_ALEPT HNS_ROCE_V1_DB_RSVD +#define HNS_ROCE_V1_EXT_ODB_ALFUL \ + (HNS_ROCE_V1_EXT_ODB_DEPTH - HNS_ROCE_V1_DB_RSVD) + +#define HNS_ROCE_BT_RSV_BUF_SIZE (1 << 17) + +#define HNS_ROCE_ODB_POLL_MODE 0 + +#define HNS_ROCE_SDB_NORMAL_MODE 0 +#define HNS_ROCE_SDB_EXTEND_MODE 1 + +#define HNS_ROCE_ODB_EXTEND_MODE 1 + +#define KEY_VALID 0x02 + +#define HNS_ROCE_CQE_QPN_MASK 0x3ffff +#define HNS_ROCE_CQE_STATUS_MASK 0x1f +#define HNS_ROCE_CQE_OPCODE_MASK 0xf + +#define HNS_ROCE_CQE_SUCCESS 0x00 +#define HNS_ROCE_CQE_SYNDROME_LOCAL_LENGTH_ERR 0x01 +#define HNS_ROCE_CQE_SYNDROME_LOCAL_QP_OP_ERR 0x02 +#define HNS_ROCE_CQE_SYNDROME_LOCAL_PROT_ERR 0x03 +#define HNS_ROCE_CQE_SYNDROME_WR_FLUSH_ERR 0x04 +#define HNS_ROCE_CQE_SYNDROME_MEM_MANAGE_OPERATE_ERR 0x05 +#define HNS_ROCE_CQE_SYNDROME_BAD_RESP_ERR 0x06 +#define HNS_ROCE_CQE_SYNDROME_LOCAL_ACCESS_ERR 0x07 +#define HNS_ROCE_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR 0x08 +#define HNS_ROCE_CQE_SYNDROME_REMOTE_ACCESS_ERR 0x09 +#define HNS_ROCE_CQE_SYNDROME_REMOTE_OP_ERR 0x0a +#define HNS_ROCE_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR 0x0b +#define HNS_ROCE_CQE_SYNDROME_RNR_RETRY_EXC_ERR 0x0c + +#define QP1C_CFGN_OFFSET 0x28 +#define PHY_PORT_OFFSET 0x8 +#define MTPT_IDX_SHIFT 16 +#define ALL_PORT_VAL_OPEN 0x3f +#define POL_TIME_INTERVAL_VAL 0x80 +#define SLEEP_TIME_INTERVAL 20 +#define SQ_PSN_SHIFT 8 +#define QKEY_VAL 0x80010000 +#define SDB_INV_CNT_OFFSET 8 + +struct hns_roce_cq_context { + u32 cqc_byte_4; + u32 cq_bt_l; + u32 cqc_byte_12; + u32 cur_cqe_ba0_l; + u32 cqc_byte_20; + u32 cqe_tptr_addr_l; + u32 cur_cqe_ba1_l; + u32 cqc_byte_32; +}; + +#define CQ_CONTEXT_CQC_BYTE_4_CQC_STATE_S 0 +#define CQ_CONTEXT_CQC_BYTE_4_CQC_STATE_M \ + (((1UL << 2) - 1) << CQ_CONTEXT_CQC_BYTE_4_CQC_STATE_S) + +#define CQ_CONTEXT_CQC_BYTE_4_CQN_S 16 +#define CQ_CONTEXT_CQC_BYTE_4_CQN_M \ + (((1UL << 16) - 1) << CQ_CONTEXT_CQC_BYTE_4_CQN_S) + +#define CQ_CONTEXT_CQC_BYTE_12_CQ_BT_H_S 0 +#define CQ_CONTEXT_CQC_BYTE_12_CQ_BT_H_M \ + (((1UL << 17) - 1) << CQ_CONTEXT_CQC_BYTE_12_CQ_BT_H_S) + +#define CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_S 20 +#define CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_M \ + (((1UL << 4) - 1) << CQ_CONTEXT_CQC_BYTE_12_CQ_CQE_SHIFT_S) + +#define CQ_CONTEXT_CQC_BYTE_12_CEQN_S 24 +#define CQ_CONTEXT_CQC_BYTE_12_CEQN_M \ + (((1UL << 5) - 1) << CQ_CONTEXT_CQC_BYTE_12_CEQN_S) + +#define CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_S 0 +#define CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_M \ + (((1UL << 5) - 1) << CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_S) + +#define CQ_CONTEXT_CQC_BYTE_20_CQ_CUR_INDEX_S 16 +#define CQ_CONTEXT_CQC_BYTE_20_CQ_CUR_INDEX_M \ + (((1UL << 16) - 1) << CQ_CONTEXT_CQC_BYTE_20_CQ_CUR_INDEX_S) + +#define CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_S 8 +#define CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_M \ + (((1UL << 5) - 1) << CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_S) + +#define CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_S 0 +#define CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_M \ + (((1UL << 5) - 1) << CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_S) + +#define CQ_CONTEXT_CQC_BYTE_32_SE_FLAG_S 9 + +#define CQ_CONTEXT_CQC_BYTE_32_CE_FLAG_S 8 +#define CQ_CONTEXT_CQC_BYTE_32_NOTIFICATION_FLAG_S 14 +#define CQ_CQNTEXT_CQC_BYTE_32_TYPE_OF_COMPLETION_NOTIFICATION_S 15 + +#define CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_S 16 +#define CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_M \ + (((1UL << 16) - 1) << CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_S) + +struct hns_roce_cqe { + u32 cqe_byte_4; + union { + u32 r_key; + u32 immediate_data; + }; + u32 byte_cnt; + u32 cqe_byte_16; + u32 cqe_byte_20; + u32 s_mac_l; + u32 cqe_byte_28; + u32 reserved; +}; + +#define CQE_BYTE_4_OWNER_S 7 +#define CQE_BYTE_4_SQ_RQ_FLAG_S 14 + +#define CQE_BYTE_4_STATUS_OF_THE_OPERATION_S 8 +#define CQE_BYTE_4_STATUS_OF_THE_OPERATION_M \ + (((1UL << 5) - 1) << CQE_BYTE_4_STATUS_OF_THE_OPERATION_S) + +#define CQE_BYTE_4_WQE_INDEX_S 16 +#define CQE_BYTE_4_WQE_INDEX_M (((1UL << 14) - 1) << CQE_BYTE_4_WQE_INDEX_S) + +#define CQE_BYTE_4_OPERATION_TYPE_S 0 +#define CQE_BYTE_4_OPERATION_TYPE_M \ + (((1UL << 4) - 1) << CQE_BYTE_4_OPERATION_TYPE_S) + +#define CQE_BYTE_4_IMM_INDICATOR_S 15 + +#define CQE_BYTE_16_LOCAL_QPN_S 0 +#define CQE_BYTE_16_LOCAL_QPN_M (((1UL << 24) - 1) << CQE_BYTE_16_LOCAL_QPN_S) + +#define CQE_BYTE_20_PORT_NUM_S 26 +#define CQE_BYTE_20_PORT_NUM_M (((1UL << 3) - 1) << CQE_BYTE_20_PORT_NUM_S) + +#define CQE_BYTE_20_SL_S 24 +#define CQE_BYTE_20_SL_M (((1UL << 2) - 1) << CQE_BYTE_20_SL_S) + +#define CQE_BYTE_20_REMOTE_QPN_S 0 +#define CQE_BYTE_20_REMOTE_QPN_M \ + (((1UL << 24) - 1) << CQE_BYTE_20_REMOTE_QPN_S) + +#define CQE_BYTE_20_GRH_PRESENT_S 29 + +#define CQE_BYTE_28_P_KEY_IDX_S 16 +#define CQE_BYTE_28_P_KEY_IDX_M (((1UL << 16) - 1) << CQE_BYTE_28_P_KEY_IDX_S) + +#define CQ_DB_REQ_NOT_SOL 0 +#define CQ_DB_REQ_NOT (1 << 16) + +struct hns_roce_v1_mpt_entry { + u32 mpt_byte_4; + u32 pbl_addr_l; + u32 mpt_byte_12; + u32 virt_addr_l; + u32 virt_addr_h; + u32 length; + u32 mpt_byte_28; + u32 pa0_l; + u32 mpt_byte_36; + u32 mpt_byte_40; + u32 mpt_byte_44; + u32 mpt_byte_48; + u32 pa4_l; + u32 mpt_byte_56; + u32 mpt_byte_60; + u32 mpt_byte_64; +}; + +#define MPT_BYTE_4_KEY_STATE_S 0 +#define MPT_BYTE_4_KEY_STATE_M (((1UL << 2) - 1) << MPT_BYTE_4_KEY_STATE_S) + +#define MPT_BYTE_4_KEY_S 8 +#define MPT_BYTE_4_KEY_M (((1UL << 8) - 1) << MPT_BYTE_4_KEY_S) + +#define MPT_BYTE_4_PAGE_SIZE_S 16 +#define MPT_BYTE_4_PAGE_SIZE_M (((1UL << 2) - 1) << MPT_BYTE_4_PAGE_SIZE_S) + +#define MPT_BYTE_4_MW_TYPE_S 20 + +#define MPT_BYTE_4_MW_BIND_ENABLE_S 21 + +#define MPT_BYTE_4_OWN_S 22 + +#define MPT_BYTE_4_MEMORY_LOCATION_TYPE_S 24 +#define MPT_BYTE_4_MEMORY_LOCATION_TYPE_M \ + (((1UL << 2) - 1) << MPT_BYTE_4_MEMORY_LOCATION_TYPE_S) + +#define MPT_BYTE_4_REMOTE_ATOMIC_S 26 +#define MPT_BYTE_4_LOCAL_WRITE_S 27 +#define MPT_BYTE_4_REMOTE_WRITE_S 28 +#define MPT_BYTE_4_REMOTE_READ_S 29 +#define MPT_BYTE_4_REMOTE_INVAL_ENABLE_S 30 +#define MPT_BYTE_4_ADDRESS_TYPE_S 31 + +#define MPT_BYTE_12_PBL_ADDR_H_S 0 +#define MPT_BYTE_12_PBL_ADDR_H_M \ + (((1UL << 17) - 1) << MPT_BYTE_12_PBL_ADDR_H_S) + +#define MPT_BYTE_12_MW_BIND_COUNTER_S 17 +#define MPT_BYTE_12_MW_BIND_COUNTER_M \ + (((1UL << 15) - 1) << MPT_BYTE_12_MW_BIND_COUNTER_S) + +#define MPT_BYTE_28_PD_S 0 +#define MPT_BYTE_28_PD_M (((1UL << 16) - 1) << MPT_BYTE_28_PD_S) + +#define MPT_BYTE_28_L_KEY_IDX_L_S 16 +#define MPT_BYTE_28_L_KEY_IDX_L_M \ + (((1UL << 16) - 1) << MPT_BYTE_28_L_KEY_IDX_L_S) + +#define MPT_BYTE_36_PA0_H_S 0 +#define MPT_BYTE_36_PA0_H_M (((1UL << 5) - 1) << MPT_BYTE_36_PA0_H_S) + +#define MPT_BYTE_36_PA1_L_S 8 +#define MPT_BYTE_36_PA1_L_M (((1UL << 24) - 1) << MPT_BYTE_36_PA1_L_S) + +#define MPT_BYTE_40_PA1_H_S 0 +#define MPT_BYTE_40_PA1_H_M (((1UL << 13) - 1) << MPT_BYTE_40_PA1_H_S) + +#define MPT_BYTE_40_PA2_L_S 16 +#define MPT_BYTE_40_PA2_L_M (((1UL << 16) - 1) << MPT_BYTE_40_PA2_L_S) + +#define MPT_BYTE_44_PA2_H_S 0 +#define MPT_BYTE_44_PA2_H_M (((1UL << 21) - 1) << MPT_BYTE_44_PA2_H_S) + +#define MPT_BYTE_44_PA3_L_S 24 +#define MPT_BYTE_44_PA3_L_M (((1UL << 8) - 1) << MPT_BYTE_44_PA3_L_S) + +#define MPT_BYTE_48_PA3_H_S 0 +#define MPT_BYTE_48_PA3_H_M (((1UL << 29) - 1) << MPT_BYTE_48_PA3_H_S) + +#define MPT_BYTE_56_PA4_H_S 0 +#define MPT_BYTE_56_PA4_H_M (((1UL << 5) - 1) << MPT_BYTE_56_PA4_H_S) + +#define MPT_BYTE_56_PA5_L_S 8 +#define MPT_BYTE_56_PA5_L_M (((1UL << 24) - 1) << MPT_BYTE_56_PA5_L_S) + +#define MPT_BYTE_60_PA5_H_S 0 +#define MPT_BYTE_60_PA5_H_M (((1UL << 13) - 1) << MPT_BYTE_60_PA5_H_S) + +#define MPT_BYTE_60_PA6_L_S 16 +#define MPT_BYTE_60_PA6_L_M (((1UL << 16) - 1) << MPT_BYTE_60_PA6_L_S) + +#define MPT_BYTE_64_PA6_H_S 0 +#define MPT_BYTE_64_PA6_H_M (((1UL << 21) - 1) << MPT_BYTE_64_PA6_H_S) + +#define MPT_BYTE_64_L_KEY_IDX_H_S 24 +#define MPT_BYTE_64_L_KEY_IDX_H_M \ + (((1UL << 8) - 1) << MPT_BYTE_64_L_KEY_IDX_H_S) + +struct hns_roce_wqe_ctrl_seg { + __be32 sgl_pa_h; + __be32 flag; + __be32 imm_data; + __be32 msg_length; +}; + +struct hns_roce_wqe_data_seg { + __be64 addr; + __be32 lkey; + __be32 len; +}; + +struct hns_roce_wqe_raddr_seg { + __be32 rkey; + __be32 len;/* reserved */ + __be64 raddr; +}; + +struct hns_roce_rq_wqe_ctrl { + + u32 rwqe_byte_4; + u32 rocee_sgl_ba_l; + u32 rwqe_byte_12; + u32 reserved[5]; +}; + +#define RQ_WQE_CTRL_RWQE_BYTE_12_RWQE_SGE_NUM_S 16 +#define RQ_WQE_CTRL_RWQE_BYTE_12_RWQE_SGE_NUM_M \ + (((1UL << 6) - 1) << RQ_WQE_CTRL_RWQE_BYTE_12_RWQE_SGE_NUM_S) + +#define HNS_ROCE_QP_DESTROY_TIMEOUT_MSECS 10000 + +#define GID_LEN 16 + +struct hns_roce_ud_send_wqe { + u32 dmac_h; + u32 u32_8; + u32 immediate_data; + + u32 u32_16; + union { + unsigned char dgid[GID_LEN]; + struct { + u32 u32_20; + u32 u32_24; + u32 u32_28; + u32 u32_32; + }; + }; + + u32 u32_36; + u32 u32_40; + + u32 va0_l; + u32 va0_h; + u32 l_key0; + + u32 va1_l; + u32 va1_h; + u32 l_key1; +}; + +#define UD_SEND_WQE_U32_4_DMAC_0_S 0 +#define UD_SEND_WQE_U32_4_DMAC_0_M \ + (((1UL << 8) - 1) << UD_SEND_WQE_U32_4_DMAC_0_S) + +#define UD_SEND_WQE_U32_4_DMAC_1_S 8 +#define UD_SEND_WQE_U32_4_DMAC_1_M \ + (((1UL << 8) - 1) << UD_SEND_WQE_U32_4_DMAC_1_S) + +#define UD_SEND_WQE_U32_4_DMAC_2_S 16 +#define UD_SEND_WQE_U32_4_DMAC_2_M \ + (((1UL << 8) - 1) << UD_SEND_WQE_U32_4_DMAC_2_S) + +#define UD_SEND_WQE_U32_4_DMAC_3_S 24 +#define UD_SEND_WQE_U32_4_DMAC_3_M \ + (((1UL << 8) - 1) << UD_SEND_WQE_U32_4_DMAC_3_S) + +#define UD_SEND_WQE_U32_8_DMAC_4_S 0 +#define UD_SEND_WQE_U32_8_DMAC_4_M \ + (((1UL << 8) - 1) << UD_SEND_WQE_U32_8_DMAC_4_S) + +#define UD_SEND_WQE_U32_8_DMAC_5_S 8 +#define UD_SEND_WQE_U32_8_DMAC_5_M \ + (((1UL << 8) - 1) << UD_SEND_WQE_U32_8_DMAC_5_S) + +#define UD_SEND_WQE_U32_8_OPERATION_TYPE_S 16 +#define UD_SEND_WQE_U32_8_OPERATION_TYPE_M \ + (((1UL << 4) - 1) << UD_SEND_WQE_U32_8_OPERATION_TYPE_S) + +#define UD_SEND_WQE_U32_8_NUMBER_OF_DATA_SEG_S 24 +#define UD_SEND_WQE_U32_8_NUMBER_OF_DATA_SEG_M \ + (((1UL << 6) - 1) << UD_SEND_WQE_U32_8_NUMBER_OF_DATA_SEG_S) + +#define UD_SEND_WQE_U32_8_SEND_GL_ROUTING_HDR_FLAG_S 31 + +#define UD_SEND_WQE_U32_16_DEST_QP_S 0 +#define UD_SEND_WQE_U32_16_DEST_QP_M \ + (((1UL << 24) - 1) << UD_SEND_WQE_U32_16_DEST_QP_S) + +#define UD_SEND_WQE_U32_16_MAX_STATIC_RATE_S 24 +#define UD_SEND_WQE_U32_16_MAX_STATIC_RATE_M \ + (((1UL << 8) - 1) << UD_SEND_WQE_U32_16_MAX_STATIC_RATE_S) + +#define UD_SEND_WQE_U32_36_FLOW_LABEL_S 0 +#define UD_SEND_WQE_U32_36_FLOW_LABEL_M \ + (((1UL << 20) - 1) << UD_SEND_WQE_U32_36_FLOW_LABEL_S) + +#define UD_SEND_WQE_U32_36_PRIORITY_S 20 +#define UD_SEND_WQE_U32_36_PRIORITY_M \ + (((1UL << 4) - 1) << UD_SEND_WQE_U32_36_PRIORITY_S) + +#define UD_SEND_WQE_U32_36_SGID_INDEX_S 24 +#define UD_SEND_WQE_U32_36_SGID_INDEX_M \ + (((1UL << 8) - 1) << UD_SEND_WQE_U32_36_SGID_INDEX_S) + +#define UD_SEND_WQE_U32_40_HOP_LIMIT_S 0 +#define UD_SEND_WQE_U32_40_HOP_LIMIT_M \ + (((1UL << 8) - 1) << UD_SEND_WQE_U32_40_HOP_LIMIT_S) + +#define UD_SEND_WQE_U32_40_TRAFFIC_CLASS_S 8 +#define UD_SEND_WQE_U32_40_TRAFFIC_CLASS_M \ + (((1UL << 8) - 1) << UD_SEND_WQE_U32_40_TRAFFIC_CLASS_S) + +struct hns_roce_sqp_context { + u32 qp1c_bytes_4; + u32 sq_rq_bt_l; + u32 qp1c_bytes_12; + u32 qp1c_bytes_16; + u32 qp1c_bytes_20; + u32 qp1c_bytes_28; + u32 cur_rq_wqe_ba_l; + u32 qp1c_bytes_32; + u32 cur_sq_wqe_ba_l; + u32 qp1c_bytes_40; +}; + +#define QP1C_BYTES_4_SQ_WQE_SHIFT_S 8 +#define QP1C_BYTES_4_SQ_WQE_SHIFT_M \ + (((1UL << 4) - 1) << QP1C_BYTES_4_SQ_WQE_SHIFT_S) + +#define QP1C_BYTES_4_RQ_WQE_SHIFT_S 12 +#define QP1C_BYTES_4_RQ_WQE_SHIFT_M \ + (((1UL << 4) - 1) << QP1C_BYTES_4_RQ_WQE_SHIFT_S) + +#define QP1C_BYTES_4_PD_S 16 +#define QP1C_BYTES_4_PD_M (((1UL << 16) - 1) << QP1C_BYTES_4_PD_S) + +#define QP1C_BYTES_12_SQ_RQ_BT_H_S 0 +#define QP1C_BYTES_12_SQ_RQ_BT_H_M \ + (((1UL << 17) - 1) << QP1C_BYTES_12_SQ_RQ_BT_H_S) + +#define QP1C_BYTES_16_RQ_HEAD_S 0 +#define QP1C_BYTES_16_RQ_HEAD_M (((1UL << 15) - 1) << QP1C_BYTES_16_RQ_HEAD_S) + +#define QP1C_BYTES_16_PORT_NUM_S 16 +#define QP1C_BYTES_16_PORT_NUM_M \ + (((1UL << 3) - 1) << QP1C_BYTES_16_PORT_NUM_S) + +#define QP1C_BYTES_16_SIGNALING_TYPE_S 27 +#define QP1C_BYTES_16_LOCAL_ENABLE_E2E_CREDIT_S 28 +#define QP1C_BYTES_16_RQ_BA_FLG_S 29 +#define QP1C_BYTES_16_SQ_BA_FLG_S 30 +#define QP1C_BYTES_16_QP1_ERR_S 31 + +#define QP1C_BYTES_20_SQ_HEAD_S 0 +#define QP1C_BYTES_20_SQ_HEAD_M (((1UL << 15) - 1) << QP1C_BYTES_20_SQ_HEAD_S) + +#define QP1C_BYTES_20_PKEY_IDX_S 16 +#define QP1C_BYTES_20_PKEY_IDX_M \ + (((1UL << 16) - 1) << QP1C_BYTES_20_PKEY_IDX_S) + +#define QP1C_BYTES_28_CUR_RQ_WQE_BA_H_S 0 +#define QP1C_BYTES_28_CUR_RQ_WQE_BA_H_M \ + (((1UL << 5) - 1) << QP1C_BYTES_28_CUR_RQ_WQE_BA_H_S) + +#define QP1C_BYTES_28_RQ_CUR_IDX_S 16 +#define QP1C_BYTES_28_RQ_CUR_IDX_M \ + (((1UL << 15) - 1) << QP1C_BYTES_28_RQ_CUR_IDX_S) + +#define QP1C_BYTES_32_TX_CQ_NUM_S 0 +#define QP1C_BYTES_32_TX_CQ_NUM_M \ + (((1UL << 16) - 1) << QP1C_BYTES_32_TX_CQ_NUM_S) + +#define QP1C_BYTES_32_RX_CQ_NUM_S 16 +#define QP1C_BYTES_32_RX_CQ_NUM_M \ + (((1UL << 16) - 1) << QP1C_BYTES_32_RX_CQ_NUM_S) + +#define QP1C_BYTES_40_CUR_SQ_WQE_BA_H_S 0 +#define QP1C_BYTES_40_CUR_SQ_WQE_BA_H_M \ + (((1UL << 5) - 1) << QP1C_BYTES_40_CUR_SQ_WQE_BA_H_S) + +#define QP1C_BYTES_40_SQ_CUR_IDX_S 16 +#define QP1C_BYTES_40_SQ_CUR_IDX_M \ + (((1UL << 15) - 1) << QP1C_BYTES_40_SQ_CUR_IDX_S) + +#define HNS_ROCE_WQE_INLINE (1UL<<31) +#define HNS_ROCE_WQE_SE (1UL<<30) + +#define HNS_ROCE_WQE_SGE_NUM_BIT 24 +#define HNS_ROCE_WQE_IMM (1UL<<23) +#define HNS_ROCE_WQE_FENCE (1UL<<21) +#define HNS_ROCE_WQE_CQ_NOTIFY (1UL<<20) + +#define HNS_ROCE_WQE_OPCODE_SEND (0<<16) +#define HNS_ROCE_WQE_OPCODE_RDMA_READ (1<<16) +#define HNS_ROCE_WQE_OPCODE_RDMA_WRITE (2<<16) +#define HNS_ROCE_WQE_OPCODE_LOCAL_INV (4<<16) +#define HNS_ROCE_WQE_OPCODE_UD_SEND (7<<16) +#define HNS_ROCE_WQE_OPCODE_MASK (15<<16) + +struct hns_roce_qp_context { + u32 qpc_bytes_4; + u32 qpc_bytes_8; + u32 qpc_bytes_12; + u32 qpc_bytes_16; + u32 sq_rq_bt_l; + u32 qpc_bytes_24; + u32 irrl_ba_l; + u32 qpc_bytes_32; + u32 qpc_bytes_36; + u32 dmac_l; + u32 qpc_bytes_44; + u32 qpc_bytes_48; + u8 dgid[16]; + u32 qpc_bytes_68; + u32 cur_rq_wqe_ba_l; + u32 qpc_bytes_76; + u32 rx_rnr_time; + u32 qpc_bytes_84; + u32 qpc_bytes_88; + union { + u32 rx_sge_len; + u32 dma_length; + }; + union { + u32 rx_sge_num; + u32 rx_send_pktn; + u32 r_key; + }; + u32 va_l; + u32 va_h; + u32 qpc_bytes_108; + u32 qpc_bytes_112; + u32 rx_cur_sq_wqe_ba_l; + u32 qpc_bytes_120; + u32 qpc_bytes_124; + u32 qpc_bytes_128; + u32 qpc_bytes_132; + u32 qpc_bytes_136; + u32 qpc_bytes_140; + u32 qpc_bytes_144; + u32 qpc_bytes_148; + union { + u32 rnr_retry; + u32 ack_time; + }; + u32 qpc_bytes_156; + u32 pkt_use_len; + u32 qpc_bytes_164; + u32 qpc_bytes_168; + union { + u32 sge_use_len; + u32 pa_use_len; + }; + u32 qpc_bytes_176; + u32 qpc_bytes_180; + u32 tx_cur_sq_wqe_ba_l; + u32 qpc_bytes_188; + u32 rvd21; +}; + +#define QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_S 0 +#define QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_M \ + (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_S) + +#define QP_CONTEXT_QPC_BYTE_4_ENABLE_FPMR_S 3 +#define QP_CONTEXT_QPC_BYTE_4_RDMA_READ_ENABLE_S 4 +#define QP_CONTEXT_QPC_BYTE_4_RDMA_WRITE_ENABLE_S 5 +#define QP_CONTEXT_QPC_BYTE_4_ATOMIC_OPERATION_ENABLE_S 6 +#define QP_CONTEXT_QPC_BYTE_4_RDMAR_USE_S 7 + +#define QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_S 8 +#define QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_M \ + (((1UL << 4) - 1) << QP_CONTEXT_QPC_BYTES_4_SQ_WQE_SHIFT_S) + +#define QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_S 12 +#define QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_M \ + (((1UL << 4) - 1) << QP_CONTEXT_QPC_BYTES_4_RQ_WQE_SHIFT_S) + +#define QP_CONTEXT_QPC_BYTES_4_PD_S 16 +#define QP_CONTEXT_QPC_BYTES_4_PD_M \ + (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_4_PD_S) + +#define QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_S 0 +#define QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_M \ + (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_8_TX_COMPLETION_S) + +#define QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_S 16 +#define QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_M \ + (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_8_RX_COMPLETION_S) + +#define QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_S 0 +#define QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_M \ + (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_12_SRQ_NUMBER_S) + +#define QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S 16 +#define QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_M \ + (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_12_P_KEY_INDEX_S) + +#define QP_CONTEXT_QPC_BYTES_16_QP_NUM_S 0 +#define QP_CONTEXT_QPC_BYTES_16_QP_NUM_M \ + (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_16_QP_NUM_S) + +#define QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_S 0 +#define QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_M \ + (((1UL << 17) - 1) << QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_S) + +#define QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_S 18 +#define QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_M \ + (((1UL << 5) - 1) << QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_S) + +#define QP_CONTEXT_QPC_BYTE_24_REMOTE_ENABLE_E2E_CREDITS_S 23 + +#define QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_S 0 +#define QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_M \ + (((1UL << 17) - 1) << QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_S) + +#define QP_CONTEXT_QPC_BYTES_32_MIG_STATE_S 18 +#define QP_CONTEXT_QPC_BYTES_32_MIG_STATE_M \ + (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_32_MIG_STATE_S) + +#define QP_CONTEXT_QPC_BYTE_32_LOCAL_ENABLE_E2E_CREDITS_S 20 +#define QP_CONTEXT_QPC_BYTE_32_SIGNALING_TYPE_S 21 +#define QP_CONTEXT_QPC_BYTE_32_LOOPBACK_INDICATOR_S 22 +#define QP_CONTEXT_QPC_BYTE_32_GLOBAL_HEADER_S 23 + +#define QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_S 24 +#define QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_M \ + (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_32_RESPONDER_RESOURCES_S) + +#define QP_CONTEXT_QPC_BYTES_36_DEST_QP_S 0 +#define QP_CONTEXT_QPC_BYTES_36_DEST_QP_M \ + (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_36_DEST_QP_S) + +#define QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_S 24 +#define QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_M \ + (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_36_SGID_INDEX_S) + +#define QP_CONTEXT_QPC_BYTES_44_DMAC_H_S 0 +#define QP_CONTEXT_QPC_BYTES_44_DMAC_H_M \ + (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_44_DMAC_H_S) + +#define QP_CONTEXT_QPC_BYTES_44_MAXIMUM_STATIC_RATE_S 16 +#define QP_CONTEXT_QPC_BYTES_44_MAXIMUM_STATIC_RATE_M \ + (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_44_MAXIMUM_STATIC_RATE_S) + +#define QP_CONTEXT_QPC_BYTES_44_HOPLMT_S 24 +#define QP_CONTEXT_QPC_BYTES_44_HOPLMT_M \ + (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_44_HOPLMT_S) + +#define QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_S 0 +#define QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_M \ + (((1UL << 20) - 1) << QP_CONTEXT_QPC_BYTES_48_FLOWLABEL_S) + +#define QP_CONTEXT_QPC_BYTES_48_TCLASS_S 20 +#define QP_CONTEXT_QPC_BYTES_48_TCLASS_M \ + (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_48_TCLASS_S) + +#define QP_CONTEXT_QPC_BYTES_48_MTU_S 28 +#define QP_CONTEXT_QPC_BYTES_48_MTU_M \ + (((1UL << 4) - 1) << QP_CONTEXT_QPC_BYTES_48_MTU_S) + +#define QP_CONTEXT_QPC_BYTES_68_RQ_HEAD_S 0 +#define QP_CONTEXT_QPC_BYTES_68_RQ_HEAD_M \ + (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_68_RQ_HEAD_S) + +#define QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_S 16 +#define QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_M \ + (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_S) + +#define QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_S 0 +#define QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_M \ + (((1UL << 5) - 1) << QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_S) + +#define QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_S 8 +#define QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_M \ + (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_S) + +#define QP_CONTEXT_QPC_BYTES_84_LAST_ACK_PSN_S 0 +#define QP_CONTEXT_QPC_BYTES_84_LAST_ACK_PSN_M \ + (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_84_LAST_ACK_PSN_S) + +#define QP_CONTEXT_QPC_BYTES_84_TRRL_HEAD_S 24 +#define QP_CONTEXT_QPC_BYTES_84_TRRL_HEAD_M \ + (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_84_TRRL_HEAD_S) + +#define QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_S 0 +#define QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_M \ + (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_88_RX_REQ_EPSN_S) + +#define QP_CONTEXT_QPC_BYTES_88_RX_REQ_PSN_ERR_FLAG_S 24 +#define QP_CONTEXT_QPC_BYTES_88_RX_LAST_OPCODE_FLG_S 25 + +#define QP_CONTEXT_QPC_BYTES_88_RQ_REQ_LAST_OPERATION_TYPE_S 26 +#define QP_CONTEXT_QPC_BYTES_88_RQ_REQ_LAST_OPERATION_TYPE_M \ + (((1UL << 2) - 1) << \ + QP_CONTEXT_QPC_BYTES_88_RQ_REQ_LAST_OPERATION_TYPE_S) + +#define QP_CONTEXT_QPC_BYTES_88_RQ_REQ_RDMA_WR_FLAG_S 29 +#define QP_CONTEXT_QPC_BYTES_88_RQ_REQ_RDMA_WR_FLAG_M \ + (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_88_RQ_REQ_RDMA_WR_FLAG_S) + +#define QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_S 0 +#define QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_M \ + (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_S) + +#define QP_CONTEXT_QPC_BYTES_108_TRRL_SDB_PSN_FLG_S 24 +#define QP_CONTEXT_QPC_BYTES_108_TRRL_TDB_PSN_FLG_S 25 + +#define QP_CONTEXT_QPC_BYTES_112_TRRL_TDB_PSN_S 0 +#define QP_CONTEXT_QPC_BYTES_112_TRRL_TDB_PSN_M \ + (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_112_TRRL_TDB_PSN_S) + +#define QP_CONTEXT_QPC_BYTES_112_TRRL_TAIL_S 24 +#define QP_CONTEXT_QPC_BYTES_112_TRRL_TAIL_M \ + (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_112_TRRL_TAIL_S) + +#define QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_S 0 +#define QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_M \ + (((1UL << 5) - 1) << QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_S) + +#define QP_CONTEXT_QPC_BYTES_124_RX_ACK_MSN_S 0 +#define QP_CONTEXT_QPC_BYTES_124_RX_ACK_MSN_M \ + (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_124_RX_ACK_MSN_S) + +#define QP_CONTEXT_QPC_BYTES_124_IRRL_MSG_IDX_S 16 +#define QP_CONTEXT_QPC_BYTES_124_IRRL_MSG_IDX_M \ + (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_124_IRRL_MSG_IDX_S) + +#define QP_CONTEXT_QPC_BYTES_128_RX_ACK_EPSN_S 0 +#define QP_CONTEXT_QPC_BYTES_128_RX_ACK_EPSN_M \ + (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_128_RX_ACK_EPSN_S) + +#define QP_CONTEXT_QPC_BYTES_128_RX_ACK_PSN_ERR_FLG_S 24 + +#define QP_CONTEXT_QPC_BYTES_128_ACK_LAST_OPERATION_TYPE_S 25 +#define QP_CONTEXT_QPC_BYTES_128_ACK_LAST_OPERATION_TYPE_M \ + (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_128_ACK_LAST_OPERATION_TYPE_S) + +#define QP_CONTEXT_QPC_BYTES_128_IRRL_PSN_VLD_FLG_S 27 + +#define QP_CONTEXT_QPC_BYTES_132_IRRL_PSN_S 0 +#define QP_CONTEXT_QPC_BYTES_132_IRRL_PSN_M \ + (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_132_IRRL_PSN_S) + +#define QP_CONTEXT_QPC_BYTES_132_IRRL_TAIL_S 24 +#define QP_CONTEXT_QPC_BYTES_132_IRRL_TAIL_M \ + (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_132_IRRL_TAIL_S) + +#define QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_PSN_S 0 +#define QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_PSN_M \ + (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_PSN_S) + +#define QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_FPKT_PSN_L_S 24 +#define QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_FPKT_PSN_L_M \ + (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_136_RETRY_MSG_FPKT_PSN_L_S) + +#define QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_FPKT_PSN_H_S 0 +#define QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_FPKT_PSN_H_M \ + (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_FPKT_PSN_H_S) + +#define QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_MSN_S 16 +#define QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_MSN_M \ + (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_140_RETRY_MSG_MSN_S) + +#define QP_CONTEXT_QPC_BYTES_140_RNR_RETRY_FLG_S 31 + +#define QP_CONTEXT_QPC_BYTES_144_QP_STATE_S 0 +#define QP_CONTEXT_QPC_BYTES_144_QP_STATE_M \ + (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_144_QP_STATE_S) + +#define QP_CONTEXT_QPC_BYTES_148_CHECK_FLAG_S 0 +#define QP_CONTEXT_QPC_BYTES_148_CHECK_FLAG_M \ + (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_148_CHECK_FLAG_S) + +#define QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_S 2 +#define QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_M \ + (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_S) + +#define QP_CONTEXT_QPC_BYTES_148_RNR_RETRY_COUNT_S 5 +#define QP_CONTEXT_QPC_BYTES_148_RNR_RETRY_COUNT_M \ + (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_148_RNR_RETRY_COUNT_S) + +#define QP_CONTEXT_QPC_BYTES_148_LSN_S 8 +#define QP_CONTEXT_QPC_BYTES_148_LSN_M \ + (((1UL << 16) - 1) << QP_CONTEXT_QPC_BYTES_148_LSN_S) + +#define QP_CONTEXT_QPC_BYTES_156_RETRY_COUNT_INIT_S 0 +#define QP_CONTEXT_QPC_BYTES_156_RETRY_COUNT_INIT_M \ + (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_156_RETRY_COUNT_INIT_S) + +#define QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_S 3 +#define QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_M \ + (((1UL << 5) - 1) << QP_CONTEXT_QPC_BYTES_156_ACK_TIMEOUT_S) + +#define QP_CONTEXT_QPC_BYTES_156_RNR_RETRY_COUNT_INIT_S 8 +#define QP_CONTEXT_QPC_BYTES_156_RNR_RETRY_COUNT_INIT_M \ + (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_156_RNR_RETRY_COUNT_INIT_S) + +#define QP_CONTEXT_QPC_BYTES_156_PORT_NUM_S 11 +#define QP_CONTEXT_QPC_BYTES_156_PORT_NUM_M \ + (((1UL << 3) - 1) << QP_CONTEXT_QPC_BYTES_156_PORT_NUM_S) + +#define QP_CONTEXT_QPC_BYTES_156_SL_S 14 +#define QP_CONTEXT_QPC_BYTES_156_SL_M \ + (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_156_SL_S) + +#define QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_S 16 +#define QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_M \ + (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_156_INITIATOR_DEPTH_S) + +#define QP_CONTEXT_QPC_BYTES_156_ACK_REQ_IND_S 24 +#define QP_CONTEXT_QPC_BYTES_156_ACK_REQ_IND_M \ + (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_156_ACK_REQ_IND_S) + +#define QP_CONTEXT_QPC_BYTES_164_SQ_PSN_S 0 +#define QP_CONTEXT_QPC_BYTES_164_SQ_PSN_M \ + (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_164_SQ_PSN_S) + +#define QP_CONTEXT_QPC_BYTES_164_IRRL_HEAD_S 24 +#define QP_CONTEXT_QPC_BYTES_164_IRRL_HEAD_M \ + (((1UL << 8) - 1) << QP_CONTEXT_QPC_BYTES_164_IRRL_HEAD_S) + +#define QP_CONTEXT_QPC_BYTES_168_RETRY_SQ_PSN_S 0 +#define QP_CONTEXT_QPC_BYTES_168_RETRY_SQ_PSN_M \ + (((1UL << 24) - 1) << QP_CONTEXT_QPC_BYTES_168_RETRY_SQ_PSN_S) + +#define QP_CONTEXT_QPC_BYTES_168_SGE_USE_FLA_S 24 +#define QP_CONTEXT_QPC_BYTES_168_SGE_USE_FLA_M \ + (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_168_SGE_USE_FLA_S) + +#define QP_CONTEXT_QPC_BYTES_168_DB_TYPE_S 26 +#define QP_CONTEXT_QPC_BYTES_168_DB_TYPE_M \ + (((1UL << 2) - 1) << QP_CONTEXT_QPC_BYTES_168_DB_TYPE_S) + +#define QP_CONTEXT_QPC_BYTES_168_MSG_LP_IND_S 28 +#define QP_CONTEXT_QPC_BYTES_168_CSDB_LP_IND_S 29 +#define QP_CONTEXT_QPC_BYTES_168_QP_ERR_FLG_S 30 + +#define QP_CONTEXT_QPC_BYTES_176_DB_CUR_INDEX_S 0 +#define QP_CONTEXT_QPC_BYTES_176_DB_CUR_INDEX_M \ + (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_176_DB_CUR_INDEX_S) + +#define QP_CONTEXT_QPC_BYTES_176_RETRY_DB_CUR_INDEX_S 16 +#define QP_CONTEXT_QPC_BYTES_176_RETRY_DB_CUR_INDEX_M \ + (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_176_RETRY_DB_CUR_INDEX_S) + +#define QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_S 0 +#define QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_M \ + (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_S) + +#define QP_CONTEXT_QPC_BYTES_180_SQ_CUR_INDEX_S 16 +#define QP_CONTEXT_QPC_BYTES_180_SQ_CUR_INDEX_M \ + (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_180_SQ_CUR_INDEX_S) + +#define QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_S 0 +#define QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_M \ + (((1UL << 5) - 1) << QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_S) + +#define QP_CONTEXT_QPC_BYTES_188_PKT_RETRY_FLG_S 8 + +#define QP_CONTEXT_QPC_BYTES_188_TX_RETRY_CUR_INDEX_S 16 +#define QP_CONTEXT_QPC_BYTES_188_TX_RETRY_CUR_INDEX_M \ + (((1UL << 15) - 1) << QP_CONTEXT_QPC_BYTES_188_TX_RETRY_CUR_INDEX_S) + +struct hns_roce_rq_db { + u32 u32_4; + u32 u32_8; +}; + +#define RQ_DOORBELL_U32_4_RQ_HEAD_S 0 +#define RQ_DOORBELL_U32_4_RQ_HEAD_M \ + (((1UL << 15) - 1) << RQ_DOORBELL_U32_4_RQ_HEAD_S) + +#define RQ_DOORBELL_U32_8_QPN_S 0 +#define RQ_DOORBELL_U32_8_QPN_M (((1UL << 24) - 1) << RQ_DOORBELL_U32_8_QPN_S) + +#define RQ_DOORBELL_U32_8_CMD_S 28 +#define RQ_DOORBELL_U32_8_CMD_M (((1UL << 3) - 1) << RQ_DOORBELL_U32_8_CMD_S) + +#define RQ_DOORBELL_U32_8_HW_SYNC_S 31 + +struct hns_roce_sq_db { + u32 u32_4; + u32 u32_8; +}; + +#define SQ_DOORBELL_U32_4_SQ_HEAD_S 0 +#define SQ_DOORBELL_U32_4_SQ_HEAD_M \ + (((1UL << 15) - 1) << SQ_DOORBELL_U32_4_SQ_HEAD_S) + +#define SQ_DOORBELL_U32_4_PORT_S 18 +#define SQ_DOORBELL_U32_4_PORT_M (((1UL << 3) - 1) << SQ_DOORBELL_U32_4_PORT_S) + +#define SQ_DOORBELL_U32_8_QPN_S 0 +#define SQ_DOORBELL_U32_8_QPN_M (((1UL << 24) - 1) << SQ_DOORBELL_U32_8_QPN_S) + +#define SQ_DOORBELL_HW_SYNC_S 31 + +struct hns_roce_ext_db { + int esdb_dep; + int eodb_dep; + struct hns_roce_buf_list *sdb_buf_list; + struct hns_roce_buf_list *odb_buf_list; +}; + +struct hns_roce_db_table { + int sdb_ext_mod; + int odb_ext_mod; + struct hns_roce_ext_db *ext_db; +}; + +struct hns_roce_bt_table { + struct hns_roce_buf_list qpc_buf; + struct hns_roce_buf_list mtpt_buf; + struct hns_roce_buf_list cqc_buf; +}; + +struct hns_roce_v1_priv { + struct hns_roce_db_table db_table; + struct hns_roce_raq_table raq_table; + struct hns_roce_bt_table bt_table; +}; + +int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool dereset); + +#endif diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c new file mode 100644 index 000000000000..764e35a54457 --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -0,0 +1,1168 @@ +/* + * Copyright (c) 2016 Hisilicon Limited. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <linux/acpi.h> +#include <linux/of_platform.h> +#include <rdma/ib_addr.h> +#include <rdma/ib_smi.h> +#include <rdma/ib_user_verbs.h> +#include "hns_roce_common.h" +#include "hns_roce_device.h" +#include "hns_roce_user.h" +#include "hns_roce_hem.h" + +/** + * hns_roce_addrconf_ifid_eui48 - Get default gid. + * @eui: eui. + * @vlan_id: gid + * @dev: net device + * Description: + * MAC convert to GID + * gid[0..7] = fe80 0000 0000 0000 + * gid[8] = mac[0] ^ 2 + * gid[9] = mac[1] + * gid[10] = mac[2] + * gid[11] = ff (VLAN ID high byte (4 MS bits)) + * gid[12] = fe (VLAN ID low byte) + * gid[13] = mac[3] + * gid[14] = mac[4] + * gid[15] = mac[5] + */ +static void hns_roce_addrconf_ifid_eui48(u8 *eui, u16 vlan_id, + struct net_device *dev) +{ + memcpy(eui, dev->dev_addr, 3); + memcpy(eui + 5, dev->dev_addr + 3, 3); + if (vlan_id < 0x1000) { + eui[3] = vlan_id >> 8; + eui[4] = vlan_id & 0xff; + } else { + eui[3] = 0xff; + eui[4] = 0xfe; + } + eui[0] ^= 2; +} + +static void hns_roce_make_default_gid(struct net_device *dev, union ib_gid *gid) +{ + memset(gid, 0, sizeof(*gid)); + gid->raw[0] = 0xFE; + gid->raw[1] = 0x80; + hns_roce_addrconf_ifid_eui48(&gid->raw[8], 0xffff, dev); +} + +/** + * hns_get_gid_index - Get gid index. + * @hr_dev: pointer to structure hns_roce_dev. + * @port: port, value range: 0 ~ MAX + * @gid_index: gid_index, value range: 0 ~ MAX + * Description: + * N ports shared gids, allocation method as follow: + * GID[0][0], GID[1][0],.....GID[N - 1][0], + * GID[0][0], GID[1][0],.....GID[N - 1][0], + * And so on + */ +int hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index) +{ + return gid_index * hr_dev->caps.num_ports + port; +} + +static int hns_roce_set_gid(struct hns_roce_dev *hr_dev, u8 port, int gid_index, + union ib_gid *gid) +{ + struct device *dev = &hr_dev->pdev->dev; + u8 gid_idx = 0; + + if (gid_index >= hr_dev->caps.gid_table_len[port]) { + dev_err(dev, "gid_index %d illegal, port %d gid range: 0~%d\n", + gid_index, port, hr_dev->caps.gid_table_len[port] - 1); + return -EINVAL; + } + + gid_idx = hns_get_gid_index(hr_dev, port, gid_index); + + if (!memcmp(gid, &hr_dev->iboe.gid_table[gid_idx], sizeof(*gid))) + return -EINVAL; + + memcpy(&hr_dev->iboe.gid_table[gid_idx], gid, sizeof(*gid)); + + hr_dev->hw->set_gid(hr_dev, port, gid_index, gid); + + return 0; +} + +static void hns_roce_set_mac(struct hns_roce_dev *hr_dev, u8 port, u8 *addr) +{ + u8 phy_port; + u32 i = 0; + + if (!memcmp(hr_dev->dev_addr[port], addr, MAC_ADDR_OCTET_NUM)) + return; + + for (i = 0; i < MAC_ADDR_OCTET_NUM; i++) + hr_dev->dev_addr[port][i] = addr[i]; + + phy_port = hr_dev->iboe.phy_port[port]; + hr_dev->hw->set_mac(hr_dev, phy_port, addr); +} + +static void hns_roce_set_mtu(struct hns_roce_dev *hr_dev, u8 port, int mtu) +{ + u8 phy_port = hr_dev->iboe.phy_port[port]; + enum ib_mtu tmp; + + tmp = iboe_get_mtu(mtu); + if (!tmp) + tmp = IB_MTU_256; + + hr_dev->hw->set_mtu(hr_dev, phy_port, tmp); +} + +static void hns_roce_update_gids(struct hns_roce_dev *hr_dev, int port) +{ + struct ib_event event; + + /* Refresh gid in ib_cache */ + event.device = &hr_dev->ib_dev; + event.element.port_num = port + 1; + event.event = IB_EVENT_GID_CHANGE; + ib_dispatch_event(&event); +} + +static int handle_en_event(struct hns_roce_dev *hr_dev, u8 port, + unsigned long event) +{ + struct device *dev = &hr_dev->pdev->dev; + struct net_device *netdev; + unsigned long flags; + union ib_gid gid; + int ret = 0; + + netdev = hr_dev->iboe.netdevs[port]; + if (!netdev) { + dev_err(dev, "port(%d) can't find netdev\n", port); + return -ENODEV; + } + + spin_lock_irqsave(&hr_dev->iboe.lock, flags); + + switch (event) { + case NETDEV_UP: + case NETDEV_CHANGE: + case NETDEV_REGISTER: + case NETDEV_CHANGEADDR: + hns_roce_set_mac(hr_dev, port, netdev->dev_addr); + hns_roce_make_default_gid(netdev, &gid); + ret = hns_roce_set_gid(hr_dev, port, 0, &gid); + if (!ret) + hns_roce_update_gids(hr_dev, port); + break; + case NETDEV_DOWN: + /* + * In v1 engine, only support all ports closed together. + */ + break; + default: + dev_dbg(dev, "NETDEV event = 0x%x!\n", (u32)(event)); + break; + } + + spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); + return ret; +} + +static int hns_roce_netdev_event(struct notifier_block *self, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct hns_roce_ib_iboe *iboe = NULL; + struct hns_roce_dev *hr_dev = NULL; + u8 port = 0; + int ret = 0; + + hr_dev = container_of(self, struct hns_roce_dev, iboe.nb); + iboe = &hr_dev->iboe; + + for (port = 0; port < hr_dev->caps.num_ports; port++) { + if (dev == iboe->netdevs[port]) { + ret = handle_en_event(hr_dev, port, event); + if (ret) + return NOTIFY_DONE; + break; + } + } + + return NOTIFY_DONE; +} + +static void hns_roce_addr_event(int event, struct net_device *event_netdev, + struct hns_roce_dev *hr_dev, union ib_gid *gid) +{ + struct hns_roce_ib_iboe *iboe = NULL; + int gid_table_len = 0; + unsigned long flags; + union ib_gid zgid; + u8 gid_idx = 0; + u8 port = 0; + int i = 0; + int free; + struct net_device *real_dev = rdma_vlan_dev_real_dev(event_netdev) ? + rdma_vlan_dev_real_dev(event_netdev) : + event_netdev; + + if (event != NETDEV_UP && event != NETDEV_DOWN) + return; + + iboe = &hr_dev->iboe; + while (port < hr_dev->caps.num_ports) { + if (real_dev == iboe->netdevs[port]) + break; + port++; + } + + if (port >= hr_dev->caps.num_ports) { + dev_dbg(&hr_dev->pdev->dev, "can't find netdev\n"); + return; + } + + memset(zgid.raw, 0, sizeof(zgid.raw)); + free = -1; + gid_table_len = hr_dev->caps.gid_table_len[port]; + + spin_lock_irqsave(&hr_dev->iboe.lock, flags); + + for (i = 0; i < gid_table_len; i++) { + gid_idx = hns_get_gid_index(hr_dev, port, i); + if (!memcmp(gid->raw, iboe->gid_table[gid_idx].raw, + sizeof(gid->raw))) + break; + if (free < 0 && !memcmp(zgid.raw, + iboe->gid_table[gid_idx].raw, sizeof(zgid.raw))) + free = i; + } + + if (i >= gid_table_len) { + if (free < 0) { + spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); + dev_dbg(&hr_dev->pdev->dev, + "gid_index overflow, port(%d)\n", port); + return; + } + if (!hns_roce_set_gid(hr_dev, port, free, gid)) + hns_roce_update_gids(hr_dev, port); + } else if (event == NETDEV_DOWN) { + if (!hns_roce_set_gid(hr_dev, port, i, &zgid)) + hns_roce_update_gids(hr_dev, port); + } + + spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); +} + +static int hns_roce_inet_event(struct notifier_block *self, unsigned long event, + void *ptr) +{ + struct in_ifaddr *ifa = ptr; + struct hns_roce_dev *hr_dev; + struct net_device *dev = ifa->ifa_dev->dev; + union ib_gid gid; + + ipv6_addr_set_v4mapped(ifa->ifa_address, (struct in6_addr *)&gid); + + hr_dev = container_of(self, struct hns_roce_dev, iboe.nb_inet); + + hns_roce_addr_event(event, dev, hr_dev, &gid); + + return NOTIFY_DONE; +} + +static int hns_roce_setup_mtu_gids(struct hns_roce_dev *hr_dev) +{ + struct in_ifaddr *ifa_list = NULL; + union ib_gid gid = {{0} }; + u32 ipaddr = 0; + int index = 0; + int ret = 0; + u8 i = 0; + + for (i = 0; i < hr_dev->caps.num_ports; i++) { + hns_roce_set_mtu(hr_dev, i, + ib_mtu_enum_to_int(hr_dev->caps.max_mtu)); + hns_roce_set_mac(hr_dev, i, hr_dev->iboe.netdevs[i]->dev_addr); + + if (hr_dev->iboe.netdevs[i]->ip_ptr) { + ifa_list = hr_dev->iboe.netdevs[i]->ip_ptr->ifa_list; + index = 1; + while (ifa_list) { + ipaddr = ifa_list->ifa_address; + ipv6_addr_set_v4mapped(ipaddr, + (struct in6_addr *)&gid); + ret = hns_roce_set_gid(hr_dev, i, index, &gid); + if (ret) + break; + index++; + ifa_list = ifa_list->ifa_next; + } + hns_roce_update_gids(hr_dev, i); + } + } + + return ret; +} + +static int hns_roce_query_device(struct ib_device *ib_dev, + struct ib_device_attr *props, + struct ib_udata *uhw) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); + + memset(props, 0, sizeof(*props)); + + props->sys_image_guid = hr_dev->sys_image_guid; + props->max_mr_size = (u64)(~(0ULL)); + props->page_size_cap = hr_dev->caps.page_size_cap; + props->vendor_id = hr_dev->vendor_id; + props->vendor_part_id = hr_dev->vendor_part_id; + props->hw_ver = hr_dev->hw_rev; + props->max_qp = hr_dev->caps.num_qps; + props->max_qp_wr = hr_dev->caps.max_wqes; + props->device_cap_flags = IB_DEVICE_PORT_ACTIVE_EVENT | + IB_DEVICE_RC_RNR_NAK_GEN; + props->max_sge = hr_dev->caps.max_sq_sg; + props->max_sge_rd = 1; + props->max_cq = hr_dev->caps.num_cqs; + props->max_cqe = hr_dev->caps.max_cqes; + props->max_mr = hr_dev->caps.num_mtpts; + props->max_pd = hr_dev->caps.num_pds; + props->max_qp_rd_atom = hr_dev->caps.max_qp_dest_rdma; + props->max_qp_init_rd_atom = hr_dev->caps.max_qp_init_rdma; + props->atomic_cap = IB_ATOMIC_NONE; + props->max_pkeys = 1; + props->local_ca_ack_delay = hr_dev->caps.local_ca_ack_delay; + + return 0; +} + +static struct net_device *hns_roce_get_netdev(struct ib_device *ib_dev, + u8 port_num) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); + struct net_device *ndev; + + if (port_num < 1 || port_num > hr_dev->caps.num_ports) + return NULL; + + rcu_read_lock(); + + ndev = hr_dev->iboe.netdevs[port_num - 1]; + if (ndev) + dev_hold(ndev); + + rcu_read_unlock(); + return ndev; +} + +static int hns_roce_query_port(struct ib_device *ib_dev, u8 port_num, + struct ib_port_attr *props) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); + struct device *dev = &hr_dev->pdev->dev; + struct net_device *net_dev; + unsigned long flags; + enum ib_mtu mtu; + u8 port; + + assert(port_num > 0); + port = port_num - 1; + + memset(props, 0, sizeof(*props)); + + props->max_mtu = hr_dev->caps.max_mtu; + props->gid_tbl_len = hr_dev->caps.gid_table_len[port]; + props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP | + IB_PORT_VENDOR_CLASS_SUP | + IB_PORT_BOOT_MGMT_SUP; + props->max_msg_sz = HNS_ROCE_MAX_MSG_LEN; + props->pkey_tbl_len = 1; + props->active_width = IB_WIDTH_4X; + props->active_speed = 1; + + spin_lock_irqsave(&hr_dev->iboe.lock, flags); + + net_dev = hr_dev->iboe.netdevs[port]; + if (!net_dev) { + spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); + dev_err(dev, "find netdev %d failed!\r\n", port); + return -EINVAL; + } + + mtu = iboe_get_mtu(net_dev->mtu); + props->active_mtu = mtu ? min(props->max_mtu, mtu) : IB_MTU_256; + props->state = (netif_running(net_dev) && netif_carrier_ok(net_dev)) ? + IB_PORT_ACTIVE : IB_PORT_DOWN; + props->phys_state = (props->state == IB_PORT_ACTIVE) ? 5 : 3; + + spin_unlock_irqrestore(&hr_dev->iboe.lock, flags); + + return 0; +} + +static enum rdma_link_layer hns_roce_get_link_layer(struct ib_device *device, + u8 port_num) +{ + return IB_LINK_LAYER_ETHERNET; +} + +static int hns_roce_query_gid(struct ib_device *ib_dev, u8 port_num, int index, + union ib_gid *gid) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); + struct device *dev = &hr_dev->pdev->dev; + u8 gid_idx = 0; + u8 port; + + if (port_num < 1 || port_num > hr_dev->caps.num_ports || + index >= hr_dev->caps.gid_table_len[port_num - 1]) { + dev_err(dev, + "port_num %d index %d illegal! correct range: port_num 1~%d index 0~%d!\n", + port_num, index, hr_dev->caps.num_ports, + hr_dev->caps.gid_table_len[port_num - 1] - 1); + return -EINVAL; + } + + port = port_num - 1; + gid_idx = hns_get_gid_index(hr_dev, port, index); + if (gid_idx >= HNS_ROCE_MAX_GID_NUM) { + dev_err(dev, "port_num %d index %d illegal! total gid num %d!\n", + port_num, index, HNS_ROCE_MAX_GID_NUM); + return -EINVAL; + } + + memcpy(gid->raw, hr_dev->iboe.gid_table[gid_idx].raw, + HNS_ROCE_GID_SIZE); + + return 0; +} + +static int hns_roce_query_pkey(struct ib_device *ib_dev, u8 port, u16 index, + u16 *pkey) +{ + *pkey = PKEY_ID; + + return 0; +} + +static int hns_roce_modify_device(struct ib_device *ib_dev, int mask, + struct ib_device_modify *props) +{ + unsigned long flags; + + if (mask & ~IB_DEVICE_MODIFY_NODE_DESC) + return -EOPNOTSUPP; + + if (mask & IB_DEVICE_MODIFY_NODE_DESC) { + spin_lock_irqsave(&to_hr_dev(ib_dev)->sm_lock, flags); + memcpy(ib_dev->node_desc, props->node_desc, NODE_DESC_SIZE); + spin_unlock_irqrestore(&to_hr_dev(ib_dev)->sm_lock, flags); + } + + return 0; +} + +static int hns_roce_modify_port(struct ib_device *ib_dev, u8 port_num, int mask, + struct ib_port_modify *props) +{ + return 0; +} + +static struct ib_ucontext *hns_roce_alloc_ucontext(struct ib_device *ib_dev, + struct ib_udata *udata) +{ + int ret = 0; + struct hns_roce_ucontext *context; + struct hns_roce_ib_alloc_ucontext_resp resp; + struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); + + resp.qp_tab_size = hr_dev->caps.num_qps; + + context = kmalloc(sizeof(*context), GFP_KERNEL); + if (!context) + return ERR_PTR(-ENOMEM); + + ret = hns_roce_uar_alloc(hr_dev, &context->uar); + if (ret) + goto error_fail_uar_alloc; + + ret = ib_copy_to_udata(udata, &resp, sizeof(resp)); + if (ret) + goto error_fail_copy_to_udata; + + return &context->ibucontext; + +error_fail_copy_to_udata: + hns_roce_uar_free(hr_dev, &context->uar); + +error_fail_uar_alloc: + kfree(context); + + return ERR_PTR(ret); +} + +static int hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext) +{ + struct hns_roce_ucontext *context = to_hr_ucontext(ibcontext); + + hns_roce_uar_free(to_hr_dev(ibcontext->device), &context->uar); + kfree(context); + + return 0; +} + +static int hns_roce_mmap(struct ib_ucontext *context, + struct vm_area_struct *vma) +{ + if (((vma->vm_end - vma->vm_start) % PAGE_SIZE) != 0) + return -EINVAL; + + if (vma->vm_pgoff == 0) { + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + if (io_remap_pfn_range(vma, vma->vm_start, + to_hr_ucontext(context)->uar.pfn, + PAGE_SIZE, vma->vm_page_prot)) + return -EAGAIN; + + } else { + return -EINVAL; + } + + return 0; +} + +static int hns_roce_port_immutable(struct ib_device *ib_dev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int ret; + + ret = hns_roce_query_port(ib_dev, port_num, &attr); + if (ret) + return ret; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE; + immutable->max_mad_size = IB_MGMT_MAD_SIZE; + + return 0; +} + +static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_ib_iboe *iboe = &hr_dev->iboe; + + unregister_inetaddr_notifier(&iboe->nb_inet); + unregister_netdevice_notifier(&iboe->nb); + ib_unregister_device(&hr_dev->ib_dev); +} + +static int hns_roce_register_device(struct hns_roce_dev *hr_dev) +{ + int ret; + struct hns_roce_ib_iboe *iboe = NULL; + struct ib_device *ib_dev = NULL; + struct device *dev = &hr_dev->pdev->dev; + + iboe = &hr_dev->iboe; + spin_lock_init(&iboe->lock); + + ib_dev = &hr_dev->ib_dev; + strlcpy(ib_dev->name, "hisi_%d", IB_DEVICE_NAME_MAX); + + ib_dev->owner = THIS_MODULE; + ib_dev->node_type = RDMA_NODE_IB_CA; + ib_dev->dma_device = dev; + + ib_dev->phys_port_cnt = hr_dev->caps.num_ports; + ib_dev->local_dma_lkey = hr_dev->caps.reserved_lkey; + ib_dev->num_comp_vectors = hr_dev->caps.num_comp_vectors; + ib_dev->uverbs_abi_ver = 1; + ib_dev->uverbs_cmd_mask = + (1ULL << IB_USER_VERBS_CMD_GET_CONTEXT) | + (1ULL << IB_USER_VERBS_CMD_QUERY_DEVICE) | + (1ULL << IB_USER_VERBS_CMD_QUERY_PORT) | + (1ULL << IB_USER_VERBS_CMD_ALLOC_PD) | + (1ULL << IB_USER_VERBS_CMD_DEALLOC_PD) | + (1ULL << IB_USER_VERBS_CMD_REG_MR) | + (1ULL << IB_USER_VERBS_CMD_DEREG_MR) | + (1ULL << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | + (1ULL << IB_USER_VERBS_CMD_CREATE_CQ) | + (1ULL << IB_USER_VERBS_CMD_DESTROY_CQ) | + (1ULL << IB_USER_VERBS_CMD_CREATE_QP) | + (1ULL << IB_USER_VERBS_CMD_MODIFY_QP) | + (1ULL << IB_USER_VERBS_CMD_QUERY_QP) | + (1ULL << IB_USER_VERBS_CMD_DESTROY_QP); + + /* HCA||device||port */ + ib_dev->modify_device = hns_roce_modify_device; + ib_dev->query_device = hns_roce_query_device; + ib_dev->query_port = hns_roce_query_port; + ib_dev->modify_port = hns_roce_modify_port; + ib_dev->get_link_layer = hns_roce_get_link_layer; + ib_dev->get_netdev = hns_roce_get_netdev; + ib_dev->query_gid = hns_roce_query_gid; + ib_dev->query_pkey = hns_roce_query_pkey; + ib_dev->alloc_ucontext = hns_roce_alloc_ucontext; + ib_dev->dealloc_ucontext = hns_roce_dealloc_ucontext; + ib_dev->mmap = hns_roce_mmap; + + /* PD */ + ib_dev->alloc_pd = hns_roce_alloc_pd; + ib_dev->dealloc_pd = hns_roce_dealloc_pd; + + /* AH */ + ib_dev->create_ah = hns_roce_create_ah; + ib_dev->query_ah = hns_roce_query_ah; + ib_dev->destroy_ah = hns_roce_destroy_ah; + + /* QP */ + ib_dev->create_qp = hns_roce_create_qp; + ib_dev->modify_qp = hns_roce_modify_qp; + ib_dev->query_qp = hr_dev->hw->query_qp; + ib_dev->destroy_qp = hr_dev->hw->destroy_qp; + ib_dev->post_send = hr_dev->hw->post_send; + ib_dev->post_recv = hr_dev->hw->post_recv; + + /* CQ */ + ib_dev->create_cq = hns_roce_ib_create_cq; + ib_dev->destroy_cq = hns_roce_ib_destroy_cq; + ib_dev->req_notify_cq = hr_dev->hw->req_notify_cq; + ib_dev->poll_cq = hr_dev->hw->poll_cq; + + /* MR */ + ib_dev->get_dma_mr = hns_roce_get_dma_mr; + ib_dev->reg_user_mr = hns_roce_reg_user_mr; + ib_dev->dereg_mr = hns_roce_dereg_mr; + + /* OTHERS */ + ib_dev->get_port_immutable = hns_roce_port_immutable; + + ret = ib_register_device(ib_dev, NULL); + if (ret) { + dev_err(dev, "ib_register_device failed!\n"); + return ret; + } + + ret = hns_roce_setup_mtu_gids(hr_dev); + if (ret) { + dev_err(dev, "roce_setup_mtu_gids failed!\n"); + goto error_failed_setup_mtu_gids; + } + + iboe->nb.notifier_call = hns_roce_netdev_event; + ret = register_netdevice_notifier(&iboe->nb); + if (ret) { + dev_err(dev, "register_netdevice_notifier failed!\n"); + goto error_failed_setup_mtu_gids; + } + + iboe->nb_inet.notifier_call = hns_roce_inet_event; + ret = register_inetaddr_notifier(&iboe->nb_inet); + if (ret) { + dev_err(dev, "register inet addr notifier failed!\n"); + goto error_failed_register_inetaddr_notifier; + } + + return 0; + +error_failed_register_inetaddr_notifier: + unregister_netdevice_notifier(&iboe->nb); + +error_failed_setup_mtu_gids: + ib_unregister_device(ib_dev); + + return ret; +} + +static const struct of_device_id hns_roce_of_match[] = { + { .compatible = "hisilicon,hns-roce-v1", .data = &hns_roce_hw_v1, }, + {}, +}; +MODULE_DEVICE_TABLE(of, hns_roce_of_match); + +static const struct acpi_device_id hns_roce_acpi_match[] = { + { "HISI00D1", (kernel_ulong_t)&hns_roce_hw_v1 }, + {}, +}; +MODULE_DEVICE_TABLE(acpi, hns_roce_acpi_match); + +static int hns_roce_node_match(struct device *dev, void *fwnode) +{ + return dev->fwnode == fwnode; +} + +static struct +platform_device *hns_roce_find_pdev(struct fwnode_handle *fwnode) +{ + struct device *dev; + + /* get the 'device'corresponding to matching 'fwnode' */ + dev = bus_find_device(&platform_bus_type, NULL, + fwnode, hns_roce_node_match); + /* get the platform device */ + return dev ? to_platform_device(dev) : NULL; +} + +static int hns_roce_get_cfg(struct hns_roce_dev *hr_dev) +{ + int i; + int ret; + u8 phy_port; + int port_cnt = 0; + struct device *dev = &hr_dev->pdev->dev; + struct device_node *net_node; + struct net_device *netdev = NULL; + struct platform_device *pdev = NULL; + struct resource *res; + + /* check if we are compatible with the underlying SoC */ + if (dev_of_node(dev)) { + const struct of_device_id *of_id; + + of_id = of_match_node(hns_roce_of_match, dev->of_node); + if (!of_id) { + dev_err(dev, "device is not compatible!\n"); + return -ENXIO; + } + hr_dev->hw = (struct hns_roce_hw *)of_id->data; + if (!hr_dev->hw) { + dev_err(dev, "couldn't get H/W specific DT data!\n"); + return -ENXIO; + } + } else if (is_acpi_device_node(dev->fwnode)) { + const struct acpi_device_id *acpi_id; + + acpi_id = acpi_match_device(hns_roce_acpi_match, dev); + if (!acpi_id) { + dev_err(dev, "device is not compatible!\n"); + return -ENXIO; + } + hr_dev->hw = (struct hns_roce_hw *) acpi_id->driver_data; + if (!hr_dev->hw) { + dev_err(dev, "couldn't get H/W specific ACPI data!\n"); + return -ENXIO; + } + } else { + dev_err(dev, "can't read compatibility data from DT or ACPI\n"); + return -ENXIO; + } + + /* get the mapped register base address */ + res = platform_get_resource(hr_dev->pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(dev, "memory resource not found!\n"); + return -EINVAL; + } + hr_dev->reg_base = devm_ioremap_resource(dev, res); + if (IS_ERR(hr_dev->reg_base)) + return PTR_ERR(hr_dev->reg_base); + + /* read the node_guid of IB device from the DT or ACPI */ + ret = device_property_read_u8_array(dev, "node-guid", + (u8 *)&hr_dev->ib_dev.node_guid, + GUID_LEN); + if (ret) { + dev_err(dev, "couldn't get node_guid from DT or ACPI!\n"); + return ret; + } + + /* get the RoCE associated ethernet ports or netdevices */ + for (i = 0; i < HNS_ROCE_MAX_PORTS; i++) { + if (dev_of_node(dev)) { + net_node = of_parse_phandle(dev->of_node, "eth-handle", + i); + if (!net_node) + continue; + pdev = of_find_device_by_node(net_node); + } else if (is_acpi_device_node(dev->fwnode)) { + struct acpi_reference_args args; + struct fwnode_handle *fwnode; + + ret = acpi_node_get_property_reference(dev->fwnode, + "eth-handle", + i, &args); + if (ret) + continue; + fwnode = acpi_fwnode_handle(args.adev); + pdev = hns_roce_find_pdev(fwnode); + } else { + dev_err(dev, "cannot read data from DT or ACPI\n"); + return -ENXIO; + } + + if (pdev) { + netdev = platform_get_drvdata(pdev); + phy_port = (u8)i; + if (netdev) { + hr_dev->iboe.netdevs[port_cnt] = netdev; + hr_dev->iboe.phy_port[port_cnt] = phy_port; + } else { + dev_err(dev, "no netdev found with pdev %s\n", + pdev->name); + return -ENODEV; + } + port_cnt++; + } + } + + if (port_cnt == 0) { + dev_err(dev, "unable to get eth-handle for available ports!\n"); + return -EINVAL; + } + + hr_dev->caps.num_ports = port_cnt; + + /* cmd issue mode: 0 is poll, 1 is event */ + hr_dev->cmd_mod = 1; + hr_dev->loop_idc = 0; + + /* read the interrupt names from the DT or ACPI */ + ret = device_property_read_string_array(dev, "interrupt-names", + hr_dev->irq_names, + HNS_ROCE_MAX_IRQ_NUM); + if (ret < 0) { + dev_err(dev, "couldn't get interrupt names from DT or ACPI!\n"); + return ret; + } + + /* fetch the interrupt numbers */ + for (i = 0; i < HNS_ROCE_MAX_IRQ_NUM; i++) { + hr_dev->irq[i] = platform_get_irq(hr_dev->pdev, i); + if (hr_dev->irq[i] <= 0) { + dev_err(dev, "platform get of irq[=%d] failed!\n", i); + return -EINVAL; + } + } + + return 0; +} + +static int hns_roce_init_hem(struct hns_roce_dev *hr_dev) +{ + int ret; + struct device *dev = &hr_dev->pdev->dev; + + ret = hns_roce_init_hem_table(hr_dev, &hr_dev->mr_table.mtt_table, + HEM_TYPE_MTT, hr_dev->caps.mtt_entry_sz, + hr_dev->caps.num_mtt_segs, 1); + if (ret) { + dev_err(dev, "Failed to init MTT context memory, aborting.\n"); + return ret; + } + + ret = hns_roce_init_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table, + HEM_TYPE_MTPT, hr_dev->caps.mtpt_entry_sz, + hr_dev->caps.num_mtpts, 1); + if (ret) { + dev_err(dev, "Failed to init MTPT context memory, aborting.\n"); + goto err_unmap_mtt; + } + + ret = hns_roce_init_hem_table(hr_dev, &hr_dev->qp_table.qp_table, + HEM_TYPE_QPC, hr_dev->caps.qpc_entry_sz, + hr_dev->caps.num_qps, 1); + if (ret) { + dev_err(dev, "Failed to init QP context memory, aborting.\n"); + goto err_unmap_dmpt; + } + + ret = hns_roce_init_hem_table(hr_dev, &hr_dev->qp_table.irrl_table, + HEM_TYPE_IRRL, + hr_dev->caps.irrl_entry_sz * + hr_dev->caps.max_qp_init_rdma, + hr_dev->caps.num_qps, 1); + if (ret) { + dev_err(dev, "Failed to init irrl_table memory, aborting.\n"); + goto err_unmap_qp; + } + + ret = hns_roce_init_hem_table(hr_dev, &hr_dev->cq_table.table, + HEM_TYPE_CQC, hr_dev->caps.cqc_entry_sz, + hr_dev->caps.num_cqs, 1); + if (ret) { + dev_err(dev, "Failed to init CQ context memory, aborting.\n"); + goto err_unmap_irrl; + } + + return 0; + +err_unmap_irrl: + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.irrl_table); + +err_unmap_qp: + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->qp_table.qp_table); + +err_unmap_dmpt: + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtpt_table); + +err_unmap_mtt: + hns_roce_cleanup_hem_table(hr_dev, &hr_dev->mr_table.mtt_table); + + return ret; +} + +/** +* hns_roce_setup_hca - setup host channel adapter +* @hr_dev: pointer to hns roce device +* Return : int +*/ +static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) +{ + int ret; + struct device *dev = &hr_dev->pdev->dev; + + spin_lock_init(&hr_dev->sm_lock); + spin_lock_init(&hr_dev->bt_cmd_lock); + + ret = hns_roce_init_uar_table(hr_dev); + if (ret) { + dev_err(dev, "Failed to initialize uar table. aborting\n"); + return ret; + } + + ret = hns_roce_uar_alloc(hr_dev, &hr_dev->priv_uar); + if (ret) { + dev_err(dev, "Failed to allocate priv_uar.\n"); + goto err_uar_table_free; + } + + ret = hns_roce_init_pd_table(hr_dev); + if (ret) { + dev_err(dev, "Failed to init protected domain table.\n"); + goto err_uar_alloc_free; + } + + ret = hns_roce_init_mr_table(hr_dev); + if (ret) { + dev_err(dev, "Failed to init memory region table.\n"); + goto err_pd_table_free; + } + + ret = hns_roce_init_cq_table(hr_dev); + if (ret) { + dev_err(dev, "Failed to init completion queue table.\n"); + goto err_mr_table_free; + } + + ret = hns_roce_init_qp_table(hr_dev); + if (ret) { + dev_err(dev, "Failed to init queue pair table.\n"); + goto err_cq_table_free; + } + + return 0; + +err_cq_table_free: + hns_roce_cleanup_cq_table(hr_dev); + +err_mr_table_free: + hns_roce_cleanup_mr_table(hr_dev); + +err_pd_table_free: + hns_roce_cleanup_pd_table(hr_dev); + +err_uar_alloc_free: + hns_roce_uar_free(hr_dev, &hr_dev->priv_uar); + +err_uar_table_free: + hns_roce_cleanup_uar_table(hr_dev); + return ret; +} + +/** +* hns_roce_probe - RoCE driver entrance +* @pdev: pointer to platform device +* Return : int +* +*/ +static int hns_roce_probe(struct platform_device *pdev) +{ + int ret; + struct hns_roce_dev *hr_dev; + struct device *dev = &pdev->dev; + + hr_dev = (struct hns_roce_dev *)ib_alloc_device(sizeof(*hr_dev)); + if (!hr_dev) + return -ENOMEM; + + memset((u8 *)hr_dev + sizeof(struct ib_device), 0, + sizeof(struct hns_roce_dev) - sizeof(struct ib_device)); + + hr_dev->pdev = pdev; + platform_set_drvdata(pdev, hr_dev); + + if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64ULL)) && + dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32ULL))) { + dev_err(dev, "Not usable DMA addressing mode\n"); + ret = -EIO; + goto error_failed_get_cfg; + } + + ret = hns_roce_get_cfg(hr_dev); + if (ret) { + dev_err(dev, "Get Configuration failed!\n"); + goto error_failed_get_cfg; + } + + ret = hr_dev->hw->reset(hr_dev, true); + if (ret) { + dev_err(dev, "Reset RoCE engine failed!\n"); + goto error_failed_get_cfg; + } + + hr_dev->hw->hw_profile(hr_dev); + + ret = hns_roce_cmd_init(hr_dev); + if (ret) { + dev_err(dev, "cmd init failed!\n"); + goto error_failed_cmd_init; + } + + ret = hns_roce_init_eq_table(hr_dev); + if (ret) { + dev_err(dev, "eq init failed!\n"); + goto error_failed_eq_table; + } + + if (hr_dev->cmd_mod) { + ret = hns_roce_cmd_use_events(hr_dev); + if (ret) { + dev_err(dev, "Switch to event-driven cmd failed!\n"); + goto error_failed_use_event; + } + } + + ret = hns_roce_init_hem(hr_dev); + if (ret) { + dev_err(dev, "init HEM(Hardware Entry Memory) failed!\n"); + goto error_failed_init_hem; + } + + ret = hns_roce_setup_hca(hr_dev); + if (ret) { + dev_err(dev, "setup hca failed!\n"); + goto error_failed_setup_hca; + } + + ret = hr_dev->hw->hw_init(hr_dev); + if (ret) { + dev_err(dev, "hw_init failed!\n"); + goto error_failed_engine_init; + } + + ret = hns_roce_register_device(hr_dev); + if (ret) + goto error_failed_register_device; + + return 0; + +error_failed_register_device: + hr_dev->hw->hw_exit(hr_dev); + +error_failed_engine_init: + hns_roce_cleanup_bitmap(hr_dev); + +error_failed_setup_hca: + hns_roce_cleanup_hem(hr_dev); + +error_failed_init_hem: + if (hr_dev->cmd_mod) + hns_roce_cmd_use_polling(hr_dev); + +error_failed_use_event: + hns_roce_cleanup_eq_table(hr_dev); + +error_failed_eq_table: + hns_roce_cmd_cleanup(hr_dev); + +error_failed_cmd_init: + ret = hr_dev->hw->reset(hr_dev, false); + if (ret) + dev_err(&hr_dev->pdev->dev, "roce_engine reset fail\n"); + +error_failed_get_cfg: + ib_dealloc_device(&hr_dev->ib_dev); + + return ret; +} + +/** +* hns_roce_remove - remove RoCE device +* @pdev: pointer to platform device +*/ +static int hns_roce_remove(struct platform_device *pdev) +{ + struct hns_roce_dev *hr_dev = platform_get_drvdata(pdev); + + hns_roce_unregister_device(hr_dev); + hr_dev->hw->hw_exit(hr_dev); + hns_roce_cleanup_bitmap(hr_dev); + hns_roce_cleanup_hem(hr_dev); + + if (hr_dev->cmd_mod) + hns_roce_cmd_use_polling(hr_dev); + + hns_roce_cleanup_eq_table(hr_dev); + hns_roce_cmd_cleanup(hr_dev); + hr_dev->hw->reset(hr_dev, false); + + ib_dealloc_device(&hr_dev->ib_dev); + + return 0; +} + +static struct platform_driver hns_roce_driver = { + .probe = hns_roce_probe, + .remove = hns_roce_remove, + .driver = { + .name = DRV_NAME, + .of_match_table = hns_roce_of_match, + .acpi_match_table = ACPI_PTR(hns_roce_acpi_match), + }, +}; + +module_platform_driver(hns_roce_driver); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Wei Hu <xavier.huwei@huawei.com>"); +MODULE_AUTHOR("Nenglong Zhao <zhaonenglong@hisilicon.com>"); +MODULE_AUTHOR("Lijun Ou <oulijun@huawei.com>"); +MODULE_DESCRIPTION("HNS RoCE Driver"); diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c new file mode 100644 index 000000000000..fb87883ead34 --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -0,0 +1,617 @@ +/* + * Copyright (c) 2016 Hisilicon Limited. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/platform_device.h> +#include <rdma/ib_umem.h> +#include "hns_roce_device.h" +#include "hns_roce_cmd.h" +#include "hns_roce_hem.h" + +static u32 hw_index_to_key(unsigned long ind) +{ + return (u32)(ind >> 24) | (ind << 8); +} + +static unsigned long key_to_hw_index(u32 key) +{ + return (key << 24) | (key >> 8); +} + +static int hns_roce_sw2hw_mpt(struct hns_roce_dev *hr_dev, + struct hns_roce_cmd_mailbox *mailbox, + unsigned long mpt_index) +{ + return hns_roce_cmd_mbox(hr_dev, mailbox->dma, 0, mpt_index, 0, + HNS_ROCE_CMD_SW2HW_MPT, + HNS_ROCE_CMD_TIME_CLASS_B); +} + +static int hns_roce_hw2sw_mpt(struct hns_roce_dev *hr_dev, + struct hns_roce_cmd_mailbox *mailbox, + unsigned long mpt_index) +{ + return hns_roce_cmd_mbox(hr_dev, 0, mailbox ? mailbox->dma : 0, + mpt_index, !mailbox, HNS_ROCE_CMD_HW2SW_MPT, + HNS_ROCE_CMD_TIME_CLASS_B); +} + +static int hns_roce_buddy_alloc(struct hns_roce_buddy *buddy, int order, + unsigned long *seg) +{ + int o; + u32 m; + + spin_lock(&buddy->lock); + + for (o = order; o <= buddy->max_order; ++o) { + if (buddy->num_free[o]) { + m = 1 << (buddy->max_order - o); + *seg = find_first_bit(buddy->bits[o], m); + if (*seg < m) + goto found; + } + } + spin_unlock(&buddy->lock); + return -1; + + found: + clear_bit(*seg, buddy->bits[o]); + --buddy->num_free[o]; + + while (o > order) { + --o; + *seg <<= 1; + set_bit(*seg ^ 1, buddy->bits[o]); + ++buddy->num_free[o]; + } + + spin_unlock(&buddy->lock); + + *seg <<= order; + return 0; +} + +static void hns_roce_buddy_free(struct hns_roce_buddy *buddy, unsigned long seg, + int order) +{ + seg >>= order; + + spin_lock(&buddy->lock); + + while (test_bit(seg ^ 1, buddy->bits[order])) { + clear_bit(seg ^ 1, buddy->bits[order]); + --buddy->num_free[order]; + seg >>= 1; + ++order; + } + + set_bit(seg, buddy->bits[order]); + ++buddy->num_free[order]; + + spin_unlock(&buddy->lock); +} + +static int hns_roce_buddy_init(struct hns_roce_buddy *buddy, int max_order) +{ + int i, s; + + buddy->max_order = max_order; + spin_lock_init(&buddy->lock); + + buddy->bits = kzalloc((buddy->max_order + 1) * sizeof(long *), + GFP_KERNEL); + buddy->num_free = kzalloc((buddy->max_order + 1) * sizeof(int *), + GFP_KERNEL); + if (!buddy->bits || !buddy->num_free) + goto err_out; + + for (i = 0; i <= buddy->max_order; ++i) { + s = BITS_TO_LONGS(1 << (buddy->max_order - i)); + buddy->bits[i] = kmalloc_array(s, sizeof(long), GFP_KERNEL); + if (!buddy->bits[i]) + goto err_out_free; + + bitmap_zero(buddy->bits[i], 1 << (buddy->max_order - i)); + } + + set_bit(0, buddy->bits[buddy->max_order]); + buddy->num_free[buddy->max_order] = 1; + + return 0; + +err_out_free: + for (i = 0; i <= buddy->max_order; ++i) + kfree(buddy->bits[i]); + +err_out: + kfree(buddy->bits); + kfree(buddy->num_free); + return -ENOMEM; +} + +static void hns_roce_buddy_cleanup(struct hns_roce_buddy *buddy) +{ + int i; + + for (i = 0; i <= buddy->max_order; ++i) + kfree(buddy->bits[i]); + + kfree(buddy->bits); + kfree(buddy->num_free); +} + +static int hns_roce_alloc_mtt_range(struct hns_roce_dev *hr_dev, int order, + unsigned long *seg) +{ + struct hns_roce_mr_table *mr_table = &hr_dev->mr_table; + int ret = 0; + + ret = hns_roce_buddy_alloc(&mr_table->mtt_buddy, order, seg); + if (ret == -1) + return -1; + + if (hns_roce_table_get_range(hr_dev, &mr_table->mtt_table, *seg, + *seg + (1 << order) - 1)) { + hns_roce_buddy_free(&mr_table->mtt_buddy, *seg, order); + return -1; + } + + return 0; +} + +int hns_roce_mtt_init(struct hns_roce_dev *hr_dev, int npages, int page_shift, + struct hns_roce_mtt *mtt) +{ + int ret = 0; + int i; + + /* Page num is zero, correspond to DMA memory register */ + if (!npages) { + mtt->order = -1; + mtt->page_shift = HNS_ROCE_HEM_PAGE_SHIFT; + return 0; + } + + /* Note: if page_shift is zero, FAST memory regsiter */ + mtt->page_shift = page_shift; + + /* Compute MTT entry necessary */ + for (mtt->order = 0, i = HNS_ROCE_MTT_ENTRY_PER_SEG; i < npages; + i <<= 1) + ++mtt->order; + + /* Allocate MTT entry */ + ret = hns_roce_alloc_mtt_range(hr_dev, mtt->order, &mtt->first_seg); + if (ret == -1) + return -ENOMEM; + + return 0; +} + +void hns_roce_mtt_cleanup(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt) +{ + struct hns_roce_mr_table *mr_table = &hr_dev->mr_table; + + if (mtt->order < 0) + return; + + hns_roce_buddy_free(&mr_table->mtt_buddy, mtt->first_seg, mtt->order); + hns_roce_table_put_range(hr_dev, &mr_table->mtt_table, mtt->first_seg, + mtt->first_seg + (1 << mtt->order) - 1); +} + +static int hns_roce_mr_alloc(struct hns_roce_dev *hr_dev, u32 pd, u64 iova, + u64 size, u32 access, int npages, + struct hns_roce_mr *mr) +{ + unsigned long index = 0; + int ret = 0; + struct device *dev = &hr_dev->pdev->dev; + + /* Allocate a key for mr from mr_table */ + ret = hns_roce_bitmap_alloc(&hr_dev->mr_table.mtpt_bitmap, &index); + if (ret == -1) + return -ENOMEM; + + mr->iova = iova; /* MR va starting addr */ + mr->size = size; /* MR addr range */ + mr->pd = pd; /* MR num */ + mr->access = access; /* MR access permit */ + mr->enabled = 0; /* MR active status */ + mr->key = hw_index_to_key(index); /* MR key */ + + if (size == ~0ull) { + mr->type = MR_TYPE_DMA; + mr->pbl_buf = NULL; + mr->pbl_dma_addr = 0; + } else { + mr->type = MR_TYPE_MR; + mr->pbl_buf = dma_alloc_coherent(dev, npages * 8, + &(mr->pbl_dma_addr), + GFP_KERNEL); + if (!mr->pbl_buf) + return -ENOMEM; + } + + return 0; +} + +static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, + struct hns_roce_mr *mr) +{ + struct device *dev = &hr_dev->pdev->dev; + int npages = 0; + int ret; + + if (mr->enabled) { + ret = hns_roce_hw2sw_mpt(hr_dev, NULL, key_to_hw_index(mr->key) + & (hr_dev->caps.num_mtpts - 1)); + if (ret) + dev_warn(dev, "HW2SW_MPT failed (%d)\n", ret); + } + + if (mr->size != ~0ULL) { + npages = ib_umem_page_count(mr->umem); + dma_free_coherent(dev, (unsigned int)(npages * 8), mr->pbl_buf, + mr->pbl_dma_addr); + } + + hns_roce_bitmap_free(&hr_dev->mr_table.mtpt_bitmap, + key_to_hw_index(mr->key)); +} + +static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, + struct hns_roce_mr *mr) +{ + int ret; + unsigned long mtpt_idx = key_to_hw_index(mr->key); + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_cmd_mailbox *mailbox; + struct hns_roce_mr_table *mr_table = &hr_dev->mr_table; + + /* Prepare HEM entry memory */ + ret = hns_roce_table_get(hr_dev, &mr_table->mtpt_table, mtpt_idx); + if (ret) + return ret; + + /* Allocate mailbox memory */ + mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); + if (IS_ERR(mailbox)) { + ret = PTR_ERR(mailbox); + goto err_table; + } + + ret = hr_dev->hw->write_mtpt(mailbox->buf, mr, mtpt_idx); + if (ret) { + dev_err(dev, "Write mtpt fail!\n"); + goto err_page; + } + + ret = hns_roce_sw2hw_mpt(hr_dev, mailbox, + mtpt_idx & (hr_dev->caps.num_mtpts - 1)); + if (ret) { + dev_err(dev, "SW2HW_MPT failed (%d)\n", ret); + goto err_page; + } + + mr->enabled = 1; + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + + return 0; + +err_page: + hns_roce_free_cmd_mailbox(hr_dev, mailbox); + +err_table: + hns_roce_table_put(hr_dev, &mr_table->mtpt_table, mtpt_idx); + return ret; +} + +static int hns_roce_write_mtt_chunk(struct hns_roce_dev *hr_dev, + struct hns_roce_mtt *mtt, u32 start_index, + u32 npages, u64 *page_list) +{ + u32 i = 0; + __le64 *mtts = NULL; + dma_addr_t dma_handle; + u32 s = start_index * sizeof(u64); + + /* All MTTs must fit in the same page */ + if (start_index / (PAGE_SIZE / sizeof(u64)) != + (start_index + npages - 1) / (PAGE_SIZE / sizeof(u64))) + return -EINVAL; + + if (start_index & (HNS_ROCE_MTT_ENTRY_PER_SEG - 1)) + return -EINVAL; + + mtts = hns_roce_table_find(&hr_dev->mr_table.mtt_table, + mtt->first_seg + s / hr_dev->caps.mtt_entry_sz, + &dma_handle); + if (!mtts) + return -ENOMEM; + + /* Save page addr, low 12 bits : 0 */ + for (i = 0; i < npages; ++i) + mtts[i] = (cpu_to_le64(page_list[i])) >> PAGE_ADDR_SHIFT; + + return 0; +} + +static int hns_roce_write_mtt(struct hns_roce_dev *hr_dev, + struct hns_roce_mtt *mtt, u32 start_index, + u32 npages, u64 *page_list) +{ + int chunk; + int ret; + + if (mtt->order < 0) + return -EINVAL; + + while (npages > 0) { + chunk = min_t(int, PAGE_SIZE / sizeof(u64), npages); + + ret = hns_roce_write_mtt_chunk(hr_dev, mtt, start_index, chunk, + page_list); + if (ret) + return ret; + + npages -= chunk; + start_index += chunk; + page_list += chunk; + } + + return 0; +} + +int hns_roce_buf_write_mtt(struct hns_roce_dev *hr_dev, + struct hns_roce_mtt *mtt, struct hns_roce_buf *buf) +{ + u32 i = 0; + int ret = 0; + u64 *page_list = NULL; + + page_list = kmalloc_array(buf->npages, sizeof(*page_list), GFP_KERNEL); + if (!page_list) + return -ENOMEM; + + for (i = 0; i < buf->npages; ++i) { + if (buf->nbufs == 1) + page_list[i] = buf->direct.map + (i << buf->page_shift); + else + page_list[i] = buf->page_list[i].map; + + } + ret = hns_roce_write_mtt(hr_dev, mtt, 0, buf->npages, page_list); + + kfree(page_list); + + return ret; +} + +int hns_roce_init_mr_table(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_mr_table *mr_table = &hr_dev->mr_table; + int ret = 0; + + ret = hns_roce_bitmap_init(&mr_table->mtpt_bitmap, + hr_dev->caps.num_mtpts, + hr_dev->caps.num_mtpts - 1, + hr_dev->caps.reserved_mrws, 0); + if (ret) + return ret; + + ret = hns_roce_buddy_init(&mr_table->mtt_buddy, + ilog2(hr_dev->caps.num_mtt_segs)); + if (ret) + goto err_buddy; + + return 0; + +err_buddy: + hns_roce_bitmap_cleanup(&mr_table->mtpt_bitmap); + return ret; +} + +void hns_roce_cleanup_mr_table(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_mr_table *mr_table = &hr_dev->mr_table; + + hns_roce_buddy_cleanup(&mr_table->mtt_buddy); + hns_roce_bitmap_cleanup(&mr_table->mtpt_bitmap); +} + +struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc) +{ + int ret = 0; + struct hns_roce_mr *mr = NULL; + + mr = kmalloc(sizeof(*mr), GFP_KERNEL); + if (mr == NULL) + return ERR_PTR(-ENOMEM); + + /* Allocate memory region key */ + ret = hns_roce_mr_alloc(to_hr_dev(pd->device), to_hr_pd(pd)->pdn, 0, + ~0ULL, acc, 0, mr); + if (ret) + goto err_free; + + ret = hns_roce_mr_enable(to_hr_dev(pd->device), mr); + if (ret) + goto err_mr; + + mr->ibmr.rkey = mr->ibmr.lkey = mr->key; + mr->umem = NULL; + + return &mr->ibmr; + +err_mr: + hns_roce_mr_free(to_hr_dev(pd->device), mr); + +err_free: + kfree(mr); + return ERR_PTR(ret); +} + +int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev, + struct hns_roce_mtt *mtt, struct ib_umem *umem) +{ + struct scatterlist *sg; + int i, k, entry; + int ret = 0; + u64 *pages; + u32 n; + int len; + + pages = (u64 *) __get_free_page(GFP_KERNEL); + if (!pages) + return -ENOMEM; + + i = n = 0; + + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { + len = sg_dma_len(sg) >> mtt->page_shift; + for (k = 0; k < len; ++k) { + pages[i++] = sg_dma_address(sg) + umem->page_size * k; + if (i == PAGE_SIZE / sizeof(u64)) { + ret = hns_roce_write_mtt(hr_dev, mtt, n, i, + pages); + if (ret) + goto out; + n += i; + i = 0; + } + } + } + + if (i) + ret = hns_roce_write_mtt(hr_dev, mtt, n, i, pages); + +out: + free_page((unsigned long) pages); + return ret; +} + +static int hns_roce_ib_umem_write_mr(struct hns_roce_mr *mr, + struct ib_umem *umem) +{ + int i = 0; + int entry; + struct scatterlist *sg; + + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { + mr->pbl_buf[i] = ((u64)sg_dma_address(sg)) >> 12; + i++; + } + + /* Memory barrier */ + mb(); + + return 0; +} + +struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 virt_addr, int access_flags, + struct ib_udata *udata) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_mr *mr = NULL; + int ret = 0; + int n = 0; + + mr = kmalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + mr->umem = ib_umem_get(pd->uobject->context, start, length, + access_flags, 0); + if (IS_ERR(mr->umem)) { + ret = PTR_ERR(mr->umem); + goto err_free; + } + + n = ib_umem_page_count(mr->umem); + if (mr->umem->page_size != HNS_ROCE_HEM_PAGE_SIZE) { + dev_err(dev, "Just support 4K page size but is 0x%x now!\n", + mr->umem->page_size); + ret = -EINVAL; + goto err_umem; + } + + if (n > HNS_ROCE_MAX_MTPT_PBL_NUM) { + dev_err(dev, " MR len %lld err. MR is limited to 4G at most!\n", + length); + ret = -EINVAL; + goto err_umem; + } + + ret = hns_roce_mr_alloc(hr_dev, to_hr_pd(pd)->pdn, virt_addr, length, + access_flags, n, mr); + if (ret) + goto err_umem; + + ret = hns_roce_ib_umem_write_mr(mr, mr->umem); + if (ret) + goto err_mr; + + ret = hns_roce_mr_enable(hr_dev, mr); + if (ret) + goto err_mr; + + mr->ibmr.rkey = mr->ibmr.lkey = mr->key; + + return &mr->ibmr; + +err_mr: + hns_roce_mr_free(hr_dev, mr); + +err_umem: + ib_umem_release(mr->umem); + +err_free: + kfree(mr); + return ERR_PTR(ret); +} + +int hns_roce_dereg_mr(struct ib_mr *ibmr) +{ + struct hns_roce_mr *mr = to_hr_mr(ibmr); + + hns_roce_mr_free(to_hr_dev(ibmr->device), mr); + if (mr->umem) + ib_umem_release(mr->umem); + + kfree(mr); + + return 0; +} diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c new file mode 100644 index 000000000000..05db7d59812a --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_pd.c @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2016 Hisilicon Limited. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/platform_device.h> +#include "hns_roce_device.h" + +static int hns_roce_pd_alloc(struct hns_roce_dev *hr_dev, unsigned long *pdn) +{ + return hns_roce_bitmap_alloc(&hr_dev->pd_bitmap, pdn); +} + +static void hns_roce_pd_free(struct hns_roce_dev *hr_dev, unsigned long pdn) +{ + hns_roce_bitmap_free(&hr_dev->pd_bitmap, pdn); +} + +int hns_roce_init_pd_table(struct hns_roce_dev *hr_dev) +{ + return hns_roce_bitmap_init(&hr_dev->pd_bitmap, hr_dev->caps.num_pds, + hr_dev->caps.num_pds - 1, + hr_dev->caps.reserved_pds, 0); +} + +void hns_roce_cleanup_pd_table(struct hns_roce_dev *hr_dev) +{ + hns_roce_bitmap_cleanup(&hr_dev->pd_bitmap); +} + +struct ib_pd *hns_roce_alloc_pd(struct ib_device *ib_dev, + struct ib_ucontext *context, + struct ib_udata *udata) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ib_dev); + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_pd *pd; + int ret; + + pd = kmalloc(sizeof(*pd), GFP_KERNEL); + if (!pd) + return ERR_PTR(-ENOMEM); + + ret = hns_roce_pd_alloc(to_hr_dev(ib_dev), &pd->pdn); + if (ret) { + kfree(pd); + dev_err(dev, "[alloc_pd]hns_roce_pd_alloc failed!\n"); + return ERR_PTR(ret); + } + + if (context) { + if (ib_copy_to_udata(udata, &pd->pdn, sizeof(u64))) { + hns_roce_pd_free(to_hr_dev(ib_dev), pd->pdn); + dev_err(dev, "[alloc_pd]ib_copy_to_udata failed!\n"); + kfree(pd); + return ERR_PTR(-EFAULT); + } + } + + return &pd->ibpd; +} + +int hns_roce_dealloc_pd(struct ib_pd *pd) +{ + hns_roce_pd_free(to_hr_dev(pd->device), to_hr_pd(pd)->pdn); + kfree(to_hr_pd(pd)); + + return 0; +} + +int hns_roce_uar_alloc(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar) +{ + struct resource *res; + int ret = 0; + + /* Using bitmap to manager UAR index */ + ret = hns_roce_bitmap_alloc(&hr_dev->uar_table.bitmap, &uar->index); + if (ret == -1) + return -ENOMEM; + + if (uar->index > 0) + uar->index = (uar->index - 1) % + (hr_dev->caps.phy_num_uars - 1) + 1; + + res = platform_get_resource(hr_dev->pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(&hr_dev->pdev->dev, "memory resource not found!\n"); + return -EINVAL; + } + uar->pfn = ((res->start) >> PAGE_SHIFT) + uar->index; + + return 0; +} + +void hns_roce_uar_free(struct hns_roce_dev *hr_dev, struct hns_roce_uar *uar) +{ + hns_roce_bitmap_free(&hr_dev->uar_table.bitmap, uar->index); +} + +int hns_roce_init_uar_table(struct hns_roce_dev *hr_dev) +{ + return hns_roce_bitmap_init(&hr_dev->uar_table.bitmap, + hr_dev->caps.num_uars, + hr_dev->caps.num_uars - 1, + hr_dev->caps.reserved_uars, 0); +} + +void hns_roce_cleanup_uar_table(struct hns_roce_dev *hr_dev) +{ + hns_roce_bitmap_cleanup(&hr_dev->uar_table.bitmap); +} diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c new file mode 100644 index 000000000000..e86dd8d06777 --- /dev/null +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -0,0 +1,838 @@ +/* + * Copyright (c) 2016 Hisilicon Limited. + * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/platform_device.h> +#include <rdma/ib_addr.h> +#include <rdma/ib_umem.h> +#include "hns_roce_common.h" +#include "hns_roce_device.h" +#include "hns_roce_hem.h" +#include "hns_roce_user.h" + +#define SQP_NUM (2 * HNS_ROCE_MAX_PORTS) + +void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type) +{ + struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_qp *qp; + + spin_lock(&qp_table->lock); + + qp = __hns_roce_qp_lookup(hr_dev, qpn); + if (qp) + atomic_inc(&qp->refcount); + + spin_unlock(&qp_table->lock); + + if (!qp) { + dev_warn(dev, "Async event for bogus QP %08x\n", qpn); + return; + } + + qp->event(qp, (enum hns_roce_event)event_type); + + if (atomic_dec_and_test(&qp->refcount)) + complete(&qp->free); +} + +static void hns_roce_ib_qp_event(struct hns_roce_qp *hr_qp, + enum hns_roce_event type) +{ + struct ib_event event; + struct ib_qp *ibqp = &hr_qp->ibqp; + + if (ibqp->event_handler) { + event.device = ibqp->device; + event.element.qp = ibqp; + switch (type) { + case HNS_ROCE_EVENT_TYPE_PATH_MIG: + event.event = IB_EVENT_PATH_MIG; + break; + case HNS_ROCE_EVENT_TYPE_COMM_EST: + event.event = IB_EVENT_COMM_EST; + break; + case HNS_ROCE_EVENT_TYPE_SQ_DRAINED: + event.event = IB_EVENT_SQ_DRAINED; + break; + case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH: + event.event = IB_EVENT_QP_LAST_WQE_REACHED; + break; + case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: + event.event = IB_EVENT_QP_FATAL; + break; + case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED: + event.event = IB_EVENT_PATH_MIG_ERR; + break; + case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: + event.event = IB_EVENT_QP_REQ_ERR; + break; + case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: + event.event = IB_EVENT_QP_ACCESS_ERR; + break; + default: + dev_dbg(ibqp->device->dma_device, "roce_ib: Unexpected event type %d on QP %06lx\n", + type, hr_qp->qpn); + return; + } + ibqp->event_handler(&event, ibqp->qp_context); + } +} + +static int hns_roce_reserve_range_qp(struct hns_roce_dev *hr_dev, int cnt, + int align, unsigned long *base) +{ + struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; + + return hns_roce_bitmap_alloc_range(&qp_table->bitmap, cnt, align, base); +} + +enum hns_roce_qp_state to_hns_roce_state(enum ib_qp_state state) +{ + switch (state) { + case IB_QPS_RESET: + return HNS_ROCE_QP_STATE_RST; + case IB_QPS_INIT: + return HNS_ROCE_QP_STATE_INIT; + case IB_QPS_RTR: + return HNS_ROCE_QP_STATE_RTR; + case IB_QPS_RTS: + return HNS_ROCE_QP_STATE_RTS; + case IB_QPS_SQD: + return HNS_ROCE_QP_STATE_SQD; + case IB_QPS_ERR: + return HNS_ROCE_QP_STATE_ERR; + default: + return HNS_ROCE_QP_NUM_STATE; + } +} + +static int hns_roce_gsi_qp_alloc(struct hns_roce_dev *hr_dev, unsigned long qpn, + struct hns_roce_qp *hr_qp) +{ + struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; + int ret; + + if (!qpn) + return -EINVAL; + + hr_qp->qpn = qpn; + + spin_lock_irq(&qp_table->lock); + ret = radix_tree_insert(&hr_dev->qp_table_tree, + hr_qp->qpn & (hr_dev->caps.num_qps - 1), hr_qp); + spin_unlock_irq(&qp_table->lock); + if (ret) { + dev_err(&hr_dev->pdev->dev, "QPC radix_tree_insert failed\n"); + goto err_put_irrl; + } + + atomic_set(&hr_qp->refcount, 1); + init_completion(&hr_qp->free); + + return 0; + +err_put_irrl: + + return ret; +} + +static int hns_roce_qp_alloc(struct hns_roce_dev *hr_dev, unsigned long qpn, + struct hns_roce_qp *hr_qp) +{ + struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; + struct device *dev = &hr_dev->pdev->dev; + int ret; + + if (!qpn) + return -EINVAL; + + hr_qp->qpn = qpn; + + /* Alloc memory for QPC */ + ret = hns_roce_table_get(hr_dev, &qp_table->qp_table, hr_qp->qpn); + if (ret) { + dev_err(dev, "QPC table get failed\n"); + goto err_out; + } + + /* Alloc memory for IRRL */ + ret = hns_roce_table_get(hr_dev, &qp_table->irrl_table, hr_qp->qpn); + if (ret) { + dev_err(dev, "IRRL table get failed\n"); + goto err_put_qp; + } + + spin_lock_irq(&qp_table->lock); + ret = radix_tree_insert(&hr_dev->qp_table_tree, + hr_qp->qpn & (hr_dev->caps.num_qps - 1), hr_qp); + spin_unlock_irq(&qp_table->lock); + if (ret) { + dev_err(dev, "QPC radix_tree_insert failed\n"); + goto err_put_irrl; + } + + atomic_set(&hr_qp->refcount, 1); + init_completion(&hr_qp->free); + + return 0; + +err_put_irrl: + hns_roce_table_put(hr_dev, &qp_table->irrl_table, hr_qp->qpn); + +err_put_qp: + hns_roce_table_put(hr_dev, &qp_table->qp_table, hr_qp->qpn); + +err_out: + return ret; +} + +void hns_roce_qp_remove(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) +{ + struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; + unsigned long flags; + + spin_lock_irqsave(&qp_table->lock, flags); + radix_tree_delete(&hr_dev->qp_table_tree, + hr_qp->qpn & (hr_dev->caps.num_qps - 1)); + spin_unlock_irqrestore(&qp_table->lock, flags); +} + +void hns_roce_qp_free(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) +{ + struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; + + if (atomic_dec_and_test(&hr_qp->refcount)) + complete(&hr_qp->free); + wait_for_completion(&hr_qp->free); + + if ((hr_qp->ibqp.qp_type) != IB_QPT_GSI) { + hns_roce_table_put(hr_dev, &qp_table->irrl_table, hr_qp->qpn); + hns_roce_table_put(hr_dev, &qp_table->qp_table, hr_qp->qpn); + } +} + +void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn, + int cnt) +{ + struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; + + if (base_qpn < SQP_NUM) + return; + + hns_roce_bitmap_free_range(&qp_table->bitmap, base_qpn, cnt); +} + +static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev, + struct ib_qp_cap *cap, int is_user, int has_srq, + struct hns_roce_qp *hr_qp) +{ + u32 max_cnt; + struct device *dev = &hr_dev->pdev->dev; + + /* Check the validity of QP support capacity */ + if (cap->max_recv_wr > hr_dev->caps.max_wqes || + cap->max_recv_sge > hr_dev->caps.max_rq_sg) { + dev_err(dev, "RQ WR or sge error!max_recv_wr=%d max_recv_sge=%d\n", + cap->max_recv_wr, cap->max_recv_sge); + return -EINVAL; + } + + /* If srq exit, set zero for relative number of rq */ + if (has_srq) { + if (cap->max_recv_wr) { + dev_dbg(dev, "srq no need config max_recv_wr\n"); + return -EINVAL; + } + + hr_qp->rq.wqe_cnt = hr_qp->rq.max_gs = 0; + } else { + if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge)) { + dev_err(dev, "user space no need config max_recv_wr max_recv_sge\n"); + return -EINVAL; + } + + /* In v1 engine, parameter verification procession */ + max_cnt = cap->max_recv_wr > HNS_ROCE_MIN_WQE_NUM ? + cap->max_recv_wr : HNS_ROCE_MIN_WQE_NUM; + hr_qp->rq.wqe_cnt = roundup_pow_of_two(max_cnt); + + if ((u32)hr_qp->rq.wqe_cnt > hr_dev->caps.max_wqes) { + dev_err(dev, "hns_roce_set_rq_size rq.wqe_cnt too large\n"); + return -EINVAL; + } + + max_cnt = max(1U, cap->max_recv_sge); + hr_qp->rq.max_gs = roundup_pow_of_two(max_cnt); + /* WQE is fixed for 64B */ + hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz); + } + + cap->max_recv_wr = hr_qp->rq.max_post = hr_qp->rq.wqe_cnt; + cap->max_recv_sge = hr_qp->rq.max_gs; + + return 0; +} + +static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp, + struct hns_roce_ib_create_qp *ucmd) +{ + u32 roundup_sq_stride = roundup_pow_of_two(hr_dev->caps.max_sq_desc_sz); + u8 max_sq_stride = ilog2(roundup_sq_stride); + + /* Sanity check SQ size before proceeding */ + if ((u32)(1 << ucmd->log_sq_bb_count) > hr_dev->caps.max_wqes || + ucmd->log_sq_stride > max_sq_stride || + ucmd->log_sq_stride < HNS_ROCE_IB_MIN_SQ_STRIDE) { + dev_err(&hr_dev->pdev->dev, "check SQ size error!\n"); + return -EINVAL; + } + + hr_qp->sq.wqe_cnt = 1 << ucmd->log_sq_bb_count; + hr_qp->sq.wqe_shift = ucmd->log_sq_stride; + + /* Get buf size, SQ and RQ are aligned to page_szie */ + hr_qp->buff_size = HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt << + hr_qp->rq.wqe_shift), PAGE_SIZE) + + HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt << + hr_qp->sq.wqe_shift), PAGE_SIZE); + + hr_qp->sq.offset = 0; + hr_qp->rq.offset = HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt << + hr_qp->sq.wqe_shift), PAGE_SIZE); + + return 0; +} + +static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev, + struct ib_qp_cap *cap, + struct hns_roce_qp *hr_qp) +{ + struct device *dev = &hr_dev->pdev->dev; + u32 max_cnt; + + if (cap->max_send_wr > hr_dev->caps.max_wqes || + cap->max_send_sge > hr_dev->caps.max_sq_sg || + cap->max_inline_data > hr_dev->caps.max_sq_inline) { + dev_err(dev, "hns_roce_set_kernel_sq_size error1\n"); + return -EINVAL; + } + + hr_qp->sq.wqe_shift = ilog2(hr_dev->caps.max_sq_desc_sz); + hr_qp->sq_max_wqes_per_wr = 1; + hr_qp->sq_spare_wqes = 0; + + /* In v1 engine, parameter verification procession */ + max_cnt = cap->max_send_wr > HNS_ROCE_MIN_WQE_NUM ? + cap->max_send_wr : HNS_ROCE_MIN_WQE_NUM; + hr_qp->sq.wqe_cnt = roundup_pow_of_two(max_cnt); + if ((u32)hr_qp->sq.wqe_cnt > hr_dev->caps.max_wqes) { + dev_err(dev, "hns_roce_set_kernel_sq_size sq.wqe_cnt too large\n"); + return -EINVAL; + } + + /* Get data_seg numbers */ + max_cnt = max(1U, cap->max_send_sge); + hr_qp->sq.max_gs = roundup_pow_of_two(max_cnt); + + /* Get buf size, SQ and RQ are aligned to page_szie */ + hr_qp->buff_size = HNS_ROCE_ALOGN_UP((hr_qp->rq.wqe_cnt << + hr_qp->rq.wqe_shift), PAGE_SIZE) + + HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt << + hr_qp->sq.wqe_shift), PAGE_SIZE); + hr_qp->sq.offset = 0; + hr_qp->rq.offset = HNS_ROCE_ALOGN_UP((hr_qp->sq.wqe_cnt << + hr_qp->sq.wqe_shift), PAGE_SIZE); + + /* Get wr and sge number which send */ + cap->max_send_wr = hr_qp->sq.max_post = hr_qp->sq.wqe_cnt; + cap->max_send_sge = hr_qp->sq.max_gs; + + /* We don't support inline sends for kernel QPs (yet) */ + cap->max_inline_data = 0; + + return 0; +} + +static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, + struct ib_pd *ib_pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata, unsigned long sqpn, + struct hns_roce_qp *hr_qp) +{ + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_ib_create_qp ucmd; + unsigned long qpn = 0; + int ret = 0; + + mutex_init(&hr_qp->mutex); + spin_lock_init(&hr_qp->sq.lock); + spin_lock_init(&hr_qp->rq.lock); + + hr_qp->state = IB_QPS_RESET; + + if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) + hr_qp->sq_signal_bits = IB_SIGNAL_ALL_WR; + else + hr_qp->sq_signal_bits = IB_SIGNAL_REQ_WR; + + ret = hns_roce_set_rq_size(hr_dev, &init_attr->cap, !!ib_pd->uobject, + !!init_attr->srq, hr_qp); + if (ret) { + dev_err(dev, "hns_roce_set_rq_size failed\n"); + goto err_out; + } + + if (ib_pd->uobject) { + if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { + dev_err(dev, "ib_copy_from_udata error for create qp\n"); + ret = -EFAULT; + goto err_out; + } + + ret = hns_roce_set_user_sq_size(hr_dev, hr_qp, &ucmd); + if (ret) { + dev_err(dev, "hns_roce_set_user_sq_size error for create qp\n"); + goto err_out; + } + + hr_qp->umem = ib_umem_get(ib_pd->uobject->context, + ucmd.buf_addr, hr_qp->buff_size, 0, + 0); + if (IS_ERR(hr_qp->umem)) { + dev_err(dev, "ib_umem_get error for create qp\n"); + ret = PTR_ERR(hr_qp->umem); + goto err_out; + } + + ret = hns_roce_mtt_init(hr_dev, ib_umem_page_count(hr_qp->umem), + ilog2((unsigned int)hr_qp->umem->page_size), + &hr_qp->mtt); + if (ret) { + dev_err(dev, "hns_roce_mtt_init error for create qp\n"); + goto err_buf; + } + + ret = hns_roce_ib_umem_write_mtt(hr_dev, &hr_qp->mtt, + hr_qp->umem); + if (ret) { + dev_err(dev, "hns_roce_ib_umem_write_mtt error for create qp\n"); + goto err_mtt; + } + } else { + if (init_attr->create_flags & + IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) { + dev_err(dev, "init_attr->create_flags error!\n"); + ret = -EINVAL; + goto err_out; + } + + if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) { + dev_err(dev, "init_attr->create_flags error!\n"); + ret = -EINVAL; + goto err_out; + } + + /* Set SQ size */ + ret = hns_roce_set_kernel_sq_size(hr_dev, &init_attr->cap, + hr_qp); + if (ret) { + dev_err(dev, "hns_roce_set_kernel_sq_size error!\n"); + goto err_out; + } + + /* QP doorbell register address */ + hr_qp->sq.db_reg_l = hr_dev->reg_base + ROCEE_DB_SQ_L_0_REG + + DB_REG_OFFSET * hr_dev->priv_uar.index; + hr_qp->rq.db_reg_l = hr_dev->reg_base + + ROCEE_DB_OTHERS_L_0_REG + + DB_REG_OFFSET * hr_dev->priv_uar.index; + + /* Allocate QP buf */ + if (hns_roce_buf_alloc(hr_dev, hr_qp->buff_size, PAGE_SIZE * 2, + &hr_qp->hr_buf)) { + dev_err(dev, "hns_roce_buf_alloc error!\n"); + ret = -ENOMEM; + goto err_out; + } + + /* Write MTT */ + ret = hns_roce_mtt_init(hr_dev, hr_qp->hr_buf.npages, + hr_qp->hr_buf.page_shift, &hr_qp->mtt); + if (ret) { + dev_err(dev, "hns_roce_mtt_init error for kernel create qp\n"); + goto err_buf; + } + + ret = hns_roce_buf_write_mtt(hr_dev, &hr_qp->mtt, + &hr_qp->hr_buf); + if (ret) { + dev_err(dev, "hns_roce_buf_write_mtt error for kernel create qp\n"); + goto err_mtt; + } + + hr_qp->sq.wrid = kmalloc_array(hr_qp->sq.wqe_cnt, sizeof(u64), + GFP_KERNEL); + hr_qp->rq.wrid = kmalloc_array(hr_qp->rq.wqe_cnt, sizeof(u64), + GFP_KERNEL); + if (!hr_qp->sq.wrid || !hr_qp->rq.wrid) { + ret = -ENOMEM; + goto err_wrid; + } + } + + if (sqpn) { + qpn = sqpn; + } else { + /* Get QPN */ + ret = hns_roce_reserve_range_qp(hr_dev, 1, 1, &qpn); + if (ret) { + dev_err(dev, "hns_roce_reserve_range_qp alloc qpn error\n"); + goto err_wrid; + } + } + + if ((init_attr->qp_type) == IB_QPT_GSI) { + ret = hns_roce_gsi_qp_alloc(hr_dev, qpn, hr_qp); + if (ret) { + dev_err(dev, "hns_roce_qp_alloc failed!\n"); + goto err_qpn; + } + } else { + ret = hns_roce_qp_alloc(hr_dev, qpn, hr_qp); + if (ret) { + dev_err(dev, "hns_roce_qp_alloc failed!\n"); + goto err_qpn; + } + } + + if (sqpn) + hr_qp->doorbell_qpn = 1; + else + hr_qp->doorbell_qpn = cpu_to_le64(hr_qp->qpn); + + hr_qp->event = hns_roce_ib_qp_event; + + return 0; + +err_qpn: + if (!sqpn) + hns_roce_release_range_qp(hr_dev, qpn, 1); + +err_wrid: + kfree(hr_qp->sq.wrid); + kfree(hr_qp->rq.wrid); + +err_mtt: + hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt); + +err_buf: + if (ib_pd->uobject) + ib_umem_release(hr_qp->umem); + else + hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf); + +err_out: + return ret; +} + +struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); + struct device *dev = &hr_dev->pdev->dev; + struct hns_roce_sqp *hr_sqp; + struct hns_roce_qp *hr_qp; + int ret; + + switch (init_attr->qp_type) { + case IB_QPT_RC: { + hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL); + if (!hr_qp) + return ERR_PTR(-ENOMEM); + + ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata, 0, + hr_qp); + if (ret) { + dev_err(dev, "Create RC QP failed\n"); + kfree(hr_qp); + return ERR_PTR(ret); + } + + hr_qp->ibqp.qp_num = hr_qp->qpn; + + break; + } + case IB_QPT_GSI: { + /* Userspace is not allowed to create special QPs: */ + if (pd->uobject) { + dev_err(dev, "not support usr space GSI\n"); + return ERR_PTR(-EINVAL); + } + + hr_sqp = kzalloc(sizeof(*hr_sqp), GFP_KERNEL); + if (!hr_sqp) + return ERR_PTR(-ENOMEM); + + hr_qp = &hr_sqp->hr_qp; + hr_qp->port = init_attr->port_num - 1; + hr_qp->phy_port = hr_dev->iboe.phy_port[hr_qp->port]; + hr_qp->ibqp.qp_num = HNS_ROCE_MAX_PORTS + + hr_dev->iboe.phy_port[hr_qp->port]; + + ret = hns_roce_create_qp_common(hr_dev, pd, init_attr, udata, + hr_qp->ibqp.qp_num, hr_qp); + if (ret) { + dev_err(dev, "Create GSI QP failed!\n"); + kfree(hr_sqp); + return ERR_PTR(ret); + } + + break; + } + default:{ + dev_err(dev, "not support QP type %d\n", init_attr->qp_type); + return ERR_PTR(-EINVAL); + } + } + + return &hr_qp->ibqp; +} + +int to_hr_qp_type(int qp_type) +{ + int transport_type; + + if (qp_type == IB_QPT_RC) + transport_type = SERV_TYPE_RC; + else if (qp_type == IB_QPT_UC) + transport_type = SERV_TYPE_UC; + else if (qp_type == IB_QPT_UD) + transport_type = SERV_TYPE_UD; + else if (qp_type == IB_QPT_GSI) + transport_type = SERV_TYPE_UD; + else + transport_type = -1; + + return transport_type; +} + +int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + enum ib_qp_state cur_state, new_state; + struct device *dev = &hr_dev->pdev->dev; + int ret = -EINVAL; + int p; + enum ib_mtu active_mtu; + + mutex_lock(&hr_qp->mutex); + + cur_state = attr_mask & IB_QP_CUR_STATE ? + attr->cur_qp_state : (enum ib_qp_state)hr_qp->state; + new_state = attr_mask & IB_QP_STATE ? + attr->qp_state : cur_state; + + if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask, + IB_LINK_LAYER_ETHERNET)) { + dev_err(dev, "ib_modify_qp_is_ok failed\n"); + goto out; + } + + if ((attr_mask & IB_QP_PORT) && + (attr->port_num == 0 || attr->port_num > hr_dev->caps.num_ports)) { + dev_err(dev, "attr port_num invalid.attr->port_num=%d\n", + attr->port_num); + goto out; + } + + if (attr_mask & IB_QP_PKEY_INDEX) { + p = attr_mask & IB_QP_PORT ? (attr->port_num - 1) : hr_qp->port; + if (attr->pkey_index >= hr_dev->caps.pkey_table_len[p]) { + dev_err(dev, "attr pkey_index invalid.attr->pkey_index=%d\n", + attr->pkey_index); + goto out; + } + } + + if (attr_mask & IB_QP_PATH_MTU) { + p = attr_mask & IB_QP_PORT ? (attr->port_num - 1) : hr_qp->port; + active_mtu = iboe_get_mtu(hr_dev->iboe.netdevs[p]->mtu); + + if (attr->path_mtu > IB_MTU_2048 || + attr->path_mtu < IB_MTU_256 || + attr->path_mtu > active_mtu) { + dev_err(dev, "attr path_mtu(%d)invalid while modify qp", + attr->path_mtu); + goto out; + } + } + + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && + attr->max_rd_atomic > hr_dev->caps.max_qp_init_rdma) { + dev_err(dev, "attr max_rd_atomic invalid.attr->max_rd_atomic=%d\n", + attr->max_rd_atomic); + goto out; + } + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && + attr->max_dest_rd_atomic > hr_dev->caps.max_qp_dest_rdma) { + dev_err(dev, "attr max_dest_rd_atomic invalid.attr->max_dest_rd_atomic=%d\n", + attr->max_dest_rd_atomic); + goto out; + } + + if (cur_state == new_state && cur_state == IB_QPS_RESET) { + ret = -EPERM; + dev_err(dev, "cur_state=%d new_state=%d\n", cur_state, + new_state); + goto out; + } + + ret = hr_dev->hw->modify_qp(ibqp, attr, attr_mask, cur_state, + new_state); + +out: + mutex_unlock(&hr_qp->mutex); + + return ret; +} + +void hns_roce_lock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq) + __acquires(&send_cq->lock) __acquires(&recv_cq->lock) +{ + if (send_cq == recv_cq) { + spin_lock_irq(&send_cq->lock); + __acquire(&recv_cq->lock); + } else if (send_cq->cqn < recv_cq->cqn) { + spin_lock_irq(&send_cq->lock); + spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING); + } else { + spin_lock_irq(&recv_cq->lock); + spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING); + } +} + +void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq, + struct hns_roce_cq *recv_cq) __releases(&send_cq->lock) + __releases(&recv_cq->lock) +{ + if (send_cq == recv_cq) { + __release(&recv_cq->lock); + spin_unlock_irq(&send_cq->lock); + } else if (send_cq->cqn < recv_cq->cqn) { + spin_unlock(&recv_cq->lock); + spin_unlock_irq(&send_cq->lock); + } else { + spin_unlock(&send_cq->lock); + spin_unlock_irq(&recv_cq->lock); + } +} + +__be32 send_ieth(struct ib_send_wr *wr) +{ + switch (wr->opcode) { + case IB_WR_SEND_WITH_IMM: + case IB_WR_RDMA_WRITE_WITH_IMM: + return cpu_to_le32(wr->ex.imm_data); + case IB_WR_SEND_WITH_INV: + return cpu_to_le32(wr->ex.invalidate_rkey); + default: + return 0; + } +} + +static void *get_wqe(struct hns_roce_qp *hr_qp, int offset) +{ + + return hns_roce_buf_offset(&hr_qp->hr_buf, offset); +} + +void *get_recv_wqe(struct hns_roce_qp *hr_qp, int n) +{ + return get_wqe(hr_qp, hr_qp->rq.offset + (n << hr_qp->rq.wqe_shift)); +} + +void *get_send_wqe(struct hns_roce_qp *hr_qp, int n) +{ + return get_wqe(hr_qp, hr_qp->sq.offset + (n << hr_qp->sq.wqe_shift)); +} + +bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq, + struct ib_cq *ib_cq) +{ + struct hns_roce_cq *hr_cq; + u32 cur; + + cur = hr_wq->head - hr_wq->tail; + if (likely(cur + nreq < hr_wq->max_post)) + return 0; + + hr_cq = to_hr_cq(ib_cq); + spin_lock(&hr_cq->lock); + cur = hr_wq->head - hr_wq->tail; + spin_unlock(&hr_cq->lock); + + return cur + nreq >= hr_wq->max_post; +} + +int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; + int reserved_from_top = 0; + int ret; + + spin_lock_init(&qp_table->lock); + INIT_RADIX_TREE(&hr_dev->qp_table_tree, GFP_ATOMIC); + + /* A port include two SQP, six port total 12 */ + ret = hns_roce_bitmap_init(&qp_table->bitmap, hr_dev->caps.num_qps, + hr_dev->caps.num_qps - 1, SQP_NUM, + reserved_from_top); + if (ret) { + dev_err(&hr_dev->pdev->dev, "qp bitmap init failed!error=%d\n", + ret); + return ret; + } + + return 0; +} + +void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev) +{ + hns_roce_bitmap_cleanup(&hr_dev->qp_table.bitmap); +} diff --git a/drivers/infiniband/hw/cxgb3/iwch_user.h b/drivers/infiniband/hw/hns/hns_roce_user.h index a277c31fcaf7..a28f761a9f65 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_user.h +++ b/drivers/infiniband/hw/hns/hns_roce_user.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006 Chelsio, Inc. All rights reserved. + * Copyright (c) 2016 Hisilicon Limited. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -29,46 +29,25 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef __IWCH_USER_H__ -#define __IWCH_USER_H__ -#define IWCH_UVERBS_ABI_VERSION 1 +#ifndef _HNS_ROCE_USER_H +#define _HNS_ROCE_USER_H -/* - * Make sure that all structs defined in this file remain laid out so - * that they pack the same way on 32-bit and 64-bit architectures (to - * avoid incompatibility between 32-bit userspace and 64-bit kernels). - * In particular do not use pointer types -- pass pointers in __u64 - * instead. - */ -struct iwch_create_cq_req { - __u64 user_rptr_addr; +struct hns_roce_ib_create_cq { + __u64 buf_addr; }; -struct iwch_create_cq_resp_v0 { - __u64 key; - __u32 cqid; - __u32 size_log2; +struct hns_roce_ib_create_qp { + __u64 buf_addr; + __u64 db_addr; + __u8 log_sq_bb_count; + __u8 log_sq_stride; + __u8 sq_no_prefetch; + __u8 reserved[5]; }; -struct iwch_create_cq_resp { - __u64 key; - __u32 cqid; - __u32 size_log2; - __u32 memsize; - __u32 reserved; +struct hns_roce_ib_alloc_ucontext_resp { + __u32 qp_tab_size; }; -struct iwch_create_qp_resp { - __u64 key; - __u64 db_key; - __u32 qpid; - __u32 size_log2; - __u32 sq_size_log2; - __u32 rq_size_log2; -}; - -struct iwch_reg_user_mr_resp { - __u32 pbl_addr; -}; -#endif +#endif /*_HNS_ROCE_USER_H */ diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h index b738acdb9b02..8ec09e470f84 100644 --- a/drivers/infiniband/hw/i40iw/i40iw.h +++ b/drivers/infiniband/hw/i40iw/i40iw.h @@ -232,7 +232,7 @@ struct i40iw_device { struct i40e_client *client; struct i40iw_hw hw; struct i40iw_cm_core cm_core; - unsigned long *mem_resources; + u8 *mem_resources; unsigned long *allocated_qps; unsigned long *allocated_cqs; unsigned long *allocated_mrs; @@ -435,8 +435,8 @@ static inline int i40iw_alloc_resource(struct i40iw_device *iwdev, *next = resource_num + 1; if (*next == max_resources) *next = 0; - spin_unlock_irqrestore(&iwdev->resource_lock, flags); *req_resource_num = resource_num; + spin_unlock_irqrestore(&iwdev->resource_lock, flags); return 0; } diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 5026dc79978a..85637696f6e9 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -535,8 +535,8 @@ static struct i40iw_puda_buf *i40iw_form_cm_frame(struct i40iw_cm_node *cm_node, buf += hdr_len; } - if (pd_len) - memcpy(buf, pdata->addr, pd_len); + if (pdata && pdata->addr) + memcpy(buf, pdata->addr, pdata->size); atomic_set(&sqbuf->refcount, 1); @@ -3166,8 +3166,11 @@ void i40iw_setup_cm_core(struct i40iw_device *iwdev) spin_lock_init(&cm_core->ht_lock); spin_lock_init(&cm_core->listen_list_lock); - cm_core->event_wq = create_singlethread_workqueue("iwewq"); - cm_core->disconn_wq = create_singlethread_workqueue("iwdwq"); + cm_core->event_wq = alloc_ordered_workqueue("iwewq", + WQ_MEM_RECLAIM); + + cm_core->disconn_wq = alloc_ordered_workqueue("iwdwq", + WQ_MEM_RECLAIM); } /** @@ -3347,26 +3350,6 @@ int i40iw_cm_disconn(struct i40iw_qp *iwqp) } /** - * i40iw_loopback_nop - Send a nop - * @qp: associated hw qp - */ -static void i40iw_loopback_nop(struct i40iw_sc_qp *qp) -{ - u64 *wqe; - u64 header; - - wqe = qp->qp_uk.sq_base->elem; - set_64bit_val(wqe, 0, 0); - set_64bit_val(wqe, 8, 0); - set_64bit_val(wqe, 16, 0); - - header = LS_64(I40IWQP_OP_NOP, I40IWQPSQ_OPCODE) | - LS_64(0, I40IWQPSQ_SIGCOMPL) | - LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID); - set_64bit_val(wqe, 24, header); -} - -/** * i40iw_qp_disconnect - free qp and close cm * @iwqp: associate qp for the connection */ @@ -3638,7 +3621,7 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) } else { if (iwqp->page) iwqp->sc_qp.qp_uk.sq_base = kmap(iwqp->page); - i40iw_loopback_nop(&iwqp->sc_qp); + dev->iw_priv_qp_ops->qp_send_lsmm(&iwqp->sc_qp, NULL, 0, 0); } if (iwqp->page) diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c index 3ee0cad96bc6..0c92a40b3e86 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_hw.c +++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c @@ -265,6 +265,7 @@ void i40iw_next_iw_state(struct i40iw_qp *iwqp, info.dont_send_fin = false; if (iwqp->sc_qp.term_flags && (state == I40IW_QP_STATE_ERROR)) info.reset_tcp_conn = true; + iwqp->hw_iwarp_state = state; i40iw_hw_modify_qp(iwqp->iwdev, iwqp, &info, 0); } diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index 6e9081380a27..ac2f3cd9478c 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -100,7 +100,7 @@ static struct notifier_block i40iw_net_notifier = { .notifier_call = i40iw_net_event }; -static int i40iw_notifiers_registered; +static atomic_t i40iw_notifiers_registered; /** * i40iw_find_i40e_handler - find a handler given a client info @@ -1342,12 +1342,11 @@ exit: */ static void i40iw_register_notifiers(void) { - if (!i40iw_notifiers_registered) { + if (atomic_inc_return(&i40iw_notifiers_registered) == 1) { register_inetaddr_notifier(&i40iw_inetaddr_notifier); register_inet6addr_notifier(&i40iw_inetaddr6_notifier); register_netevent_notifier(&i40iw_net_notifier); } - i40iw_notifiers_registered++; } /** @@ -1429,8 +1428,7 @@ static void i40iw_deinit_device(struct i40iw_device *iwdev, bool reset, bool del i40iw_del_macip_entry(iwdev, (u8)iwdev->mac_ip_table_idx); /* fallthrough */ case INET_NOTIFIER: - if (i40iw_notifiers_registered > 0) { - i40iw_notifiers_registered--; + if (!atomic_dec_return(&i40iw_notifiers_registered)) { unregister_netevent_notifier(&i40iw_net_notifier); unregister_inetaddr_notifier(&i40iw_inetaddr_notifier); unregister_inet6addr_notifier(&i40iw_inetaddr6_notifier); @@ -1558,6 +1556,10 @@ static int i40iw_open(struct i40e_info *ldev, struct i40e_client *client) enum i40iw_status_code status; struct i40iw_handler *hdl; + hdl = i40iw_find_netdev(ldev->netdev); + if (hdl) + return 0; + hdl = kzalloc(sizeof(*hdl), GFP_KERNEL); if (!hdl) return -ENOMEM; @@ -1613,7 +1615,7 @@ static int i40iw_open(struct i40e_info *ldev, struct i40e_client *client) status = i40iw_hmc_init_pble(&iwdev->sc_dev, iwdev->pble_rsrc); if (status) break; - iwdev->virtchnl_wq = create_singlethread_workqueue("iwvch"); + iwdev->virtchnl_wq = alloc_ordered_workqueue("iwvch", WQ_MEM_RECLAIM); i40iw_register_notifiers(); iwdev->init_state = INET_NOTIFIER; status = i40iw_add_mac_ip(iwdev); diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c index 0e8db0a35141..6fd043b1d714 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_utils.c +++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c @@ -673,8 +673,11 @@ enum i40iw_status_code i40iw_free_virt_mem(struct i40iw_hw *hw, { if (!mem) return I40IW_ERR_PARAM; + /* + * mem->va points to the parent of mem, so both mem and mem->va + * can not be touched once mem->va is freed + */ kfree(mem->va); - mem->va = NULL; return 0; } diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 2360338877bf..6329c971c22f 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -794,7 +794,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, return &iwqp->ibqp; error: i40iw_free_qp_resources(iwdev, iwqp, qp_num); - kfree(mem); return ERR_PTR(err_code); } @@ -1926,8 +1925,7 @@ static int i40iw_dereg_mr(struct ib_mr *ib_mr) } if (iwpbl->pbl_allocated) i40iw_free_pble(iwdev->pble_rsrc, palloc); - kfree(iwpbl->iwmr); - iwpbl->iwmr = NULL; + kfree(iwmr); return 0; } diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c index c74ef2620b85..5e9939045852 100644 --- a/drivers/infiniband/hw/mlx4/alias_GUID.c +++ b/drivers/infiniband/hw/mlx4/alias_GUID.c @@ -881,7 +881,7 @@ int mlx4_ib_init_alias_guid_service(struct mlx4_ib_dev *dev) snprintf(alias_wq_name, sizeof alias_wq_name, "alias_guid%d", i); dev->sriov.alias_guid.ports_guid[i].wq = - create_singlethread_workqueue(alias_wq_name); + alloc_ordered_workqueue(alias_wq_name, WQ_MEM_RECLAIM); if (!dev->sriov.alias_guid.ports_guid[i].wq) { ret = -ENOMEM; goto err_thread; diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index d6fc8a6e8c33..1ea686b9e0f9 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -37,7 +37,7 @@ #include <linux/slab.h> #include "mlx4_ib.h" -#include "user.h" +#include <rdma/mlx4-abi.h> static void mlx4_ib_cq_comp(struct mlx4_cq *cq) { @@ -576,8 +576,8 @@ static int mlx4_ib_ipoib_csum_ok(__be16 status, __be16 checksum) checksum == cpu_to_be16(0xffff); } -static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc, - unsigned tail, struct mlx4_cqe *cqe, int is_eth) +static void use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc, + unsigned tail, struct mlx4_cqe *cqe, int is_eth) { struct mlx4_ib_proxy_sqp_hdr *hdr; @@ -600,8 +600,6 @@ static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct wc->slid = be16_to_cpu(hdr->tun.slid_mac_47_32); wc->sl = (u8) (be16_to_cpu(hdr->tun.sl_vid) >> 12); } - - return 0; } static void mlx4_ib_qp_sw_comp(struct mlx4_ib_qp *qp, int num_entries, @@ -689,12 +687,6 @@ repoll: is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_ERROR; - if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_OPCODE_NOP && - is_send)) { - pr_warn("Completion for NOP opcode detected!\n"); - return -EINVAL; - } - /* Resize CQ in progress */ if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_RESIZE)) { if (cq->resize_buf) { @@ -720,12 +712,6 @@ repoll: */ mqp = __mlx4_qp_lookup(to_mdev(cq->ibcq.device)->dev, be32_to_cpu(cqe->vlan_my_qpn)); - if (unlikely(!mqp)) { - pr_warn("CQ %06x with entry for unknown QPN %06x\n", - cq->mcq.cqn, be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK); - return -EINVAL; - } - *cur_qp = to_mibqp(mqp); } @@ -738,11 +724,6 @@ repoll: /* SRQ is also in the radix tree */ msrq = mlx4_srq_lookup(to_mdev(cq->ibcq.device)->dev, srq_num); - if (unlikely(!msrq)) { - pr_warn("CQ %06x with entry for unknown SRQN %06x\n", - cq->mcq.cqn, srq_num); - return -EINVAL; - } } if (is_send) { @@ -852,9 +833,11 @@ repoll: if (mlx4_is_mfunc(to_mdev(cq->ibcq.device)->dev)) { if ((*cur_qp)->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER | - MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) - return use_tunnel_data(*cur_qp, cq, wc, tail, - cqe, is_eth); + MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) { + use_tunnel_data(*cur_qp, cq, wc, tail, cqe, + is_eth); + return 0; + } } wc->slid = be16_to_cpu(cqe->rlid); @@ -891,7 +874,6 @@ int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) struct mlx4_ib_qp *cur_qp = NULL; unsigned long flags; int npolled; - int err = 0; struct mlx4_ib_dev *mdev = to_mdev(cq->ibcq.device); spin_lock_irqsave(&cq->lock, flags); @@ -901,8 +883,7 @@ int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) } for (npolled = 0; npolled < num_entries; ++npolled) { - err = mlx4_ib_poll_one(cq, &cur_qp, wc + npolled); - if (err) + if (mlx4_ib_poll_one(cq, &cur_qp, wc + npolled)) break; } @@ -911,10 +892,7 @@ int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) out: spin_unlock_irqrestore(&cq->lock, flags); - if (err == 0 || err == -EAGAIN) - return npolled; - else - return err; + return npolled; } int mlx4_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 9c2e53d28f98..1672907ff219 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -230,6 +230,8 @@ static void smp_snoop(struct ib_device *ibdev, u8 port_num, const struct ib_mad mad->mad_hdr.method == IB_MGMT_METHOD_SET) switch (mad->mad_hdr.attr_id) { case IB_SMP_ATTR_PORT_INFO: + if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV) + return; pinfo = (struct ib_port_info *) ((struct ib_smp *) mad)->data; lid = be16_to_cpu(pinfo->lid); @@ -245,6 +247,8 @@ static void smp_snoop(struct ib_device *ibdev, u8 port_num, const struct ib_mad break; case IB_SMP_ATTR_PKEY_TABLE: + if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV) + return; if (!mlx4_is_mfunc(dev->dev)) { mlx4_ib_dispatch_event(dev, port_num, IB_EVENT_PKEY_CHANGE); @@ -281,6 +285,8 @@ static void smp_snoop(struct ib_device *ibdev, u8 port_num, const struct ib_mad break; case IB_SMP_ATTR_GUID_INFO: + if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV) + return; /* paravirtualized master's guid is guid 0 -- does not change */ if (!mlx4_is_master(dev->dev)) mlx4_ib_dispatch_event(dev, port_num, @@ -296,6 +302,26 @@ static void smp_snoop(struct ib_device *ibdev, u8 port_num, const struct ib_mad } break; + case IB_SMP_ATTR_SL_TO_VL_TABLE: + /* cache sl to vl mapping changes for use in + * filling QP1 LRH VL field when sending packets + */ + if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV && + dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT) + return; + if (!mlx4_is_slave(dev->dev)) { + union sl2vl_tbl_to_u64 sl2vl64; + int jj; + + for (jj = 0; jj < 8; jj++) { + sl2vl64.sl8[jj] = ((struct ib_smp *)mad)->data[jj]; + pr_debug("port %u, sl2vl[%d] = %02x\n", + port_num, jj, sl2vl64.sl8[jj]); + } + atomic64_set(&dev->sl2vl[port_num - 1], sl2vl64.sl64); + } + break; + default: break; } @@ -345,7 +371,8 @@ static void node_desc_override(struct ib_device *dev, mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP && mad->mad_hdr.attr_id == IB_SMP_ATTR_NODE_DESC) { spin_lock_irqsave(&to_mdev(dev)->sm_lock, flags); - memcpy(((struct ib_smp *) mad)->data, dev->node_desc, 64); + memcpy(((struct ib_smp *) mad)->data, dev->node_desc, + IB_DEVICE_NODE_DESC_MAX); spin_unlock_irqrestore(&to_mdev(dev)->sm_lock, flags); } } @@ -805,8 +832,7 @@ static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num, return IB_MAD_RESULT_FAILURE; if (!out_mad->mad_hdr.status) { - if (!(to_mdev(ibdev)->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV)) - smp_snoop(ibdev, port_num, in_mad, prev_lid); + smp_snoop(ibdev, port_num, in_mad, prev_lid); /* slaves get node desc from FW */ if (!mlx4_is_slave(to_mdev(ibdev)->dev)) node_desc_override(ibdev, out_mad); @@ -1037,6 +1063,23 @@ static void handle_client_rereg_event(struct mlx4_ib_dev *dev, u8 port_num) MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK); } } + + /* Update the sl to vl table from inside client rereg + * only if in secure-host mode (snooping is not possible) + * and the sl-to-vl change event is not generated by FW. + */ + if (!mlx4_is_slave(dev->dev) && + dev->dev->flags & MLX4_FLAG_SECURE_HOST && + !(dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT)) { + if (mlx4_is_master(dev->dev)) + /* already in work queue from mlx4_ib_event queueing + * mlx4_handle_port_mgmt_change_event, which calls + * this procedure. Therefore, call sl2vl_update directly. + */ + mlx4_ib_sl2vl_update(dev, port_num); + else + mlx4_sched_ib_sl2vl_update_work(dev, port_num); + } mlx4_ib_dispatch_event(dev, port_num, IB_EVENT_CLIENT_REREGISTER); } @@ -1128,6 +1171,27 @@ void handle_port_mgmt_change_event(struct work_struct *work) /* Generate GUID changed event */ if (changed_attr & MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK) { + if (mlx4_is_master(dev->dev)) { + union ib_gid gid; + int err = 0; + + if (!eqe->event.port_mgmt_change.params.port_info.gid_prefix) + err = __mlx4_ib_query_gid(&dev->ib_dev, port, 0, &gid, 1); + else + gid.global.subnet_prefix = + eqe->event.port_mgmt_change.params.port_info.gid_prefix; + if (err) { + pr_warn("Could not change QP1 subnet prefix for port %d: query_gid error (%d)\n", + port, err); + } else { + pr_debug("Changing QP1 subnet prefix for port %d. old=0x%llx. new=0x%llx\n", + port, + (u64)atomic64_read(&dev->sriov.demux[port - 1].subnet_prefix), + be64_to_cpu(gid.global.subnet_prefix)); + atomic64_set(&dev->sriov.demux[port - 1].subnet_prefix, + be64_to_cpu(gid.global.subnet_prefix)); + } + } mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE); /*if master, notify all slaves*/ if (mlx4_is_master(dev->dev)) @@ -1155,6 +1219,24 @@ void handle_port_mgmt_change_event(struct work_struct *work) handle_slaves_guid_change(dev, port, tbl_block, change_bitmap); } break; + + case MLX4_DEV_PMC_SUBTYPE_SL_TO_VL_MAP: + /* cache sl to vl mapping changes for use in + * filling QP1 LRH VL field when sending packets + */ + if (!mlx4_is_slave(dev->dev)) { + union sl2vl_tbl_to_u64 sl2vl64; + int jj; + + for (jj = 0; jj < 8; jj++) { + sl2vl64.sl8[jj] = + eqe->event.port_mgmt_change.params.sl2vl_tbl_change_info.sl2vl_table[jj]; + pr_debug("port %u, sl2vl[%d] = %02x\n", + port, jj, sl2vl64.sl8[jj]); + } + atomic64_set(&dev->sl2vl[port - 1], sl2vl64.sl64); + } + break; default: pr_warn("Unsupported subtype 0x%x for " "Port Management Change event\n", eqe->subtype); @@ -1897,7 +1979,7 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port, goto err_buf; } - ctx->pd = ib_alloc_pd(ctx->ib_dev); + ctx->pd = ib_alloc_pd(ctx->ib_dev, 0); if (IS_ERR(ctx->pd)) { ret = PTR_ERR(ctx->pd); pr_err("Couldn't create tunnel PD (%d)\n", ret); @@ -2070,7 +2152,7 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev, } snprintf(name, sizeof name, "mlx4_ibt%d", port); - ctx->wq = create_singlethread_workqueue(name); + ctx->wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM); if (!ctx->wq) { pr_err("Failed to create tunnelling WQ for port %d\n", port); ret = -ENOMEM; @@ -2078,7 +2160,7 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev, } snprintf(name, sizeof name, "mlx4_ibud%d", port); - ctx->ud_wq = create_singlethread_workqueue(name); + ctx->ud_wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM); if (!ctx->ud_wq) { pr_err("Failed to create up/down WQ for port %d\n", port); ret = -ENOMEM; @@ -2202,6 +2284,8 @@ int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev) if (err) goto demux_err; dev->sriov.demux[i].guid_cache[0] = gid.global.interface_id; + atomic64_set(&dev->sriov.demux[i].subnet_prefix, + be64_to_cpu(gid.global.subnet_prefix)); err = alloc_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1, &dev->sriov.sqps[i]); if (err) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 2af44c2de262..b597e8227591 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -55,7 +55,7 @@ #include <linux/mlx4/qp.h> #include "mlx4_ib.h" -#include "user.h" +#include <rdma/mlx4-abi.h> #define DRV_NAME MLX4_IB_DRV_NAME #define DRV_VERSION "2.2-1" @@ -832,6 +832,66 @@ static int mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index, return ret; } +static int mlx4_ib_query_sl2vl(struct ib_device *ibdev, u8 port, u64 *sl2vl_tbl) +{ + union sl2vl_tbl_to_u64 sl2vl64; + struct ib_smp *in_mad = NULL; + struct ib_smp *out_mad = NULL; + int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS; + int err = -ENOMEM; + int jj; + + if (mlx4_is_slave(to_mdev(ibdev)->dev)) { + *sl2vl_tbl = 0; + return 0; + } + + in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); + out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); + if (!in_mad || !out_mad) + goto out; + + init_query_mad(in_mad); + in_mad->attr_id = IB_SMP_ATTR_SL_TO_VL_TABLE; + in_mad->attr_mod = 0; + + if (mlx4_is_mfunc(to_mdev(ibdev)->dev)) + mad_ifc_flags |= MLX4_MAD_IFC_NET_VIEW; + + err = mlx4_MAD_IFC(to_mdev(ibdev), mad_ifc_flags, port, NULL, NULL, + in_mad, out_mad); + if (err) + goto out; + + for (jj = 0; jj < 8; jj++) + sl2vl64.sl8[jj] = ((struct ib_smp *)out_mad)->data[jj]; + *sl2vl_tbl = sl2vl64.sl64; + +out: + kfree(in_mad); + kfree(out_mad); + return err; +} + +static void mlx4_init_sl2vl_tbl(struct mlx4_ib_dev *mdev) +{ + u64 sl2vl; + int i; + int err; + + for (i = 1; i <= mdev->dev->caps.num_ports; i++) { + if (mdev->dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH) + continue; + err = mlx4_ib_query_sl2vl(&mdev->ib_dev, i, &sl2vl); + if (err) { + pr_err("Unable to get default sl to vl mapping for port %d. Using all zeroes (%d)\n", + i, err); + sl2vl = 0; + } + atomic64_set(&mdev->sl2vl[i - 1], sl2vl); + } +} + int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey, int netw_view) { @@ -886,7 +946,7 @@ static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask, return -EOPNOTSUPP; spin_lock_irqsave(&to_mdev(ibdev)->sm_lock, flags); - memcpy(ibdev->node_desc, props->node_desc, 64); + memcpy(ibdev->node_desc, props->node_desc, IB_DEVICE_NODE_DESC_MAX); spin_unlock_irqrestore(&to_mdev(ibdev)->sm_lock, flags); /* @@ -897,7 +957,7 @@ static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask, if (IS_ERR(mailbox)) return 0; - memcpy(mailbox->buf, props->node_desc, 64); + memcpy(mailbox->buf, props->node_desc, IB_DEVICE_NODE_DESC_MAX); mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0, MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); @@ -1259,7 +1319,7 @@ static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev, if (err) goto err1; - xrcd->pd = ib_alloc_pd(ibdev); + xrcd->pd = ib_alloc_pd(ibdev, 0); if (IS_ERR(xrcd->pd)) { err = PTR_ERR(xrcd->pd); goto err2; @@ -1361,6 +1421,19 @@ struct mlx4_ib_steering { union ib_gid gid; }; +#define LAST_ETH_FIELD vlan_tag +#define LAST_IB_FIELD sl +#define LAST_IPV4_FIELD dst_ip +#define LAST_TCP_UDP_FIELD src_port + +/* Field is the last supported field */ +#define FIELDS_NOT_SUPPORTED(filter, field)\ + memchr_inv((void *)&filter.field +\ + sizeof(filter.field), 0,\ + sizeof(filter) -\ + offsetof(typeof(filter), field) -\ + sizeof(filter.field)) + static int parse_flow_attr(struct mlx4_dev *dev, u32 qp_num, union ib_flow_spec *ib_spec, @@ -1370,6 +1443,9 @@ static int parse_flow_attr(struct mlx4_dev *dev, switch (ib_spec->type) { case IB_FLOW_SPEC_ETH: + if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD)) + return -ENOTSUPP; + type = MLX4_NET_TRANS_RULE_ID_ETH; memcpy(mlx4_spec->eth.dst_mac, ib_spec->eth.val.dst_mac, ETH_ALEN); @@ -1379,6 +1455,9 @@ static int parse_flow_attr(struct mlx4_dev *dev, mlx4_spec->eth.vlan_tag_msk = ib_spec->eth.mask.vlan_tag; break; case IB_FLOW_SPEC_IB: + if (FIELDS_NOT_SUPPORTED(ib_spec->ib.mask, LAST_IB_FIELD)) + return -ENOTSUPP; + type = MLX4_NET_TRANS_RULE_ID_IB; mlx4_spec->ib.l3_qpn = cpu_to_be32(qp_num); @@ -1388,6 +1467,9 @@ static int parse_flow_attr(struct mlx4_dev *dev, case IB_FLOW_SPEC_IPV4: + if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD)) + return -ENOTSUPP; + type = MLX4_NET_TRANS_RULE_ID_IPV4; mlx4_spec->ipv4.src_ip = ib_spec->ipv4.val.src_ip; mlx4_spec->ipv4.src_ip_msk = ib_spec->ipv4.mask.src_ip; @@ -1397,6 +1479,9 @@ static int parse_flow_attr(struct mlx4_dev *dev, case IB_FLOW_SPEC_TCP: case IB_FLOW_SPEC_UDP: + if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, LAST_TCP_UDP_FIELD)) + return -ENOTSUPP; + type = ib_spec->type == IB_FLOW_SPEC_TCP ? MLX4_NET_TRANS_RULE_ID_TCP : MLX4_NET_TRANS_RULE_ID_UDP; @@ -2000,7 +2085,7 @@ static int init_node_data(struct mlx4_ib_dev *dev) if (err) goto out; - memcpy(dev->ib_dev.node_desc, out_mad->data, 64); + memcpy(dev->ib_dev.node_desc, out_mad->data, IB_DEVICE_NODE_DESC_MAX); in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; @@ -2202,6 +2287,9 @@ static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev) bool per_port = !!(ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT); + if (mlx4_is_slave(ibdev->dev)) + return 0; + for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) { /* i == 1 means we are building port counters */ if (i && !per_port) @@ -2650,6 +2738,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) if (init_node_data(ibdev)) goto err_map; + mlx4_init_sl2vl_tbl(ibdev); for (i = 0; i < ibdev->num_ports; ++i) { mutex_init(&ibdev->counters_table[i].mutex); @@ -3098,6 +3187,47 @@ static void handle_bonded_port_state_event(struct work_struct *work) ib_dispatch_event(&ibev); } +void mlx4_ib_sl2vl_update(struct mlx4_ib_dev *mdev, int port) +{ + u64 sl2vl; + int err; + + err = mlx4_ib_query_sl2vl(&mdev->ib_dev, port, &sl2vl); + if (err) { + pr_err("Unable to get current sl to vl mapping for port %d. Using all zeroes (%d)\n", + port, err); + sl2vl = 0; + } + atomic64_set(&mdev->sl2vl[port - 1], sl2vl); +} + +static void ib_sl2vl_update_work(struct work_struct *work) +{ + struct ib_event_work *ew = container_of(work, struct ib_event_work, work); + struct mlx4_ib_dev *mdev = ew->ib_dev; + int port = ew->port; + + mlx4_ib_sl2vl_update(mdev, port); + + kfree(ew); +} + +void mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev *ibdev, + int port) +{ + struct ib_event_work *ew; + + ew = kmalloc(sizeof(*ew), GFP_ATOMIC); + if (ew) { + INIT_WORK(&ew->work, ib_sl2vl_update_work); + ew->port = port; + ew->ib_dev = ibdev; + queue_work(wq, &ew->work); + } else { + pr_err("failed to allocate memory for sl2vl update work\n"); + } +} + static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, enum mlx4_dev_event event, unsigned long param) { @@ -3128,10 +3258,14 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, case MLX4_DEV_EVENT_PORT_UP: if (p > ibdev->num_ports) return; - if (mlx4_is_master(dev) && + if (!mlx4_is_slave(dev) && rdma_port_get_link_layer(&ibdev->ib_dev, p) == IB_LINK_LAYER_INFINIBAND) { - mlx4_ib_invalidate_all_guid_record(ibdev, p); + if (mlx4_is_master(dev)) + mlx4_ib_invalidate_all_guid_record(ibdev, p); + if (ibdev->dev->flags & MLX4_FLAG_SECURE_HOST && + !(ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT)) + mlx4_sched_ib_sl2vl_update_work(ibdev, p); } ibev.event = IB_EVENT_PORT_ACTIVE; break; @@ -3219,7 +3353,7 @@ static int __init mlx4_ib_init(void) { int err; - wq = create_singlethread_workqueue("mlx4_ib"); + wq = alloc_ordered_workqueue("mlx4_ib", WQ_MEM_RECLAIM); if (!wq) return -ENOMEM; diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c index 8f7ad07915b0..a21d37f02f35 100644 --- a/drivers/infiniband/hw/mlx4/mcg.c +++ b/drivers/infiniband/hw/mlx4/mcg.c @@ -489,7 +489,7 @@ static u8 get_leave_state(struct mcast_group *group) if (!group->members[i]) leave_state |= (1 << i); - return leave_state & (group->rec.scope_join_state & 7); + return leave_state & (group->rec.scope_join_state & 0xf); } static int join_group(struct mcast_group *group, int slave, u8 join_mask) @@ -564,8 +564,8 @@ static void mlx4_ib_mcg_timeout_handler(struct work_struct *work) } else mcg_warn_group(group, "DRIVER BUG\n"); } else if (group->state == MCAST_LEAVE_SENT) { - if (group->rec.scope_join_state & 7) - group->rec.scope_join_state &= 0xf8; + if (group->rec.scope_join_state & 0xf) + group->rec.scope_join_state &= 0xf0; group->state = MCAST_IDLE; mutex_unlock(&group->lock); if (release_group(group, 1)) @@ -605,7 +605,7 @@ static int handle_leave_req(struct mcast_group *group, u8 leave_mask, static int handle_join_req(struct mcast_group *group, u8 join_mask, struct mcast_req *req) { - u8 group_join_state = group->rec.scope_join_state & 7; + u8 group_join_state = group->rec.scope_join_state & 0xf; int ref = 0; u16 status; struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data; @@ -690,8 +690,8 @@ static void mlx4_ib_mcg_work_handler(struct work_struct *work) u8 cur_join_state; resp_join_state = ((struct ib_sa_mcmember_data *) - group->response_sa_mad.data)->scope_join_state & 7; - cur_join_state = group->rec.scope_join_state & 7; + group->response_sa_mad.data)->scope_join_state & 0xf; + cur_join_state = group->rec.scope_join_state & 0xf; if (method == IB_MGMT_METHOD_GET_RESP) { /* successfull join */ @@ -710,7 +710,7 @@ process_requests: req = list_first_entry(&group->pending_list, struct mcast_req, group_list); sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data; - req_join_state = sa_data->scope_join_state & 0x7; + req_join_state = sa_data->scope_join_state & 0xf; /* For a leave request, we will immediately answer the VF, and * update our internal counters. The actual leave will be sent @@ -1045,7 +1045,7 @@ int mlx4_ib_mcg_port_init(struct mlx4_ib_demux_ctx *ctx) atomic_set(&ctx->tid, 0); sprintf(name, "mlx4_ib_mcg%d", ctx->port); - ctx->mcg_wq = create_singlethread_workqueue(name); + ctx->mcg_wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM); if (!ctx->mcg_wq) return -ENOMEM; @@ -1246,7 +1246,7 @@ void clean_vf_mcast(struct mlx4_ib_demux_ctx *ctx, int slave) int mlx4_ib_mcg_init(void) { - clean_wq = create_singlethread_workqueue("mlx4_ib_mcg"); + clean_wq = alloc_ordered_workqueue("mlx4_ib_mcg", WQ_MEM_RECLAIM); if (!clean_wq) return -ENOMEM; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 7c5832ede4bd..35141f451e5c 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -448,7 +448,7 @@ struct mlx4_ib_demux_ctx { struct workqueue_struct *wq; struct workqueue_struct *ud_wq; spinlock_t ud_lock; - __be64 subnet_prefix; + atomic64_t subnet_prefix; __be64 guid_cache[128]; struct mlx4_ib_dev *dev; /* the following lock protects both mcg_table and mcg_mgid0_list */ @@ -570,6 +570,7 @@ struct mlx4_ib_dev { struct ib_mad_agent *send_agent[MLX4_MAX_PORTS][2]; struct ib_ah *sm_ah[MLX4_MAX_PORTS]; spinlock_t sm_lock; + atomic64_t sl2vl[MLX4_MAX_PORTS]; struct mlx4_ib_sriov sriov; struct mutex cap_mask_mutex; @@ -600,6 +601,7 @@ struct ib_event_work { struct work_struct work; struct mlx4_ib_dev *ib_dev; struct mlx4_eqe ib_eqe; + int port; }; struct mlx4_ib_qp_tunnel_init_attr { @@ -883,4 +885,9 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev, u8 port_num, int index); +void mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev *ibdev, + int port); + +void mlx4_ib_sl2vl_update(struct mlx4_ib_dev *mdev, int port); + #endif /* MLX4_IB_H */ diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 768085f59566..570bc866b1d6 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -47,7 +47,7 @@ #include <linux/mlx4/qp.h> #include "mlx4_ib.h" -#include "user.h" +#include <rdma/mlx4-abi.h> static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv_cq); @@ -2405,6 +2405,22 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, return 0; } +static u8 sl_to_vl(struct mlx4_ib_dev *dev, u8 sl, int port_num) +{ + union sl2vl_tbl_to_u64 tmp_vltab; + u8 vl; + + if (sl > 15) + return 0xf; + tmp_vltab.sl64 = atomic64_read(&dev->sl2vl[port_num - 1]); + vl = tmp_vltab.sl8[sl >> 1]; + if (sl & 1) + vl &= 0x0f; + else + vl >>= 4; + return vl; +} + #define MLX4_ROCEV2_QP1_SPORT 0xC000 static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr, void *wqe, unsigned *mlx_seg_len) @@ -2493,24 +2509,27 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr, sqp->ud_header.grh.flow_label = ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff); sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit; - if (is_eth) + if (is_eth) { memcpy(sqp->ud_header.grh.source_gid.raw, sgid.raw, 16); - else { - if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) { - /* When multi-function is enabled, the ib_core gid - * indexes don't necessarily match the hw ones, so - * we must use our own cache */ - sqp->ud_header.grh.source_gid.global.subnet_prefix = - to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. - subnet_prefix; - sqp->ud_header.grh.source_gid.global.interface_id = - to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. - guid_cache[ah->av.ib.gid_index]; - } else - ib_get_cached_gid(ib_dev, - be32_to_cpu(ah->av.ib.port_pd) >> 24, - ah->av.ib.gid_index, - &sqp->ud_header.grh.source_gid, NULL); + } else { + if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) { + /* When multi-function is enabled, the ib_core gid + * indexes don't necessarily match the hw ones, so + * we must use our own cache + */ + sqp->ud_header.grh.source_gid.global.subnet_prefix = + cpu_to_be64(atomic64_read(&(to_mdev(ib_dev)->sriov. + demux[sqp->qp.port - 1]. + subnet_prefix))); + sqp->ud_header.grh.source_gid.global.interface_id = + to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. + guid_cache[ah->av.ib.gid_index]; + } else { + ib_get_cached_gid(ib_dev, + be32_to_cpu(ah->av.ib.port_pd) >> 24, + ah->av.ib.gid_index, + &sqp->ud_header.grh.source_gid, NULL); + } } memcpy(sqp->ud_header.grh.destination_gid.raw, ah->av.ib.dgid, 16); @@ -2587,7 +2606,12 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr, sqp->ud_header.vlan.tag = cpu_to_be16(vlan | pcp); } } else { - sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : 0; + sqp->ud_header.lrh.virtual_lane = !sqp->qp.ibqp.qp_num ? 15 : + sl_to_vl(to_mdev(ib_dev), + sqp->ud_header.lrh.service_level, + sqp->qp.port); + if (sqp->qp.ibqp.qp_num && sqp->ud_header.lrh.virtual_lane == 15) + return -EINVAL; if (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE) sqp->ud_header.lrh.source_lid = IB_LID_PERMISSIVE; } diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c index 0597f3eef5d0..7dd3f267f06b 100644 --- a/drivers/infiniband/hw/mlx4/srq.c +++ b/drivers/infiniband/hw/mlx4/srq.c @@ -37,7 +37,7 @@ #include <linux/vmalloc.h> #include "mlx4_ib.h" -#include "user.h" +#include <rdma/mlx4-abi.h> static void *get_wqe(struct mlx4_ib_srq *srq, int n) { diff --git a/drivers/infiniband/hw/mlx4/user.h b/drivers/infiniband/hw/mlx4/user.h deleted file mode 100644 index 07e6769ef43b..000000000000 --- a/drivers/infiniband/hw/mlx4/user.h +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef MLX4_IB_USER_H -#define MLX4_IB_USER_H - -#include <linux/types.h> - -/* - * Increment this value if any changes that break userspace ABI - * compatibility are made. - */ - -#define MLX4_IB_UVERBS_NO_DEV_CAPS_ABI_VERSION 3 -#define MLX4_IB_UVERBS_ABI_VERSION 4 - -/* - * Make sure that all structs defined in this file remain laid out so - * that they pack the same way on 32-bit and 64-bit architectures (to - * avoid incompatibility between 32-bit userspace and 64-bit kernels). - * In particular do not use pointer types -- pass pointers in __u64 - * instead. - */ - -struct mlx4_ib_alloc_ucontext_resp_v3 { - __u32 qp_tab_size; - __u16 bf_reg_size; - __u16 bf_regs_per_page; -}; - -struct mlx4_ib_alloc_ucontext_resp { - __u32 dev_caps; - __u32 qp_tab_size; - __u16 bf_reg_size; - __u16 bf_regs_per_page; - __u32 cqe_size; -}; - -struct mlx4_ib_alloc_pd_resp { - __u32 pdn; - __u32 reserved; -}; - -struct mlx4_ib_create_cq { - __u64 buf_addr; - __u64 db_addr; -}; - -struct mlx4_ib_create_cq_resp { - __u32 cqn; - __u32 reserved; -}; - -struct mlx4_ib_resize_cq { - __u64 buf_addr; -}; - -struct mlx4_ib_create_srq { - __u64 buf_addr; - __u64 db_addr; -}; - -struct mlx4_ib_create_srq_resp { - __u32 srqn; - __u32 reserved; -}; - -struct mlx4_ib_create_qp { - __u64 buf_addr; - __u64 db_addr; - __u8 log_sq_bb_count; - __u8 log_sq_stride; - __u8 sq_no_prefetch; - __u8 reserved[5]; -}; - -#endif /* MLX4_IB_USER_H */ diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 308a358e5b46..79d017baf6f4 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -35,7 +35,6 @@ #include <rdma/ib_user_verbs.h> #include <rdma/ib_cache.h> #include "mlx5_ib.h" -#include "user.h" static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq) { @@ -553,12 +552,6 @@ repoll: * from the table. */ mqp = __mlx5_qp_lookup(dev->mdev, qpn); - if (unlikely(!mqp)) { - mlx5_ib_warn(dev, "CQE@CQ %06x for unknown QPN %6x\n", - cq->mcq.cqn, qpn); - return -EINVAL; - } - *cur_qp = to_mibqp(mqp); } @@ -619,13 +612,6 @@ repoll: read_lock(&dev->mdev->priv.mkey_table.lock); mmkey = __mlx5_mr_lookup(dev->mdev, mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey))); - if (unlikely(!mmkey)) { - read_unlock(&dev->mdev->priv.mkey_table.lock); - mlx5_ib_warn(dev, "CQE@CQ %06x for unknown MR %6x\n", - cq->mcq.cqn, be32_to_cpu(sig_err_cqe->mkey)); - return -EINVAL; - } - mr = to_mibmr(mmkey); get_sig_err_item(sig_err_cqe, &mr->sig->err_item); mr->sig->sig_err_exists = true; @@ -676,7 +662,6 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) unsigned long flags; int soft_polled = 0; int npolled; - int err = 0; spin_lock_irqsave(&cq->lock, flags); if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { @@ -688,8 +673,7 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) soft_polled = poll_soft_wc(cq, num_entries, wc); for (npolled = 0; npolled < num_entries - soft_polled; npolled++) { - err = mlx5_poll_one(cq, &cur_qp, wc + soft_polled + npolled); - if (err) + if (mlx5_poll_one(cq, &cur_qp, wc + soft_polled + npolled)) break; } @@ -698,10 +682,7 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) out: spin_unlock_irqrestore(&cq->lock, flags); - if (err == 0 || err == -EAGAIN) - return soft_polled + npolled; - else - return err; + return soft_polled + npolled; } int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) @@ -747,14 +728,16 @@ static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf, static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, struct ib_ucontext *context, struct mlx5_ib_cq *cq, - int entries, struct mlx5_create_cq_mbox_in **cqb, + int entries, u32 **cqb, int *cqe_size, int *index, int *inlen) { struct mlx5_ib_create_cq ucmd; size_t ucmdlen; int page_shift; + __be64 *pas; int npages; int ncont; + void *cqc; int err; ucmdlen = @@ -792,14 +775,20 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, mlx5_ib_dbg(dev, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n", ucmd.buf_addr, entries * ucmd.cqe_size, npages, page_shift, ncont); - *inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * ncont; + *inlen = MLX5_ST_SZ_BYTES(create_cq_in) + + MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * ncont; *cqb = mlx5_vzalloc(*inlen); if (!*cqb) { err = -ENOMEM; goto err_db; } - mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, (*cqb)->pas, 0); - (*cqb)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT; + + pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas); + mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, pas, 0); + + cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context); + MLX5_SET(cqc, cqc, log_page_size, + page_shift - MLX5_ADAPTER_PAGE_SHIFT); *index = to_mucontext(context)->uuari.uars[0].index; @@ -834,9 +823,10 @@ static void init_cq_buf(struct mlx5_ib_cq *cq, struct mlx5_ib_cq_buf *buf) static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, int entries, int cqe_size, - struct mlx5_create_cq_mbox_in **cqb, - int *index, int *inlen) + u32 **cqb, int *index, int *inlen) { + __be64 *pas; + void *cqc; int err; err = mlx5_db_alloc(dev->mdev, &cq->db); @@ -853,15 +843,21 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, init_cq_buf(cq, &cq->buf); - *inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * cq->buf.buf.npages; + *inlen = MLX5_ST_SZ_BYTES(create_cq_in) + + MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * cq->buf.buf.npages; *cqb = mlx5_vzalloc(*inlen); if (!*cqb) { err = -ENOMEM; goto err_buf; } - mlx5_fill_page_array(&cq->buf.buf, (*cqb)->pas); - (*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT; + pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas); + mlx5_fill_page_array(&cq->buf.buf, pas); + + cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context); + MLX5_SET(cqc, cqc, log_page_size, + cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); + *index = dev->mdev->priv.uuari.uars[0].index; return 0; @@ -895,11 +891,12 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, { int entries = attr->cqe; int vector = attr->comp_vector; - struct mlx5_create_cq_mbox_in *cqb = NULL; struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_ib_cq *cq; int uninitialized_var(index); int uninitialized_var(inlen); + u32 *cqb = NULL; + void *cqc; int cqe_size; unsigned int irqn; int eqn; @@ -945,19 +942,20 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, INIT_WORK(&cq->notify_work, notify_soft_wc_handler); } - cq->cqe_size = cqe_size; - cqb->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5; - - if (cq->create_flags & IB_CQ_FLAGS_IGNORE_OVERRUN) - cqb->ctx.cqe_sz_flags |= (1 << 1); - - cqb->ctx.log_sz_usr_page = cpu_to_be32((ilog2(entries) << 24) | index); err = mlx5_vector2eqn(dev->mdev, vector, &eqn, &irqn); if (err) goto err_cqb; - cqb->ctx.c_eqn = cpu_to_be16(eqn); - cqb->ctx.db_record_addr = cpu_to_be64(cq->db.dma); + cq->cqe_size = cqe_size; + + cqc = MLX5_ADDR_OF(create_cq_in, cqb, cq_context); + MLX5_SET(cqc, cqc, cqe_sz, cqe_sz_to_mlx_sz(cqe_size)); + MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries)); + MLX5_SET(cqc, cqc, uar_page, index); + MLX5_SET(cqc, cqc, c_eqn, eqn); + MLX5_SET64(cqc, cqc, dbr_addr, cq->db.dma); + if (cq->create_flags & IB_CQ_FLAGS_IGNORE_OVERRUN) + MLX5_SET(cqc, cqc, oi, 1); err = mlx5_core_create_cq(dev->mdev, &cq->mcq, cqb, inlen); if (err) @@ -1088,27 +1086,15 @@ void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq) int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) { - struct mlx5_modify_cq_mbox_in *in; struct mlx5_ib_dev *dev = to_mdev(cq->device); struct mlx5_ib_cq *mcq = to_mcq(cq); int err; - u32 fsel; if (!MLX5_CAP_GEN(dev->mdev, cq_moderation)) return -ENOSYS; - in = kzalloc(sizeof(*in), GFP_KERNEL); - if (!in) - return -ENOMEM; - - in->cqn = cpu_to_be32(mcq->mcq.cqn); - fsel = (MLX5_CQ_MODIFY_PERIOD | MLX5_CQ_MODIFY_COUNT); - in->ctx.cq_period = cpu_to_be16(cq_period); - in->ctx.cq_max_count = cpu_to_be16(cq_count); - in->field_select = cpu_to_be32(fsel); - err = mlx5_core_modify_cq(dev->mdev, &mcq->mcq, in, sizeof(*in)); - kfree(in); - + err = mlx5_core_modify_cq_moderation(dev->mdev, &mcq->mcq, + cq_period, cq_count); if (err) mlx5_ib_warn(dev, "modify cq 0x%x failed\n", mcq->mcq.cqn); @@ -1241,9 +1227,11 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(ibcq->device); struct mlx5_ib_cq *cq = to_mcq(ibcq); - struct mlx5_modify_cq_mbox_in *in; + void *cqc; + u32 *in; int err; int npas; + __be64 *pas; int page_shift; int inlen; int uninitialized_var(cqe_size); @@ -1285,28 +1273,37 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) if (err) goto ex; - inlen = sizeof(*in) + npas * sizeof(in->pas[0]); + inlen = MLX5_ST_SZ_BYTES(modify_cq_in) + + MLX5_FLD_SZ_BYTES(modify_cq_in, pas[0]) * npas; + in = mlx5_vzalloc(inlen); if (!in) { err = -ENOMEM; goto ex_resize; } + pas = (__be64 *)MLX5_ADDR_OF(modify_cq_in, in, pas); if (udata) mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift, - in->pas, 0); + pas, 0); else - mlx5_fill_page_array(&cq->resize_buf->buf, in->pas); - - in->field_select = cpu_to_be32(MLX5_MODIFY_CQ_MASK_LOG_SIZE | - MLX5_MODIFY_CQ_MASK_PG_OFFSET | - MLX5_MODIFY_CQ_MASK_PG_SIZE); - in->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT; - in->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5; - in->ctx.page_offset = 0; - in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(entries) << 24); - in->hdr.opmod = cpu_to_be16(MLX5_CQ_OPMOD_RESIZE); - in->cqn = cpu_to_be32(cq->mcq.cqn); + mlx5_fill_page_array(&cq->resize_buf->buf, pas); + + MLX5_SET(modify_cq_in, in, + modify_field_select_resize_field_select.resize_field_select.resize_field_select, + MLX5_MODIFY_CQ_MASK_LOG_SIZE | + MLX5_MODIFY_CQ_MASK_PG_OFFSET | + MLX5_MODIFY_CQ_MASK_PG_SIZE); + + cqc = MLX5_ADDR_OF(modify_cq_in, in, cq_context); + + MLX5_SET(cqc, cqc, log_page_size, + page_shift - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(cqc, cqc, cqe_sz, cqe_sz_to_mlx_sz(cqe_size)); + MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries)); + + MLX5_SET(modify_cq_in, in, op_mod, MLX5_CQ_OPMOD_RESIZE); + MLX5_SET(modify_cq_in, in, cqn, cq->mcq.cqn); err = mlx5_core_modify_cq(dev->mdev, &cq->mcq, in, inlen); if (err) diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 364aab9f3c9e..39e58489dcc2 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -394,7 +394,7 @@ int mlx5_query_mad_ifc_node_desc(struct mlx5_ib_dev *dev, char *node_desc) if (err) goto out; - memcpy(node_desc, out_mad->data, 64); + memcpy(node_desc, out_mad->data, IB_DEVICE_NODE_DESC_MAX); out: kfree(in_mad); kfree(out_mad); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index a84bb766fc62..22174774dbb8 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -37,7 +37,6 @@ #include <linux/pci.h> #include <linux/dma-mapping.h> #include <linux/slab.h> -#include <linux/io-mapping.h> #if defined(CONFIG_X86) #include <asm/pat.h> #endif @@ -54,7 +53,6 @@ #include <linux/in.h> #include <linux/etherdevice.h> #include <linux/mlx5/fs.h> -#include "user.h" #include "mlx5_ib.h" #define DRIVER_NAME "mlx5_ib" @@ -107,13 +105,42 @@ static int mlx5_netdev_event(struct notifier_block *this, struct mlx5_ib_dev *ibdev = container_of(this, struct mlx5_ib_dev, roce.nb); - if ((event != NETDEV_UNREGISTER) && (event != NETDEV_REGISTER)) - return NOTIFY_DONE; + switch (event) { + case NETDEV_REGISTER: + case NETDEV_UNREGISTER: + write_lock(&ibdev->roce.netdev_lock); + if (ndev->dev.parent == &ibdev->mdev->pdev->dev) + ibdev->roce.netdev = (event == NETDEV_UNREGISTER) ? + NULL : ndev; + write_unlock(&ibdev->roce.netdev_lock); + break; + + case NETDEV_UP: + case NETDEV_DOWN: { + struct net_device *lag_ndev = mlx5_lag_get_roce_netdev(ibdev->mdev); + struct net_device *upper = NULL; + + if (lag_ndev) { + upper = netdev_master_upper_dev_get(lag_ndev); + dev_put(lag_ndev); + } - write_lock(&ibdev->roce.netdev_lock); - if (ndev->dev.parent == &ibdev->mdev->pdev->dev) - ibdev->roce.netdev = (event == NETDEV_UNREGISTER) ? NULL : ndev; - write_unlock(&ibdev->roce.netdev_lock); + if ((upper == ndev || (!upper && ndev == ibdev->roce.netdev)) + && ibdev->ib_active) { + struct ib_event ibev = {0}; + + ibev.device = &ibdev->ib_dev; + ibev.event = (event == NETDEV_UP) ? + IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; + ibev.element.port_num = 1; + ib_dispatch_event(&ibev); + } + break; + } + + default: + break; + } return NOTIFY_DONE; } @@ -124,6 +151,10 @@ static struct net_device *mlx5_ib_get_netdev(struct ib_device *device, struct mlx5_ib_dev *ibdev = to_mdev(device); struct net_device *ndev; + ndev = mlx5_lag_get_roce_netdev(ibdev->mdev); + if (ndev) + return ndev; + /* Ensure ndev does not disappear before we invoke dev_hold() */ read_lock(&ibdev->roce.netdev_lock); @@ -139,7 +170,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, struct ib_port_attr *props) { struct mlx5_ib_dev *dev = to_mdev(device); - struct net_device *ndev; + struct net_device *ndev, *upper; enum ib_mtu ndev_ib_mtu; u16 qkey_viol_cntr; @@ -163,6 +194,17 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num, if (!ndev) return 0; + if (mlx5_lag_is_active(dev->mdev)) { + rcu_read_lock(); + upper = netdev_master_upper_dev_get_rcu(ndev); + if (upper) { + dev_put(ndev); + ndev = upper; + dev_hold(ndev); + } + rcu_read_unlock(); + } + if (netif_running(ndev) && netif_carrier_ok(ndev)) { props->state = IB_PORT_ACTIVE; props->phys_state = 5; @@ -233,23 +275,19 @@ static int set_roce_addr(struct ib_device *device, u8 port_num, const union ib_gid *gid, const struct ib_gid_attr *attr) { - struct mlx5_ib_dev *dev = to_mdev(device); - u32 in[MLX5_ST_SZ_DW(set_roce_address_in)]; - u32 out[MLX5_ST_SZ_DW(set_roce_address_out)]; + struct mlx5_ib_dev *dev = to_mdev(device); + u32 in[MLX5_ST_SZ_DW(set_roce_address_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(set_roce_address_out)] = {0}; void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address); enum rdma_link_layer ll = mlx5_ib_port_link_layer(device, port_num); if (ll != IB_LINK_LAYER_ETHERNET) return -EINVAL; - memset(in, 0, sizeof(in)); - ib_gid_to_mlx5_roce_addr(gid, attr, in_addr); MLX5_SET(set_roce_address_in, in, roce_address_index, index); MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS); - - memset(out, 0, sizeof(out)); return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); } @@ -289,7 +327,9 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev) { - return !MLX5_CAP_GEN(dev->mdev, ib_virt); + if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB) + return !MLX5_CAP_GEN(dev->mdev, ib_virt); + return 0; } enum { @@ -432,7 +472,7 @@ static int mlx5_query_node_guid(struct mlx5_ib_dev *dev, } struct mlx5_reg_node_desc { - u8 desc[64]; + u8 desc[IB_DEVICE_NODE_DESC_MAX]; }; static int mlx5_query_node_desc(struct mlx5_ib_dev *dev, char *node_desc) @@ -535,6 +575,26 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, resp.response_length += sizeof(resp.tso_caps); } } + + if (field_avail(typeof(resp), rss_caps, uhw->outlen)) { + resp.rss_caps.rx_hash_function = + MLX5_RX_HASH_FUNC_TOEPLITZ; + resp.rss_caps.rx_hash_fields_mask = + MLX5_RX_HASH_SRC_IPV4 | + MLX5_RX_HASH_DST_IPV4 | + MLX5_RX_HASH_SRC_IPV6 | + MLX5_RX_HASH_DST_IPV6 | + MLX5_RX_HASH_SRC_PORT_TCP | + MLX5_RX_HASH_DST_PORT_TCP | + MLX5_RX_HASH_SRC_PORT_UDP | + MLX5_RX_HASH_DST_PORT_UDP; + resp.response_length += sizeof(resp.rss_caps); + } + } else { + if (field_avail(typeof(resp), tso_caps, uhw->outlen)) + resp.response_length += sizeof(resp.tso_caps); + if (field_avail(typeof(resp), rss_caps, uhw->outlen)) + resp.response_length += sizeof(resp.rss_caps); } if (MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) { @@ -598,6 +658,17 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (!mlx5_core_is_pf(mdev)) props->device_cap_flags |= IB_DEVICE_VIRTUAL_FUNCTION; + if (mlx5_ib_port_link_layer(ibdev, 1) == + IB_LINK_LAYER_ETHERNET) { + props->rss_caps.max_rwq_indirection_tables = + 1 << MLX5_CAP_GEN(dev->mdev, log_max_rqt); + props->rss_caps.max_rwq_indirection_table_size = + 1 << MLX5_CAP_GEN(dev->mdev, log_max_rqt_size); + props->rss_caps.supported_qpts = 1 << IB_QPT_RAW_PACKET; + props->max_wq_type_rq = + 1 << MLX5_CAP_GEN(dev->mdev, log_max_rq); + } + if (uhw->outlen) { err = ib_copy_to_udata(uhw, &resp, resp.response_length); @@ -752,8 +823,7 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port, &props->active_width); if (err) goto out; - err = mlx5_query_port_proto_oper(mdev, &props->active_speed, MLX5_PTYS_IB, - port); + err = mlx5_query_port_ib_proto_oper(mdev, &props->active_speed, port); if (err) goto out; @@ -850,13 +920,13 @@ static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask, * If possible, pass node desc to FW, so it can generate * a 144 trap. If cmd fails, just ignore. */ - memcpy(&in, props->node_desc, 64); + memcpy(&in, props->node_desc, IB_DEVICE_NODE_DESC_MAX); err = mlx5_core_access_reg(dev->mdev, &in, sizeof(in), &out, sizeof(out), MLX5_REG_NODE_DESC, 0, 1); if (err) return err; - memcpy(ibdev->node_desc, props->node_desc, 64); + memcpy(ibdev->node_desc, props->node_desc, IB_DEVICE_NODE_DESC_MAX); return err; } @@ -1399,28 +1469,77 @@ static int mlx5_ib_dealloc_pd(struct ib_pd *pd) return 0; } -static bool outer_header_zero(u32 *match_criteria) +enum { + MATCH_CRITERIA_ENABLE_OUTER_BIT, + MATCH_CRITERIA_ENABLE_MISC_BIT, + MATCH_CRITERIA_ENABLE_INNER_BIT +}; + +#define HEADER_IS_ZERO(match_criteria, headers) \ + !(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \ + 0, MLX5_FLD_SZ_BYTES(fte_match_param, headers))) \ + +static u8 get_match_criteria_enable(u32 *match_criteria) { - int size = MLX5_ST_SZ_BYTES(fte_match_param); - char *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_criteria, - outer_headers); + u8 match_criteria_enable; + + match_criteria_enable = + (!HEADER_IS_ZERO(match_criteria, outer_headers)) << + MATCH_CRITERIA_ENABLE_OUTER_BIT; + match_criteria_enable |= + (!HEADER_IS_ZERO(match_criteria, misc_parameters)) << + MATCH_CRITERIA_ENABLE_MISC_BIT; + match_criteria_enable |= + (!HEADER_IS_ZERO(match_criteria, inner_headers)) << + MATCH_CRITERIA_ENABLE_INNER_BIT; + + return match_criteria_enable; +} + +static void set_proto(void *outer_c, void *outer_v, u8 mask, u8 val) +{ + MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask); + MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val); +} - return outer_headers_c[0] == 0 && !memcmp(outer_headers_c, - outer_headers_c + 1, - size - 1); +static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val) +{ + MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask); + MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_ecn, val); + MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_dscp, mask >> 2); + MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2); } +#define LAST_ETH_FIELD vlan_tag +#define LAST_IB_FIELD sl +#define LAST_IPV4_FIELD tos +#define LAST_IPV6_FIELD traffic_class +#define LAST_TCP_UDP_FIELD src_port + +/* Field is the last supported field */ +#define FIELDS_NOT_SUPPORTED(filter, field)\ + memchr_inv((void *)&filter.field +\ + sizeof(filter.field), 0,\ + sizeof(filter) -\ + offsetof(typeof(filter), field) -\ + sizeof(filter.field)) + static int parse_flow_attr(u32 *match_c, u32 *match_v, - union ib_flow_spec *ib_spec) + const union ib_flow_spec *ib_spec) { void *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_c, outer_headers); void *outer_headers_v = MLX5_ADDR_OF(fte_match_param, match_v, outer_headers); + void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c, + misc_parameters); + void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v, + misc_parameters); + switch (ib_spec->type) { case IB_FLOW_SPEC_ETH: - if (ib_spec->size != sizeof(ib_spec->eth)) - return -EINVAL; + if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD)) + return -ENOTSUPP; ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, dmac_47_16), @@ -1429,6 +1548,13 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v, dmac_47_16), ib_spec->eth.val.dst_mac); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + smac_47_16), + ib_spec->eth.mask.src_mac); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + smac_47_16), + ib_spec->eth.val.src_mac); + if (ib_spec->eth.mask.vlan_tag) { MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, vlan_tag, 1); @@ -1460,8 +1586,8 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v, ethertype, ntohs(ib_spec->eth.val.ether_type)); break; case IB_FLOW_SPEC_IPV4: - if (ib_spec->size != sizeof(ib_spec->ipv4)) - return -EINVAL; + if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD)) + return -ENOTSUPP; MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ethertype, 0xffff); @@ -1484,10 +1610,16 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v, dst_ipv4_dst_ipv6.ipv4_layout.ipv4), &ib_spec->ipv4.val.dst_ip, sizeof(ib_spec->ipv4.val.dst_ip)); + + set_tos(outer_headers_c, outer_headers_v, + ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos); + + set_proto(outer_headers_c, outer_headers_v, + ib_spec->ipv4.mask.proto, ib_spec->ipv4.val.proto); break; case IB_FLOW_SPEC_IPV6: - if (ib_spec->size != sizeof(ib_spec->ipv6)) - return -EINVAL; + if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD)) + return -ENOTSUPP; MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ethertype, 0xffff); @@ -1510,10 +1642,26 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v, dst_ipv4_dst_ipv6.ipv6_layout.ipv6), &ib_spec->ipv6.val.dst_ip, sizeof(ib_spec->ipv6.val.dst_ip)); + + set_tos(outer_headers_c, outer_headers_v, + ib_spec->ipv6.mask.traffic_class, + ib_spec->ipv6.val.traffic_class); + + set_proto(outer_headers_c, outer_headers_v, + ib_spec->ipv6.mask.next_hdr, + ib_spec->ipv6.val.next_hdr); + + MLX5_SET(fte_match_set_misc, misc_params_c, + outer_ipv6_flow_label, + ntohl(ib_spec->ipv6.mask.flow_label)); + MLX5_SET(fte_match_set_misc, misc_params_v, + outer_ipv6_flow_label, + ntohl(ib_spec->ipv6.val.flow_label)); break; case IB_FLOW_SPEC_TCP: - if (ib_spec->size != sizeof(ib_spec->tcp_udp)) - return -EINVAL; + if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, + LAST_TCP_UDP_FIELD)) + return -ENOTSUPP; MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol, 0xff); @@ -1531,8 +1679,9 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v, ntohs(ib_spec->tcp_udp.val.dst_port)); break; case IB_FLOW_SPEC_UDP: - if (ib_spec->size != sizeof(ib_spec->tcp_udp)) - return -EINVAL; + if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, + LAST_TCP_UDP_FIELD)) + return -ENOTSUPP; MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol, 0xff); @@ -1579,7 +1728,7 @@ static bool flow_is_multicast_only(struct ib_flow_attr *ib_attr) is_multicast_ether_addr(eth_spec->val.dst_mac); } -static bool is_valid_attr(struct ib_flow_attr *flow_attr) +static bool is_valid_attr(const struct ib_flow_attr *flow_attr) { union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1); bool has_ipv4_spec = false; @@ -1623,12 +1772,13 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id) list_for_each_entry_safe(iter, tmp, &handler->list, list) { mlx5_del_flow_rule(iter->rule); + put_flow_table(dev, iter->prio, true); list_del(&iter->list); kfree(iter); } mlx5_del_flow_rule(handler->rule); - put_flow_table(dev, &dev->flow_db.prios[handler->prio], true); + put_flow_table(dev, handler->prio, true); mutex_unlock(&dev->flow_db.lock); kfree(handler); @@ -1644,10 +1794,16 @@ static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap) return priority; } +enum flow_table_type { + MLX5_IB_FT_RX, + MLX5_IB_FT_TX +}; + #define MLX5_FS_MAX_TYPES 10 #define MLX5_FS_MAX_ENTRIES 32000UL static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, - struct ib_flow_attr *flow_attr) + struct ib_flow_attr *flow_attr, + enum flow_table_type ft_type) { bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP; struct mlx5_flow_namespace *ns = NULL; @@ -1678,6 +1834,19 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, &num_entries, &num_groups); prio = &dev->flow_db.prios[MLX5_IB_FLOW_LEFTOVERS_PRIO]; + } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { + if (!MLX5_CAP_FLOWTABLE(dev->mdev, + allow_sniffer_and_nic_rx_shared_tir)) + return ERR_PTR(-ENOTSUPP); + + ns = mlx5_get_flow_namespace(dev->mdev, ft_type == MLX5_IB_FT_RX ? + MLX5_FLOW_NAMESPACE_SNIFFER_RX : + MLX5_FLOW_NAMESPACE_SNIFFER_TX); + + prio = &dev->flow_db.sniffer[ft_type]; + priority = 0; + num_entries = 1; + num_groups = 1; } if (!ns) @@ -1703,13 +1872,13 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, struct mlx5_ib_flow_prio *ft_prio, - struct ib_flow_attr *flow_attr, + const struct ib_flow_attr *flow_attr, struct mlx5_flow_destination *dst) { struct mlx5_flow_table *ft = ft_prio->flow_table; struct mlx5_ib_flow_handler *handler; struct mlx5_flow_spec *spec; - void *ib_flow = flow_attr + 1; + const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr); unsigned int spec_index; u32 action; int err = 0; @@ -1735,9 +1904,7 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, ib_flow += ((union ib_flow_spec *)ib_flow)->size; } - /* Outer header support only */ - spec->match_criteria_enable = (!outer_header_zero(spec->match_criteria)) - << 0; + spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria); action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST : MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; handler->rule = mlx5_add_flow_rule(ft, spec, @@ -1750,7 +1917,8 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, goto free; } - handler->prio = ft_prio - dev->flow_db.prios; + ft_prio->refcount++; + handler->prio = ft_prio; ft_prio->flow_table = ft; free: @@ -1774,6 +1942,7 @@ static struct mlx5_ib_flow_handler *create_dont_trap_rule(struct mlx5_ib_dev *de flow_attr, dst); if (IS_ERR(handler_dst)) { mlx5_del_flow_rule(handler->rule); + ft_prio->refcount--; kfree(handler); handler = handler_dst; } else { @@ -1835,6 +2004,8 @@ static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *de &leftovers_specs[LEFTOVERS_UC].flow_attr, dst); if (IS_ERR(handler_ucast)) { + mlx5_del_flow_rule(handler->rule); + ft_prio->refcount--; kfree(handler); handler = handler_ucast; } else { @@ -1845,13 +2016,52 @@ static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *de return handler; } +static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev, + struct mlx5_ib_flow_prio *ft_rx, + struct mlx5_ib_flow_prio *ft_tx, + struct mlx5_flow_destination *dst) +{ + struct mlx5_ib_flow_handler *handler_rx; + struct mlx5_ib_flow_handler *handler_tx; + int err; + static const struct ib_flow_attr flow_attr = { + .num_of_specs = 0, + .size = sizeof(flow_attr) + }; + + handler_rx = create_flow_rule(dev, ft_rx, &flow_attr, dst); + if (IS_ERR(handler_rx)) { + err = PTR_ERR(handler_rx); + goto err; + } + + handler_tx = create_flow_rule(dev, ft_tx, &flow_attr, dst); + if (IS_ERR(handler_tx)) { + err = PTR_ERR(handler_tx); + goto err_tx; + } + + list_add(&handler_tx->list, &handler_rx->list); + + return handler_rx; + +err_tx: + mlx5_del_flow_rule(handler_rx->rule); + ft_rx->refcount--; + kfree(handler_rx); +err: + return ERR_PTR(err); +} + static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr, int domain) { struct mlx5_ib_dev *dev = to_mdev(qp->device); + struct mlx5_ib_qp *mqp = to_mqp(qp); struct mlx5_ib_flow_handler *handler = NULL; struct mlx5_flow_destination *dst = NULL; + struct mlx5_ib_flow_prio *ft_prio_tx = NULL; struct mlx5_ib_flow_prio *ft_prio; int err; @@ -1869,14 +2079,25 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, mutex_lock(&dev->flow_db.lock); - ft_prio = get_flow_table(dev, flow_attr); + ft_prio = get_flow_table(dev, flow_attr, MLX5_IB_FT_RX); if (IS_ERR(ft_prio)) { err = PTR_ERR(ft_prio); goto unlock; } + if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { + ft_prio_tx = get_flow_table(dev, flow_attr, MLX5_IB_FT_TX); + if (IS_ERR(ft_prio_tx)) { + err = PTR_ERR(ft_prio_tx); + ft_prio_tx = NULL; + goto destroy_ft; + } + } dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; - dst->tir_num = to_mqp(qp)->raw_packet_qp.rq.tirn; + if (mqp->flags & MLX5_IB_QP_RSS) + dst->tir_num = mqp->rss_qp.tirn; + else + dst->tir_num = mqp->raw_packet_qp.rq.tirn; if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) { @@ -1890,6 +2111,8 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { handler = create_leftovers_rule(dev, ft_prio, flow_attr, dst); + } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { + handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst); } else { err = -EINVAL; goto destroy_ft; @@ -1901,7 +2124,6 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, goto destroy_ft; } - ft_prio->refcount++; mutex_unlock(&dev->flow_db.lock); kfree(dst); @@ -1909,6 +2131,8 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, destroy_ft: put_flow_table(dev, ft_prio, false); + if (ft_prio_tx) + put_flow_table(dev, ft_prio_tx, false); unlock: mutex_unlock(&dev->flow_db.lock); kfree(dst); @@ -2098,14 +2322,19 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context, break; case MLX5_DEV_EVENT_PORT_UP: - ibev.event = IB_EVENT_PORT_ACTIVE; - port = (u8)param; - break; - case MLX5_DEV_EVENT_PORT_DOWN: case MLX5_DEV_EVENT_PORT_INITIALIZED: - ibev.event = IB_EVENT_PORT_ERR; port = (u8)param; + + /* In RoCE, port up/down events are handled in + * mlx5_netdev_event(). + */ + if (mlx5_ib_port_link_layer(&ibdev->ib_dev, port) == + IB_LINK_LAYER_ETHERNET) + return; + + ibev.event = (event == MLX5_DEV_EVENT_PORT_UP) ? + IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR; break; case MLX5_DEV_EVENT_LID_CHANGE: @@ -2228,7 +2457,7 @@ static int create_umr_res(struct mlx5_ib_dev *dev) goto error_0; } - pd = ib_alloc_pd(&dev->ib_dev); + pd = ib_alloc_pd(&dev->ib_dev, 0); if (IS_ERR(pd)) { mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n"); ret = PTR_ERR(pd); @@ -2510,30 +2739,88 @@ static void get_dev_fw_str(struct ib_device *ibdev, char *str, fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev)); } +static int mlx5_roce_lag_init(struct mlx5_ib_dev *dev) +{ + struct mlx5_core_dev *mdev = dev->mdev; + struct mlx5_flow_namespace *ns = mlx5_get_flow_namespace(mdev, + MLX5_FLOW_NAMESPACE_LAG); + struct mlx5_flow_table *ft; + int err; + + if (!ns || !mlx5_lag_is_active(mdev)) + return 0; + + err = mlx5_cmd_create_vport_lag(mdev); + if (err) + return err; + + ft = mlx5_create_lag_demux_flow_table(ns, 0, 0); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + goto err_destroy_vport_lag; + } + + dev->flow_db.lag_demux_ft = ft; + return 0; + +err_destroy_vport_lag: + mlx5_cmd_destroy_vport_lag(mdev); + return err; +} + +static void mlx5_roce_lag_cleanup(struct mlx5_ib_dev *dev) +{ + struct mlx5_core_dev *mdev = dev->mdev; + + if (dev->flow_db.lag_demux_ft) { + mlx5_destroy_flow_table(dev->flow_db.lag_demux_ft); + dev->flow_db.lag_demux_ft = NULL; + + mlx5_cmd_destroy_vport_lag(mdev); + } +} + +static void mlx5_remove_roce_notifier(struct mlx5_ib_dev *dev) +{ + if (dev->roce.nb.notifier_call) { + unregister_netdevice_notifier(&dev->roce.nb); + dev->roce.nb.notifier_call = NULL; + } +} + static int mlx5_enable_roce(struct mlx5_ib_dev *dev) { int err; dev->roce.nb.notifier_call = mlx5_netdev_event; err = register_netdevice_notifier(&dev->roce.nb); - if (err) + if (err) { + dev->roce.nb.notifier_call = NULL; return err; + } err = mlx5_nic_vport_enable_roce(dev->mdev); if (err) goto err_unregister_netdevice_notifier; + err = mlx5_roce_lag_init(dev); + if (err) + goto err_disable_roce; + return 0; +err_disable_roce: + mlx5_nic_vport_disable_roce(dev->mdev); + err_unregister_netdevice_notifier: - unregister_netdevice_notifier(&dev->roce.nb); + mlx5_remove_roce_notifier(dev); return err; } static void mlx5_disable_roce(struct mlx5_ib_dev *dev) { + mlx5_roce_lag_cleanup(dev); mlx5_nic_vport_disable_roce(dev->mdev); - unregister_netdevice_notifier(&dev->roce.nb); } static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev) @@ -2648,6 +2935,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) struct mlx5_ib_dev *dev; enum rdma_link_layer ll; int port_type_cap; + const char *name; int err; int i; @@ -2680,7 +2968,12 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) MLX5_INIT_DOORBELL_LOCK(&dev->uar_lock); - strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX); + if (!mlx5_lag_is_active(mdev)) + name = "mlx5_%d"; + else + name = "mlx5_bond_%d"; + + strlcpy(dev->ib_dev.name, name, IB_DEVICE_NAME_MAX); dev->ib_dev.owner = THIS_MODULE; dev->ib_dev.node_type = RDMA_NODE_IB_CA; dev->ib_dev.local_dma_lkey = 0 /* not supported for now */; @@ -2882,8 +3175,10 @@ err_rsrc: destroy_dev_resources(&dev->devr); err_disable_roce: - if (ll == IB_LINK_LAYER_ETHERNET) + if (ll == IB_LINK_LAYER_ETHERNET) { mlx5_disable_roce(dev); + mlx5_remove_roce_notifier(dev); + } err_free_port: kfree(dev->port); @@ -2899,6 +3194,7 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) struct mlx5_ib_dev *dev = context; enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1); + mlx5_remove_roce_notifier(dev); ib_unregister_device(&dev->ib_dev); mlx5_ib_dealloc_q_counters(dev); destroy_umrc_res(dev); diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index 40df2cca0609..996b54e366b0 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -71,7 +71,7 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, addr = addr >> page_shift; tmp = (unsigned long)addr; - m = find_first_bit(&tmp, sizeof(tmp)); + m = find_first_bit(&tmp, BITS_PER_LONG); skip = 1 << m; mask = skip - 1; i = 0; @@ -81,7 +81,7 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, for (k = 0; k < len; k++) { if (!(i & mask)) { tmp = (unsigned long)pfn; - m = min_t(unsigned long, m, find_first_bit(&tmp, sizeof(tmp))); + m = min_t(unsigned long, m, find_first_bit(&tmp, BITS_PER_LONG)); skip = 1 << m; mask = skip - 1; base = pfn; @@ -89,7 +89,7 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, } else { if (base + p != pfn) { tmp = (unsigned long)p; - m = find_first_bit(&tmp, sizeof(tmp)); + m = find_first_bit(&tmp, BITS_PER_LONG); skip = 1 << m; mask = skip - 1; base = pfn; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 372385d0f993..dcdcd195fe53 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -44,6 +44,7 @@ #include <linux/types.h> #include <linux/mlx5/transobj.h> #include <rdma/ib_user_verbs.h> +#include <rdma/mlx5-abi.h> #define mlx5_ib_dbg(dev, format, arg...) \ pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ @@ -142,6 +143,7 @@ struct mlx5_ib_pd { #define MLX5_IB_FLOW_LEFTOVERS_PRIO (MLX5_IB_FLOW_MCAST_PRIO + 1) #define MLX5_IB_NUM_FLOW_FT (MLX5_IB_FLOW_LEFTOVERS_PRIO + 1) +#define MLX5_IB_NUM_SNIFFER_FTS 2 struct mlx5_ib_flow_prio { struct mlx5_flow_table *flow_table; unsigned int refcount; @@ -150,12 +152,14 @@ struct mlx5_ib_flow_prio { struct mlx5_ib_flow_handler { struct list_head list; struct ib_flow ibflow; - unsigned int prio; + struct mlx5_ib_flow_prio *prio; struct mlx5_flow_rule *rule; }; struct mlx5_ib_flow_db { struct mlx5_ib_flow_prio prios[MLX5_IB_NUM_FLOW_FT]; + struct mlx5_ib_flow_prio sniffer[MLX5_IB_NUM_SNIFFER_FTS]; + struct mlx5_flow_table *lag_demux_ft; /* Protect flow steering bypass flow tables * when add/del flow rules. * only single add/removal of flow steering rule could be done @@ -225,7 +229,7 @@ struct mlx5_ib_wq { struct mlx5_ib_rwq { struct ib_wq ibwq; - u32 rqn; + struct mlx5_core_qp core_qp; u32 rq_num_pas; u32 log_rq_stride; u32 log_rq_size; @@ -402,6 +406,7 @@ enum mlx5_ib_qp_flags { /* QP uses 1 as its source QP number */ MLX5_IB_QP_SQPN_QP1 = 1 << 6, MLX5_IB_QP_CAP_SCATTER_FCS = 1 << 7, + MLX5_IB_QP_RSS = 1 << 8, }; struct mlx5_umr_wr { @@ -504,7 +509,7 @@ struct mlx5_ib_mr { int umred; int npages; struct mlx5_ib_dev *dev; - struct mlx5_create_mkey_mbox_out out; + u32 out[MLX5_ST_SZ_DW(create_mkey_out)]; struct mlx5_core_sig_ctx *sig; int live; void *descs_alloc; @@ -602,6 +607,7 @@ struct mlx5_roce { rwlock_t netdev_lock; struct net_device *netdev; struct notifier_block nb; + atomic_t next_port; }; struct mlx5_ib_dev { @@ -662,6 +668,11 @@ static inline struct mlx5_ib_qp *to_mibqp(struct mlx5_core_qp *mqp) return container_of(mqp, struct mlx5_ib_qp_base, mqp)->container_mibqp; } +static inline struct mlx5_ib_rwq *to_mibrwq(struct mlx5_core_qp *core_qp) +{ + return container_of(core_qp, struct mlx5_ib_rwq, core_qp); +} + static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mkey *mmkey) { return container_of(mmkey, struct mlx5_ib_mr, mmkey); @@ -946,4 +957,40 @@ static inline int verify_assign_uidx(u8 cqe_version, u32 cmd_uidx, return 0; } + +static inline int get_qp_user_index(struct mlx5_ib_ucontext *ucontext, + struct mlx5_ib_create_qp *ucmd, + int inlen, + u32 *user_index) +{ + u8 cqe_version = ucontext->cqe_version; + + if (field_avail(struct mlx5_ib_create_qp, uidx, inlen) && + !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX)) + return 0; + + if (!!(field_avail(struct mlx5_ib_create_qp, uidx, inlen) != + !!cqe_version)) + return -EINVAL; + + return verify_assign_uidx(cqe_version, ucmd->uidx, user_index); +} + +static inline int get_srq_user_index(struct mlx5_ib_ucontext *ucontext, + struct mlx5_ib_create_srq *ucmd, + int inlen, + u32 *user_index) +{ + u8 cqe_version = ucontext->cqe_version; + + if (field_avail(struct mlx5_ib_create_srq, uidx, inlen) && + !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX)) + return 0; + + if (!!(field_avail(struct mlx5_ib_create_srq, uidx, inlen) != + !!cqe_version)) + return -EINVAL; + + return verify_assign_uidx(cqe_version, ucmd->uidx, user_index); +} #endif /* MLX5_IB_H */ diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 4b021305c321..d4ad672b905b 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -40,7 +40,6 @@ #include <rdma/ib_umem_odp.h> #include <rdma/ib_verbs.h> #include "mlx5_ib.h" -#include "user.h" enum { MAX_PENDING_REG_MR = 8, @@ -135,20 +134,10 @@ static void reg_mr_callback(int status, void *context) return; } - if (mr->out.hdr.status) { - mlx5_ib_warn(dev, "failed - status %d, syndorme 0x%x\n", - mr->out.hdr.status, - be32_to_cpu(mr->out.hdr.syndrome)); - kfree(mr); - dev->fill_delay = 1; - mod_timer(&dev->delay_timer, jiffies + HZ); - return; - } - spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags); key = dev->mdev->priv.mkey_key++; spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags); - mr->mmkey.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key; + mr->mmkey.key = mlx5_idx_to_mkey(MLX5_GET(create_mkey_out, mr->out, mkey_index)) | key; cache->last_add = jiffies; @@ -170,16 +159,19 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) { struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent = &cache->ent[c]; - struct mlx5_create_mkey_mbox_in *in; + int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); struct mlx5_ib_mr *mr; int npages = 1 << ent->order; + void *mkc; + u32 *in; int err = 0; int i; - in = kzalloc(sizeof(*in), GFP_KERNEL); + in = kzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); for (i = 0; i < num; i++) { if (ent->pending >= MAX_PENDING_REG_MR) { err = -EAGAIN; @@ -194,18 +186,22 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) mr->order = ent->order; mr->umred = 1; mr->dev = dev; - in->seg.status = MLX5_MKEY_STATUS_FREE; - in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2); - in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); - in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN; - in->seg.log2_page_size = 12; + + MLX5_SET(mkc, mkc, free, 1); + MLX5_SET(mkc, mkc, umr_en, 1); + MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT); + + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET(mkc, mkc, translations_octword_size, (npages + 1) / 2); + MLX5_SET(mkc, mkc, log_page_size, 12); spin_lock_irq(&ent->lock); ent->pending++; spin_unlock_irq(&ent->lock); - err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, - sizeof(*in), reg_mr_callback, - mr, &mr->out); + err = mlx5_core_create_mkey_cb(dev->mdev, &mr->mmkey, + in, inlen, + mr->out, sizeof(mr->out), + reg_mr_callback, mr); if (err) { spin_lock_irq(&ent->lock); ent->pending--; @@ -614,7 +610,7 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) int err; int i; - cache->wq = create_singlethread_workqueue("mkey_cache"); + cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM); if (!cache->wq) { mlx5_ib_warn(dev, "failed to create work queue\n"); return -ENOMEM; @@ -670,30 +666,38 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) { struct mlx5_ib_dev *dev = to_mdev(pd->device); + int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); struct mlx5_core_dev *mdev = dev->mdev; - struct mlx5_create_mkey_mbox_in *in; - struct mlx5_mkey_seg *seg; struct mlx5_ib_mr *mr; + void *mkc; + u32 *in; int err; mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); - in = kzalloc(sizeof(*in), GFP_KERNEL); + in = kzalloc(inlen, GFP_KERNEL); if (!in) { err = -ENOMEM; goto err_free; } - seg = &in->seg; - seg->flags = convert_access(acc) | MLX5_ACCESS_MODE_PA; - seg->flags_pd = cpu_to_be32(to_mpd(pd)->pdn | MLX5_MKEY_LEN64); - seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); - seg->start_addr = 0; + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + + MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_PA); + MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); + MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); + MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); + MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); + MLX5_SET(mkc, mkc, lr, 1); - err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, sizeof(*in), NULL, NULL, - NULL); + MLX5_SET(mkc, mkc, length64, 1); + MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET64(mkc, mkc, start_addr, 0); + + err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen); if (err) goto err_in; @@ -1063,9 +1067,11 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, int page_shift, int access_flags) { struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct mlx5_create_mkey_mbox_in *in; struct mlx5_ib_mr *mr; + __be64 *pas; + void *mkc; int inlen; + u32 *in; int err; bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg)); @@ -1073,31 +1079,41 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, if (!mr) return ERR_PTR(-ENOMEM); - inlen = sizeof(*in) + sizeof(*in->pas) * ((npages + 1) / 2) * 2; + inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + + sizeof(*pas) * ((npages + 1) / 2) * 2; in = mlx5_vzalloc(inlen); if (!in) { err = -ENOMEM; goto err_1; } - mlx5_ib_populate_pas(dev, umem, page_shift, in->pas, + pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); + mlx5_ib_populate_pas(dev, umem, page_shift, pas, pg_cap ? MLX5_IB_MTT_PRESENT : 0); - /* The MLX5_MKEY_INBOX_PG_ACCESS bit allows setting the access flags + /* The pg_access bit allows setting the access flags * in the page list submitted with the command. */ - in->flags = pg_cap ? cpu_to_be32(MLX5_MKEY_INBOX_PG_ACCESS) : 0; - in->seg.flags = convert_access(access_flags) | - MLX5_ACCESS_MODE_MTT; - in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); - in->seg.start_addr = cpu_to_be64(virt_addr); - in->seg.len = cpu_to_be64(length); - in->seg.bsfs_octo_size = 0; - in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift)); - in->seg.log2_page_size = page_shift; - in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); - in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length, - 1 << page_shift)); - err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen, NULL, - NULL, NULL); + MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap)); + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT); + MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC)); + MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE)); + MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ)); + MLX5_SET(mkc, mkc, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE)); + MLX5_SET(mkc, mkc, lr, 1); + + MLX5_SET64(mkc, mkc, start_addr, virt_addr); + MLX5_SET64(mkc, mkc, len, length); + MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); + MLX5_SET(mkc, mkc, bsf_octword_size, 0); + MLX5_SET(mkc, mkc, translations_octword_size, + get_octo_len(virt_addr, length, 1 << page_shift)); + MLX5_SET(mkc, mkc, log_page_size, page_shift); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET(create_mkey_in, in, translations_octword_actual_size, + get_octo_len(virt_addr, length, 1 << page_shift)); + + err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); if (err) { mlx5_ib_warn(dev, "create mkey failed\n"); goto err_2; @@ -1523,30 +1539,32 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, u32 max_num_sg) { struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct mlx5_create_mkey_mbox_in *in; - struct mlx5_ib_mr *mr; + int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); int ndescs = ALIGN(max_num_sg, 4); + struct mlx5_ib_mr *mr; + void *mkc; + u32 *in; int err; mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); - in = kzalloc(sizeof(*in), GFP_KERNEL); + in = kzalloc(inlen, GFP_KERNEL); if (!in) { err = -ENOMEM; goto err_free; } - in->seg.status = MLX5_MKEY_STATUS_FREE; - in->seg.xlt_oct_size = cpu_to_be32(ndescs); - in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); - in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + MLX5_SET(mkc, mkc, free, 1); + MLX5_SET(mkc, mkc, translations_octword_size, ndescs); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); if (mr_type == IB_MR_TYPE_MEM_REG) { - mr->access_mode = MLX5_ACCESS_MODE_MTT; - in->seg.log2_page_size = PAGE_SHIFT; - + mr->access_mode = MLX5_MKC_ACCESS_MODE_MTT; + MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT); err = mlx5_alloc_priv_descs(pd->device, mr, ndescs, sizeof(u64)); if (err) @@ -1555,7 +1573,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, mr->desc_size = sizeof(u64); mr->max_descs = ndescs; } else if (mr_type == IB_MR_TYPE_SG_GAPS) { - mr->access_mode = MLX5_ACCESS_MODE_KLM; + mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS; err = mlx5_alloc_priv_descs(pd->device, mr, ndescs, sizeof(struct mlx5_klm)); @@ -1566,9 +1584,8 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, } else if (mr_type == IB_MR_TYPE_SIGNATURE) { u32 psv_index[2]; - in->seg.flags_pd = cpu_to_be32(be32_to_cpu(in->seg.flags_pd) | - MLX5_MKEY_BSF_EN); - in->seg.bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE); + MLX5_SET(mkc, mkc, bsf_en, 1); + MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE); mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL); if (!mr->sig) { err = -ENOMEM; @@ -1581,7 +1598,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, if (err) goto err_free_sig; - mr->access_mode = MLX5_ACCESS_MODE_KLM; + mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS; mr->sig->psv_memory.psv_idx = psv_index[0]; mr->sig->psv_wire.psv_idx = psv_index[1]; @@ -1595,9 +1612,10 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, goto err_free_in; } - in->seg.flags = MLX5_PERM_UMR_EN | mr->access_mode; - err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, sizeof(*in), - NULL, NULL, NULL); + MLX5_SET(mkc, mkc, access_mode, mr->access_mode); + MLX5_SET(mkc, mkc, umr_en, 1); + + err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); if (err) goto err_destroy_psv; @@ -1633,8 +1651,10 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct mlx5_create_mkey_mbox_in *in = NULL; + int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); struct mlx5_ib_mw *mw = NULL; + u32 *in = NULL; + void *mkc; int ndescs; int err; struct mlx5_ib_alloc_mw req = {}; @@ -1658,23 +1678,24 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4); mw = kzalloc(sizeof(*mw), GFP_KERNEL); - in = kzalloc(sizeof(*in), GFP_KERNEL); + in = kzalloc(inlen, GFP_KERNEL); if (!mw || !in) { err = -ENOMEM; goto free; } - in->seg.status = MLX5_MKEY_STATUS_FREE; - in->seg.xlt_oct_size = cpu_to_be32(ndescs); - in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); - in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_KLM | - MLX5_PERM_LOCAL_READ; - if (type == IB_MW_TYPE_2) - in->seg.flags_pd |= cpu_to_be32(MLX5_MKEY_REMOTE_INVAL); - in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); - - err = mlx5_core_create_mkey(dev->mdev, &mw->mmkey, in, sizeof(*in), - NULL, NULL, NULL); + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + + MLX5_SET(mkc, mkc, free, 1); + MLX5_SET(mkc, mkc, translations_octword_size, ndescs); + MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); + MLX5_SET(mkc, mkc, umr_en, 1); + MLX5_SET(mkc, mkc, lr, 1); + MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_KLMS); + MLX5_SET(mkc, mkc, en_rinval, !!((type == IB_MW_TYPE_2))); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + + err = mlx5_core_create_mkey(dev->mdev, &mw->mmkey, in, inlen); if (err) goto free; @@ -1811,7 +1832,7 @@ int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, mr->desc_size * mr->max_descs, DMA_TO_DEVICE); - if (mr->access_mode == MLX5_ACCESS_MODE_KLM) + if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS) n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset); else n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c index 34e79e709c67..cacb631a7b0a 100644 --- a/drivers/infiniband/hw/mlx5/odp.c +++ b/drivers/infiniband/hw/mlx5/odp.c @@ -782,8 +782,8 @@ void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev) int __init mlx5_ib_odp_init(void) { - mlx5_ib_page_fault_wq = - create_singlethread_workqueue("mlx5_ib_page_faults"); + mlx5_ib_page_fault_wq = alloc_ordered_workqueue("mlx5_ib_page_faults", + WQ_MEM_RECLAIM); if (!mlx5_ib_page_fault_wq) return -ENOMEM; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 0dd7d93cac95..41f4c2afbcdd 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -35,7 +35,6 @@ #include <rdma/ib_cache.h> #include <rdma/ib_user_verbs.h> #include "mlx5_ib.h" -#include "user.h" /* not supported currently */ static int wq_signature; @@ -77,6 +76,17 @@ struct mlx5_wqe_eth_pad { u8 rsvd0[16]; }; +enum raw_qp_set_mask_map { + MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID = 1UL << 0, +}; + +struct mlx5_modify_raw_qp_param { + u16 operation; + + u32 set_mask; /* raw_qp_set_mask_map */ + u8 rq_q_ctr_id; +}; + static void get_cqs(enum ib_qp_type qp_type, struct ib_cq *ib_send_cq, struct ib_cq *ib_recv_cq, struct mlx5_ib_cq **send_cq, struct mlx5_ib_cq **recv_cq); @@ -726,7 +736,7 @@ err_umem: static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct mlx5_ib_qp *qp, struct ib_udata *udata, struct ib_qp_init_attr *attr, - struct mlx5_create_qp_mbox_in **in, + u32 **in, struct mlx5_ib_create_qp_resp *resp, int *inlen, struct mlx5_ib_qp_base *base) { @@ -739,6 +749,8 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, u32 offset = 0; int uuarn; int ncont = 0; + __be64 *pas; + void *qpc; int err; err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); @@ -795,20 +807,24 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, ubuffer->umem = NULL; } - *inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont; + *inlen = MLX5_ST_SZ_BYTES(create_qp_in) + + MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * ncont; *in = mlx5_vzalloc(*inlen); if (!*in) { err = -ENOMEM; goto err_umem; } + + pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas); if (ubuffer->umem) - mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift, - (*in)->pas, 0); - (*in)->ctx.log_pg_sz_remote_qpn = - cpu_to_be32((page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24); - (*in)->ctx.params2 = cpu_to_be32(offset << 6); + mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift, pas, 0); + + qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc); + + MLX5_SET(qpc, qpc, log_page_size, page_shift - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(qpc, qpc, page_offset, offset); - (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index); + MLX5_SET(qpc, qpc, uar_page, uar_index); resp->uuar_index = uuarn; qp->uuarn = uuarn; @@ -857,12 +873,13 @@ static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp, static int create_kernel_qp(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *init_attr, struct mlx5_ib_qp *qp, - struct mlx5_create_qp_mbox_in **in, int *inlen, + u32 **in, int *inlen, struct mlx5_ib_qp_base *base) { enum mlx5_ib_latency_class lc = MLX5_IB_LATENCY_CLASS_LOW; struct mlx5_uuar_info *uuari; int uar_index; + void *qpc; int uuarn; int err; @@ -902,25 +919,29 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, } qp->sq.qend = mlx5_get_send_wqe(qp, qp->sq.wqe_cnt); - *inlen = sizeof(**in) + sizeof(*(*in)->pas) * qp->buf.npages; + *inlen = MLX5_ST_SZ_BYTES(create_qp_in) + + MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * qp->buf.npages; *in = mlx5_vzalloc(*inlen); if (!*in) { err = -ENOMEM; goto err_buf; } - (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index); - (*in)->ctx.log_pg_sz_remote_qpn = - cpu_to_be32((qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24); + + qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc); + MLX5_SET(qpc, qpc, uar_page, uar_index); + MLX5_SET(qpc, qpc, log_page_size, qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); + /* Set "fast registration enabled" for all kernel QPs */ - (*in)->ctx.params1 |= cpu_to_be32(1 << 11); - (*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4); + MLX5_SET(qpc, qpc, fre, 1); + MLX5_SET(qpc, qpc, rlky, 1); if (init_attr->create_flags & mlx5_ib_create_qp_sqpn_qp1()) { - (*in)->ctx.deth_sqpn = cpu_to_be32(1); + MLX5_SET(qpc, qpc, deth_sqpn, 1); qp->flags |= MLX5_IB_QP_SQPN_QP1; } - mlx5_fill_page_array(&qp->buf, (*in)->pas); + mlx5_fill_page_array(&qp->buf, + (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas)); err = mlx5_db_alloc(dev->mdev, &qp->db); if (err) { @@ -974,15 +995,15 @@ static void destroy_qp_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) free_uuar(&dev->mdev->priv.uuari, qp->bf->uuarn); } -static __be32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr) +static u32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr) { if (attr->srq || (attr->qp_type == IB_QPT_XRC_TGT) || (attr->qp_type == IB_QPT_XRC_INI)) - return cpu_to_be32(MLX5_SRQ_RQ); + return MLX5_SRQ_RQ; else if (!qp->has_rq) - return cpu_to_be32(MLX5_ZERO_LEN_RQ); + return MLX5_ZERO_LEN_RQ; else - return cpu_to_be32(MLX5_NON_ZERO_RQ); + return MLX5_NON_ZERO_RQ; } static int is_connected(enum ib_qp_type qp_type) @@ -996,13 +1017,10 @@ static int is_connected(enum ib_qp_type qp_type) static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev, struct mlx5_ib_sq *sq, u32 tdn) { - u32 in[MLX5_ST_SZ_DW(create_tis_in)]; + u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0}; void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); - memset(in, 0, sizeof(in)); - MLX5_SET(tisc, tisc, transport_domain, tdn); - return mlx5_core_create_tis(dev->mdev, in, sizeof(in), &sq->tisn); } @@ -1191,7 +1209,7 @@ static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev, } static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, - struct mlx5_create_qp_mbox_in *in, + u32 *in, struct ib_pd *pd) { struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp; @@ -1449,6 +1467,7 @@ create_tir: kvfree(in); /* qpn is reserved for that QP */ qp->trans_qp.base.mqp.qpn = 0; + qp->flags |= MLX5_IB_QP_RSS; return 0; err: @@ -1461,18 +1480,18 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct ib_udata *udata, struct mlx5_ib_qp *qp) { struct mlx5_ib_resources *devr = &dev->devr; + int inlen = MLX5_ST_SZ_BYTES(create_qp_in); struct mlx5_core_dev *mdev = dev->mdev; - struct mlx5_ib_qp_base *base; struct mlx5_ib_create_qp_resp resp; - struct mlx5_create_qp_mbox_in *in; - struct mlx5_ib_create_qp ucmd; struct mlx5_ib_cq *send_cq; struct mlx5_ib_cq *recv_cq; unsigned long flags; - int inlen = sizeof(*in); - int err; u32 uidx = MLX5_IB_DEFAULT_UIDX; + struct mlx5_ib_create_qp ucmd; + struct mlx5_ib_qp_base *base; void *qpc; + u32 *in; + int err; base = init_attr->qp_type == IB_QPT_RAW_PACKET ? &qp->raw_packet_qp.rq.base : @@ -1600,7 +1619,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (err) return err; } else { - in = mlx5_vzalloc(sizeof(*in)); + in = mlx5_vzalloc(inlen); if (!in) return -ENOMEM; @@ -1610,26 +1629,29 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (is_sqp(init_attr->qp_type)) qp->port = init_attr->port_num; - in->ctx.flags = cpu_to_be32(to_mlx5_st(init_attr->qp_type) << 16 | - MLX5_QP_PM_MIGRATED << 11); + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); + + MLX5_SET(qpc, qpc, st, to_mlx5_st(init_attr->qp_type)); + MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); if (init_attr->qp_type != MLX5_IB_QPT_REG_UMR) - in->ctx.flags_pd = cpu_to_be32(to_mpd(pd ? pd : devr->p0)->pdn); + MLX5_SET(qpc, qpc, pd, to_mpd(pd ? pd : devr->p0)->pdn); else - in->ctx.flags_pd = cpu_to_be32(MLX5_QP_LAT_SENSITIVE); + MLX5_SET(qpc, qpc, latency_sensitive, 1); + if (qp->wq_sig) - in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_ENABLE_SIG); + MLX5_SET(qpc, qpc, wq_signature, 1); if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK) - in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_BLOCK_MCAST); + MLX5_SET(qpc, qpc, block_lb_mc, 1); if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL) - in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_MASTER); + MLX5_SET(qpc, qpc, cd_master, 1); if (qp->flags & MLX5_IB_QP_MANAGED_SEND) - in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_SLAVE_SEND); + MLX5_SET(qpc, qpc, cd_slave_send, 1); if (qp->flags & MLX5_IB_QP_MANAGED_RECV) - in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_SLAVE_RECV); + MLX5_SET(qpc, qpc, cd_slave_receive, 1); if (qp->scat_cqe && is_connected(init_attr->qp_type)) { int rcqe_sz; @@ -1639,71 +1661,68 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, scqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->send_cq); if (rcqe_sz == 128) - in->ctx.cs_res = MLX5_RES_SCAT_DATA64_CQE; + MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA64_CQE); else - in->ctx.cs_res = MLX5_RES_SCAT_DATA32_CQE; + MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA32_CQE); if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) { if (scqe_sz == 128) - in->ctx.cs_req = MLX5_REQ_SCAT_DATA64_CQE; + MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA64_CQE); else - in->ctx.cs_req = MLX5_REQ_SCAT_DATA32_CQE; + MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA32_CQE); } } if (qp->rq.wqe_cnt) { - in->ctx.rq_size_stride = (qp->rq.wqe_shift - 4); - in->ctx.rq_size_stride |= ilog2(qp->rq.wqe_cnt) << 3; + MLX5_SET(qpc, qpc, log_rq_stride, qp->rq.wqe_shift - 4); + MLX5_SET(qpc, qpc, log_rq_size, ilog2(qp->rq.wqe_cnt)); } - in->ctx.rq_type_srqn = get_rx_type(qp, init_attr); + MLX5_SET(qpc, qpc, rq_type, get_rx_type(qp, init_attr)); if (qp->sq.wqe_cnt) - in->ctx.sq_crq_size |= cpu_to_be16(ilog2(qp->sq.wqe_cnt) << 11); + MLX5_SET(qpc, qpc, log_sq_size, ilog2(qp->sq.wqe_cnt)); else - in->ctx.sq_crq_size |= cpu_to_be16(0x8000); + MLX5_SET(qpc, qpc, no_sq, 1); /* Set default resources */ switch (init_attr->qp_type) { case IB_QPT_XRC_TGT: - in->ctx.cqn_recv = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn); - in->ctx.cqn_send = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn); - in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn); - in->ctx.xrcd = cpu_to_be32(to_mxrcd(init_attr->xrcd)->xrcdn); + MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(devr->c0)->mcq.cqn); + MLX5_SET(qpc, qpc, cqn_snd, to_mcq(devr->c0)->mcq.cqn); + MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s0)->msrq.srqn); + MLX5_SET(qpc, qpc, xrcd, to_mxrcd(init_attr->xrcd)->xrcdn); break; case IB_QPT_XRC_INI: - in->ctx.cqn_recv = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn); - in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x1)->xrcdn); - in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn); + MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(devr->c0)->mcq.cqn); + MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn); + MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s0)->msrq.srqn); break; default: if (init_attr->srq) { - in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x0)->xrcdn); - in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(init_attr->srq)->msrq.srqn); + MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x0)->xrcdn); + MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(init_attr->srq)->msrq.srqn); } else { - in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x1)->xrcdn); - in->ctx.rq_type_srqn |= - cpu_to_be32(to_msrq(devr->s1)->msrq.srqn); + MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn); + MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s1)->msrq.srqn); } } if (init_attr->send_cq) - in->ctx.cqn_send = cpu_to_be32(to_mcq(init_attr->send_cq)->mcq.cqn); + MLX5_SET(qpc, qpc, cqn_snd, to_mcq(init_attr->send_cq)->mcq.cqn); if (init_attr->recv_cq) - in->ctx.cqn_recv = cpu_to_be32(to_mcq(init_attr->recv_cq)->mcq.cqn); + MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(init_attr->recv_cq)->mcq.cqn); - in->ctx.db_rec_addr = cpu_to_be64(qp->db.dma); + MLX5_SET64(qpc, qpc, dbr_addr, qp->db.dma); - if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1) { - qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); - /* 0xffffff means we ask to work with cqe version 0 */ + /* 0xffffff means we ask to work with cqe version 0 */ + if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1) MLX5_SET(qpc, qpc, user_index, uidx); - } + /* we use IB_QP_CREATE_IPOIB_UD_LSO to indicates ipoib qp */ if (init_attr->qp_type == IB_QPT_UD && (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)) { - qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); MLX5_SET(qpc, qpc, ulp_stateless_offload_mode, 1); qp->flags |= MLX5_IB_QP_LSO; } @@ -1854,13 +1873,13 @@ static void get_cqs(enum ib_qp_type qp_type, } static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, - u16 operation); + const struct mlx5_modify_raw_qp_param *raw_qp_param, + u8 lag_tx_affinity); static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) { struct mlx5_ib_cq *send_cq, *recv_cq; struct mlx5_ib_qp_base *base = &qp->trans_qp.base; - struct mlx5_modify_qp_mbox_in *in; unsigned long flags; int err; @@ -1873,19 +1892,18 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) &qp->raw_packet_qp.rq.base : &qp->trans_qp.base; - in = kzalloc(sizeof(*in), GFP_KERNEL); - if (!in) - return; - if (qp->state != IB_QPS_RESET) { if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET) { mlx5_ib_qp_disable_pagefaults(qp); err = mlx5_core_qp_modify(dev->mdev, - MLX5_CMD_OP_2RST_QP, in, 0, - &base->mqp); + MLX5_CMD_OP_2RST_QP, 0, + NULL, &base->mqp); } else { - err = modify_raw_packet_qp(dev, qp, - MLX5_CMD_OP_2RST_QP); + struct mlx5_modify_raw_qp_param raw_qp_param = { + .operation = MLX5_CMD_OP_2RST_QP + }; + + err = modify_raw_packet_qp(dev, qp, &raw_qp_param, 0); } if (err) mlx5_ib_warn(dev, "mlx5_ib: modify QP 0x%06x to RESET failed\n", @@ -1924,8 +1942,6 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) base->mqp.qpn); } - kfree(in); - if (qp->create_type == MLX5_QP_KERNEL) destroy_qp_kernel(dev, qp); else if (qp->create_type == MLX5_QP_USER) @@ -2151,6 +2167,31 @@ static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev, return err; } +static int modify_raw_packet_tx_affinity(struct mlx5_core_dev *dev, + struct mlx5_ib_sq *sq, u8 tx_affinity) +{ + void *in; + void *tisc; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_tis_in); + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_tis_in, in, bitmask.lag_tx_port_affinity, 1); + + tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx); + MLX5_SET(tisc, tisc, lag_tx_port_affinity, tx_affinity); + + err = mlx5_core_modify_tis(dev, sq->tisn, in, inlen); + + kvfree(in); + + return err; +} + static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, const struct ib_ah_attr *ah, struct mlx5_qp_path *path, u8 port, int attr_mask, @@ -2361,8 +2402,9 @@ static int ib_mask_to_mlx5_opt(int ib_mask) return result; } -static int modify_raw_packet_qp_rq(struct mlx5_core_dev *dev, - struct mlx5_ib_rq *rq, int new_state) +static int modify_raw_packet_qp_rq(struct mlx5_ib_dev *dev, + struct mlx5_ib_rq *rq, int new_state, + const struct mlx5_modify_raw_qp_param *raw_qp_param) { void *in; void *rqc; @@ -2379,7 +2421,17 @@ static int modify_raw_packet_qp_rq(struct mlx5_core_dev *dev, rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); MLX5_SET(rqc, rqc, state, new_state); - err = mlx5_core_modify_rq(dev, rq->base.mqp.qpn, in, inlen); + if (raw_qp_param->set_mask & MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID) { + if (MLX5_CAP_GEN(dev->mdev, modify_rq_counter_set_id)) { + MLX5_SET64(modify_rq_in, in, modify_bitmask, + MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_MODIFY_RQ_COUNTER_SET_ID); + MLX5_SET(rqc, rqc, counter_set_id, raw_qp_param->rq_q_ctr_id); + } else + pr_info_once("%s: RAW PACKET QP counters are not supported on current FW\n", + dev->ib_dev.name); + } + + err = mlx5_core_modify_rq(dev->mdev, rq->base.mqp.qpn, in, inlen); if (err) goto out; @@ -2420,7 +2472,8 @@ out: } static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, - u16 operation) + const struct mlx5_modify_raw_qp_param *raw_qp_param, + u8 tx_affinity) { struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp; struct mlx5_ib_rq *rq = &raw_packet_qp->rq; @@ -2429,7 +2482,7 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, int sq_state; int err; - switch (operation) { + switch (raw_qp_param->operation) { case MLX5_CMD_OP_RST2INIT_QP: rq_state = MLX5_RQC_STATE_RDY; sq_state = MLX5_SQC_STATE_RDY; @@ -2446,21 +2499,31 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, case MLX5_CMD_OP_INIT2RTR_QP: case MLX5_CMD_OP_RTR2RTS_QP: case MLX5_CMD_OP_RTS2RTS_QP: - /* Nothing to do here... */ - return 0; + if (raw_qp_param->set_mask) + return -EINVAL; + else + return 0; default: WARN_ON(1); return -EINVAL; } if (qp->rq.wqe_cnt) { - err = modify_raw_packet_qp_rq(dev->mdev, rq, rq_state); + err = modify_raw_packet_qp_rq(dev, rq, rq_state, raw_qp_param); if (err) return err; } - if (qp->sq.wqe_cnt) + if (qp->sq.wqe_cnt) { + if (tx_affinity) { + err = modify_raw_packet_tx_affinity(dev->mdev, sq, + tx_affinity); + if (err) + return err; + } + return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state); + } return 0; } @@ -2511,20 +2574,20 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, struct mlx5_ib_qp_base *base = &qp->trans_qp.base; struct mlx5_ib_cq *send_cq, *recv_cq; struct mlx5_qp_context *context; - struct mlx5_modify_qp_mbox_in *in; struct mlx5_ib_pd *pd; + struct mlx5_ib_port *mibport = NULL; enum mlx5_qp_state mlx5_cur, mlx5_new; enum mlx5_qp_optpar optpar; int sqd_event; int mlx5_st; int err; u16 op; + u8 tx_affinity = 0; - in = kzalloc(sizeof(*in), GFP_KERNEL); - if (!in) + context = kzalloc(sizeof(*context), GFP_KERNEL); + if (!context) return -ENOMEM; - context = &in->ctx; err = to_mlx5_st(ibqp->qp_type); if (err < 0) { mlx5_ib_dbg(dev, "unsupported qp type %d\n", ibqp->qp_type); @@ -2549,6 +2612,23 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, } } + if ((cur_state == IB_QPS_RESET) && (new_state == IB_QPS_INIT)) { + if ((ibqp->qp_type == IB_QPT_RC) || + (ibqp->qp_type == IB_QPT_UD && + !(qp->flags & MLX5_IB_QP_SQPN_QP1)) || + (ibqp->qp_type == IB_QPT_UC) || + (ibqp->qp_type == IB_QPT_RAW_PACKET) || + (ibqp->qp_type == IB_QPT_XRC_INI) || + (ibqp->qp_type == IB_QPT_XRC_TGT)) { + if (mlx5_lag_is_active(dev->mdev)) { + tx_affinity = (unsigned int)atomic_add_return(1, + &dev->roce.next_port) % + MLX5_MAX_PORTS + 1; + context->flags |= cpu_to_be32(tx_affinity << 24); + } + } + } + if (is_sqp(ibqp->qp_type)) { context->mtu_msgmax = (IB_MTU_256 << 5) | 8; } else if (ibqp->qp_type == IB_QPT_UD || @@ -2654,8 +2734,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { u8 port_num = (attr_mask & IB_QP_PORT ? attr->port_num : qp->port) - 1; - struct mlx5_ib_port *mibport = &dev->port[port_num]; - + mibport = &dev->port[port_num]; context->qp_counter_set_usr_page |= cpu_to_be32((u32)(mibport->q_cnt_id) << 24); } @@ -2689,13 +2768,21 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, op = optab[mlx5_cur][mlx5_new]; optpar = ib_mask_to_mlx5_opt(attr_mask); optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st]; - in->optparam = cpu_to_be32(optpar); - if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) - err = modify_raw_packet_qp(dev, qp, op); - else - err = mlx5_core_qp_modify(dev->mdev, op, in, sqd_event, + if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) { + struct mlx5_modify_raw_qp_param raw_qp_param = {}; + + raw_qp_param.operation = op; + if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { + raw_qp_param.rq_q_ctr_id = mibport->q_cnt_id; + raw_qp_param.set_mask |= MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID; + } + err = modify_raw_packet_qp(dev, qp, &raw_qp_param, tx_affinity); + } else { + err = mlx5_core_qp_modify(dev->mdev, op, optpar, context, &base->mqp); + } + if (err) goto out; @@ -2735,7 +2822,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, } out: - kfree(in); + kfree(context); return err; } @@ -2968,7 +3055,7 @@ static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr, memset(umr, 0, sizeof(*umr)); - if (mr->access_mode == MLX5_ACCESS_MODE_KLM) + if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS) /* KLMs take twice the size of MTTs */ ndescs *= 2; @@ -3111,9 +3198,9 @@ static void set_reg_mkey_seg(struct mlx5_mkey_seg *seg, memset(seg, 0, sizeof(*seg)); - if (mr->access_mode == MLX5_ACCESS_MODE_MTT) + if (mr->access_mode == MLX5_MKC_ACCESS_MODE_MTT) seg->log2_page_size = ilog2(mr->ibmr.page_size); - else if (mr->access_mode == MLX5_ACCESS_MODE_KLM) + else if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS) /* KLMs take twice the size of MTTs */ ndescs *= 2; @@ -3454,7 +3541,7 @@ static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg, memset(seg, 0, sizeof(*seg)); seg->flags = get_umr_flags(wr->access_flags) | - MLX5_ACCESS_MODE_KLM; + MLX5_MKC_ACCESS_MODE_KLMS; seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00); seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 | MLX5_MKEY_BSF_EN | pdn); @@ -3658,12 +3745,8 @@ static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, struct ib_send_wr *wr, unsigned *idx, int *size, int nreq) { - int err = 0; - - if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) { - err = -ENOMEM; - return err; - } + if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) + return -ENOMEM; *idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); *seg = mlx5_get_send_wqe(qp, *idx); @@ -3679,7 +3762,7 @@ static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, *seg += sizeof(**ctrl); *size = sizeof(**ctrl) / 16; - return err; + return 0; } static void finish_wqe(struct mlx5_ib_qp *qp, @@ -3758,7 +3841,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, num_sge = wr->num_sge; if (unlikely(num_sge > qp->sq.max_gs)) { mlx5_ib_warn(dev, "\n"); - err = -ENOMEM; + err = -EINVAL; *bad_wr = wr; goto out; } @@ -4320,21 +4403,24 @@ static int query_raw_packet_qp_state(struct mlx5_ib_dev *dev, static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, struct ib_qp_attr *qp_attr) { - struct mlx5_query_qp_mbox_out *outb; + int outlen = MLX5_ST_SZ_BYTES(query_qp_out); struct mlx5_qp_context *context; int mlx5_state; + u32 *outb; int err = 0; - outb = kzalloc(sizeof(*outb), GFP_KERNEL); + outb = kzalloc(outlen, GFP_KERNEL); if (!outb) return -ENOMEM; - context = &outb->ctx; err = mlx5_core_qp_query(dev->mdev, &qp->trans_qp.base.mqp, outb, - sizeof(*outb)); + outlen); if (err) goto out; + /* FIXME: use MLX5_GET rather than mlx5_qp_context manual struct */ + context = (struct mlx5_qp_context *)MLX5_ADDR_OF(query_qp_out, outb, qpc); + mlx5_state = be32_to_cpu(context->flags) >> 28; qp->state = to_ib_qp_state(mlx5_state); @@ -4499,6 +4585,28 @@ int mlx5_ib_dealloc_xrcd(struct ib_xrcd *xrcd) return 0; } +static void mlx5_ib_wq_event(struct mlx5_core_qp *core_qp, int type) +{ + struct mlx5_ib_rwq *rwq = to_mibrwq(core_qp); + struct mlx5_ib_dev *dev = to_mdev(rwq->ibwq.device); + struct ib_event event; + + if (rwq->ibwq.event_handler) { + event.device = rwq->ibwq.device; + event.element.wq = &rwq->ibwq; + switch (type) { + case MLX5_EVENT_TYPE_WQ_CATAS_ERROR: + event.event = IB_EVENT_WQ_FATAL; + break; + default: + mlx5_ib_warn(dev, "Unexpected event type %d on WQ %06x\n", type, core_qp->qpn); + return; + } + + rwq->ibwq.event_handler(&event, rwq->ibwq.wq_context); + } +} + static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd, struct ib_wq_init_attr *init_attr) { @@ -4536,7 +4644,7 @@ static int create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd, MLX5_SET64(wq, wq, dbr_addr, rwq->db.dma); rq_pas0 = (__be64 *)MLX5_ADDR_OF(wq, wq, pas); mlx5_ib_populate_pas(dev, rwq->umem, rwq->page_shift, rq_pas0, 0); - err = mlx5_core_create_rq(dev->mdev, in, inlen, &rwq->rqn); + err = mlx5_core_create_rq_tracked(dev->mdev, in, inlen, &rwq->core_qp); kvfree(in); return err; } @@ -4652,7 +4760,7 @@ struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, return ERR_PTR(-EINVAL); } - rwq->ibwq.wq_num = rwq->rqn; + rwq->ibwq.wq_num = rwq->core_qp.qpn; rwq->ibwq.state = IB_WQS_RESET; if (udata->outlen) { resp.response_length = offsetof(typeof(resp), response_length) + @@ -4662,10 +4770,12 @@ struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd, goto err_copy; } + rwq->core_qp.event = mlx5_ib_wq_event; + rwq->ibwq.event_handler = init_attr->event_handler; return &rwq->ibwq; err_copy: - mlx5_core_destroy_rq(dev->mdev, rwq->rqn); + mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp); err_user_rq: destroy_user_rq(pd, rwq); err: @@ -4678,7 +4788,7 @@ int mlx5_ib_destroy_wq(struct ib_wq *wq) struct mlx5_ib_dev *dev = to_mdev(wq->device); struct mlx5_ib_rwq *rwq = to_mrwq(wq); - mlx5_core_destroy_rq(dev->mdev, rwq->rqn); + mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp); destroy_user_rq(wq->pd, rwq); kfree(rwq); @@ -4810,7 +4920,7 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, MLX5_SET(modify_rq_in, in, rq_state, curr_wq_state); MLX5_SET(rqc, rqc, state, wq_state); - err = mlx5_core_modify_rq(dev->mdev, rwq->rqn, in, inlen); + err = mlx5_core_modify_rq(dev->mdev, rwq->core_qp.qpn, in, inlen); kvfree(in); if (!err) rwq->ibwq.state = (wq_state == MLX5_RQC_STATE_ERR) ? IB_WQS_ERR : wq_state; diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index ed6ac52355f1..3857dbd9c956 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -38,7 +38,6 @@ #include <rdma/ib_user_verbs.h> #include "mlx5_ib.h" -#include "user.h" /* not supported currently */ static int srq_signature; diff --git a/drivers/infiniband/hw/mlx5/user.h b/drivers/infiniband/hw/mlx5/user.h deleted file mode 100644 index 188dac4301b5..000000000000 --- a/drivers/infiniband/hw/mlx5/user.h +++ /dev/null @@ -1,281 +0,0 @@ -/* - * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef MLX5_IB_USER_H -#define MLX5_IB_USER_H - -#include <linux/types.h> - -#include "mlx5_ib.h" - -enum { - MLX5_QP_FLAG_SIGNATURE = 1 << 0, - MLX5_QP_FLAG_SCATTER_CQE = 1 << 1, -}; - -enum { - MLX5_SRQ_FLAG_SIGNATURE = 1 << 0, -}; - -enum { - MLX5_WQ_FLAG_SIGNATURE = 1 << 0, -}; - - -/* Increment this value if any changes that break userspace ABI - * compatibility are made. - */ -#define MLX5_IB_UVERBS_ABI_VERSION 1 - -/* Make sure that all structs defined in this file remain laid out so - * that they pack the same way on 32-bit and 64-bit architectures (to - * avoid incompatibility between 32-bit userspace and 64-bit kernels). - * In particular do not use pointer types -- pass pointers in __u64 - * instead. - */ - -struct mlx5_ib_alloc_ucontext_req { - __u32 total_num_uuars; - __u32 num_low_latency_uuars; -}; - -struct mlx5_ib_alloc_ucontext_req_v2 { - __u32 total_num_uuars; - __u32 num_low_latency_uuars; - __u32 flags; - __u32 comp_mask; - __u8 max_cqe_version; - __u8 reserved0; - __u16 reserved1; - __u32 reserved2; -}; - -enum mlx5_ib_alloc_ucontext_resp_mask { - MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET = 1UL << 0, -}; - -enum mlx5_user_cmds_supp_uhw { - MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE = 1 << 0, -}; - -struct mlx5_ib_alloc_ucontext_resp { - __u32 qp_tab_size; - __u32 bf_reg_size; - __u32 tot_uuars; - __u32 cache_line_size; - __u16 max_sq_desc_sz; - __u16 max_rq_desc_sz; - __u32 max_send_wqebb; - __u32 max_recv_wr; - __u32 max_srq_recv_wr; - __u16 num_ports; - __u16 reserved1; - __u32 comp_mask; - __u32 response_length; - __u8 cqe_version; - __u8 cmds_supp_uhw; - __u16 reserved2; - __u64 hca_core_clock_offset; -}; - -struct mlx5_ib_alloc_pd_resp { - __u32 pdn; -}; - -struct mlx5_ib_tso_caps { - __u32 max_tso; /* Maximum tso payload size in bytes */ - - /* Corresponding bit will be set if qp type from - * 'enum ib_qp_type' is supported, e.g. - * supported_qpts |= 1 << IB_QPT_UD - */ - __u32 supported_qpts; -}; - -struct mlx5_ib_query_device_resp { - __u32 comp_mask; - __u32 response_length; - struct mlx5_ib_tso_caps tso_caps; -}; - -struct mlx5_ib_create_cq { - __u64 buf_addr; - __u64 db_addr; - __u32 cqe_size; - __u32 reserved; /* explicit padding (optional on i386) */ -}; - -struct mlx5_ib_create_cq_resp { - __u32 cqn; - __u32 reserved; -}; - -struct mlx5_ib_resize_cq { - __u64 buf_addr; - __u16 cqe_size; - __u16 reserved0; - __u32 reserved1; -}; - -struct mlx5_ib_create_srq { - __u64 buf_addr; - __u64 db_addr; - __u32 flags; - __u32 reserved0; /* explicit padding (optional on i386) */ - __u32 uidx; - __u32 reserved1; -}; - -struct mlx5_ib_create_srq_resp { - __u32 srqn; - __u32 reserved; -}; - -struct mlx5_ib_create_qp { - __u64 buf_addr; - __u64 db_addr; - __u32 sq_wqe_count; - __u32 rq_wqe_count; - __u32 rq_wqe_shift; - __u32 flags; - __u32 uidx; - __u32 reserved0; - __u64 sq_buf_addr; -}; - -/* RX Hash function flags */ -enum mlx5_rx_hash_function_flags { - MLX5_RX_HASH_FUNC_TOEPLITZ = 1 << 0, -}; - -/* - * RX Hash flags, these flags allows to set which incoming packet's field should - * participates in RX Hash. Each flag represent certain packet's field, - * when the flag is set the field that is represented by the flag will - * participate in RX Hash calculation. - * Note: *IPV4 and *IPV6 flags can't be enabled together on the same QP - * and *TCP and *UDP flags can't be enabled together on the same QP. -*/ -enum mlx5_rx_hash_fields { - MLX5_RX_HASH_SRC_IPV4 = 1 << 0, - MLX5_RX_HASH_DST_IPV4 = 1 << 1, - MLX5_RX_HASH_SRC_IPV6 = 1 << 2, - MLX5_RX_HASH_DST_IPV6 = 1 << 3, - MLX5_RX_HASH_SRC_PORT_TCP = 1 << 4, - MLX5_RX_HASH_DST_PORT_TCP = 1 << 5, - MLX5_RX_HASH_SRC_PORT_UDP = 1 << 6, - MLX5_RX_HASH_DST_PORT_UDP = 1 << 7 -}; - -struct mlx5_ib_create_qp_rss { - __u64 rx_hash_fields_mask; /* enum mlx5_rx_hash_fields */ - __u8 rx_hash_function; /* enum mlx5_rx_hash_function_flags */ - __u8 rx_key_len; /* valid only for Toeplitz */ - __u8 reserved[6]; - __u8 rx_hash_key[128]; /* valid only for Toeplitz */ - __u32 comp_mask; - __u32 reserved1; -}; - -struct mlx5_ib_create_qp_resp { - __u32 uuar_index; -}; - -struct mlx5_ib_alloc_mw { - __u32 comp_mask; - __u8 num_klms; - __u8 reserved1; - __u16 reserved2; -}; - -struct mlx5_ib_create_wq { - __u64 buf_addr; - __u64 db_addr; - __u32 rq_wqe_count; - __u32 rq_wqe_shift; - __u32 user_index; - __u32 flags; - __u32 comp_mask; - __u32 reserved; -}; - -struct mlx5_ib_create_wq_resp { - __u32 response_length; - __u32 reserved; -}; - -struct mlx5_ib_create_rwq_ind_tbl_resp { - __u32 response_length; - __u32 reserved; -}; - -struct mlx5_ib_modify_wq { - __u32 comp_mask; - __u32 reserved; -}; - -static inline int get_qp_user_index(struct mlx5_ib_ucontext *ucontext, - struct mlx5_ib_create_qp *ucmd, - int inlen, - u32 *user_index) -{ - u8 cqe_version = ucontext->cqe_version; - - if (field_avail(struct mlx5_ib_create_qp, uidx, inlen) && - !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX)) - return 0; - - if (!!(field_avail(struct mlx5_ib_create_qp, uidx, inlen) != - !!cqe_version)) - return -EINVAL; - - return verify_assign_uidx(cqe_version, ucmd->uidx, user_index); -} - -static inline int get_srq_user_index(struct mlx5_ib_ucontext *ucontext, - struct mlx5_ib_create_srq *ucmd, - int inlen, - u32 *user_index) -{ - u8 cqe_version = ucontext->cqe_version; - - if (field_avail(struct mlx5_ib_create_srq, uidx, inlen) && - !cqe_version && (ucmd->uidx == MLX5_IB_DEFAULT_UIDX)) - return 0; - - if (!!(field_avail(struct mlx5_ib_create_srq, uidx, inlen) != - !!cqe_version)) - return -EINVAL; - - return verify_assign_uidx(cqe_version, ucmd->uidx, user_index); -} -#endif /* MLX5_IB_USER_H */ diff --git a/drivers/infiniband/hw/mthca/mthca_catas.c b/drivers/infiniband/hw/mthca/mthca_catas.c index 712d2a30fbe5..f6474c24f193 100644 --- a/drivers/infiniband/hw/mthca/mthca_catas.c +++ b/drivers/infiniband/hw/mthca/mthca_catas.c @@ -187,7 +187,7 @@ int __init mthca_catas_init(void) { INIT_WORK(&catas_work, catas_reset); - catas_wq = create_singlethread_workqueue("mthca_catas"); + catas_wq = alloc_ordered_workqueue("mthca_catas", WQ_MEM_RECLAIM); if (!catas_wq) return -ENOMEM; diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index 7c3f2fb44ba5..9139405c4810 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -153,7 +153,8 @@ static void node_desc_override(struct ib_device *dev, mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP && mad->mad_hdr.attr_id == IB_SMP_ATTR_NODE_DESC) { mutex_lock(&to_mdev(dev)->cap_mask_mutex); - memcpy(((struct ib_smp *) mad)->data, dev->node_desc, 64); + memcpy(((struct ib_smp *) mad)->data, dev->node_desc, + IB_DEVICE_NODE_DESC_MAX); mutex_unlock(&to_mdev(dev)->cap_mask_mutex); } } diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c index 6c00d04b8b28..c6fe89d79248 100644 --- a/drivers/infiniband/hw/mthca/mthca_memfree.c +++ b/drivers/infiniband/hw/mthca/mthca_memfree.c @@ -472,7 +472,7 @@ int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar, goto out; } - ret = get_user_pages(uaddr & PAGE_MASK, 1, 1, 0, pages, NULL); + ret = get_user_pages(uaddr & PAGE_MASK, 1, FOLL_WRITE, pages, NULL); if (ret < 0) goto out; diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index da2335f7f7c3..358930a41e36 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -46,7 +46,7 @@ #include "mthca_dev.h" #include "mthca_cmd.h" -#include "mthca_user.h" +#include <rdma/mthca-abi.h> #include "mthca_memfree.h" static void init_query_mad(struct ib_smp *mad) @@ -193,7 +193,8 @@ static int mthca_modify_device(struct ib_device *ibdev, if (mask & IB_DEVICE_MODIFY_NODE_DESC) { if (mutex_lock_interruptible(&to_mdev(ibdev)->cap_mask_mutex)) return -ERESTARTSYS; - memcpy(ibdev->node_desc, props->node_desc, 64); + memcpy(ibdev->node_desc, props->node_desc, + IB_DEVICE_NODE_DESC_MAX); mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex); } @@ -1138,7 +1139,7 @@ static int mthca_init_node_data(struct mthca_dev *dev) if (err) goto out; - memcpy(dev->ib_dev.node_desc, out_mad->data, 64); + memcpy(dev->ib_dev.node_desc, out_mad->data, IB_DEVICE_NODE_DESC_MAX); in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h index bd9d132f11c7..e7430c9254d3 100644 --- a/drivers/infiniband/hw/nes/nes.h +++ b/drivers/infiniband/hw/nes/nes.h @@ -165,7 +165,7 @@ do { \ #include "nes_hw.h" #include "nes_verbs.h" #include "nes_context.h" -#include "nes_user.h" +#include <rdma/nes-abi.h> #include "nes_cm.h" #include "nes_mgt.h" diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index 7f0aa23aef9d..57db9b332f44 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -2692,12 +2692,12 @@ static struct nes_cm_core *nes_cm_alloc_core(void) nes_debug(NES_DBG_CM, "Init CM Core completed -- cm_core=%p\n", cm_core); nes_debug(NES_DBG_CM, "Enable QUEUE EVENTS\n"); - cm_core->event_wq = create_singlethread_workqueue("nesewq"); + cm_core->event_wq = alloc_ordered_workqueue("nesewq", 0); if (!cm_core->event_wq) goto out_free_cmcore; cm_core->post_event = nes_cm_post_event; nes_debug(NES_DBG_CM, "Enable QUEUE DISCONNECTS\n"); - cm_core->disconn_wq = create_singlethread_workqueue("nesdwq"); + cm_core->disconn_wq = alloc_ordered_workqueue("nesdwq", 0); if (!cm_core->disconn_wq) goto out_free_wq; diff --git a/drivers/infiniband/hw/nes/nes_user.h b/drivers/infiniband/hw/nes/nes_user.h deleted file mode 100644 index 529c421bb15c..000000000000 --- a/drivers/infiniband/hw/nes/nes_user.h +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright (c) 2006 - 2011 Intel Corporation. All rights reserved. - * Copyright (c) 2005 Topspin Communications. All rights reserved. - * Copyright (c) 2005 Cisco Systems. All rights reserved. - * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - * - */ - -#ifndef NES_USER_H -#define NES_USER_H - -#include <linux/types.h> - -#define NES_ABI_USERSPACE_VER 2 -#define NES_ABI_KERNEL_VER 2 - -/* - * Make sure that all structs defined in this file remain laid out so - * that they pack the same way on 32-bit and 64-bit architectures (to - * avoid incompatibility between 32-bit userspace and 64-bit kernels). - * In particular do not use pointer types -- pass pointers in __u64 - * instead. - */ - -struct nes_alloc_ucontext_req { - __u32 reserved32; - __u8 userspace_ver; - __u8 reserved8[3]; -}; - -struct nes_alloc_ucontext_resp { - __u32 max_pds; /* maximum pds allowed for this user process */ - __u32 max_qps; /* maximum qps allowed for this user process */ - __u32 wq_size; /* size of the WQs (sq+rq) allocated to the mmaped area */ - __u8 virtwq; /* flag to indicate if virtual WQ are to be used or not */ - __u8 kernel_ver; - __u8 reserved[2]; -}; - -struct nes_alloc_pd_resp { - __u32 pd_id; - __u32 mmap_db_index; -}; - -struct nes_create_cq_req { - __u64 user_cq_buffer; - __u32 mcrqf; - __u8 reserved[4]; -}; - -struct nes_create_qp_req { - __u64 user_wqe_buffers; - __u64 user_qp_buffer; -}; - -enum iwnes_memreg_type { - IWNES_MEMREG_TYPE_MEM = 0x0000, - IWNES_MEMREG_TYPE_QP = 0x0001, - IWNES_MEMREG_TYPE_CQ = 0x0002, - IWNES_MEMREG_TYPE_MW = 0x0003, - IWNES_MEMREG_TYPE_FMR = 0x0004, - IWNES_MEMREG_TYPE_FMEM = 0x0005, -}; - -struct nes_mem_reg_req { - __u32 reg_type; /* indicates if id is memory, QP or CQ */ - __u32 reserved; -}; - -struct nes_create_cq_resp { - __u32 cq_id; - __u32 cq_size; - __u32 mmap_db_index; - __u32 reserved; -}; - -struct nes_create_qp_resp { - __u32 qp_id; - __u32 actual_sq_size; - __u32 actual_rq_size; - __u32 mmap_sq_db_index; - __u32 mmap_rq_db_index; - __u32 nes_drv_opt; -}; - -#endif /* NES_USER_H */ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_abi.h b/drivers/infiniband/hw/ocrdma/ocrdma_abi.h deleted file mode 100644 index 430b1350fe96..000000000000 --- a/drivers/infiniband/hw/ocrdma/ocrdma_abi.h +++ /dev/null @@ -1,149 +0,0 @@ -/* This file is part of the Emulex RoCE Device Driver for - * RoCE (RDMA over Converged Ethernet) adapters. - * Copyright (C) 2012-2015 Emulex. All rights reserved. - * EMULEX and SLI are trademarks of Emulex. - * www.emulex.com - * - * This software is available to you under a choice of one of two licenses. - * You may choose to be licensed under the terms of the GNU General Public - * License (GPL) Version 2, available from the file COPYING in the main - * directory of this source tree, or the BSD license below: - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * - Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * - Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR - * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * Contact Information: - * linux-drivers@emulex.com - * - * Emulex - * 3333 Susan Street - * Costa Mesa, CA 92626 - */ - -#ifndef __OCRDMA_ABI_H__ -#define __OCRDMA_ABI_H__ - -#define OCRDMA_ABI_VERSION 2 -#define OCRDMA_BE_ROCE_ABI_VERSION 1 -/* user kernel communication data structures. */ - -struct ocrdma_alloc_ucontext_resp { - u32 dev_id; - u32 wqe_size; - u32 max_inline_data; - u32 dpp_wqe_size; - u64 ah_tbl_page; - u32 ah_tbl_len; - u32 rqe_size; - u8 fw_ver[32]; - /* for future use/new features in progress */ - u64 rsvd1; - u64 rsvd2; -}; - -struct ocrdma_alloc_pd_ureq { - u64 rsvd1; -}; - -struct ocrdma_alloc_pd_uresp { - u32 id; - u32 dpp_enabled; - u32 dpp_page_addr_hi; - u32 dpp_page_addr_lo; - u64 rsvd1; -}; - -struct ocrdma_create_cq_ureq { - u32 dpp_cq; - u32 rsvd; /* pad */ -}; - -#define MAX_CQ_PAGES 8 -struct ocrdma_create_cq_uresp { - u32 cq_id; - u32 page_size; - u32 num_pages; - u32 max_hw_cqe; - u64 page_addr[MAX_CQ_PAGES]; - u64 db_page_addr; - u32 db_page_size; - u32 phase_change; - /* for future use/new features in progress */ - u64 rsvd1; - u64 rsvd2; -}; - -#define MAX_QP_PAGES 8 -#define MAX_UD_AV_PAGES 8 - -struct ocrdma_create_qp_ureq { - u8 enable_dpp_cq; - u8 rsvd; - u16 dpp_cq_id; - u32 rsvd1; /* pad */ -}; - -struct ocrdma_create_qp_uresp { - u16 qp_id; - u16 sq_dbid; - u16 rq_dbid; - u16 resv0; /* pad */ - u32 sq_page_size; - u32 rq_page_size; - u32 num_sq_pages; - u32 num_rq_pages; - u64 sq_page_addr[MAX_QP_PAGES]; - u64 rq_page_addr[MAX_QP_PAGES]; - u64 db_page_addr; - u32 db_page_size; - u32 dpp_credit; - u32 dpp_offset; - u32 num_wqe_allocated; - u32 num_rqe_allocated; - u32 db_sq_offset; - u32 db_rq_offset; - u32 db_shift; - u64 rsvd[11]; -} __packed; - -struct ocrdma_create_srq_uresp { - u16 rq_dbid; - u16 resv0; /* pad */ - u32 resv1; - - u32 rq_page_size; - u32 num_rq_pages; - - u64 rq_page_addr[MAX_QP_PAGES]; - u64 db_page_addr; - - u32 db_page_size; - u32 num_rqe_allocated; - u32 db_rq_offset; - u32 db_shift; - - u64 rsvd2; - u64 rsvd3; -}; - -#endif /* __OCRDMA_ABI_H__ */ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 16740dcb876b..67fc0b6857e1 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -1156,18 +1156,18 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev, attr->max_srq = (rsp->max_srq_rpir_qps & OCRDMA_MBX_QUERY_CFG_MAX_SRQ_MASK) >> OCRDMA_MBX_QUERY_CFG_MAX_SRQ_OFFSET; - attr->max_send_sge = ((rsp->max_write_send_sge & + attr->max_send_sge = ((rsp->max_recv_send_sge & OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK) >> OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT); - attr->max_recv_sge = (rsp->max_write_send_sge & - OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK) >> - OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT; + attr->max_recv_sge = (rsp->max_recv_send_sge & + OCRDMA_MBX_QUERY_CFG_MAX_RECV_SGE_MASK) >> + OCRDMA_MBX_QUERY_CFG_MAX_RECV_SGE_SHIFT; attr->max_srq_sge = (rsp->max_srq_rqe_sge & OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_MASK) >> OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_OFFSET; - attr->max_rdma_sge = (rsp->max_write_send_sge & - OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_MASK) >> - OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_SHIFT; + attr->max_rdma_sge = (rsp->max_wr_rd_sge & + OCRDMA_MBX_QUERY_CFG_MAX_RD_SGE_MASK) >> + OCRDMA_MBX_QUERY_CFG_MAX_RD_SGE_SHIFT; attr->max_ord_per_qp = (rsp->max_ird_ord_per_qp & OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_MASK) >> OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_SHIFT; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 07d0c6c5b046..896071502739 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -56,7 +56,7 @@ #include "be_roce.h" #include "ocrdma_hw.h" #include "ocrdma_stats.h" -#include "ocrdma_abi.h" +#include <rdma/ocrdma-abi.h> MODULE_VERSION(OCRDMA_ROCE_DRV_VERSION); MODULE_DESCRIPTION(OCRDMA_ROCE_DRV_DESC " " OCRDMA_ROCE_DRV_VERSION); @@ -119,6 +119,7 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) { strlcpy(dev->ibdev.name, "ocrdma%d", IB_DEVICE_NAME_MAX); ocrdma_get_guid(dev, (u8 *)&dev->ibdev.node_guid); + BUILD_BUG_ON(sizeof(OCRDMA_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX); memcpy(dev->ibdev.node_desc, OCRDMA_NODE_DESC, sizeof(OCRDMA_NODE_DESC)); dev->ibdev.owner = THIS_MODULE; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index 0efc9662c6d8..37df4481bb8f 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -554,9 +554,9 @@ enum { OCRDMA_MBX_QUERY_CFG_L3_TYPE_MASK = 0x18, OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT = 0, OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK = 0xFFFF, - OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_SHIFT = 16, - OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_MASK = 0xFFFF << - OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_SHIFT, + OCRDMA_MBX_QUERY_CFG_MAX_RECV_SGE_SHIFT = 16, + OCRDMA_MBX_QUERY_CFG_MAX_RECV_SGE_MASK = 0xFFFF << + OCRDMA_MBX_QUERY_CFG_MAX_RECV_SGE_SHIFT, OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_SHIFT = 0, OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_MASK = 0xFFFF, @@ -612,6 +612,8 @@ enum { OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_OFFSET = 0, OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_MASK = 0xFFFF << OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_OFFSET, + OCRDMA_MBX_QUERY_CFG_MAX_RD_SGE_SHIFT = 0, + OCRDMA_MBX_QUERY_CFG_MAX_RD_SGE_MASK = 0xFFFF, }; struct ocrdma_mbx_query_config { @@ -619,7 +621,7 @@ struct ocrdma_mbx_query_config { struct ocrdma_mbx_rsp rsp; u32 qp_srq_cq_ird_ord; u32 max_pd_ca_ack_delay; - u32 max_write_send_sge; + u32 max_recv_send_sge; u32 max_ird_ord_per_qp; u32 max_shared_ird_ord; u32 max_mr; @@ -639,6 +641,8 @@ struct ocrdma_mbx_query_config { u32 max_wqes_rqes_per_q; u32 max_cq_cqes_per_cq; u32 max_srq_rqe_sge; + u32 max_wr_rd_sge; + u32 ird_pgsz_num_pages; }; struct ocrdma_fw_ver_rsp { diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index b1a3d91fe8b9..6af44f8db3d5 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -51,7 +51,7 @@ #include "ocrdma.h" #include "ocrdma_hw.h" #include "ocrdma_verbs.h" -#include "ocrdma_abi.h" +#include <rdma/ocrdma-abi.h> int ocrdma_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) { @@ -125,8 +125,8 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS; - attr->max_sge = dev->attr.max_send_sge; - attr->max_sge_rd = attr->max_sge; + attr->max_sge = min(dev->attr.max_send_sge, dev->attr.max_recv_sge); + attr->max_sge_rd = dev->attr.max_rdma_sge; attr->max_cq = dev->attr.max_cq; attr->max_cqe = dev->attr.max_cqe; attr->max_mr = dev->attr.max_mr; diff --git a/drivers/infiniband/hw/qedr/Kconfig b/drivers/infiniband/hw/qedr/Kconfig new file mode 100644 index 000000000000..7c06d85568d4 --- /dev/null +++ b/drivers/infiniband/hw/qedr/Kconfig @@ -0,0 +1,7 @@ +config INFINIBAND_QEDR + tristate "QLogic RoCE driver" + depends on 64BIT && QEDE + select QED_LL2 + ---help--- + This driver provides low-level InfiniBand over Ethernet + support for QLogic QED host channel adapters (HCAs). diff --git a/drivers/infiniband/hw/qedr/Makefile b/drivers/infiniband/hw/qedr/Makefile new file mode 100644 index 000000000000..ba7067c77f2f --- /dev/null +++ b/drivers/infiniband/hw/qedr/Makefile @@ -0,0 +1,3 @@ +obj-$(CONFIG_INFINIBAND_QEDR) := qedr.o + +qedr-y := main.o verbs.o qedr_cm.o diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c new file mode 100644 index 000000000000..7b74d09a8217 --- /dev/null +++ b/drivers/infiniband/hw/qedr/main.c @@ -0,0 +1,914 @@ +/* QLogic qedr NIC Driver + * Copyright (c) 2015-2016 QLogic Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <linux/module.h> +#include <rdma/ib_verbs.h> +#include <rdma/ib_addr.h> +#include <rdma/ib_user_verbs.h> +#include <linux/netdevice.h> +#include <linux/iommu.h> +#include <net/addrconf.h> +#include <linux/qed/qede_roce.h> +#include <linux/qed/qed_chain.h> +#include <linux/qed/qed_if.h> +#include "qedr.h" +#include "verbs.h" +#include <rdma/qedr-abi.h> + +MODULE_DESCRIPTION("QLogic 40G/100G ROCE Driver"); +MODULE_AUTHOR("QLogic Corporation"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION(QEDR_MODULE_VERSION); + +#define QEDR_WQ_MULTIPLIER_DFT (3) + +void qedr_ib_dispatch_event(struct qedr_dev *dev, u8 port_num, + enum ib_event_type type) +{ + struct ib_event ibev; + + ibev.device = &dev->ibdev; + ibev.element.port_num = port_num; + ibev.event = type; + + ib_dispatch_event(&ibev); +} + +static enum rdma_link_layer qedr_link_layer(struct ib_device *device, + u8 port_num) +{ + return IB_LINK_LAYER_ETHERNET; +} + +static void qedr_get_dev_fw_str(struct ib_device *ibdev, char *str, + size_t str_len) +{ + struct qedr_dev *qedr = get_qedr_dev(ibdev); + u32 fw_ver = (u32)qedr->attr.fw_ver; + + snprintf(str, str_len, "%d. %d. %d. %d", + (fw_ver >> 24) & 0xFF, (fw_ver >> 16) & 0xFF, + (fw_ver >> 8) & 0xFF, fw_ver & 0xFF); +} + +static struct net_device *qedr_get_netdev(struct ib_device *dev, u8 port_num) +{ + struct qedr_dev *qdev; + + qdev = get_qedr_dev(dev); + dev_hold(qdev->ndev); + + /* The HW vendor's device driver must guarantee + * that this function returns NULL before the net device reaches + * NETDEV_UNREGISTER_FINAL state. + */ + return qdev->ndev; +} + +static int qedr_register_device(struct qedr_dev *dev) +{ + strlcpy(dev->ibdev.name, "qedr%d", IB_DEVICE_NAME_MAX); + + dev->ibdev.node_guid = dev->attr.node_guid; + memcpy(dev->ibdev.node_desc, QEDR_NODE_DESC, sizeof(QEDR_NODE_DESC)); + dev->ibdev.owner = THIS_MODULE; + dev->ibdev.uverbs_abi_ver = QEDR_ABI_VERSION; + + dev->ibdev.uverbs_cmd_mask = QEDR_UVERBS(GET_CONTEXT) | + QEDR_UVERBS(QUERY_DEVICE) | + QEDR_UVERBS(QUERY_PORT) | + QEDR_UVERBS(ALLOC_PD) | + QEDR_UVERBS(DEALLOC_PD) | + QEDR_UVERBS(CREATE_COMP_CHANNEL) | + QEDR_UVERBS(CREATE_CQ) | + QEDR_UVERBS(RESIZE_CQ) | + QEDR_UVERBS(DESTROY_CQ) | + QEDR_UVERBS(REQ_NOTIFY_CQ) | + QEDR_UVERBS(CREATE_QP) | + QEDR_UVERBS(MODIFY_QP) | + QEDR_UVERBS(QUERY_QP) | + QEDR_UVERBS(DESTROY_QP) | + QEDR_UVERBS(REG_MR) | + QEDR_UVERBS(DEREG_MR) | + QEDR_UVERBS(POLL_CQ) | + QEDR_UVERBS(POST_SEND) | + QEDR_UVERBS(POST_RECV); + + dev->ibdev.phys_port_cnt = 1; + dev->ibdev.num_comp_vectors = dev->num_cnq; + dev->ibdev.node_type = RDMA_NODE_IB_CA; + + dev->ibdev.query_device = qedr_query_device; + dev->ibdev.query_port = qedr_query_port; + dev->ibdev.modify_port = qedr_modify_port; + + dev->ibdev.query_gid = qedr_query_gid; + dev->ibdev.add_gid = qedr_add_gid; + dev->ibdev.del_gid = qedr_del_gid; + + dev->ibdev.alloc_ucontext = qedr_alloc_ucontext; + dev->ibdev.dealloc_ucontext = qedr_dealloc_ucontext; + dev->ibdev.mmap = qedr_mmap; + + dev->ibdev.alloc_pd = qedr_alloc_pd; + dev->ibdev.dealloc_pd = qedr_dealloc_pd; + + dev->ibdev.create_cq = qedr_create_cq; + dev->ibdev.destroy_cq = qedr_destroy_cq; + dev->ibdev.resize_cq = qedr_resize_cq; + dev->ibdev.req_notify_cq = qedr_arm_cq; + + dev->ibdev.create_qp = qedr_create_qp; + dev->ibdev.modify_qp = qedr_modify_qp; + dev->ibdev.query_qp = qedr_query_qp; + dev->ibdev.destroy_qp = qedr_destroy_qp; + + dev->ibdev.query_pkey = qedr_query_pkey; + + dev->ibdev.create_ah = qedr_create_ah; + dev->ibdev.destroy_ah = qedr_destroy_ah; + + dev->ibdev.get_dma_mr = qedr_get_dma_mr; + dev->ibdev.dereg_mr = qedr_dereg_mr; + dev->ibdev.reg_user_mr = qedr_reg_user_mr; + dev->ibdev.alloc_mr = qedr_alloc_mr; + dev->ibdev.map_mr_sg = qedr_map_mr_sg; + + dev->ibdev.poll_cq = qedr_poll_cq; + dev->ibdev.post_send = qedr_post_send; + dev->ibdev.post_recv = qedr_post_recv; + + dev->ibdev.process_mad = qedr_process_mad; + dev->ibdev.get_port_immutable = qedr_port_immutable; + dev->ibdev.get_netdev = qedr_get_netdev; + + dev->ibdev.dma_device = &dev->pdev->dev; + + dev->ibdev.get_link_layer = qedr_link_layer; + dev->ibdev.get_dev_fw_str = qedr_get_dev_fw_str; + + return ib_register_device(&dev->ibdev, NULL); +} + +/* This function allocates fast-path status block memory */ +static int qedr_alloc_mem_sb(struct qedr_dev *dev, + struct qed_sb_info *sb_info, u16 sb_id) +{ + struct status_block *sb_virt; + dma_addr_t sb_phys; + int rc; + + sb_virt = dma_alloc_coherent(&dev->pdev->dev, + sizeof(*sb_virt), &sb_phys, GFP_KERNEL); + if (!sb_virt) + return -ENOMEM; + + rc = dev->ops->common->sb_init(dev->cdev, sb_info, + sb_virt, sb_phys, sb_id, + QED_SB_TYPE_CNQ); + if (rc) { + pr_err("Status block initialization failed\n"); + dma_free_coherent(&dev->pdev->dev, sizeof(*sb_virt), + sb_virt, sb_phys); + return rc; + } + + return 0; +} + +static void qedr_free_mem_sb(struct qedr_dev *dev, + struct qed_sb_info *sb_info, int sb_id) +{ + if (sb_info->sb_virt) { + dev->ops->common->sb_release(dev->cdev, sb_info, sb_id); + dma_free_coherent(&dev->pdev->dev, sizeof(*sb_info->sb_virt), + (void *)sb_info->sb_virt, sb_info->sb_phys); + } +} + +static void qedr_free_resources(struct qedr_dev *dev) +{ + int i; + + for (i = 0; i < dev->num_cnq; i++) { + qedr_free_mem_sb(dev, &dev->sb_array[i], dev->sb_start + i); + dev->ops->common->chain_free(dev->cdev, &dev->cnq_array[i].pbl); + } + + kfree(dev->cnq_array); + kfree(dev->sb_array); + kfree(dev->sgid_tbl); +} + +static int qedr_alloc_resources(struct qedr_dev *dev) +{ + struct qedr_cnq *cnq; + __le16 *cons_pi; + u16 n_entries; + int i, rc; + + dev->sgid_tbl = kzalloc(sizeof(union ib_gid) * + QEDR_MAX_SGID, GFP_KERNEL); + if (!dev->sgid_tbl) + return -ENOMEM; + + spin_lock_init(&dev->sgid_lock); + + /* Allocate Status blocks for CNQ */ + dev->sb_array = kcalloc(dev->num_cnq, sizeof(*dev->sb_array), + GFP_KERNEL); + if (!dev->sb_array) { + rc = -ENOMEM; + goto err1; + } + + dev->cnq_array = kcalloc(dev->num_cnq, + sizeof(*dev->cnq_array), GFP_KERNEL); + if (!dev->cnq_array) { + rc = -ENOMEM; + goto err2; + } + + dev->sb_start = dev->ops->rdma_get_start_sb(dev->cdev); + + /* Allocate CNQ PBLs */ + n_entries = min_t(u32, QED_RDMA_MAX_CNQ_SIZE, QEDR_ROCE_MAX_CNQ_SIZE); + for (i = 0; i < dev->num_cnq; i++) { + cnq = &dev->cnq_array[i]; + + rc = qedr_alloc_mem_sb(dev, &dev->sb_array[i], + dev->sb_start + i); + if (rc) + goto err3; + + rc = dev->ops->common->chain_alloc(dev->cdev, + QED_CHAIN_USE_TO_CONSUME, + QED_CHAIN_MODE_PBL, + QED_CHAIN_CNT_TYPE_U16, + n_entries, + sizeof(struct regpair *), + &cnq->pbl); + if (rc) + goto err4; + + cnq->dev = dev; + cnq->sb = &dev->sb_array[i]; + cons_pi = dev->sb_array[i].sb_virt->pi_array; + cnq->hw_cons_ptr = &cons_pi[QED_ROCE_PROTOCOL_INDEX]; + cnq->index = i; + sprintf(cnq->name, "qedr%d@pci:%s", i, pci_name(dev->pdev)); + + DP_DEBUG(dev, QEDR_MSG_INIT, "cnq[%d].cons=%d\n", + i, qed_chain_get_cons_idx(&cnq->pbl)); + } + + return 0; +err4: + qedr_free_mem_sb(dev, &dev->sb_array[i], dev->sb_start + i); +err3: + for (--i; i >= 0; i--) { + dev->ops->common->chain_free(dev->cdev, &dev->cnq_array[i].pbl); + qedr_free_mem_sb(dev, &dev->sb_array[i], dev->sb_start + i); + } + kfree(dev->cnq_array); +err2: + kfree(dev->sb_array); +err1: + kfree(dev->sgid_tbl); + return rc; +} + +/* QEDR sysfs interface */ +static ssize_t show_rev(struct device *device, struct device_attribute *attr, + char *buf) +{ + struct qedr_dev *dev = dev_get_drvdata(device); + + return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->pdev->vendor); +} + +static ssize_t show_hca_type(struct device *device, + struct device_attribute *attr, char *buf) +{ + return scnprintf(buf, PAGE_SIZE, "%s\n", "HCA_TYPE_TO_SET"); +} + +static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); +static DEVICE_ATTR(hca_type, S_IRUGO, show_hca_type, NULL); + +static struct device_attribute *qedr_attributes[] = { + &dev_attr_hw_rev, + &dev_attr_hca_type +}; + +static void qedr_remove_sysfiles(struct qedr_dev *dev) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(qedr_attributes); i++) + device_remove_file(&dev->ibdev.dev, qedr_attributes[i]); +} + +static void qedr_pci_set_atomic(struct qedr_dev *dev, struct pci_dev *pdev) +{ + struct pci_dev *bridge; + u32 val; + + dev->atomic_cap = IB_ATOMIC_NONE; + + bridge = pdev->bus->self; + if (!bridge) + return; + + /* Check whether we are connected directly or via a switch */ + while (bridge && bridge->bus->parent) { + DP_DEBUG(dev, QEDR_MSG_INIT, + "Device is not connected directly to root. bridge->bus->number=%d primary=%d\n", + bridge->bus->number, bridge->bus->primary); + /* Need to check Atomic Op Routing Supported all the way to + * root complex. + */ + pcie_capability_read_dword(bridge, PCI_EXP_DEVCAP2, &val); + if (!(val & PCI_EXP_DEVCAP2_ATOMIC_ROUTE)) { + pcie_capability_clear_word(pdev, + PCI_EXP_DEVCTL2, + PCI_EXP_DEVCTL2_ATOMIC_REQ); + return; + } + bridge = bridge->bus->parent->self; + } + bridge = pdev->bus->self; + + /* according to bridge capability */ + pcie_capability_read_dword(bridge, PCI_EXP_DEVCAP2, &val); + if (val & PCI_EXP_DEVCAP2_ATOMIC_COMP64) { + pcie_capability_set_word(pdev, PCI_EXP_DEVCTL2, + PCI_EXP_DEVCTL2_ATOMIC_REQ); + dev->atomic_cap = IB_ATOMIC_GLOB; + } else { + pcie_capability_clear_word(pdev, PCI_EXP_DEVCTL2, + PCI_EXP_DEVCTL2_ATOMIC_REQ); + } +} + +static const struct qed_rdma_ops *qed_ops; + +#define HILO_U64(hi, lo) ((((u64)(hi)) << 32) + (lo)) + +static irqreturn_t qedr_irq_handler(int irq, void *handle) +{ + u16 hw_comp_cons, sw_comp_cons; + struct qedr_cnq *cnq = handle; + struct regpair *cq_handle; + struct qedr_cq *cq; + + qed_sb_ack(cnq->sb, IGU_INT_DISABLE, 0); + + qed_sb_update_sb_idx(cnq->sb); + + hw_comp_cons = le16_to_cpu(*cnq->hw_cons_ptr); + sw_comp_cons = qed_chain_get_cons_idx(&cnq->pbl); + + /* Align protocol-index and chain reads */ + rmb(); + + while (sw_comp_cons != hw_comp_cons) { + cq_handle = (struct regpair *)qed_chain_consume(&cnq->pbl); + cq = (struct qedr_cq *)(uintptr_t)HILO_U64(cq_handle->hi, + cq_handle->lo); + + if (cq == NULL) { + DP_ERR(cnq->dev, + "Received NULL CQ cq_handle->hi=%d cq_handle->lo=%d sw_comp_cons=%d hw_comp_cons=%d\n", + cq_handle->hi, cq_handle->lo, sw_comp_cons, + hw_comp_cons); + + break; + } + + if (cq->sig != QEDR_CQ_MAGIC_NUMBER) { + DP_ERR(cnq->dev, + "Problem with cq signature, cq_handle->hi=%d ch_handle->lo=%d cq=%p\n", + cq_handle->hi, cq_handle->lo, cq); + break; + } + + cq->arm_flags = 0; + + if (cq->ibcq.comp_handler) + (*cq->ibcq.comp_handler) + (&cq->ibcq, cq->ibcq.cq_context); + + sw_comp_cons = qed_chain_get_cons_idx(&cnq->pbl); + + cnq->n_comp++; + + } + + qed_ops->rdma_cnq_prod_update(cnq->dev->rdma_ctx, cnq->index, + sw_comp_cons); + + qed_sb_ack(cnq->sb, IGU_INT_ENABLE, 1); + + return IRQ_HANDLED; +} + +static void qedr_sync_free_irqs(struct qedr_dev *dev) +{ + u32 vector; + int i; + + for (i = 0; i < dev->int_info.used_cnt; i++) { + if (dev->int_info.msix_cnt) { + vector = dev->int_info.msix[i * dev->num_hwfns].vector; + synchronize_irq(vector); + free_irq(vector, &dev->cnq_array[i]); + } + } + + dev->int_info.used_cnt = 0; +} + +static int qedr_req_msix_irqs(struct qedr_dev *dev) +{ + int i, rc = 0; + + if (dev->num_cnq > dev->int_info.msix_cnt) { + DP_ERR(dev, + "Interrupt mismatch: %d CNQ queues > %d MSI-x vectors\n", + dev->num_cnq, dev->int_info.msix_cnt); + return -EINVAL; + } + + for (i = 0; i < dev->num_cnq; i++) { + rc = request_irq(dev->int_info.msix[i * dev->num_hwfns].vector, + qedr_irq_handler, 0, dev->cnq_array[i].name, + &dev->cnq_array[i]); + if (rc) { + DP_ERR(dev, "Request cnq %d irq failed\n", i); + qedr_sync_free_irqs(dev); + } else { + DP_DEBUG(dev, QEDR_MSG_INIT, + "Requested cnq irq for %s [entry %d]. Cookie is at %p\n", + dev->cnq_array[i].name, i, + &dev->cnq_array[i]); + dev->int_info.used_cnt++; + } + } + + return rc; +} + +static int qedr_setup_irqs(struct qedr_dev *dev) +{ + int rc; + + DP_DEBUG(dev, QEDR_MSG_INIT, "qedr_setup_irqs\n"); + + /* Learn Interrupt configuration */ + rc = dev->ops->rdma_set_rdma_int(dev->cdev, dev->num_cnq); + if (rc < 0) + return rc; + + rc = dev->ops->rdma_get_rdma_int(dev->cdev, &dev->int_info); + if (rc) { + DP_DEBUG(dev, QEDR_MSG_INIT, "get_rdma_int failed\n"); + return rc; + } + + if (dev->int_info.msix_cnt) { + DP_DEBUG(dev, QEDR_MSG_INIT, "rdma msix_cnt = %d\n", + dev->int_info.msix_cnt); + rc = qedr_req_msix_irqs(dev); + if (rc) + return rc; + } + + DP_DEBUG(dev, QEDR_MSG_INIT, "qedr_setup_irqs succeeded\n"); + + return 0; +} + +static int qedr_set_device_attr(struct qedr_dev *dev) +{ + struct qed_rdma_device *qed_attr; + struct qedr_device_attr *attr; + u32 page_size; + + /* Part 1 - query core capabilities */ + qed_attr = dev->ops->rdma_query_device(dev->rdma_ctx); + + /* Part 2 - check capabilities */ + page_size = ~dev->attr.page_size_caps + 1; + if (page_size > PAGE_SIZE) { + DP_ERR(dev, + "Kernel PAGE_SIZE is %ld which is smaller than minimum page size (%d) required by qedr\n", + PAGE_SIZE, page_size); + return -ENODEV; + } + + /* Part 3 - copy and update capabilities */ + attr = &dev->attr; + attr->vendor_id = qed_attr->vendor_id; + attr->vendor_part_id = qed_attr->vendor_part_id; + attr->hw_ver = qed_attr->hw_ver; + attr->fw_ver = qed_attr->fw_ver; + attr->node_guid = qed_attr->node_guid; + attr->sys_image_guid = qed_attr->sys_image_guid; + attr->max_cnq = qed_attr->max_cnq; + attr->max_sge = qed_attr->max_sge; + attr->max_inline = qed_attr->max_inline; + attr->max_sqe = min_t(u32, qed_attr->max_wqe, QEDR_MAX_SQE); + attr->max_rqe = min_t(u32, qed_attr->max_wqe, QEDR_MAX_RQE); + attr->max_qp_resp_rd_atomic_resc = qed_attr->max_qp_resp_rd_atomic_resc; + attr->max_qp_req_rd_atomic_resc = qed_attr->max_qp_req_rd_atomic_resc; + attr->max_dev_resp_rd_atomic_resc = + qed_attr->max_dev_resp_rd_atomic_resc; + attr->max_cq = qed_attr->max_cq; + attr->max_qp = qed_attr->max_qp; + attr->max_mr = qed_attr->max_mr; + attr->max_mr_size = qed_attr->max_mr_size; + attr->max_cqe = min_t(u64, qed_attr->max_cqe, QEDR_MAX_CQES); + attr->max_mw = qed_attr->max_mw; + attr->max_fmr = qed_attr->max_fmr; + attr->max_mr_mw_fmr_pbl = qed_attr->max_mr_mw_fmr_pbl; + attr->max_mr_mw_fmr_size = qed_attr->max_mr_mw_fmr_size; + attr->max_pd = qed_attr->max_pd; + attr->max_ah = qed_attr->max_ah; + attr->max_pkey = qed_attr->max_pkey; + attr->max_srq = qed_attr->max_srq; + attr->max_srq_wr = qed_attr->max_srq_wr; + attr->dev_caps = qed_attr->dev_caps; + attr->page_size_caps = qed_attr->page_size_caps; + attr->dev_ack_delay = qed_attr->dev_ack_delay; + attr->reserved_lkey = qed_attr->reserved_lkey; + attr->bad_pkey_counter = qed_attr->bad_pkey_counter; + attr->max_stats_queues = qed_attr->max_stats_queues; + + return 0; +} + +void qedr_unaffiliated_event(void *context, + u8 event_code) +{ + pr_err("unaffiliated event not implemented yet\n"); +} + +void qedr_affiliated_event(void *context, u8 e_code, void *fw_handle) +{ +#define EVENT_TYPE_NOT_DEFINED 0 +#define EVENT_TYPE_CQ 1 +#define EVENT_TYPE_QP 2 + struct qedr_dev *dev = (struct qedr_dev *)context; + union event_ring_data *data = fw_handle; + u64 roce_handle64 = ((u64)data->roce_handle.hi << 32) + + data->roce_handle.lo; + u8 event_type = EVENT_TYPE_NOT_DEFINED; + struct ib_event event; + struct ib_cq *ibcq; + struct ib_qp *ibqp; + struct qedr_cq *cq; + struct qedr_qp *qp; + + switch (e_code) { + case ROCE_ASYNC_EVENT_CQ_OVERFLOW_ERR: + event.event = IB_EVENT_CQ_ERR; + event_type = EVENT_TYPE_CQ; + break; + case ROCE_ASYNC_EVENT_SQ_DRAINED: + event.event = IB_EVENT_SQ_DRAINED; + event_type = EVENT_TYPE_QP; + break; + case ROCE_ASYNC_EVENT_QP_CATASTROPHIC_ERR: + event.event = IB_EVENT_QP_FATAL; + event_type = EVENT_TYPE_QP; + break; + case ROCE_ASYNC_EVENT_LOCAL_INVALID_REQUEST_ERR: + event.event = IB_EVENT_QP_REQ_ERR; + event_type = EVENT_TYPE_QP; + break; + case ROCE_ASYNC_EVENT_LOCAL_ACCESS_ERR: + event.event = IB_EVENT_QP_ACCESS_ERR; + event_type = EVENT_TYPE_QP; + break; + default: + DP_ERR(dev, "unsupported event %d on handle=%llx\n", e_code, + roce_handle64); + } + + switch (event_type) { + case EVENT_TYPE_CQ: + cq = (struct qedr_cq *)(uintptr_t)roce_handle64; + if (cq) { + ibcq = &cq->ibcq; + if (ibcq->event_handler) { + event.device = ibcq->device; + event.element.cq = ibcq; + ibcq->event_handler(&event, ibcq->cq_context); + } + } else { + WARN(1, + "Error: CQ event with NULL pointer ibcq. Handle=%llx\n", + roce_handle64); + } + DP_ERR(dev, "CQ event %d on hanlde %p\n", e_code, cq); + break; + case EVENT_TYPE_QP: + qp = (struct qedr_qp *)(uintptr_t)roce_handle64; + if (qp) { + ibqp = &qp->ibqp; + if (ibqp->event_handler) { + event.device = ibqp->device; + event.element.qp = ibqp; + ibqp->event_handler(&event, ibqp->qp_context); + } + } else { + WARN(1, + "Error: QP event with NULL pointer ibqp. Handle=%llx\n", + roce_handle64); + } + DP_ERR(dev, "QP event %d on hanlde %p\n", e_code, qp); + break; + default: + break; + } +} + +static int qedr_init_hw(struct qedr_dev *dev) +{ + struct qed_rdma_add_user_out_params out_params; + struct qed_rdma_start_in_params *in_params; + struct qed_rdma_cnq_params *cur_pbl; + struct qed_rdma_events events; + dma_addr_t p_phys_table; + u32 page_cnt; + int rc = 0; + int i; + + in_params = kzalloc(sizeof(*in_params), GFP_KERNEL); + if (!in_params) { + rc = -ENOMEM; + goto out; + } + + in_params->desired_cnq = dev->num_cnq; + for (i = 0; i < dev->num_cnq; i++) { + cur_pbl = &in_params->cnq_pbl_list[i]; + + page_cnt = qed_chain_get_page_cnt(&dev->cnq_array[i].pbl); + cur_pbl->num_pbl_pages = page_cnt; + + p_phys_table = qed_chain_get_pbl_phys(&dev->cnq_array[i].pbl); + cur_pbl->pbl_ptr = (u64)p_phys_table; + } + + events.affiliated_event = qedr_affiliated_event; + events.unaffiliated_event = qedr_unaffiliated_event; + events.context = dev; + + in_params->events = &events; + in_params->cq_mode = QED_RDMA_CQ_MODE_32_BITS; + in_params->max_mtu = dev->ndev->mtu; + ether_addr_copy(&in_params->mac_addr[0], dev->ndev->dev_addr); + + rc = dev->ops->rdma_init(dev->cdev, in_params); + if (rc) + goto out; + + rc = dev->ops->rdma_add_user(dev->rdma_ctx, &out_params); + if (rc) + goto out; + + dev->db_addr = (void *)(uintptr_t)out_params.dpi_addr; + dev->db_phys_addr = out_params.dpi_phys_addr; + dev->db_size = out_params.dpi_size; + dev->dpi = out_params.dpi; + + rc = qedr_set_device_attr(dev); +out: + kfree(in_params); + if (rc) + DP_ERR(dev, "Init HW Failed rc = %d\n", rc); + + return rc; +} + +void qedr_stop_hw(struct qedr_dev *dev) +{ + dev->ops->rdma_remove_user(dev->rdma_ctx, dev->dpi); + dev->ops->rdma_stop(dev->rdma_ctx); +} + +static struct qedr_dev *qedr_add(struct qed_dev *cdev, struct pci_dev *pdev, + struct net_device *ndev) +{ + struct qed_dev_rdma_info dev_info; + struct qedr_dev *dev; + int rc = 0, i; + + dev = (struct qedr_dev *)ib_alloc_device(sizeof(*dev)); + if (!dev) { + pr_err("Unable to allocate ib device\n"); + return NULL; + } + + DP_DEBUG(dev, QEDR_MSG_INIT, "qedr add device called\n"); + + dev->pdev = pdev; + dev->ndev = ndev; + dev->cdev = cdev; + + qed_ops = qed_get_rdma_ops(); + if (!qed_ops) { + DP_ERR(dev, "Failed to get qed roce operations\n"); + goto init_err; + } + + dev->ops = qed_ops; + rc = qed_ops->fill_dev_info(cdev, &dev_info); + if (rc) + goto init_err; + + dev->num_hwfns = dev_info.common.num_hwfns; + dev->rdma_ctx = dev->ops->rdma_get_rdma_ctx(cdev); + + dev->num_cnq = dev->ops->rdma_get_min_cnq_msix(cdev); + if (!dev->num_cnq) { + DP_ERR(dev, "not enough CNQ resources.\n"); + goto init_err; + } + + dev->wq_multiplier = QEDR_WQ_MULTIPLIER_DFT; + + qedr_pci_set_atomic(dev, pdev); + + rc = qedr_alloc_resources(dev); + if (rc) + goto init_err; + + rc = qedr_init_hw(dev); + if (rc) + goto alloc_err; + + rc = qedr_setup_irqs(dev); + if (rc) + goto irq_err; + + rc = qedr_register_device(dev); + if (rc) { + DP_ERR(dev, "Unable to allocate register device\n"); + goto reg_err; + } + + for (i = 0; i < ARRAY_SIZE(qedr_attributes); i++) + if (device_create_file(&dev->ibdev.dev, qedr_attributes[i])) + goto sysfs_err; + + DP_DEBUG(dev, QEDR_MSG_INIT, "qedr driver loaded successfully\n"); + return dev; + +sysfs_err: + ib_unregister_device(&dev->ibdev); +reg_err: + qedr_sync_free_irqs(dev); +irq_err: + qedr_stop_hw(dev); +alloc_err: + qedr_free_resources(dev); +init_err: + ib_dealloc_device(&dev->ibdev); + DP_ERR(dev, "qedr driver load failed rc=%d\n", rc); + + return NULL; +} + +static void qedr_remove(struct qedr_dev *dev) +{ + /* First unregister with stack to stop all the active traffic + * of the registered clients. + */ + qedr_remove_sysfiles(dev); + ib_unregister_device(&dev->ibdev); + + qedr_stop_hw(dev); + qedr_sync_free_irqs(dev); + qedr_free_resources(dev); + ib_dealloc_device(&dev->ibdev); +} + +static int qedr_close(struct qedr_dev *dev) +{ + qedr_ib_dispatch_event(dev, 1, IB_EVENT_PORT_ERR); + + return 0; +} + +static void qedr_shutdown(struct qedr_dev *dev) +{ + qedr_close(dev); + qedr_remove(dev); +} + +static void qedr_mac_address_change(struct qedr_dev *dev) +{ + union ib_gid *sgid = &dev->sgid_tbl[0]; + u8 guid[8], mac_addr[6]; + int rc; + + /* Update SGID */ + ether_addr_copy(&mac_addr[0], dev->ndev->dev_addr); + guid[0] = mac_addr[0] ^ 2; + guid[1] = mac_addr[1]; + guid[2] = mac_addr[2]; + guid[3] = 0xff; + guid[4] = 0xfe; + guid[5] = mac_addr[3]; + guid[6] = mac_addr[4]; + guid[7] = mac_addr[5]; + sgid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); + memcpy(&sgid->raw[8], guid, sizeof(guid)); + + /* Update LL2 */ + rc = dev->ops->roce_ll2_set_mac_filter(dev->cdev, + dev->gsi_ll2_mac_address, + dev->ndev->dev_addr); + + ether_addr_copy(dev->gsi_ll2_mac_address, dev->ndev->dev_addr); + + qedr_ib_dispatch_event(dev, 1, IB_EVENT_GID_CHANGE); + + if (rc) + DP_ERR(dev, "Error updating mac filter\n"); +} + +/* event handling via NIC driver ensures that all the NIC specific + * initialization done before RoCE driver notifies + * event to stack. + */ +static void qedr_notify(struct qedr_dev *dev, enum qede_roce_event event) +{ + switch (event) { + case QEDE_UP: + qedr_ib_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE); + break; + case QEDE_DOWN: + qedr_close(dev); + break; + case QEDE_CLOSE: + qedr_shutdown(dev); + break; + case QEDE_CHANGE_ADDR: + qedr_mac_address_change(dev); + break; + default: + pr_err("Event not supported\n"); + } +} + +static struct qedr_driver qedr_drv = { + .name = "qedr_driver", + .add = qedr_add, + .remove = qedr_remove, + .notify = qedr_notify, +}; + +static int __init qedr_init_module(void) +{ + return qede_roce_register_driver(&qedr_drv); +} + +static void __exit qedr_exit_module(void) +{ + qede_roce_unregister_driver(&qedr_drv); +} + +module_init(qedr_init_module); +module_exit(qedr_exit_module); diff --git a/drivers/infiniband/hw/qedr/qedr.h b/drivers/infiniband/hw/qedr/qedr.h new file mode 100644 index 000000000000..620badd7d4fb --- /dev/null +++ b/drivers/infiniband/hw/qedr/qedr.h @@ -0,0 +1,495 @@ +/* QLogic qedr NIC Driver + * Copyright (c) 2015-2016 QLogic Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __QEDR_H__ +#define __QEDR_H__ + +#include <linux/pci.h> +#include <rdma/ib_addr.h> +#include <linux/qed/qed_if.h> +#include <linux/qed/qed_chain.h> +#include <linux/qed/qed_roce_if.h> +#include <linux/qed/qede_roce.h> +#include "qedr_hsi.h" + +#define QEDR_MODULE_VERSION "8.10.10.0" +#define QEDR_NODE_DESC "QLogic 579xx RoCE HCA" +#define DP_NAME(dev) ((dev)->ibdev.name) + +#define DP_DEBUG(dev, module, fmt, ...) \ + pr_debug("(%s) " module ": " fmt, \ + DP_NAME(dev) ? DP_NAME(dev) : "", ## __VA_ARGS__) + +#define QEDR_MSG_INIT "INIT" +#define QEDR_MSG_MISC "MISC" +#define QEDR_MSG_CQ " CQ" +#define QEDR_MSG_MR " MR" +#define QEDR_MSG_RQ " RQ" +#define QEDR_MSG_SQ " SQ" +#define QEDR_MSG_QP " QP" +#define QEDR_MSG_GSI " GSI" + +#define QEDR_CQ_MAGIC_NUMBER (0x11223344) + +struct qedr_dev; + +struct qedr_cnq { + struct qedr_dev *dev; + struct qed_chain pbl; + struct qed_sb_info *sb; + char name[32]; + u64 n_comp; + __le16 *hw_cons_ptr; + u8 index; +}; + +#define QEDR_MAX_SGID 128 + +struct qedr_device_attr { + u32 vendor_id; + u32 vendor_part_id; + u32 hw_ver; + u64 fw_ver; + u64 node_guid; + u64 sys_image_guid; + u8 max_cnq; + u8 max_sge; + u16 max_inline; + u32 max_sqe; + u32 max_rqe; + u8 max_qp_resp_rd_atomic_resc; + u8 max_qp_req_rd_atomic_resc; + u64 max_dev_resp_rd_atomic_resc; + u32 max_cq; + u32 max_qp; + u32 max_mr; + u64 max_mr_size; + u32 max_cqe; + u32 max_mw; + u32 max_fmr; + u32 max_mr_mw_fmr_pbl; + u64 max_mr_mw_fmr_size; + u32 max_pd; + u32 max_ah; + u8 max_pkey; + u32 max_srq; + u32 max_srq_wr; + u8 max_srq_sge; + u8 max_stats_queues; + u32 dev_caps; + + u64 page_size_caps; + u8 dev_ack_delay; + u32 reserved_lkey; + u32 bad_pkey_counter; + struct qed_rdma_events events; +}; + +struct qedr_dev { + struct ib_device ibdev; + struct qed_dev *cdev; + struct pci_dev *pdev; + struct net_device *ndev; + + enum ib_atomic_cap atomic_cap; + + void *rdma_ctx; + struct qedr_device_attr attr; + + const struct qed_rdma_ops *ops; + struct qed_int_info int_info; + + struct qed_sb_info *sb_array; + struct qedr_cnq *cnq_array; + int num_cnq; + int sb_start; + + void __iomem *db_addr; + u64 db_phys_addr; + u32 db_size; + u16 dpi; + + union ib_gid *sgid_tbl; + + /* Lock for sgid table */ + spinlock_t sgid_lock; + + u64 guid; + + u32 dp_module; + u8 dp_level; + u8 num_hwfns; + uint wq_multiplier; + u8 gsi_ll2_mac_address[ETH_ALEN]; + int gsi_qp_created; + struct qedr_cq *gsi_sqcq; + struct qedr_cq *gsi_rqcq; + struct qedr_qp *gsi_qp; +}; + +#define QEDR_MAX_SQ_PBL (0x8000) +#define QEDR_MAX_SQ_PBL_ENTRIES (0x10000 / sizeof(void *)) +#define QEDR_SQE_ELEMENT_SIZE (sizeof(struct rdma_sq_sge)) +#define QEDR_MAX_SQE_ELEMENTS_PER_SQE (ROCE_REQ_MAX_SINGLE_SQ_WQE_SIZE / \ + QEDR_SQE_ELEMENT_SIZE) +#define QEDR_MAX_SQE_ELEMENTS_PER_PAGE ((RDMA_RING_PAGE_SIZE) / \ + QEDR_SQE_ELEMENT_SIZE) +#define QEDR_MAX_SQE ((QEDR_MAX_SQ_PBL_ENTRIES) *\ + (RDMA_RING_PAGE_SIZE) / \ + (QEDR_SQE_ELEMENT_SIZE) /\ + (QEDR_MAX_SQE_ELEMENTS_PER_SQE)) +/* RQ */ +#define QEDR_MAX_RQ_PBL (0x2000) +#define QEDR_MAX_RQ_PBL_ENTRIES (0x10000 / sizeof(void *)) +#define QEDR_RQE_ELEMENT_SIZE (sizeof(struct rdma_rq_sge)) +#define QEDR_MAX_RQE_ELEMENTS_PER_RQE (RDMA_MAX_SGE_PER_RQ_WQE) +#define QEDR_MAX_RQE_ELEMENTS_PER_PAGE ((RDMA_RING_PAGE_SIZE) / \ + QEDR_RQE_ELEMENT_SIZE) +#define QEDR_MAX_RQE ((QEDR_MAX_RQ_PBL_ENTRIES) *\ + (RDMA_RING_PAGE_SIZE) / \ + (QEDR_RQE_ELEMENT_SIZE) /\ + (QEDR_MAX_RQE_ELEMENTS_PER_RQE)) + +#define QEDR_CQE_SIZE (sizeof(union rdma_cqe)) +#define QEDR_MAX_CQE_PBL_SIZE (512 * 1024) +#define QEDR_MAX_CQE_PBL_ENTRIES (((QEDR_MAX_CQE_PBL_SIZE) / \ + sizeof(u64)) - 1) +#define QEDR_MAX_CQES ((u32)((QEDR_MAX_CQE_PBL_ENTRIES) * \ + (QED_CHAIN_PAGE_SIZE) / QEDR_CQE_SIZE)) + +#define QEDR_ROCE_MAX_CNQ_SIZE (0x4000) + +#define QEDR_MAX_PORT (1) + +#define QEDR_UVERBS(CMD_NAME) (1ull << IB_USER_VERBS_CMD_##CMD_NAME) + +#define QEDR_ROCE_PKEY_MAX 1 +#define QEDR_ROCE_PKEY_TABLE_LEN 1 +#define QEDR_ROCE_PKEY_DEFAULT 0xffff + +struct qedr_pbl { + struct list_head list_entry; + void *va; + dma_addr_t pa; +}; + +struct qedr_ucontext { + struct ib_ucontext ibucontext; + struct qedr_dev *dev; + struct qedr_pd *pd; + u64 dpi_addr; + u64 dpi_phys_addr; + u32 dpi_size; + u16 dpi; + + struct list_head mm_head; + + /* Lock to protect mm list */ + struct mutex mm_list_lock; +}; + +union db_prod64 { + struct rdma_pwm_val32_data data; + u64 raw; +}; + +enum qedr_cq_type { + QEDR_CQ_TYPE_GSI, + QEDR_CQ_TYPE_KERNEL, + QEDR_CQ_TYPE_USER, +}; + +struct qedr_pbl_info { + u32 num_pbls; + u32 num_pbes; + u32 pbl_size; + u32 pbe_size; + bool two_layered; +}; + +struct qedr_userq { + struct ib_umem *umem; + struct qedr_pbl_info pbl_info; + struct qedr_pbl *pbl_tbl; + u64 buf_addr; + size_t buf_len; +}; + +struct qedr_cq { + struct ib_cq ibcq; + + enum qedr_cq_type cq_type; + u32 sig; + + u16 icid; + + /* Lock to protect completion handler */ + spinlock_t comp_handler_lock; + + /* Lock to protect multiplem CQ's */ + spinlock_t cq_lock; + u8 arm_flags; + struct qed_chain pbl; + + void __iomem *db_addr; + union db_prod64 db; + + u8 pbl_toggle; + union rdma_cqe *latest_cqe; + union rdma_cqe *toggle_cqe; + + u32 cq_cons; + + struct qedr_userq q; +}; + +struct qedr_pd { + struct ib_pd ibpd; + u32 pd_id; + struct qedr_ucontext *uctx; +}; + +struct qedr_mm { + struct { + u64 phy_addr; + unsigned long len; + } key; + struct list_head entry; +}; + +union db_prod32 { + struct rdma_pwm_val16_data data; + u32 raw; +}; + +struct qedr_qp_hwq_info { + /* WQE Elements */ + struct qed_chain pbl; + u64 p_phys_addr_tbl; + u32 max_sges; + + /* WQE */ + u16 prod; + u16 cons; + u16 wqe_cons; + u16 gsi_cons; + u16 max_wr; + + /* DB */ + void __iomem *db; + union db_prod32 db_data; +}; + +#define QEDR_INC_SW_IDX(p_info, index) \ + do { \ + p_info->index = (p_info->index + 1) & \ + qed_chain_get_capacity(p_info->pbl) \ + } while (0) + +enum qedr_qp_err_bitmap { + QEDR_QP_ERR_SQ_FULL = 1, + QEDR_QP_ERR_RQ_FULL = 2, + QEDR_QP_ERR_BAD_SR = 4, + QEDR_QP_ERR_BAD_RR = 8, + QEDR_QP_ERR_SQ_PBL_FULL = 16, + QEDR_QP_ERR_RQ_PBL_FULL = 32, +}; + +struct qedr_qp { + struct ib_qp ibqp; /* must be first */ + struct qedr_dev *dev; + + struct qedr_qp_hwq_info sq; + struct qedr_qp_hwq_info rq; + + u32 max_inline_data; + + /* Lock for QP's */ + spinlock_t q_lock; + struct qedr_cq *sq_cq; + struct qedr_cq *rq_cq; + struct qedr_srq *srq; + enum qed_roce_qp_state state; + u32 id; + struct qedr_pd *pd; + enum ib_qp_type qp_type; + struct qed_rdma_qp *qed_qp; + u32 qp_id; + u16 icid; + u16 mtu; + int sgid_idx; + u32 rq_psn; + u32 sq_psn; + u32 qkey; + u32 dest_qp_num; + + /* Relevant to qps created from kernel space only (ULPs) */ + u8 prev_wqe_size; + u16 wqe_cons; + u32 err_bitmap; + bool signaled; + + /* SQ shadow */ + struct { + u64 wr_id; + enum ib_wc_opcode opcode; + u32 bytes_len; + u8 wqe_size; + bool signaled; + dma_addr_t icrc_mapping; + u32 *icrc; + struct qedr_mr *mr; + } *wqe_wr_id; + + /* RQ shadow */ + struct { + u64 wr_id; + struct ib_sge sg_list[RDMA_MAX_SGE_PER_RQ_WQE]; + u8 wqe_size; + + u8 smac[ETH_ALEN]; + u16 vlan_id; + int rc; + } *rqe_wr_id; + + /* Relevant to qps created from user space only (applications) */ + struct qedr_userq usq; + struct qedr_userq urq; +}; + +struct qedr_ah { + struct ib_ah ibah; + struct ib_ah_attr attr; +}; + +enum qedr_mr_type { + QEDR_MR_USER, + QEDR_MR_KERNEL, + QEDR_MR_DMA, + QEDR_MR_FRMR, +}; + +struct mr_info { + struct qedr_pbl *pbl_table; + struct qedr_pbl_info pbl_info; + struct list_head free_pbl_list; + struct list_head inuse_pbl_list; + u32 completed; + u32 completed_handled; +}; + +struct qedr_mr { + struct ib_mr ibmr; + struct ib_umem *umem; + + struct qed_rdma_register_tid_in_params hw_mr; + enum qedr_mr_type type; + + struct qedr_dev *dev; + struct mr_info info; + + u64 *pages; + u32 npages; +}; + +#define SET_FIELD2(value, name, flag) ((value) |= ((flag) << (name ## _SHIFT))) + +#define QEDR_RESP_IMM (RDMA_CQE_RESPONDER_IMM_FLG_MASK << \ + RDMA_CQE_RESPONDER_IMM_FLG_SHIFT) +#define QEDR_RESP_RDMA (RDMA_CQE_RESPONDER_RDMA_FLG_MASK << \ + RDMA_CQE_RESPONDER_RDMA_FLG_SHIFT) +#define QEDR_RESP_RDMA_IMM (QEDR_RESP_IMM | QEDR_RESP_RDMA) + +static inline void qedr_inc_sw_cons(struct qedr_qp_hwq_info *info) +{ + info->cons = (info->cons + 1) % info->max_wr; + info->wqe_cons++; +} + +static inline void qedr_inc_sw_prod(struct qedr_qp_hwq_info *info) +{ + info->prod = (info->prod + 1) % info->max_wr; +} + +static inline int qedr_get_dmac(struct qedr_dev *dev, + struct ib_ah_attr *ah_attr, u8 *mac_addr) +{ + union ib_gid zero_sgid = { { 0 } }; + struct in6_addr in6; + + if (!memcmp(&ah_attr->grh.dgid, &zero_sgid, sizeof(union ib_gid))) { + DP_ERR(dev, "Local port GID not supported\n"); + eth_zero_addr(mac_addr); + return -EINVAL; + } + + memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(in6)); + ether_addr_copy(mac_addr, ah_attr->dmac); + + return 0; +} + +static inline +struct qedr_ucontext *get_qedr_ucontext(struct ib_ucontext *ibucontext) +{ + return container_of(ibucontext, struct qedr_ucontext, ibucontext); +} + +static inline struct qedr_dev *get_qedr_dev(struct ib_device *ibdev) +{ + return container_of(ibdev, struct qedr_dev, ibdev); +} + +static inline struct qedr_pd *get_qedr_pd(struct ib_pd *ibpd) +{ + return container_of(ibpd, struct qedr_pd, ibpd); +} + +static inline struct qedr_cq *get_qedr_cq(struct ib_cq *ibcq) +{ + return container_of(ibcq, struct qedr_cq, ibcq); +} + +static inline struct qedr_qp *get_qedr_qp(struct ib_qp *ibqp) +{ + return container_of(ibqp, struct qedr_qp, ibqp); +} + +static inline struct qedr_ah *get_qedr_ah(struct ib_ah *ibah) +{ + return container_of(ibah, struct qedr_ah, ibah); +} + +static inline struct qedr_mr *get_qedr_mr(struct ib_mr *ibmr) +{ + return container_of(ibmr, struct qedr_mr, ibmr); +} +#endif diff --git a/drivers/infiniband/hw/qedr/qedr_cm.c b/drivers/infiniband/hw/qedr/qedr_cm.c new file mode 100644 index 000000000000..63890ebb72bd --- /dev/null +++ b/drivers/infiniband/hw/qedr/qedr_cm.c @@ -0,0 +1,622 @@ +/* QLogic qedr NIC Driver + * Copyright (c) 2015-2016 QLogic Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <linux/dma-mapping.h> +#include <linux/crc32.h> +#include <linux/iommu.h> +#include <net/ip.h> +#include <net/ipv6.h> +#include <net/udp.h> + +#include <rdma/ib_verbs.h> +#include <rdma/ib_user_verbs.h> +#include <rdma/iw_cm.h> +#include <rdma/ib_umem.h> +#include <rdma/ib_addr.h> +#include <rdma/ib_cache.h> + +#include "qedr_hsi.h" +#include <linux/qed/qed_if.h> +#include <linux/qed/qed_roce_if.h> +#include "qedr.h" +#include "qedr_hsi.h" +#include "verbs.h" +#include <rdma/qedr-abi.h> +#include "qedr_hsi.h" +#include "qedr_cm.h" + +void qedr_inc_sw_gsi_cons(struct qedr_qp_hwq_info *info) +{ + info->gsi_cons = (info->gsi_cons + 1) % info->max_wr; +} + +void qedr_store_gsi_qp_cq(struct qedr_dev *dev, struct qedr_qp *qp, + struct ib_qp_init_attr *attrs) +{ + dev->gsi_qp_created = 1; + dev->gsi_sqcq = get_qedr_cq(attrs->send_cq); + dev->gsi_rqcq = get_qedr_cq(attrs->recv_cq); + dev->gsi_qp = qp; +} + +void qedr_ll2_tx_cb(void *_qdev, struct qed_roce_ll2_packet *pkt) +{ + struct qedr_dev *dev = (struct qedr_dev *)_qdev; + struct qedr_cq *cq = dev->gsi_sqcq; + struct qedr_qp *qp = dev->gsi_qp; + unsigned long flags; + + DP_DEBUG(dev, QEDR_MSG_GSI, + "LL2 TX CB: gsi_sqcq=%p, gsi_rqcq=%p, gsi_cons=%d, ibcq_comp=%s\n", + dev->gsi_sqcq, dev->gsi_rqcq, qp->sq.gsi_cons, + cq->ibcq.comp_handler ? "Yes" : "No"); + + dma_free_coherent(&dev->pdev->dev, pkt->header.len, pkt->header.vaddr, + pkt->header.baddr); + kfree(pkt); + + spin_lock_irqsave(&qp->q_lock, flags); + qedr_inc_sw_gsi_cons(&qp->sq); + spin_unlock_irqrestore(&qp->q_lock, flags); + + if (cq->ibcq.comp_handler) { + spin_lock_irqsave(&cq->comp_handler_lock, flags); + (*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context); + spin_unlock_irqrestore(&cq->comp_handler_lock, flags); + } +} + +void qedr_ll2_rx_cb(void *_dev, struct qed_roce_ll2_packet *pkt, + struct qed_roce_ll2_rx_params *params) +{ + struct qedr_dev *dev = (struct qedr_dev *)_dev; + struct qedr_cq *cq = dev->gsi_rqcq; + struct qedr_qp *qp = dev->gsi_qp; + unsigned long flags; + + spin_lock_irqsave(&qp->q_lock, flags); + + qp->rqe_wr_id[qp->rq.gsi_cons].rc = params->rc; + qp->rqe_wr_id[qp->rq.gsi_cons].vlan_id = params->vlan_id; + qp->rqe_wr_id[qp->rq.gsi_cons].sg_list[0].length = pkt->payload[0].len; + ether_addr_copy(qp->rqe_wr_id[qp->rq.gsi_cons].smac, params->smac); + + qedr_inc_sw_gsi_cons(&qp->rq); + + spin_unlock_irqrestore(&qp->q_lock, flags); + + if (cq->ibcq.comp_handler) { + spin_lock_irqsave(&cq->comp_handler_lock, flags); + (*cq->ibcq.comp_handler) (&cq->ibcq, cq->ibcq.cq_context); + spin_unlock_irqrestore(&cq->comp_handler_lock, flags); + } +} + +static void qedr_destroy_gsi_cq(struct qedr_dev *dev, + struct ib_qp_init_attr *attrs) +{ + struct qed_rdma_destroy_cq_in_params iparams; + struct qed_rdma_destroy_cq_out_params oparams; + struct qedr_cq *cq; + + cq = get_qedr_cq(attrs->send_cq); + iparams.icid = cq->icid; + dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams); + dev->ops->common->chain_free(dev->cdev, &cq->pbl); + + cq = get_qedr_cq(attrs->recv_cq); + /* if a dedicated recv_cq was used, delete it too */ + if (iparams.icid != cq->icid) { + iparams.icid = cq->icid; + dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams); + dev->ops->common->chain_free(dev->cdev, &cq->pbl); + } +} + +static inline int qedr_check_gsi_qp_attrs(struct qedr_dev *dev, + struct ib_qp_init_attr *attrs) +{ + if (attrs->cap.max_recv_sge > QEDR_GSI_MAX_RECV_SGE) { + DP_ERR(dev, + " create gsi qp: failed. max_recv_sge is larger the max %d>%d\n", + attrs->cap.max_recv_sge, QEDR_GSI_MAX_RECV_SGE); + return -EINVAL; + } + + if (attrs->cap.max_recv_wr > QEDR_GSI_MAX_RECV_WR) { + DP_ERR(dev, + " create gsi qp: failed. max_recv_wr is too large %d>%d\n", + attrs->cap.max_recv_wr, QEDR_GSI_MAX_RECV_WR); + return -EINVAL; + } + + if (attrs->cap.max_send_wr > QEDR_GSI_MAX_SEND_WR) { + DP_ERR(dev, + " create gsi qp: failed. max_send_wr is too large %d>%d\n", + attrs->cap.max_send_wr, QEDR_GSI_MAX_SEND_WR); + return -EINVAL; + } + + return 0; +} + +struct ib_qp *qedr_create_gsi_qp(struct qedr_dev *dev, + struct ib_qp_init_attr *attrs, + struct qedr_qp *qp) +{ + struct qed_roce_ll2_params ll2_params; + int rc; + + rc = qedr_check_gsi_qp_attrs(dev, attrs); + if (rc) + return ERR_PTR(rc); + + /* configure and start LL2 */ + memset(&ll2_params, 0, sizeof(ll2_params)); + ll2_params.max_tx_buffers = attrs->cap.max_send_wr; + ll2_params.max_rx_buffers = attrs->cap.max_recv_wr; + ll2_params.cbs.tx_cb = qedr_ll2_tx_cb; + ll2_params.cbs.rx_cb = qedr_ll2_rx_cb; + ll2_params.cb_cookie = (void *)dev; + ll2_params.mtu = dev->ndev->mtu; + ether_addr_copy(ll2_params.mac_address, dev->ndev->dev_addr); + rc = dev->ops->roce_ll2_start(dev->cdev, &ll2_params); + if (rc) { + DP_ERR(dev, "create gsi qp: failed on ll2 start. rc=%d\n", rc); + return ERR_PTR(rc); + } + + /* create QP */ + qp->ibqp.qp_num = 1; + qp->rq.max_wr = attrs->cap.max_recv_wr; + qp->sq.max_wr = attrs->cap.max_send_wr; + + qp->rqe_wr_id = kcalloc(qp->rq.max_wr, sizeof(*qp->rqe_wr_id), + GFP_KERNEL); + if (!qp->rqe_wr_id) + goto err; + qp->wqe_wr_id = kcalloc(qp->sq.max_wr, sizeof(*qp->wqe_wr_id), + GFP_KERNEL); + if (!qp->wqe_wr_id) + goto err; + + qedr_store_gsi_qp_cq(dev, qp, attrs); + ether_addr_copy(dev->gsi_ll2_mac_address, dev->ndev->dev_addr); + + /* the GSI CQ is handled by the driver so remove it from the FW */ + qedr_destroy_gsi_cq(dev, attrs); + dev->gsi_rqcq->cq_type = QEDR_CQ_TYPE_GSI; + dev->gsi_rqcq->cq_type = QEDR_CQ_TYPE_GSI; + + DP_DEBUG(dev, QEDR_MSG_GSI, "created GSI QP %p\n", qp); + + return &qp->ibqp; + +err: + kfree(qp->rqe_wr_id); + + rc = dev->ops->roce_ll2_stop(dev->cdev); + if (rc) + DP_ERR(dev, "create gsi qp: failed destroy on create\n"); + + return ERR_PTR(-ENOMEM); +} + +int qedr_destroy_gsi_qp(struct qedr_dev *dev) +{ + int rc; + + rc = dev->ops->roce_ll2_stop(dev->cdev); + if (rc) + DP_ERR(dev, "destroy gsi qp: failed (rc=%d)\n", rc); + else + DP_DEBUG(dev, QEDR_MSG_GSI, "destroy gsi qp: success\n"); + + return rc; +} + +#define QEDR_MAX_UD_HEADER_SIZE (100) +#define QEDR_GSI_QPN (1) +static inline int qedr_gsi_build_header(struct qedr_dev *dev, + struct qedr_qp *qp, + struct ib_send_wr *swr, + struct ib_ud_header *udh, + int *roce_mode) +{ + bool has_vlan = false, has_grh_ipv6 = true; + struct ib_ah_attr *ah_attr = &get_qedr_ah(ud_wr(swr)->ah)->attr; + struct ib_global_route *grh = &ah_attr->grh; + union ib_gid sgid; + int send_size = 0; + u16 vlan_id = 0; + u16 ether_type; + struct ib_gid_attr sgid_attr; + int rc; + int ip_ver = 0; + + bool has_udp = false; + int i; + + send_size = 0; + for (i = 0; i < swr->num_sge; ++i) + send_size += swr->sg_list[i].length; + + rc = ib_get_cached_gid(qp->ibqp.device, ah_attr->port_num, + grh->sgid_index, &sgid, &sgid_attr); + if (rc) { + DP_ERR(dev, + "gsi post send: failed to get cached GID (port=%d, ix=%d)\n", + ah_attr->port_num, grh->sgid_index); + return rc; + } + + vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev); + if (vlan_id < VLAN_CFI_MASK) + has_vlan = true; + if (sgid_attr.ndev) + dev_put(sgid_attr.ndev); + + if (!memcmp(&sgid, &zgid, sizeof(sgid))) { + DP_ERR(dev, "gsi post send: GID not found GID index %d\n", + ah_attr->grh.sgid_index); + return -ENOENT; + } + + has_udp = (sgid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP); + if (!has_udp) { + /* RoCE v1 */ + ether_type = ETH_P_ROCE; + *roce_mode = ROCE_V1; + } else if (ipv6_addr_v4mapped((struct in6_addr *)&sgid)) { + /* RoCE v2 IPv4 */ + ip_ver = 4; + ether_type = ETH_P_IP; + has_grh_ipv6 = false; + *roce_mode = ROCE_V2_IPV4; + } else { + /* RoCE v2 IPv6 */ + ip_ver = 6; + ether_type = ETH_P_IPV6; + *roce_mode = ROCE_V2_IPV6; + } + + rc = ib_ud_header_init(send_size, false, true, has_vlan, + has_grh_ipv6, ip_ver, has_udp, 0, udh); + if (rc) { + DP_ERR(dev, "gsi post send: failed to init header\n"); + return rc; + } + + /* ENET + VLAN headers */ + ether_addr_copy(udh->eth.dmac_h, ah_attr->dmac); + ether_addr_copy(udh->eth.smac_h, dev->ndev->dev_addr); + if (has_vlan) { + udh->eth.type = htons(ETH_P_8021Q); + udh->vlan.tag = htons(vlan_id); + udh->vlan.type = htons(ether_type); + } else { + udh->eth.type = htons(ether_type); + } + + /* BTH */ + udh->bth.solicited_event = !!(swr->send_flags & IB_SEND_SOLICITED); + udh->bth.pkey = QEDR_ROCE_PKEY_DEFAULT; + udh->bth.destination_qpn = htonl(ud_wr(swr)->remote_qpn); + udh->bth.psn = htonl((qp->sq_psn++) & ((1 << 24) - 1)); + udh->bth.opcode = IB_OPCODE_UD_SEND_ONLY; + + /* DETH */ + udh->deth.qkey = htonl(0x80010000); + udh->deth.source_qpn = htonl(QEDR_GSI_QPN); + + if (has_grh_ipv6) { + /* GRH / IPv6 header */ + udh->grh.traffic_class = grh->traffic_class; + udh->grh.flow_label = grh->flow_label; + udh->grh.hop_limit = grh->hop_limit; + udh->grh.destination_gid = grh->dgid; + memcpy(&udh->grh.source_gid.raw, &sgid.raw, + sizeof(udh->grh.source_gid.raw)); + } else { + /* IPv4 header */ + u32 ipv4_addr; + + udh->ip4.protocol = IPPROTO_UDP; + udh->ip4.tos = htonl(ah_attr->grh.flow_label); + udh->ip4.frag_off = htons(IP_DF); + udh->ip4.ttl = ah_attr->grh.hop_limit; + + ipv4_addr = qedr_get_ipv4_from_gid(sgid.raw); + udh->ip4.saddr = ipv4_addr; + ipv4_addr = qedr_get_ipv4_from_gid(ah_attr->grh.dgid.raw); + udh->ip4.daddr = ipv4_addr; + /* note: checksum is calculated by the device */ + } + + /* UDP */ + if (has_udp) { + udh->udp.sport = htons(QEDR_ROCE_V2_UDP_SPORT); + udh->udp.dport = htons(ROCE_V2_UDP_DPORT); + udh->udp.csum = 0; + /* UDP length is untouched hence is zero */ + } + return 0; +} + +static inline int qedr_gsi_build_packet(struct qedr_dev *dev, + struct qedr_qp *qp, + struct ib_send_wr *swr, + struct qed_roce_ll2_packet **p_packet) +{ + u8 ud_header_buffer[QEDR_MAX_UD_HEADER_SIZE]; + struct qed_roce_ll2_packet *packet; + struct pci_dev *pdev = dev->pdev; + int roce_mode, header_size; + struct ib_ud_header udh; + int i, rc; + + *p_packet = NULL; + + rc = qedr_gsi_build_header(dev, qp, swr, &udh, &roce_mode); + if (rc) + return rc; + + header_size = ib_ud_header_pack(&udh, &ud_header_buffer); + + packet = kzalloc(sizeof(*packet), GFP_ATOMIC); + if (!packet) + return -ENOMEM; + + packet->header.vaddr = dma_alloc_coherent(&pdev->dev, header_size, + &packet->header.baddr, + GFP_ATOMIC); + if (!packet->header.vaddr) { + kfree(packet); + return -ENOMEM; + } + + if (ether_addr_equal(udh.eth.smac_h, udh.eth.dmac_h)) + packet->tx_dest = QED_ROCE_LL2_TX_DEST_NW; + else + packet->tx_dest = QED_ROCE_LL2_TX_DEST_LB; + + packet->roce_mode = roce_mode; + memcpy(packet->header.vaddr, ud_header_buffer, header_size); + packet->header.len = header_size; + packet->n_seg = swr->num_sge; + for (i = 0; i < packet->n_seg; i++) { + packet->payload[i].baddr = swr->sg_list[i].addr; + packet->payload[i].len = swr->sg_list[i].length; + } + + *p_packet = packet; + + return 0; +} + +int qedr_gsi_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + struct qed_roce_ll2_packet *pkt = NULL; + struct qedr_qp *qp = get_qedr_qp(ibqp); + struct qed_roce_ll2_tx_params params; + struct qedr_dev *dev = qp->dev; + unsigned long flags; + int rc; + + if (qp->state != QED_ROCE_QP_STATE_RTS) { + *bad_wr = wr; + DP_ERR(dev, + "gsi post recv: failed to post rx buffer. state is %d and not QED_ROCE_QP_STATE_RTS\n", + qp->state); + return -EINVAL; + } + + if (wr->num_sge > RDMA_MAX_SGE_PER_SQ_WQE) { + DP_ERR(dev, "gsi post send: num_sge is too large (%d>%d)\n", + wr->num_sge, RDMA_MAX_SGE_PER_SQ_WQE); + rc = -EINVAL; + goto err; + } + + if (wr->opcode != IB_WR_SEND) { + DP_ERR(dev, + "gsi post send: failed due to unsupported opcode %d\n", + wr->opcode); + rc = -EINVAL; + goto err; + } + + memset(¶ms, 0, sizeof(params)); + + spin_lock_irqsave(&qp->q_lock, flags); + + rc = qedr_gsi_build_packet(dev, qp, wr, &pkt); + if (rc) { + spin_unlock_irqrestore(&qp->q_lock, flags); + goto err; + } + + rc = dev->ops->roce_ll2_tx(dev->cdev, pkt, ¶ms); + if (!rc) { + qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id; + qedr_inc_sw_prod(&qp->sq); + DP_DEBUG(qp->dev, QEDR_MSG_GSI, + "gsi post send: opcode=%d, in_irq=%ld, irqs_disabled=%d, wr_id=%llx\n", + wr->opcode, in_irq(), irqs_disabled(), wr->wr_id); + } else { + if (rc == QED_ROCE_TX_HEAD_FAILURE) { + /* TX failed while posting header - release resources */ + dma_free_coherent(&dev->pdev->dev, pkt->header.len, + pkt->header.vaddr, pkt->header.baddr); + kfree(pkt); + } else if (rc == QED_ROCE_TX_FRAG_FAILURE) { + /* NTD since TX failed while posting a fragment. We will + * release the resources on TX callback + */ + } + + DP_ERR(dev, "gsi post send: failed to transmit (rc=%d)\n", rc); + rc = -EAGAIN; + *bad_wr = wr; + } + + spin_unlock_irqrestore(&qp->q_lock, flags); + + if (wr->next) { + DP_ERR(dev, + "gsi post send: failed second WR. Only one WR may be passed at a time\n"); + *bad_wr = wr->next; + rc = -EINVAL; + } + + return rc; + +err: + *bad_wr = wr; + return rc; +} + +int qedr_gsi_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + struct qedr_dev *dev = get_qedr_dev(ibqp->device); + struct qedr_qp *qp = get_qedr_qp(ibqp); + struct qed_roce_ll2_buffer buf; + unsigned long flags; + int status = 0; + int rc; + + if ((qp->state != QED_ROCE_QP_STATE_RTR) && + (qp->state != QED_ROCE_QP_STATE_RTS)) { + *bad_wr = wr; + DP_ERR(dev, + "gsi post recv: failed to post rx buffer. state is %d and not QED_ROCE_QP_STATE_RTR/S\n", + qp->state); + return -EINVAL; + } + + memset(&buf, 0, sizeof(buf)); + + spin_lock_irqsave(&qp->q_lock, flags); + + while (wr) { + if (wr->num_sge > QEDR_GSI_MAX_RECV_SGE) { + DP_ERR(dev, + "gsi post recv: failed to post rx buffer. too many sges %d>%d\n", + wr->num_sge, QEDR_GSI_MAX_RECV_SGE); + goto err; + } + + buf.baddr = wr->sg_list[0].addr; + buf.len = wr->sg_list[0].length; + + rc = dev->ops->roce_ll2_post_rx_buffer(dev->cdev, &buf, 0, 1); + if (rc) { + DP_ERR(dev, + "gsi post recv: failed to post rx buffer (rc=%d)\n", + rc); + goto err; + } + + memset(&qp->rqe_wr_id[qp->rq.prod], 0, + sizeof(qp->rqe_wr_id[qp->rq.prod])); + qp->rqe_wr_id[qp->rq.prod].sg_list[0] = wr->sg_list[0]; + qp->rqe_wr_id[qp->rq.prod].wr_id = wr->wr_id; + + qedr_inc_sw_prod(&qp->rq); + + wr = wr->next; + } + + spin_unlock_irqrestore(&qp->q_lock, flags); + + return status; +err: + spin_unlock_irqrestore(&qp->q_lock, flags); + *bad_wr = wr; + return -ENOMEM; +} + +int qedr_gsi_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) +{ + struct qedr_dev *dev = get_qedr_dev(ibcq->device); + struct qedr_cq *cq = get_qedr_cq(ibcq); + struct qedr_qp *qp = dev->gsi_qp; + unsigned long flags; + int i = 0; + + spin_lock_irqsave(&cq->cq_lock, flags); + + while (i < num_entries && qp->rq.cons != qp->rq.gsi_cons) { + memset(&wc[i], 0, sizeof(*wc)); + + wc[i].qp = &qp->ibqp; + wc[i].wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id; + wc[i].opcode = IB_WC_RECV; + wc[i].pkey_index = 0; + wc[i].status = (qp->rqe_wr_id[qp->rq.cons].rc) ? + IB_WC_GENERAL_ERR : IB_WC_SUCCESS; + /* 0 - currently only one recv sg is supported */ + wc[i].byte_len = qp->rqe_wr_id[qp->rq.cons].sg_list[0].length; + wc[i].wc_flags |= IB_WC_GRH | IB_WC_IP_CSUM_OK; + ether_addr_copy(wc[i].smac, qp->rqe_wr_id[qp->rq.cons].smac); + wc[i].wc_flags |= IB_WC_WITH_SMAC; + if (qp->rqe_wr_id[qp->rq.cons].vlan_id) { + wc[i].wc_flags |= IB_WC_WITH_VLAN; + wc[i].vlan_id = qp->rqe_wr_id[qp->rq.cons].vlan_id; + } + + qedr_inc_sw_cons(&qp->rq); + i++; + } + + while (i < num_entries && qp->sq.cons != qp->sq.gsi_cons) { + memset(&wc[i], 0, sizeof(*wc)); + + wc[i].qp = &qp->ibqp; + wc[i].wr_id = qp->wqe_wr_id[qp->sq.cons].wr_id; + wc[i].opcode = IB_WC_SEND; + wc[i].status = IB_WC_SUCCESS; + + qedr_inc_sw_cons(&qp->sq); + i++; + } + + spin_unlock_irqrestore(&cq->cq_lock, flags); + + DP_DEBUG(dev, QEDR_MSG_GSI, + "gsi poll_cq: requested entries=%d, actual=%d, qp->rq.cons=%d, qp->rq.gsi_cons=%x, qp->sq.cons=%d, qp->sq.gsi_cons=%d, qp_num=%d\n", + num_entries, i, qp->rq.cons, qp->rq.gsi_cons, qp->sq.cons, + qp->sq.gsi_cons, qp->ibqp.qp_num); + + return i; +} diff --git a/drivers/infiniband/hw/cxgb4/user.h b/drivers/infiniband/hw/qedr/qedr_cm.h index 295f422b9a3a..9ba6e15cd93f 100644 --- a/drivers/infiniband/hw/cxgb4/user.h +++ b/drivers/infiniband/hw/qedr/qedr_cm.h @@ -1,5 +1,5 @@ -/* - * Copyright (c) 2009-2010 Chelsio, Inc. All rights reserved. +/* QLogic qedr NIC Driver + * Copyright (c) 2015-2016 QLogic Corporation * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -17,7 +17,7 @@ * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials + * disclaimer in the documentation and /or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, @@ -29,52 +29,33 @@ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef __C4IW_USER_H__ -#define __C4IW_USER_H__ +#ifndef LINUX_QEDR_CM_H_ +#define LINUX_QEDR_CM_H_ -#define C4IW_UVERBS_ABI_VERSION 3 - -/* - * Make sure that all structs defined in this file remain laid out so - * that they pack the same way on 32-bit and 64-bit architectures (to - * avoid incompatibility between 32-bit userspace and 64-bit kernels). - * In particular do not use pointer types -- pass pointers in __u64 - * instead. - */ -struct c4iw_create_cq_resp { - __u64 key; - __u64 gts_key; - __u64 memsize; - __u32 cqid; - __u32 size; - __u32 qid_mask; - __u32 reserved; /* explicit padding (optional for i386) */ -}; +#define QEDR_GSI_MAX_RECV_WR (4096) +#define QEDR_GSI_MAX_SEND_WR (4096) +#define QEDR_GSI_MAX_RECV_SGE (1) /* LL2 FW limitation */ -enum { - C4IW_QPF_ONCHIP = (1<<0) -}; +#define ETH_P_ROCE (0x8915) +#define QEDR_ROCE_V2_UDP_SPORT (0000) -struct c4iw_create_qp_resp { - __u64 ma_sync_key; - __u64 sq_key; - __u64 rq_key; - __u64 sq_db_gts_key; - __u64 rq_db_gts_key; - __u64 sq_memsize; - __u64 rq_memsize; - __u32 sqid; - __u32 rqid; - __u32 sq_size; - __u32 rq_size; - __u32 qid_mask; - __u32 flags; -}; +static inline u32 qedr_get_ipv4_from_gid(u8 *gid) +{ + return *(u32 *)(void *)&gid[12]; +} -struct c4iw_alloc_ucontext_resp { - __u64 status_page_key; - __u32 status_page_size; - __u32 reserved; /* explicit padding (optional for i386) */ -}; +/* RDMA CM */ +int qedr_gsi_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc); +int qedr_gsi_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr); +int qedr_gsi_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr); +struct ib_qp *qedr_create_gsi_qp(struct qedr_dev *dev, + struct ib_qp_init_attr *attrs, + struct qedr_qp *qp); +void qedr_store_gsi_qp_cq(struct qedr_dev *dev, + struct qedr_qp *qp, struct ib_qp_init_attr *attrs); +int qedr_destroy_gsi_qp(struct qedr_dev *dev); +void qedr_inc_sw_gsi_cons(struct qedr_qp_hwq_info *info); #endif diff --git a/drivers/infiniband/hw/qedr/qedr_hsi.h b/drivers/infiniband/hw/qedr/qedr_hsi.h new file mode 100644 index 000000000000..66d27521373f --- /dev/null +++ b/drivers/infiniband/hw/qedr/qedr_hsi.h @@ -0,0 +1,56 @@ +/* QLogic qedr NIC Driver + * Copyright (c) 2015-2016 QLogic Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __QED_HSI_ROCE__ +#define __QED_HSI_ROCE__ + +#include <linux/qed/common_hsi.h> +#include <linux/qed/roce_common.h> +#include "qedr_hsi_rdma.h" + +/* Affiliated asynchronous events / errors enumeration */ +enum roce_async_events_type { + ROCE_ASYNC_EVENT_NONE = 0, + ROCE_ASYNC_EVENT_COMM_EST = 1, + ROCE_ASYNC_EVENT_SQ_DRAINED, + ROCE_ASYNC_EVENT_SRQ_LIMIT, + ROCE_ASYNC_EVENT_LAST_WQE_REACHED, + ROCE_ASYNC_EVENT_CQ_ERR, + ROCE_ASYNC_EVENT_LOCAL_INVALID_REQUEST_ERR, + ROCE_ASYNC_EVENT_LOCAL_CATASTROPHIC_ERR, + ROCE_ASYNC_EVENT_LOCAL_ACCESS_ERR, + ROCE_ASYNC_EVENT_QP_CATASTROPHIC_ERR, + ROCE_ASYNC_EVENT_CQ_OVERFLOW_ERR, + ROCE_ASYNC_EVENT_SRQ_EMPTY, + MAX_ROCE_ASYNC_EVENTS_TYPE +}; + +#endif /* __QED_HSI_ROCE__ */ diff --git a/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h b/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h new file mode 100644 index 000000000000..5c98d2055cad --- /dev/null +++ b/drivers/infiniband/hw/qedr/qedr_hsi_rdma.h @@ -0,0 +1,748 @@ +/* QLogic qedr NIC Driver + * Copyright (c) 2015-2016 QLogic Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __QED_HSI_RDMA__ +#define __QED_HSI_RDMA__ + +#include <linux/qed/rdma_common.h> + +/* rdma completion notification queue element */ +struct rdma_cnqe { + struct regpair cq_handle; +}; + +struct rdma_cqe_responder { + struct regpair srq_wr_id; + struct regpair qp_handle; + __le32 imm_data_or_inv_r_Key; + __le32 length; + __le32 imm_data_hi; + __le16 rq_cons; + u8 flags; +#define RDMA_CQE_RESPONDER_TOGGLE_BIT_MASK 0x1 +#define RDMA_CQE_RESPONDER_TOGGLE_BIT_SHIFT 0 +#define RDMA_CQE_RESPONDER_TYPE_MASK 0x3 +#define RDMA_CQE_RESPONDER_TYPE_SHIFT 1 +#define RDMA_CQE_RESPONDER_INV_FLG_MASK 0x1 +#define RDMA_CQE_RESPONDER_INV_FLG_SHIFT 3 +#define RDMA_CQE_RESPONDER_IMM_FLG_MASK 0x1 +#define RDMA_CQE_RESPONDER_IMM_FLG_SHIFT 4 +#define RDMA_CQE_RESPONDER_RDMA_FLG_MASK 0x1 +#define RDMA_CQE_RESPONDER_RDMA_FLG_SHIFT 5 +#define RDMA_CQE_RESPONDER_RESERVED2_MASK 0x3 +#define RDMA_CQE_RESPONDER_RESERVED2_SHIFT 6 + u8 status; +}; + +struct rdma_cqe_requester { + __le16 sq_cons; + __le16 reserved0; + __le32 reserved1; + struct regpair qp_handle; + struct regpair reserved2; + __le32 reserved3; + __le16 reserved4; + u8 flags; +#define RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK 0x1 +#define RDMA_CQE_REQUESTER_TOGGLE_BIT_SHIFT 0 +#define RDMA_CQE_REQUESTER_TYPE_MASK 0x3 +#define RDMA_CQE_REQUESTER_TYPE_SHIFT 1 +#define RDMA_CQE_REQUESTER_RESERVED5_MASK 0x1F +#define RDMA_CQE_REQUESTER_RESERVED5_SHIFT 3 + u8 status; +}; + +struct rdma_cqe_common { + struct regpair reserved0; + struct regpair qp_handle; + __le16 reserved1[7]; + u8 flags; +#define RDMA_CQE_COMMON_TOGGLE_BIT_MASK 0x1 +#define RDMA_CQE_COMMON_TOGGLE_BIT_SHIFT 0 +#define RDMA_CQE_COMMON_TYPE_MASK 0x3 +#define RDMA_CQE_COMMON_TYPE_SHIFT 1 +#define RDMA_CQE_COMMON_RESERVED2_MASK 0x1F +#define RDMA_CQE_COMMON_RESERVED2_SHIFT 3 + u8 status; +}; + +/* rdma completion queue element */ +union rdma_cqe { + struct rdma_cqe_responder resp; + struct rdma_cqe_requester req; + struct rdma_cqe_common cmn; +}; + +/* * CQE requester status enumeration */ +enum rdma_cqe_requester_status_enum { + RDMA_CQE_REQ_STS_OK, + RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR, + RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR, + RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR, + RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR, + RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR, + RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR, + RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR, + RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR, + RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR, + RDMA_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR, + RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR, + MAX_RDMA_CQE_REQUESTER_STATUS_ENUM +}; + +/* CQE responder status enumeration */ +enum rdma_cqe_responder_status_enum { + RDMA_CQE_RESP_STS_OK, + RDMA_CQE_RESP_STS_LOCAL_ACCESS_ERR, + RDMA_CQE_RESP_STS_LOCAL_LENGTH_ERR, + RDMA_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR, + RDMA_CQE_RESP_STS_LOCAL_PROTECTION_ERR, + RDMA_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR, + RDMA_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR, + RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR, + MAX_RDMA_CQE_RESPONDER_STATUS_ENUM +}; + +/* CQE type enumeration */ +enum rdma_cqe_type { + RDMA_CQE_TYPE_REQUESTER, + RDMA_CQE_TYPE_RESPONDER_RQ, + RDMA_CQE_TYPE_RESPONDER_SRQ, + RDMA_CQE_TYPE_INVALID, + MAX_RDMA_CQE_TYPE +}; + +struct rdma_sq_sge { + __le32 length; + struct regpair addr; + __le32 l_key; +}; + +struct rdma_rq_sge { + struct regpair addr; + __le32 length; + __le32 flags; +#define RDMA_RQ_SGE_L_KEY_MASK 0x3FFFFFF +#define RDMA_RQ_SGE_L_KEY_SHIFT 0 +#define RDMA_RQ_SGE_NUM_SGES_MASK 0x7 +#define RDMA_RQ_SGE_NUM_SGES_SHIFT 26 +#define RDMA_RQ_SGE_RESERVED0_MASK 0x7 +#define RDMA_RQ_SGE_RESERVED0_SHIFT 29 +}; + +struct rdma_srq_sge { + struct regpair addr; + __le32 length; + __le32 l_key; +}; + +/* Rdma doorbell data for SQ and RQ */ +struct rdma_pwm_val16_data { + __le16 icid; + __le16 value; +}; + +union rdma_pwm_val16_data_union { + struct rdma_pwm_val16_data as_struct; + __le32 as_dword; +}; + +/* Rdma doorbell data for CQ */ +struct rdma_pwm_val32_data { + __le16 icid; + u8 agg_flags; + u8 params; +#define RDMA_PWM_VAL32_DATA_AGG_CMD_MASK 0x3 +#define RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT 0 +#define RDMA_PWM_VAL32_DATA_BYPASS_EN_MASK 0x1 +#define RDMA_PWM_VAL32_DATA_BYPASS_EN_SHIFT 2 +#define RDMA_PWM_VAL32_DATA_RESERVED_MASK 0x1F +#define RDMA_PWM_VAL32_DATA_RESERVED_SHIFT 3 + __le32 value; +}; + +/* DIF Block size options */ +enum rdma_dif_block_size { + RDMA_DIF_BLOCK_512 = 0, + RDMA_DIF_BLOCK_4096 = 1, + MAX_RDMA_DIF_BLOCK_SIZE +}; + +/* DIF CRC initial value */ +enum rdma_dif_crc_seed { + RDMA_DIF_CRC_SEED_0000 = 0, + RDMA_DIF_CRC_SEED_FFFF = 1, + MAX_RDMA_DIF_CRC_SEED +}; + +/* RDMA DIF Error Result Structure */ +struct rdma_dif_error_result { + __le32 error_intervals; + __le32 dif_error_1st_interval; + u8 flags; +#define RDMA_DIF_ERROR_RESULT_DIF_ERROR_TYPE_CRC_MASK 0x1 +#define RDMA_DIF_ERROR_RESULT_DIF_ERROR_TYPE_CRC_SHIFT 0 +#define RDMA_DIF_ERROR_RESULT_DIF_ERROR_TYPE_APP_TAG_MASK 0x1 +#define RDMA_DIF_ERROR_RESULT_DIF_ERROR_TYPE_APP_TAG_SHIFT 1 +#define RDMA_DIF_ERROR_RESULT_DIF_ERROR_TYPE_REF_TAG_MASK 0x1 +#define RDMA_DIF_ERROR_RESULT_DIF_ERROR_TYPE_REF_TAG_SHIFT 2 +#define RDMA_DIF_ERROR_RESULT_RESERVED0_MASK 0xF +#define RDMA_DIF_ERROR_RESULT_RESERVED0_SHIFT 3 +#define RDMA_DIF_ERROR_RESULT_TOGGLE_BIT_MASK 0x1 +#define RDMA_DIF_ERROR_RESULT_TOGGLE_BIT_SHIFT 7 + u8 reserved1[55]; +}; + +/* DIF IO direction */ +enum rdma_dif_io_direction_flg { + RDMA_DIF_DIR_RX = 0, + RDMA_DIF_DIR_TX = 1, + MAX_RDMA_DIF_IO_DIRECTION_FLG +}; + +/* RDMA DIF Runt Result Structure */ +struct rdma_dif_runt_result { + __le16 guard_tag; + __le16 reserved[3]; +}; + +/* Memory window type enumeration */ +enum rdma_mw_type { + RDMA_MW_TYPE_1, + RDMA_MW_TYPE_2A, + MAX_RDMA_MW_TYPE +}; + +struct rdma_sq_atomic_wqe { + __le32 reserved1; + __le32 length; + __le32 xrc_srq; + u8 req_type; + u8 flags; +#define RDMA_SQ_ATOMIC_WQE_COMP_FLG_MASK 0x1 +#define RDMA_SQ_ATOMIC_WQE_COMP_FLG_SHIFT 0 +#define RDMA_SQ_ATOMIC_WQE_RD_FENCE_FLG_MASK 0x1 +#define RDMA_SQ_ATOMIC_WQE_RD_FENCE_FLG_SHIFT 1 +#define RDMA_SQ_ATOMIC_WQE_INV_FENCE_FLG_MASK 0x1 +#define RDMA_SQ_ATOMIC_WQE_INV_FENCE_FLG_SHIFT 2 +#define RDMA_SQ_ATOMIC_WQE_SE_FLG_MASK 0x1 +#define RDMA_SQ_ATOMIC_WQE_SE_FLG_SHIFT 3 +#define RDMA_SQ_ATOMIC_WQE_INLINE_FLG_MASK 0x1 +#define RDMA_SQ_ATOMIC_WQE_INLINE_FLG_SHIFT 4 +#define RDMA_SQ_ATOMIC_WQE_DIF_ON_HOST_FLG_MASK 0x1 +#define RDMA_SQ_ATOMIC_WQE_DIF_ON_HOST_FLG_SHIFT 5 +#define RDMA_SQ_ATOMIC_WQE_RESERVED0_MASK 0x3 +#define RDMA_SQ_ATOMIC_WQE_RESERVED0_SHIFT 6 + u8 wqe_size; + u8 prev_wqe_size; + struct regpair remote_va; + __le32 r_key; + __le32 reserved2; + struct regpair cmp_data; + struct regpair swap_data; +}; + +/* First element (16 bytes) of atomic wqe */ +struct rdma_sq_atomic_wqe_1st { + __le32 reserved1; + __le32 length; + __le32 xrc_srq; + u8 req_type; + u8 flags; +#define RDMA_SQ_ATOMIC_WQE_1ST_COMP_FLG_MASK 0x1 +#define RDMA_SQ_ATOMIC_WQE_1ST_COMP_FLG_SHIFT 0 +#define RDMA_SQ_ATOMIC_WQE_1ST_RD_FENCE_FLG_MASK 0x1 +#define RDMA_SQ_ATOMIC_WQE_1ST_RD_FENCE_FLG_SHIFT 1 +#define RDMA_SQ_ATOMIC_WQE_1ST_INV_FENCE_FLG_MASK 0x1 +#define RDMA_SQ_ATOMIC_WQE_1ST_INV_FENCE_FLG_SHIFT 2 +#define RDMA_SQ_ATOMIC_WQE_1ST_SE_FLG_MASK 0x1 +#define RDMA_SQ_ATOMIC_WQE_1ST_SE_FLG_SHIFT 3 +#define RDMA_SQ_ATOMIC_WQE_1ST_INLINE_FLG_MASK 0x1 +#define RDMA_SQ_ATOMIC_WQE_1ST_INLINE_FLG_SHIFT 4 +#define RDMA_SQ_ATOMIC_WQE_1ST_RESERVED0_MASK 0x7 +#define RDMA_SQ_ATOMIC_WQE_1ST_RESERVED0_SHIFT 5 + u8 wqe_size; + u8 prev_wqe_size; +}; + +/* Second element (16 bytes) of atomic wqe */ +struct rdma_sq_atomic_wqe_2nd { + struct regpair remote_va; + __le32 r_key; + __le32 reserved2; +}; + +/* Third element (16 bytes) of atomic wqe */ +struct rdma_sq_atomic_wqe_3rd { + struct regpair cmp_data; + struct regpair swap_data; +}; + +struct rdma_sq_bind_wqe { + struct regpair addr; + __le32 l_key; + u8 req_type; + u8 flags; +#define RDMA_SQ_BIND_WQE_COMP_FLG_MASK 0x1 +#define RDMA_SQ_BIND_WQE_COMP_FLG_SHIFT 0 +#define RDMA_SQ_BIND_WQE_RD_FENCE_FLG_MASK 0x1 +#define RDMA_SQ_BIND_WQE_RD_FENCE_FLG_SHIFT 1 +#define RDMA_SQ_BIND_WQE_INV_FENCE_FLG_MASK 0x1 +#define RDMA_SQ_BIND_WQE_INV_FENCE_FLG_SHIFT 2 +#define RDMA_SQ_BIND_WQE_SE_FLG_MASK 0x1 +#define RDMA_SQ_BIND_WQE_SE_FLG_SHIFT 3 +#define RDMA_SQ_BIND_WQE_INLINE_FLG_MASK 0x1 +#define RDMA_SQ_BIND_WQE_INLINE_FLG_SHIFT 4 +#define RDMA_SQ_BIND_WQE_RESERVED0_MASK 0x7 +#define RDMA_SQ_BIND_WQE_RESERVED0_SHIFT 5 + u8 wqe_size; + u8 prev_wqe_size; + u8 bind_ctrl; +#define RDMA_SQ_BIND_WQE_ZERO_BASED_MASK 0x1 +#define RDMA_SQ_BIND_WQE_ZERO_BASED_SHIFT 0 +#define RDMA_SQ_BIND_WQE_MW_TYPE_MASK 0x1 +#define RDMA_SQ_BIND_WQE_MW_TYPE_SHIFT 1 +#define RDMA_SQ_BIND_WQE_RESERVED1_MASK 0x3F +#define RDMA_SQ_BIND_WQE_RESERVED1_SHIFT 2 + u8 access_ctrl; +#define RDMA_SQ_BIND_WQE_REMOTE_READ_MASK 0x1 +#define RDMA_SQ_BIND_WQE_REMOTE_READ_SHIFT 0 +#define RDMA_SQ_BIND_WQE_REMOTE_WRITE_MASK 0x1 +#define RDMA_SQ_BIND_WQE_REMOTE_WRITE_SHIFT 1 +#define RDMA_SQ_BIND_WQE_ENABLE_ATOMIC_MASK 0x1 +#define RDMA_SQ_BIND_WQE_ENABLE_ATOMIC_SHIFT 2 +#define RDMA_SQ_BIND_WQE_LOCAL_READ_MASK 0x1 +#define RDMA_SQ_BIND_WQE_LOCAL_READ_SHIFT 3 +#define RDMA_SQ_BIND_WQE_LOCAL_WRITE_MASK 0x1 +#define RDMA_SQ_BIND_WQE_LOCAL_WRITE_SHIFT 4 +#define RDMA_SQ_BIND_WQE_RESERVED2_MASK 0x7 +#define RDMA_SQ_BIND_WQE_RESERVED2_SHIFT 5 + u8 reserved3; + u8 length_hi; + __le32 length_lo; + __le32 parent_l_key; + __le32 reserved4; +}; + +/* First element (16 bytes) of bind wqe */ +struct rdma_sq_bind_wqe_1st { + struct regpair addr; + __le32 l_key; + u8 req_type; + u8 flags; +#define RDMA_SQ_BIND_WQE_1ST_COMP_FLG_MASK 0x1 +#define RDMA_SQ_BIND_WQE_1ST_COMP_FLG_SHIFT 0 +#define RDMA_SQ_BIND_WQE_1ST_RD_FENCE_FLG_MASK 0x1 +#define RDMA_SQ_BIND_WQE_1ST_RD_FENCE_FLG_SHIFT 1 +#define RDMA_SQ_BIND_WQE_1ST_INV_FENCE_FLG_MASK 0x1 +#define RDMA_SQ_BIND_WQE_1ST_INV_FENCE_FLG_SHIFT 2 +#define RDMA_SQ_BIND_WQE_1ST_SE_FLG_MASK 0x1 +#define RDMA_SQ_BIND_WQE_1ST_SE_FLG_SHIFT 3 +#define RDMA_SQ_BIND_WQE_1ST_INLINE_FLG_MASK 0x1 +#define RDMA_SQ_BIND_WQE_1ST_INLINE_FLG_SHIFT 4 +#define RDMA_SQ_BIND_WQE_1ST_RESERVED0_MASK 0x7 +#define RDMA_SQ_BIND_WQE_1ST_RESERVED0_SHIFT 5 + u8 wqe_size; + u8 prev_wqe_size; +}; + +/* Second element (16 bytes) of bind wqe */ +struct rdma_sq_bind_wqe_2nd { + u8 bind_ctrl; +#define RDMA_SQ_BIND_WQE_2ND_ZERO_BASED_MASK 0x1 +#define RDMA_SQ_BIND_WQE_2ND_ZERO_BASED_SHIFT 0 +#define RDMA_SQ_BIND_WQE_2ND_MW_TYPE_MASK 0x1 +#define RDMA_SQ_BIND_WQE_2ND_MW_TYPE_SHIFT 1 +#define RDMA_SQ_BIND_WQE_2ND_RESERVED1_MASK 0x3F +#define RDMA_SQ_BIND_WQE_2ND_RESERVED1_SHIFT 2 + u8 access_ctrl; +#define RDMA_SQ_BIND_WQE_2ND_REMOTE_READ_MASK 0x1 +#define RDMA_SQ_BIND_WQE_2ND_REMOTE_READ_SHIFT 0 +#define RDMA_SQ_BIND_WQE_2ND_REMOTE_WRITE_MASK 0x1 +#define RDMA_SQ_BIND_WQE_2ND_REMOTE_WRITE_SHIFT 1 +#define RDMA_SQ_BIND_WQE_2ND_ENABLE_ATOMIC_MASK 0x1 +#define RDMA_SQ_BIND_WQE_2ND_ENABLE_ATOMIC_SHIFT 2 +#define RDMA_SQ_BIND_WQE_2ND_LOCAL_READ_MASK 0x1 +#define RDMA_SQ_BIND_WQE_2ND_LOCAL_READ_SHIFT 3 +#define RDMA_SQ_BIND_WQE_2ND_LOCAL_WRITE_MASK 0x1 +#define RDMA_SQ_BIND_WQE_2ND_LOCAL_WRITE_SHIFT 4 +#define RDMA_SQ_BIND_WQE_2ND_RESERVED2_MASK 0x7 +#define RDMA_SQ_BIND_WQE_2ND_RESERVED2_SHIFT 5 + u8 reserved3; + u8 length_hi; + __le32 length_lo; + __le32 parent_l_key; + __le32 reserved4; +}; + +/* Structure with only the SQ WQE common + * fields. Size is of one SQ element (16B) + */ +struct rdma_sq_common_wqe { + __le32 reserved1[3]; + u8 req_type; + u8 flags; +#define RDMA_SQ_COMMON_WQE_COMP_FLG_MASK 0x1 +#define RDMA_SQ_COMMON_WQE_COMP_FLG_SHIFT 0 +#define RDMA_SQ_COMMON_WQE_RD_FENCE_FLG_MASK 0x1 +#define RDMA_SQ_COMMON_WQE_RD_FENCE_FLG_SHIFT 1 +#define RDMA_SQ_COMMON_WQE_INV_FENCE_FLG_MASK 0x1 +#define RDMA_SQ_COMMON_WQE_INV_FENCE_FLG_SHIFT 2 +#define RDMA_SQ_COMMON_WQE_SE_FLG_MASK 0x1 +#define RDMA_SQ_COMMON_WQE_SE_FLG_SHIFT 3 +#define RDMA_SQ_COMMON_WQE_INLINE_FLG_MASK 0x1 +#define RDMA_SQ_COMMON_WQE_INLINE_FLG_SHIFT 4 +#define RDMA_SQ_COMMON_WQE_RESERVED0_MASK 0x7 +#define RDMA_SQ_COMMON_WQE_RESERVED0_SHIFT 5 + u8 wqe_size; + u8 prev_wqe_size; +}; + +struct rdma_sq_fmr_wqe { + struct regpair addr; + __le32 l_key; + u8 req_type; + u8 flags; +#define RDMA_SQ_FMR_WQE_COMP_FLG_MASK 0x1 +#define RDMA_SQ_FMR_WQE_COMP_FLG_SHIFT 0 +#define RDMA_SQ_FMR_WQE_RD_FENCE_FLG_MASK 0x1 +#define RDMA_SQ_FMR_WQE_RD_FENCE_FLG_SHIFT 1 +#define RDMA_SQ_FMR_WQE_INV_FENCE_FLG_MASK 0x1 +#define RDMA_SQ_FMR_WQE_INV_FENCE_FLG_SHIFT 2 +#define RDMA_SQ_FMR_WQE_SE_FLG_MASK 0x1 +#define RDMA_SQ_FMR_WQE_SE_FLG_SHIFT 3 +#define RDMA_SQ_FMR_WQE_INLINE_FLG_MASK 0x1 +#define RDMA_SQ_FMR_WQE_INLINE_FLG_SHIFT 4 +#define RDMA_SQ_FMR_WQE_DIF_ON_HOST_FLG_MASK 0x1 +#define RDMA_SQ_FMR_WQE_DIF_ON_HOST_FLG_SHIFT 5 +#define RDMA_SQ_FMR_WQE_RESERVED0_MASK 0x3 +#define RDMA_SQ_FMR_WQE_RESERVED0_SHIFT 6 + u8 wqe_size; + u8 prev_wqe_size; + u8 fmr_ctrl; +#define RDMA_SQ_FMR_WQE_PAGE_SIZE_LOG_MASK 0x1F +#define RDMA_SQ_FMR_WQE_PAGE_SIZE_LOG_SHIFT 0 +#define RDMA_SQ_FMR_WQE_ZERO_BASED_MASK 0x1 +#define RDMA_SQ_FMR_WQE_ZERO_BASED_SHIFT 5 +#define RDMA_SQ_FMR_WQE_BIND_EN_MASK 0x1 +#define RDMA_SQ_FMR_WQE_BIND_EN_SHIFT 6 +#define RDMA_SQ_FMR_WQE_RESERVED1_MASK 0x1 +#define RDMA_SQ_FMR_WQE_RESERVED1_SHIFT 7 + u8 access_ctrl; +#define RDMA_SQ_FMR_WQE_REMOTE_READ_MASK 0x1 +#define RDMA_SQ_FMR_WQE_REMOTE_READ_SHIFT 0 +#define RDMA_SQ_FMR_WQE_REMOTE_WRITE_MASK 0x1 +#define RDMA_SQ_FMR_WQE_REMOTE_WRITE_SHIFT 1 +#define RDMA_SQ_FMR_WQE_ENABLE_ATOMIC_MASK 0x1 +#define RDMA_SQ_FMR_WQE_ENABLE_ATOMIC_SHIFT 2 +#define RDMA_SQ_FMR_WQE_LOCAL_READ_MASK 0x1 +#define RDMA_SQ_FMR_WQE_LOCAL_READ_SHIFT 3 +#define RDMA_SQ_FMR_WQE_LOCAL_WRITE_MASK 0x1 +#define RDMA_SQ_FMR_WQE_LOCAL_WRITE_SHIFT 4 +#define RDMA_SQ_FMR_WQE_RESERVED2_MASK 0x7 +#define RDMA_SQ_FMR_WQE_RESERVED2_SHIFT 5 + u8 reserved3; + u8 length_hi; + __le32 length_lo; + struct regpair pbl_addr; + __le32 dif_base_ref_tag; + __le16 dif_app_tag; + __le16 dif_app_tag_mask; + __le16 dif_runt_crc_value; + __le16 dif_flags; +#define RDMA_SQ_FMR_WQE_DIF_IO_DIRECTION_FLG_MASK 0x1 +#define RDMA_SQ_FMR_WQE_DIF_IO_DIRECTION_FLG_SHIFT 0 +#define RDMA_SQ_FMR_WQE_DIF_BLOCK_SIZE_MASK 0x1 +#define RDMA_SQ_FMR_WQE_DIF_BLOCK_SIZE_SHIFT 1 +#define RDMA_SQ_FMR_WQE_DIF_RUNT_VALID_FLG_MASK 0x1 +#define RDMA_SQ_FMR_WQE_DIF_RUNT_VALID_FLG_SHIFT 2 +#define RDMA_SQ_FMR_WQE_DIF_VALIDATE_CRC_GUARD_MASK 0x1 +#define RDMA_SQ_FMR_WQE_DIF_VALIDATE_CRC_GUARD_SHIFT 3 +#define RDMA_SQ_FMR_WQE_DIF_VALIDATE_REF_TAG_MASK 0x1 +#define RDMA_SQ_FMR_WQE_DIF_VALIDATE_REF_TAG_SHIFT 4 +#define RDMA_SQ_FMR_WQE_DIF_VALIDATE_APP_TAG_MASK 0x1 +#define RDMA_SQ_FMR_WQE_DIF_VALIDATE_APP_TAG_SHIFT 5 +#define RDMA_SQ_FMR_WQE_DIF_CRC_SEED_MASK 0x1 +#define RDMA_SQ_FMR_WQE_DIF_CRC_SEED_SHIFT 6 +#define RDMA_SQ_FMR_WQE_RESERVED4_MASK 0x1FF +#define RDMA_SQ_FMR_WQE_RESERVED4_SHIFT 7 + __le32 Reserved5; +}; + +/* First element (16 bytes) of fmr wqe */ +struct rdma_sq_fmr_wqe_1st { + struct regpair addr; + __le32 l_key; + u8 req_type; + u8 flags; +#define RDMA_SQ_FMR_WQE_1ST_COMP_FLG_MASK 0x1 +#define RDMA_SQ_FMR_WQE_1ST_COMP_FLG_SHIFT 0 +#define RDMA_SQ_FMR_WQE_1ST_RD_FENCE_FLG_MASK 0x1 +#define RDMA_SQ_FMR_WQE_1ST_RD_FENCE_FLG_SHIFT 1 +#define RDMA_SQ_FMR_WQE_1ST_INV_FENCE_FLG_MASK 0x1 +#define RDMA_SQ_FMR_WQE_1ST_INV_FENCE_FLG_SHIFT 2 +#define RDMA_SQ_FMR_WQE_1ST_SE_FLG_MASK 0x1 +#define RDMA_SQ_FMR_WQE_1ST_SE_FLG_SHIFT 3 +#define RDMA_SQ_FMR_WQE_1ST_INLINE_FLG_MASK 0x1 +#define RDMA_SQ_FMR_WQE_1ST_INLINE_FLG_SHIFT 4 +#define RDMA_SQ_FMR_WQE_1ST_DIF_ON_HOST_FLG_MASK 0x1 +#define RDMA_SQ_FMR_WQE_1ST_DIF_ON_HOST_FLG_SHIFT 5 +#define RDMA_SQ_FMR_WQE_1ST_RESERVED0_MASK 0x3 +#define RDMA_SQ_FMR_WQE_1ST_RESERVED0_SHIFT 6 + u8 wqe_size; + u8 prev_wqe_size; +}; + +/* Second element (16 bytes) of fmr wqe */ +struct rdma_sq_fmr_wqe_2nd { + u8 fmr_ctrl; +#define RDMA_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG_MASK 0x1F +#define RDMA_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG_SHIFT 0 +#define RDMA_SQ_FMR_WQE_2ND_ZERO_BASED_MASK 0x1 +#define RDMA_SQ_FMR_WQE_2ND_ZERO_BASED_SHIFT 5 +#define RDMA_SQ_FMR_WQE_2ND_BIND_EN_MASK 0x1 +#define RDMA_SQ_FMR_WQE_2ND_BIND_EN_SHIFT 6 +#define RDMA_SQ_FMR_WQE_2ND_RESERVED1_MASK 0x1 +#define RDMA_SQ_FMR_WQE_2ND_RESERVED1_SHIFT 7 + u8 access_ctrl; +#define RDMA_SQ_FMR_WQE_2ND_REMOTE_READ_MASK 0x1 +#define RDMA_SQ_FMR_WQE_2ND_REMOTE_READ_SHIFT 0 +#define RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE_MASK 0x1 +#define RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE_SHIFT 1 +#define RDMA_SQ_FMR_WQE_2ND_ENABLE_ATOMIC_MASK 0x1 +#define RDMA_SQ_FMR_WQE_2ND_ENABLE_ATOMIC_SHIFT 2 +#define RDMA_SQ_FMR_WQE_2ND_LOCAL_READ_MASK 0x1 +#define RDMA_SQ_FMR_WQE_2ND_LOCAL_READ_SHIFT 3 +#define RDMA_SQ_FMR_WQE_2ND_LOCAL_WRITE_MASK 0x1 +#define RDMA_SQ_FMR_WQE_2ND_LOCAL_WRITE_SHIFT 4 +#define RDMA_SQ_FMR_WQE_2ND_RESERVED2_MASK 0x7 +#define RDMA_SQ_FMR_WQE_2ND_RESERVED2_SHIFT 5 + u8 reserved3; + u8 length_hi; + __le32 length_lo; + struct regpair pbl_addr; +}; + +/* Third element (16 bytes) of fmr wqe */ +struct rdma_sq_fmr_wqe_3rd { + __le32 dif_base_ref_tag; + __le16 dif_app_tag; + __le16 dif_app_tag_mask; + __le16 dif_runt_crc_value; + __le16 dif_flags; +#define RDMA_SQ_FMR_WQE_3RD_DIF_IO_DIRECTION_FLG_MASK 0x1 +#define RDMA_SQ_FMR_WQE_3RD_DIF_IO_DIRECTION_FLG_SHIFT 0 +#define RDMA_SQ_FMR_WQE_3RD_DIF_BLOCK_SIZE_MASK 0x1 +#define RDMA_SQ_FMR_WQE_3RD_DIF_BLOCK_SIZE_SHIFT 1 +#define RDMA_SQ_FMR_WQE_3RD_DIF_RUNT_VALID_FLG_MASK 0x1 +#define RDMA_SQ_FMR_WQE_3RD_DIF_RUNT_VALID_FLG_SHIFT 2 +#define RDMA_SQ_FMR_WQE_3RD_DIF_VALIDATE_CRC_GUARD_MASK 0x1 +#define RDMA_SQ_FMR_WQE_3RD_DIF_VALIDATE_CRC_GUARD_SHIFT 3 +#define RDMA_SQ_FMR_WQE_3RD_DIF_VALIDATE_REF_TAG_MASK 0x1 +#define RDMA_SQ_FMR_WQE_3RD_DIF_VALIDATE_REF_TAG_SHIFT 4 +#define RDMA_SQ_FMR_WQE_3RD_DIF_VALIDATE_APP_TAG_MASK 0x1 +#define RDMA_SQ_FMR_WQE_3RD_DIF_VALIDATE_APP_TAG_SHIFT 5 +#define RDMA_SQ_FMR_WQE_3RD_DIF_CRC_SEED_MASK 0x1 +#define RDMA_SQ_FMR_WQE_3RD_DIF_CRC_SEED_SHIFT 6 +#define RDMA_SQ_FMR_WQE_3RD_RESERVED4_MASK 0x1FF +#define RDMA_SQ_FMR_WQE_3RD_RESERVED4_SHIFT 7 + __le32 Reserved5; +}; + +struct rdma_sq_local_inv_wqe { + struct regpair reserved; + __le32 inv_l_key; + u8 req_type; + u8 flags; +#define RDMA_SQ_LOCAL_INV_WQE_COMP_FLG_MASK 0x1 +#define RDMA_SQ_LOCAL_INV_WQE_COMP_FLG_SHIFT 0 +#define RDMA_SQ_LOCAL_INV_WQE_RD_FENCE_FLG_MASK 0x1 +#define RDMA_SQ_LOCAL_INV_WQE_RD_FENCE_FLG_SHIFT 1 +#define RDMA_SQ_LOCAL_INV_WQE_INV_FENCE_FLG_MASK 0x1 +#define RDMA_SQ_LOCAL_INV_WQE_INV_FENCE_FLG_SHIFT 2 +#define RDMA_SQ_LOCAL_INV_WQE_SE_FLG_MASK 0x1 +#define RDMA_SQ_LOCAL_INV_WQE_SE_FLG_SHIFT 3 +#define RDMA_SQ_LOCAL_INV_WQE_INLINE_FLG_MASK 0x1 +#define RDMA_SQ_LOCAL_INV_WQE_INLINE_FLG_SHIFT 4 +#define RDMA_SQ_LOCAL_INV_WQE_DIF_ON_HOST_FLG_MASK 0x1 +#define RDMA_SQ_LOCAL_INV_WQE_DIF_ON_HOST_FLG_SHIFT 5 +#define RDMA_SQ_LOCAL_INV_WQE_RESERVED0_MASK 0x3 +#define RDMA_SQ_LOCAL_INV_WQE_RESERVED0_SHIFT 6 + u8 wqe_size; + u8 prev_wqe_size; +}; + +struct rdma_sq_rdma_wqe { + __le32 imm_data; + __le32 length; + __le32 xrc_srq; + u8 req_type; + u8 flags; +#define RDMA_SQ_RDMA_WQE_COMP_FLG_MASK 0x1 +#define RDMA_SQ_RDMA_WQE_COMP_FLG_SHIFT 0 +#define RDMA_SQ_RDMA_WQE_RD_FENCE_FLG_MASK 0x1 +#define RDMA_SQ_RDMA_WQE_RD_FENCE_FLG_SHIFT 1 +#define RDMA_SQ_RDMA_WQE_INV_FENCE_FLG_MASK 0x1 +#define RDMA_SQ_RDMA_WQE_INV_FENCE_FLG_SHIFT 2 +#define RDMA_SQ_RDMA_WQE_SE_FLG_MASK 0x1 +#define RDMA_SQ_RDMA_WQE_SE_FLG_SHIFT 3 +#define RDMA_SQ_RDMA_WQE_INLINE_FLG_MASK 0x1 +#define RDMA_SQ_RDMA_WQE_INLINE_FLG_SHIFT 4 +#define RDMA_SQ_RDMA_WQE_DIF_ON_HOST_FLG_MASK 0x1 +#define RDMA_SQ_RDMA_WQE_DIF_ON_HOST_FLG_SHIFT 5 +#define RDMA_SQ_RDMA_WQE_RESERVED0_MASK 0x3 +#define RDMA_SQ_RDMA_WQE_RESERVED0_SHIFT 6 + u8 wqe_size; + u8 prev_wqe_size; + struct regpair remote_va; + __le32 r_key; + u8 dif_flags; +#define RDMA_SQ_RDMA_WQE_DIF_BLOCK_SIZE_MASK 0x1 +#define RDMA_SQ_RDMA_WQE_DIF_BLOCK_SIZE_SHIFT 0 +#define RDMA_SQ_RDMA_WQE_DIF_FIRST_RDMA_IN_IO_FLG_MASK 0x1 +#define RDMA_SQ_RDMA_WQE_DIF_FIRST_RDMA_IN_IO_FLG_SHIFT 1 +#define RDMA_SQ_RDMA_WQE_DIF_LAST_RDMA_IN_IO_FLG_MASK 0x1 +#define RDMA_SQ_RDMA_WQE_DIF_LAST_RDMA_IN_IO_FLG_SHIFT 2 +#define RDMA_SQ_RDMA_WQE_RESERVED1_MASK 0x1F +#define RDMA_SQ_RDMA_WQE_RESERVED1_SHIFT 3 + u8 reserved2[3]; +}; + +/* First element (16 bytes) of rdma wqe */ +struct rdma_sq_rdma_wqe_1st { + __le32 imm_data; + __le32 length; + __le32 xrc_srq; + u8 req_type; + u8 flags; +#define RDMA_SQ_RDMA_WQE_1ST_COMP_FLG_MASK 0x1 +#define RDMA_SQ_RDMA_WQE_1ST_COMP_FLG_SHIFT 0 +#define RDMA_SQ_RDMA_WQE_1ST_RD_FENCE_FLG_MASK 0x1 +#define RDMA_SQ_RDMA_WQE_1ST_RD_FENCE_FLG_SHIFT 1 +#define RDMA_SQ_RDMA_WQE_1ST_INV_FENCE_FLG_MASK 0x1 +#define RDMA_SQ_RDMA_WQE_1ST_INV_FENCE_FLG_SHIFT 2 +#define RDMA_SQ_RDMA_WQE_1ST_SE_FLG_MASK 0x1 +#define RDMA_SQ_RDMA_WQE_1ST_SE_FLG_SHIFT 3 +#define RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG_MASK 0x1 +#define RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG_SHIFT 4 +#define RDMA_SQ_RDMA_WQE_1ST_DIF_ON_HOST_FLG_MASK 0x1 +#define RDMA_SQ_RDMA_WQE_1ST_DIF_ON_HOST_FLG_SHIFT 5 +#define RDMA_SQ_RDMA_WQE_1ST_RESERVED0_MASK 0x3 +#define RDMA_SQ_RDMA_WQE_1ST_RESERVED0_SHIFT 6 + u8 wqe_size; + u8 prev_wqe_size; +}; + +/* Second element (16 bytes) of rdma wqe */ +struct rdma_sq_rdma_wqe_2nd { + struct regpair remote_va; + __le32 r_key; + u8 dif_flags; +#define RDMA_SQ_RDMA_WQE_2ND_DIF_BLOCK_SIZE_MASK 0x1 +#define RDMA_SQ_RDMA_WQE_2ND_DIF_BLOCK_SIZE_SHIFT 0 +#define RDMA_SQ_RDMA_WQE_2ND_DIF_FIRST_SEGMENT_FLG_MASK 0x1 +#define RDMA_SQ_RDMA_WQE_2ND_DIF_FIRST_SEGMENT_FLG_SHIFT 1 +#define RDMA_SQ_RDMA_WQE_2ND_DIF_LAST_SEGMENT_FLG_MASK 0x1 +#define RDMA_SQ_RDMA_WQE_2ND_DIF_LAST_SEGMENT_FLG_SHIFT 2 +#define RDMA_SQ_RDMA_WQE_2ND_RESERVED1_MASK 0x1F +#define RDMA_SQ_RDMA_WQE_2ND_RESERVED1_SHIFT 3 + u8 reserved2[3]; +}; + +/* SQ WQE req type enumeration */ +enum rdma_sq_req_type { + RDMA_SQ_REQ_TYPE_SEND, + RDMA_SQ_REQ_TYPE_SEND_WITH_IMM, + RDMA_SQ_REQ_TYPE_SEND_WITH_INVALIDATE, + RDMA_SQ_REQ_TYPE_RDMA_WR, + RDMA_SQ_REQ_TYPE_RDMA_WR_WITH_IMM, + RDMA_SQ_REQ_TYPE_RDMA_RD, + RDMA_SQ_REQ_TYPE_ATOMIC_CMP_AND_SWAP, + RDMA_SQ_REQ_TYPE_ATOMIC_ADD, + RDMA_SQ_REQ_TYPE_LOCAL_INVALIDATE, + RDMA_SQ_REQ_TYPE_FAST_MR, + RDMA_SQ_REQ_TYPE_BIND, + RDMA_SQ_REQ_TYPE_INVALID, + MAX_RDMA_SQ_REQ_TYPE +}; + +struct rdma_sq_send_wqe { + __le32 inv_key_or_imm_data; + __le32 length; + __le32 xrc_srq; + u8 req_type; + u8 flags; +#define RDMA_SQ_SEND_WQE_COMP_FLG_MASK 0x1 +#define RDMA_SQ_SEND_WQE_COMP_FLG_SHIFT 0 +#define RDMA_SQ_SEND_WQE_RD_FENCE_FLG_MASK 0x1 +#define RDMA_SQ_SEND_WQE_RD_FENCE_FLG_SHIFT 1 +#define RDMA_SQ_SEND_WQE_INV_FENCE_FLG_MASK 0x1 +#define RDMA_SQ_SEND_WQE_INV_FENCE_FLG_SHIFT 2 +#define RDMA_SQ_SEND_WQE_SE_FLG_MASK 0x1 +#define RDMA_SQ_SEND_WQE_SE_FLG_SHIFT 3 +#define RDMA_SQ_SEND_WQE_INLINE_FLG_MASK 0x1 +#define RDMA_SQ_SEND_WQE_INLINE_FLG_SHIFT 4 +#define RDMA_SQ_SEND_WQE_DIF_ON_HOST_FLG_MASK 0x1 +#define RDMA_SQ_SEND_WQE_DIF_ON_HOST_FLG_SHIFT 5 +#define RDMA_SQ_SEND_WQE_RESERVED0_MASK 0x3 +#define RDMA_SQ_SEND_WQE_RESERVED0_SHIFT 6 + u8 wqe_size; + u8 prev_wqe_size; + __le32 reserved1[4]; +}; + +struct rdma_sq_send_wqe_1st { + __le32 inv_key_or_imm_data; + __le32 length; + __le32 xrc_srq; + u8 req_type; + u8 flags; +#define RDMA_SQ_SEND_WQE_1ST_COMP_FLG_MASK 0x1 +#define RDMA_SQ_SEND_WQE_1ST_COMP_FLG_SHIFT 0 +#define RDMA_SQ_SEND_WQE_1ST_RD_FENCE_FLG_MASK 0x1 +#define RDMA_SQ_SEND_WQE_1ST_RD_FENCE_FLG_SHIFT 1 +#define RDMA_SQ_SEND_WQE_1ST_INV_FENCE_FLG_MASK 0x1 +#define RDMA_SQ_SEND_WQE_1ST_INV_FENCE_FLG_SHIFT 2 +#define RDMA_SQ_SEND_WQE_1ST_SE_FLG_MASK 0x1 +#define RDMA_SQ_SEND_WQE_1ST_SE_FLG_SHIFT 3 +#define RDMA_SQ_SEND_WQE_1ST_INLINE_FLG_MASK 0x1 +#define RDMA_SQ_SEND_WQE_1ST_INLINE_FLG_SHIFT 4 +#define RDMA_SQ_SEND_WQE_1ST_RESERVED0_MASK 0x7 +#define RDMA_SQ_SEND_WQE_1ST_RESERVED0_SHIFT 5 + u8 wqe_size; + u8 prev_wqe_size; +}; + +struct rdma_sq_send_wqe_2st { + __le32 reserved1[4]; +}; + +#endif /* __QED_HSI_RDMA__ */ diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c new file mode 100644 index 000000000000..a61514296767 --- /dev/null +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -0,0 +1,3547 @@ +/* QLogic qedr NIC Driver + * Copyright (c) 2015-2016 QLogic Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include <linux/dma-mapping.h> +#include <linux/crc32.h> +#include <net/ip.h> +#include <net/ipv6.h> +#include <net/udp.h> +#include <linux/iommu.h> + +#include <rdma/ib_verbs.h> +#include <rdma/ib_user_verbs.h> +#include <rdma/iw_cm.h> +#include <rdma/ib_umem.h> +#include <rdma/ib_addr.h> +#include <rdma/ib_cache.h> + +#include "qedr_hsi.h" +#include <linux/qed/qed_if.h> +#include "qedr.h" +#include "verbs.h" +#include <rdma/qedr-abi.h> +#include "qedr_cm.h" + +#define DB_ADDR_SHIFT(addr) ((addr) << DB_PWM_ADDR_OFFSET_SHIFT) + +int qedr_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey) +{ + if (index > QEDR_ROCE_PKEY_TABLE_LEN) + return -EINVAL; + + *pkey = QEDR_ROCE_PKEY_DEFAULT; + return 0; +} + +int qedr_query_gid(struct ib_device *ibdev, u8 port, int index, + union ib_gid *sgid) +{ + struct qedr_dev *dev = get_qedr_dev(ibdev); + int rc = 0; + + if (!rdma_cap_roce_gid_table(ibdev, port)) + return -ENODEV; + + rc = ib_get_cached_gid(ibdev, port, index, sgid, NULL); + if (rc == -EAGAIN) { + memcpy(sgid, &zgid, sizeof(*sgid)); + return 0; + } + + DP_DEBUG(dev, QEDR_MSG_INIT, "query gid: index=%d %llx:%llx\n", index, + sgid->global.interface_id, sgid->global.subnet_prefix); + + return rc; +} + +int qedr_add_gid(struct ib_device *device, u8 port_num, + unsigned int index, const union ib_gid *gid, + const struct ib_gid_attr *attr, void **context) +{ + if (!rdma_cap_roce_gid_table(device, port_num)) + return -EINVAL; + + if (port_num > QEDR_MAX_PORT) + return -EINVAL; + + if (!context) + return -EINVAL; + + return 0; +} + +int qedr_del_gid(struct ib_device *device, u8 port_num, + unsigned int index, void **context) +{ + if (!rdma_cap_roce_gid_table(device, port_num)) + return -EINVAL; + + if (port_num > QEDR_MAX_PORT) + return -EINVAL; + + if (!context) + return -EINVAL; + + return 0; +} + +int qedr_query_device(struct ib_device *ibdev, + struct ib_device_attr *attr, struct ib_udata *udata) +{ + struct qedr_dev *dev = get_qedr_dev(ibdev); + struct qedr_device_attr *qattr = &dev->attr; + + if (!dev->rdma_ctx) { + DP_ERR(dev, + "qedr_query_device called with invalid params rdma_ctx=%p\n", + dev->rdma_ctx); + return -EINVAL; + } + + memset(attr, 0, sizeof(*attr)); + + attr->fw_ver = qattr->fw_ver; + attr->sys_image_guid = qattr->sys_image_guid; + attr->max_mr_size = qattr->max_mr_size; + attr->page_size_cap = qattr->page_size_caps; + attr->vendor_id = qattr->vendor_id; + attr->vendor_part_id = qattr->vendor_part_id; + attr->hw_ver = qattr->hw_ver; + attr->max_qp = qattr->max_qp; + attr->max_qp_wr = max_t(u32, qattr->max_sqe, qattr->max_rqe); + attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD | + IB_DEVICE_RC_RNR_NAK_GEN | + IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS; + + attr->max_sge = qattr->max_sge; + attr->max_sge_rd = qattr->max_sge; + attr->max_cq = qattr->max_cq; + attr->max_cqe = qattr->max_cqe; + attr->max_mr = qattr->max_mr; + attr->max_mw = qattr->max_mw; + attr->max_pd = qattr->max_pd; + attr->atomic_cap = dev->atomic_cap; + attr->max_fmr = qattr->max_fmr; + attr->max_map_per_fmr = 16; + attr->max_qp_init_rd_atom = + 1 << (fls(qattr->max_qp_req_rd_atomic_resc) - 1); + attr->max_qp_rd_atom = + min(1 << (fls(qattr->max_qp_resp_rd_atomic_resc) - 1), + attr->max_qp_init_rd_atom); + + attr->max_srq = qattr->max_srq; + attr->max_srq_sge = qattr->max_srq_sge; + attr->max_srq_wr = qattr->max_srq_wr; + + attr->local_ca_ack_delay = qattr->dev_ack_delay; + attr->max_fast_reg_page_list_len = qattr->max_mr / 8; + attr->max_pkeys = QEDR_ROCE_PKEY_MAX; + attr->max_ah = qattr->max_ah; + + return 0; +} + +#define QEDR_SPEED_SDR (1) +#define QEDR_SPEED_DDR (2) +#define QEDR_SPEED_QDR (4) +#define QEDR_SPEED_FDR10 (8) +#define QEDR_SPEED_FDR (16) +#define QEDR_SPEED_EDR (32) + +static inline void get_link_speed_and_width(int speed, u8 *ib_speed, + u8 *ib_width) +{ + switch (speed) { + case 1000: + *ib_speed = QEDR_SPEED_SDR; + *ib_width = IB_WIDTH_1X; + break; + case 10000: + *ib_speed = QEDR_SPEED_QDR; + *ib_width = IB_WIDTH_1X; + break; + + case 20000: + *ib_speed = QEDR_SPEED_DDR; + *ib_width = IB_WIDTH_4X; + break; + + case 25000: + *ib_speed = QEDR_SPEED_EDR; + *ib_width = IB_WIDTH_1X; + break; + + case 40000: + *ib_speed = QEDR_SPEED_QDR; + *ib_width = IB_WIDTH_4X; + break; + + case 50000: + *ib_speed = QEDR_SPEED_QDR; + *ib_width = IB_WIDTH_4X; + break; + + case 100000: + *ib_speed = QEDR_SPEED_EDR; + *ib_width = IB_WIDTH_4X; + break; + + default: + /* Unsupported */ + *ib_speed = QEDR_SPEED_SDR; + *ib_width = IB_WIDTH_1X; + } +} + +int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr) +{ + struct qedr_dev *dev; + struct qed_rdma_port *rdma_port; + + dev = get_qedr_dev(ibdev); + if (port > 1) { + DP_ERR(dev, "invalid_port=0x%x\n", port); + return -EINVAL; + } + + if (!dev->rdma_ctx) { + DP_ERR(dev, "rdma_ctx is NULL\n"); + return -EINVAL; + } + + rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx); + memset(attr, 0, sizeof(*attr)); + + if (rdma_port->port_state == QED_RDMA_PORT_UP) { + attr->state = IB_PORT_ACTIVE; + attr->phys_state = 5; + } else { + attr->state = IB_PORT_DOWN; + attr->phys_state = 3; + } + attr->max_mtu = IB_MTU_4096; + attr->active_mtu = iboe_get_mtu(dev->ndev->mtu); + attr->lid = 0; + attr->lmc = 0; + attr->sm_lid = 0; + attr->sm_sl = 0; + attr->port_cap_flags = IB_PORT_IP_BASED_GIDS; + attr->gid_tbl_len = QEDR_MAX_SGID; + attr->pkey_tbl_len = QEDR_ROCE_PKEY_TABLE_LEN; + attr->bad_pkey_cntr = rdma_port->pkey_bad_counter; + attr->qkey_viol_cntr = 0; + get_link_speed_and_width(rdma_port->link_speed, + &attr->active_speed, &attr->active_width); + attr->max_msg_sz = rdma_port->max_msg_size; + attr->max_vl_num = 4; + + return 0; +} + +int qedr_modify_port(struct ib_device *ibdev, u8 port, int mask, + struct ib_port_modify *props) +{ + struct qedr_dev *dev; + + dev = get_qedr_dev(ibdev); + if (port > 1) { + DP_ERR(dev, "invalid_port=0x%x\n", port); + return -EINVAL; + } + + return 0; +} + +static int qedr_add_mmap(struct qedr_ucontext *uctx, u64 phy_addr, + unsigned long len) +{ + struct qedr_mm *mm; + + mm = kzalloc(sizeof(*mm), GFP_KERNEL); + if (!mm) + return -ENOMEM; + + mm->key.phy_addr = phy_addr; + /* This function might be called with a length which is not a multiple + * of PAGE_SIZE, while the mapping is PAGE_SIZE grained and the kernel + * forces this granularity by increasing the requested size if needed. + * When qedr_mmap is called, it will search the list with the updated + * length as a key. To prevent search failures, the length is rounded up + * in advance to PAGE_SIZE. + */ + mm->key.len = roundup(len, PAGE_SIZE); + INIT_LIST_HEAD(&mm->entry); + + mutex_lock(&uctx->mm_list_lock); + list_add(&mm->entry, &uctx->mm_head); + mutex_unlock(&uctx->mm_list_lock); + + DP_DEBUG(uctx->dev, QEDR_MSG_MISC, + "added (addr=0x%llx,len=0x%lx) for ctx=%p\n", + (unsigned long long)mm->key.phy_addr, + (unsigned long)mm->key.len, uctx); + + return 0; +} + +static bool qedr_search_mmap(struct qedr_ucontext *uctx, u64 phy_addr, + unsigned long len) +{ + bool found = false; + struct qedr_mm *mm; + + mutex_lock(&uctx->mm_list_lock); + list_for_each_entry(mm, &uctx->mm_head, entry) { + if (len != mm->key.len || phy_addr != mm->key.phy_addr) + continue; + + found = true; + break; + } + mutex_unlock(&uctx->mm_list_lock); + DP_DEBUG(uctx->dev, QEDR_MSG_MISC, + "searched for (addr=0x%llx,len=0x%lx) for ctx=%p, result=%d\n", + mm->key.phy_addr, mm->key.len, uctx, found); + + return found; +} + +struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *ibdev, + struct ib_udata *udata) +{ + int rc; + struct qedr_ucontext *ctx; + struct qedr_alloc_ucontext_resp uresp; + struct qedr_dev *dev = get_qedr_dev(ibdev); + struct qed_rdma_add_user_out_params oparams; + + if (!udata) + return ERR_PTR(-EFAULT); + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return ERR_PTR(-ENOMEM); + + rc = dev->ops->rdma_add_user(dev->rdma_ctx, &oparams); + if (rc) { + DP_ERR(dev, + "failed to allocate a DPI for a new RoCE application, rc=%d. To overcome this consider to increase the number of DPIs, increase the doorbell BAR size or just close unnecessary RoCE applications. In order to increase the number of DPIs consult the qedr readme\n", + rc); + goto err; + } + + ctx->dpi = oparams.dpi; + ctx->dpi_addr = oparams.dpi_addr; + ctx->dpi_phys_addr = oparams.dpi_phys_addr; + ctx->dpi_size = oparams.dpi_size; + INIT_LIST_HEAD(&ctx->mm_head); + mutex_init(&ctx->mm_list_lock); + + memset(&uresp, 0, sizeof(uresp)); + + uresp.db_pa = ctx->dpi_phys_addr; + uresp.db_size = ctx->dpi_size; + uresp.max_send_wr = dev->attr.max_sqe; + uresp.max_recv_wr = dev->attr.max_rqe; + uresp.max_srq_wr = dev->attr.max_srq_wr; + uresp.sges_per_send_wr = QEDR_MAX_SQE_ELEMENTS_PER_SQE; + uresp.sges_per_recv_wr = QEDR_MAX_RQE_ELEMENTS_PER_RQE; + uresp.sges_per_srq_wr = dev->attr.max_srq_sge; + uresp.max_cqes = QEDR_MAX_CQES; + + rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + if (rc) + goto err; + + ctx->dev = dev; + + rc = qedr_add_mmap(ctx, ctx->dpi_phys_addr, ctx->dpi_size); + if (rc) + goto err; + + DP_DEBUG(dev, QEDR_MSG_INIT, "Allocating user context %p\n", + &ctx->ibucontext); + return &ctx->ibucontext; + +err: + kfree(ctx); + return ERR_PTR(rc); +} + +int qedr_dealloc_ucontext(struct ib_ucontext *ibctx) +{ + struct qedr_ucontext *uctx = get_qedr_ucontext(ibctx); + struct qedr_mm *mm, *tmp; + int status = 0; + + DP_DEBUG(uctx->dev, QEDR_MSG_INIT, "Deallocating user context %p\n", + uctx); + uctx->dev->ops->rdma_remove_user(uctx->dev->rdma_ctx, uctx->dpi); + + list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) { + DP_DEBUG(uctx->dev, QEDR_MSG_MISC, + "deleted (addr=0x%llx,len=0x%lx) for ctx=%p\n", + mm->key.phy_addr, mm->key.len, uctx); + list_del(&mm->entry); + kfree(mm); + } + + kfree(uctx); + return status; +} + +int qedr_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) +{ + struct qedr_ucontext *ucontext = get_qedr_ucontext(context); + struct qedr_dev *dev = get_qedr_dev(context->device); + unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT; + u64 unmapped_db = dev->db_phys_addr; + unsigned long len = (vma->vm_end - vma->vm_start); + int rc = 0; + bool found; + + DP_DEBUG(dev, QEDR_MSG_INIT, + "qedr_mmap called vm_page=0x%lx vm_pgoff=0x%lx unmapped_db=0x%llx db_size=%x, len=%lx\n", + vm_page, vma->vm_pgoff, unmapped_db, dev->db_size, len); + if (vma->vm_start & (PAGE_SIZE - 1)) { + DP_ERR(dev, "Vma_start not page aligned = %ld\n", + vma->vm_start); + return -EINVAL; + } + + found = qedr_search_mmap(ucontext, vm_page, len); + if (!found) { + DP_ERR(dev, "Vma_pgoff not found in mapped array = %ld\n", + vma->vm_pgoff); + return -EINVAL; + } + + DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n"); + + if ((vm_page >= unmapped_db) && (vm_page <= (unmapped_db + + dev->db_size))) { + DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping doorbell bar\n"); + if (vma->vm_flags & VM_READ) { + DP_ERR(dev, "Trying to map doorbell bar for read\n"); + return -EPERM; + } + + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + + rc = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, + PAGE_SIZE, vma->vm_page_prot); + } else { + DP_DEBUG(dev, QEDR_MSG_INIT, "Mapping chains\n"); + rc = remap_pfn_range(vma, vma->vm_start, + vma->vm_pgoff, len, vma->vm_page_prot); + } + DP_DEBUG(dev, QEDR_MSG_INIT, "qedr_mmap return code: %d\n", rc); + return rc; +} + +struct ib_pd *qedr_alloc_pd(struct ib_device *ibdev, + struct ib_ucontext *context, struct ib_udata *udata) +{ + struct qedr_dev *dev = get_qedr_dev(ibdev); + struct qedr_ucontext *uctx = NULL; + struct qedr_alloc_pd_uresp uresp; + struct qedr_pd *pd; + u16 pd_id; + int rc; + + DP_DEBUG(dev, QEDR_MSG_INIT, "Function called from: %s\n", + (udata && context) ? "User Lib" : "Kernel"); + + if (!dev->rdma_ctx) { + DP_ERR(dev, "invlaid RDMA context\n"); + return ERR_PTR(-EINVAL); + } + + pd = kzalloc(sizeof(*pd), GFP_KERNEL); + if (!pd) + return ERR_PTR(-ENOMEM); + + dev->ops->rdma_alloc_pd(dev->rdma_ctx, &pd_id); + + uresp.pd_id = pd_id; + pd->pd_id = pd_id; + + if (udata && context) { + rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + if (rc) + DP_ERR(dev, "copy error pd_id=0x%x.\n", pd_id); + uctx = get_qedr_ucontext(context); + uctx->pd = pd; + pd->uctx = uctx; + } + + return &pd->ibpd; +} + +int qedr_dealloc_pd(struct ib_pd *ibpd) +{ + struct qedr_dev *dev = get_qedr_dev(ibpd->device); + struct qedr_pd *pd = get_qedr_pd(ibpd); + + if (!pd) + pr_err("Invalid PD received in dealloc_pd\n"); + + DP_DEBUG(dev, QEDR_MSG_INIT, "Deallocating PD %d\n", pd->pd_id); + dev->ops->rdma_dealloc_pd(dev->rdma_ctx, pd->pd_id); + + kfree(pd); + + return 0; +} + +static void qedr_free_pbl(struct qedr_dev *dev, + struct qedr_pbl_info *pbl_info, struct qedr_pbl *pbl) +{ + struct pci_dev *pdev = dev->pdev; + int i; + + for (i = 0; i < pbl_info->num_pbls; i++) { + if (!pbl[i].va) + continue; + dma_free_coherent(&pdev->dev, pbl_info->pbl_size, + pbl[i].va, pbl[i].pa); + } + + kfree(pbl); +} + +#define MIN_FW_PBL_PAGE_SIZE (4 * 1024) +#define MAX_FW_PBL_PAGE_SIZE (64 * 1024) + +#define NUM_PBES_ON_PAGE(_page_size) (_page_size / sizeof(u64)) +#define MAX_PBES_ON_PAGE NUM_PBES_ON_PAGE(MAX_FW_PBL_PAGE_SIZE) +#define MAX_PBES_TWO_LAYER (MAX_PBES_ON_PAGE * MAX_PBES_ON_PAGE) + +static struct qedr_pbl *qedr_alloc_pbl_tbl(struct qedr_dev *dev, + struct qedr_pbl_info *pbl_info, + gfp_t flags) +{ + struct pci_dev *pdev = dev->pdev; + struct qedr_pbl *pbl_table; + dma_addr_t *pbl_main_tbl; + dma_addr_t pa; + void *va; + int i; + + pbl_table = kcalloc(pbl_info->num_pbls, sizeof(*pbl_table), flags); + if (!pbl_table) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < pbl_info->num_pbls; i++) { + va = dma_alloc_coherent(&pdev->dev, pbl_info->pbl_size, + &pa, flags); + if (!va) + goto err; + + memset(va, 0, pbl_info->pbl_size); + pbl_table[i].va = va; + pbl_table[i].pa = pa; + } + + /* Two-Layer PBLs, if we have more than one pbl we need to initialize + * the first one with physical pointers to all of the rest + */ + pbl_main_tbl = (dma_addr_t *)pbl_table[0].va; + for (i = 0; i < pbl_info->num_pbls - 1; i++) + pbl_main_tbl[i] = pbl_table[i + 1].pa; + + return pbl_table; + +err: + for (i--; i >= 0; i--) + dma_free_coherent(&pdev->dev, pbl_info->pbl_size, + pbl_table[i].va, pbl_table[i].pa); + + qedr_free_pbl(dev, pbl_info, pbl_table); + + return ERR_PTR(-ENOMEM); +} + +static int qedr_prepare_pbl_tbl(struct qedr_dev *dev, + struct qedr_pbl_info *pbl_info, + u32 num_pbes, int two_layer_capable) +{ + u32 pbl_capacity; + u32 pbl_size; + u32 num_pbls; + + if ((num_pbes > MAX_PBES_ON_PAGE) && two_layer_capable) { + if (num_pbes > MAX_PBES_TWO_LAYER) { + DP_ERR(dev, "prepare pbl table: too many pages %d\n", + num_pbes); + return -EINVAL; + } + + /* calculate required pbl page size */ + pbl_size = MIN_FW_PBL_PAGE_SIZE; + pbl_capacity = NUM_PBES_ON_PAGE(pbl_size) * + NUM_PBES_ON_PAGE(pbl_size); + + while (pbl_capacity < num_pbes) { + pbl_size *= 2; + pbl_capacity = pbl_size / sizeof(u64); + pbl_capacity = pbl_capacity * pbl_capacity; + } + + num_pbls = DIV_ROUND_UP(num_pbes, NUM_PBES_ON_PAGE(pbl_size)); + num_pbls++; /* One for the layer0 ( points to the pbls) */ + pbl_info->two_layered = true; + } else { + /* One layered PBL */ + num_pbls = 1; + pbl_size = max_t(u32, MIN_FW_PBL_PAGE_SIZE, + roundup_pow_of_two((num_pbes * sizeof(u64)))); + pbl_info->two_layered = false; + } + + pbl_info->num_pbls = num_pbls; + pbl_info->pbl_size = pbl_size; + pbl_info->num_pbes = num_pbes; + + DP_DEBUG(dev, QEDR_MSG_MR, + "prepare pbl table: num_pbes=%d, num_pbls=%d, pbl_size=%d\n", + pbl_info->num_pbes, pbl_info->num_pbls, pbl_info->pbl_size); + + return 0; +} + +static void qedr_populate_pbls(struct qedr_dev *dev, struct ib_umem *umem, + struct qedr_pbl *pbl, + struct qedr_pbl_info *pbl_info) +{ + int shift, pg_cnt, pages, pbe_cnt, total_num_pbes = 0; + struct qedr_pbl *pbl_tbl; + struct scatterlist *sg; + struct regpair *pbe; + int entry; + u32 addr; + + if (!pbl_info->num_pbes) + return; + + /* If we have a two layered pbl, the first pbl points to the rest + * of the pbls and the first entry lays on the second pbl in the table + */ + if (pbl_info->two_layered) + pbl_tbl = &pbl[1]; + else + pbl_tbl = pbl; + + pbe = (struct regpair *)pbl_tbl->va; + if (!pbe) { + DP_ERR(dev, "cannot populate PBL due to a NULL PBE\n"); + return; + } + + pbe_cnt = 0; + + shift = ilog2(umem->page_size); + + for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { + pages = sg_dma_len(sg) >> shift; + for (pg_cnt = 0; pg_cnt < pages; pg_cnt++) { + /* store the page address in pbe */ + pbe->lo = cpu_to_le32(sg_dma_address(sg) + + umem->page_size * pg_cnt); + addr = upper_32_bits(sg_dma_address(sg) + + umem->page_size * pg_cnt); + pbe->hi = cpu_to_le32(addr); + pbe_cnt++; + total_num_pbes++; + pbe++; + + if (total_num_pbes == pbl_info->num_pbes) + return; + + /* If the given pbl is full storing the pbes, + * move to next pbl. + */ + if (pbe_cnt == (pbl_info->pbl_size / sizeof(u64))) { + pbl_tbl++; + pbe = (struct regpair *)pbl_tbl->va; + pbe_cnt = 0; + } + } + } +} + +static int qedr_copy_cq_uresp(struct qedr_dev *dev, + struct qedr_cq *cq, struct ib_udata *udata) +{ + struct qedr_create_cq_uresp uresp; + int rc; + + memset(&uresp, 0, sizeof(uresp)); + + uresp.db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT); + uresp.icid = cq->icid; + + rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + if (rc) + DP_ERR(dev, "copy error cqid=0x%x.\n", cq->icid); + + return rc; +} + +static void consume_cqe(struct qedr_cq *cq) +{ + if (cq->latest_cqe == cq->toggle_cqe) + cq->pbl_toggle ^= RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK; + + cq->latest_cqe = qed_chain_consume(&cq->pbl); +} + +static inline int qedr_align_cq_entries(int entries) +{ + u64 size, aligned_size; + + /* We allocate an extra entry that we don't report to the FW. */ + size = (entries + 1) * QEDR_CQE_SIZE; + aligned_size = ALIGN(size, PAGE_SIZE); + + return aligned_size / QEDR_CQE_SIZE; +} + +static inline int qedr_init_user_queue(struct ib_ucontext *ib_ctx, + struct qedr_dev *dev, + struct qedr_userq *q, + u64 buf_addr, size_t buf_len, + int access, int dmasync) +{ + int page_cnt; + int rc; + + q->buf_addr = buf_addr; + q->buf_len = buf_len; + q->umem = ib_umem_get(ib_ctx, q->buf_addr, q->buf_len, access, dmasync); + if (IS_ERR(q->umem)) { + DP_ERR(dev, "create user queue: failed ib_umem_get, got %ld\n", + PTR_ERR(q->umem)); + return PTR_ERR(q->umem); + } + + page_cnt = ib_umem_page_count(q->umem); + rc = qedr_prepare_pbl_tbl(dev, &q->pbl_info, page_cnt, 0); + if (rc) + goto err0; + + q->pbl_tbl = qedr_alloc_pbl_tbl(dev, &q->pbl_info, GFP_KERNEL); + if (IS_ERR_OR_NULL(q->pbl_tbl)) + goto err0; + + qedr_populate_pbls(dev, q->umem, q->pbl_tbl, &q->pbl_info); + + return 0; + +err0: + ib_umem_release(q->umem); + + return rc; +} + +static inline void qedr_init_cq_params(struct qedr_cq *cq, + struct qedr_ucontext *ctx, + struct qedr_dev *dev, int vector, + int chain_entries, int page_cnt, + u64 pbl_ptr, + struct qed_rdma_create_cq_in_params + *params) +{ + memset(params, 0, sizeof(*params)); + params->cq_handle_hi = upper_32_bits((uintptr_t)cq); + params->cq_handle_lo = lower_32_bits((uintptr_t)cq); + params->cnq_id = vector; + params->cq_size = chain_entries - 1; + params->dpi = (ctx) ? ctx->dpi : dev->dpi; + params->pbl_num_pages = page_cnt; + params->pbl_ptr = pbl_ptr; + params->pbl_two_level = 0; +} + +static void doorbell_cq(struct qedr_cq *cq, u32 cons, u8 flags) +{ + /* Flush data before signalling doorbell */ + wmb(); + cq->db.data.agg_flags = flags; + cq->db.data.value = cpu_to_le32(cons); + writeq(cq->db.raw, cq->db_addr); + + /* Make sure write would stick */ + mmiowb(); +} + +int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) +{ + struct qedr_cq *cq = get_qedr_cq(ibcq); + unsigned long sflags; + + if (cq->cq_type == QEDR_CQ_TYPE_GSI) + return 0; + + spin_lock_irqsave(&cq->cq_lock, sflags); + + cq->arm_flags = 0; + + if (flags & IB_CQ_SOLICITED) + cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD; + + if (flags & IB_CQ_NEXT_COMP) + cq->arm_flags |= DQ_UCM_ROCE_CQ_ARM_CF_CMD; + + doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags); + + spin_unlock_irqrestore(&cq->cq_lock, sflags); + + return 0; +} + +struct ib_cq *qedr_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *ib_ctx, struct ib_udata *udata) +{ + struct qedr_ucontext *ctx = get_qedr_ucontext(ib_ctx); + struct qed_rdma_destroy_cq_out_params destroy_oparams; + struct qed_rdma_destroy_cq_in_params destroy_iparams; + struct qedr_dev *dev = get_qedr_dev(ibdev); + struct qed_rdma_create_cq_in_params params; + struct qedr_create_cq_ureq ureq; + int vector = attr->comp_vector; + int entries = attr->cqe; + struct qedr_cq *cq; + int chain_entries; + int page_cnt; + u64 pbl_ptr; + u16 icid; + int rc; + + DP_DEBUG(dev, QEDR_MSG_INIT, + "create_cq: called from %s. entries=%d, vector=%d\n", + udata ? "User Lib" : "Kernel", entries, vector); + + if (entries > QEDR_MAX_CQES) { + DP_ERR(dev, + "create cq: the number of entries %d is too high. Must be equal or below %d.\n", + entries, QEDR_MAX_CQES); + return ERR_PTR(-EINVAL); + } + + chain_entries = qedr_align_cq_entries(entries); + chain_entries = min_t(int, chain_entries, QEDR_MAX_CQES); + + cq = kzalloc(sizeof(*cq), GFP_KERNEL); + if (!cq) + return ERR_PTR(-ENOMEM); + + if (udata) { + memset(&ureq, 0, sizeof(ureq)); + if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) { + DP_ERR(dev, + "create cq: problem copying data from user space\n"); + goto err0; + } + + if (!ureq.len) { + DP_ERR(dev, + "create cq: cannot create a cq with 0 entries\n"); + goto err0; + } + + cq->cq_type = QEDR_CQ_TYPE_USER; + + rc = qedr_init_user_queue(ib_ctx, dev, &cq->q, ureq.addr, + ureq.len, IB_ACCESS_LOCAL_WRITE, 1); + if (rc) + goto err0; + + pbl_ptr = cq->q.pbl_tbl->pa; + page_cnt = cq->q.pbl_info.num_pbes; + } else { + cq->cq_type = QEDR_CQ_TYPE_KERNEL; + + rc = dev->ops->common->chain_alloc(dev->cdev, + QED_CHAIN_USE_TO_CONSUME, + QED_CHAIN_MODE_PBL, + QED_CHAIN_CNT_TYPE_U32, + chain_entries, + sizeof(union rdma_cqe), + &cq->pbl); + if (rc) + goto err1; + + page_cnt = qed_chain_get_page_cnt(&cq->pbl); + pbl_ptr = qed_chain_get_pbl_phys(&cq->pbl); + } + + qedr_init_cq_params(cq, ctx, dev, vector, chain_entries, page_cnt, + pbl_ptr, ¶ms); + + rc = dev->ops->rdma_create_cq(dev->rdma_ctx, ¶ms, &icid); + if (rc) + goto err2; + + cq->icid = icid; + cq->sig = QEDR_CQ_MAGIC_NUMBER; + spin_lock_init(&cq->cq_lock); + + if (ib_ctx) { + rc = qedr_copy_cq_uresp(dev, cq, udata); + if (rc) + goto err3; + } else { + /* Generate doorbell address. */ + cq->db_addr = dev->db_addr + + DB_ADDR_SHIFT(DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT); + cq->db.data.icid = cq->icid; + cq->db.data.params = DB_AGG_CMD_SET << + RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT; + + /* point to the very last element, passing it we will toggle */ + cq->toggle_cqe = qed_chain_get_last_elem(&cq->pbl); + cq->pbl_toggle = RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK; + cq->latest_cqe = NULL; + consume_cqe(cq); + cq->cq_cons = qed_chain_get_cons_idx_u32(&cq->pbl); + } + + DP_DEBUG(dev, QEDR_MSG_CQ, + "create cq: icid=0x%0x, addr=%p, size(entries)=0x%0x\n", + cq->icid, cq, params.cq_size); + + return &cq->ibcq; + +err3: + destroy_iparams.icid = cq->icid; + dev->ops->rdma_destroy_cq(dev->rdma_ctx, &destroy_iparams, + &destroy_oparams); +err2: + if (udata) + qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl); + else + dev->ops->common->chain_free(dev->cdev, &cq->pbl); +err1: + if (udata) + ib_umem_release(cq->q.umem); +err0: + kfree(cq); + return ERR_PTR(-EINVAL); +} + +int qedr_resize_cq(struct ib_cq *ibcq, int new_cnt, struct ib_udata *udata) +{ + struct qedr_dev *dev = get_qedr_dev(ibcq->device); + struct qedr_cq *cq = get_qedr_cq(ibcq); + + DP_ERR(dev, "cq %p RESIZE NOT SUPPORTED\n", cq); + + return 0; +} + +int qedr_destroy_cq(struct ib_cq *ibcq) +{ + struct qedr_dev *dev = get_qedr_dev(ibcq->device); + struct qed_rdma_destroy_cq_out_params oparams; + struct qed_rdma_destroy_cq_in_params iparams; + struct qedr_cq *cq = get_qedr_cq(ibcq); + + DP_DEBUG(dev, QEDR_MSG_CQ, "destroy cq: cq_id %d", cq->icid); + + /* GSIs CQs are handled by driver, so they don't exist in the FW */ + if (cq->cq_type != QEDR_CQ_TYPE_GSI) { + iparams.icid = cq->icid; + dev->ops->rdma_destroy_cq(dev->rdma_ctx, &iparams, &oparams); + dev->ops->common->chain_free(dev->cdev, &cq->pbl); + } + + if (ibcq->uobject && ibcq->uobject->context) { + qedr_free_pbl(dev, &cq->q.pbl_info, cq->q.pbl_tbl); + ib_umem_release(cq->q.umem); + } + + kfree(cq); + + return 0; +} + +static inline int get_gid_info_from_table(struct ib_qp *ibqp, + struct ib_qp_attr *attr, + int attr_mask, + struct qed_rdma_modify_qp_in_params + *qp_params) +{ + enum rdma_network_type nw_type; + struct ib_gid_attr gid_attr; + union ib_gid gid; + u32 ipv4_addr; + int rc = 0; + int i; + + rc = ib_get_cached_gid(ibqp->device, attr->ah_attr.port_num, + attr->ah_attr.grh.sgid_index, &gid, &gid_attr); + if (rc) + return rc; + + if (!memcmp(&gid, &zgid, sizeof(gid))) + return -ENOENT; + + if (gid_attr.ndev) { + qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr.ndev); + + dev_put(gid_attr.ndev); + nw_type = ib_gid_to_network_type(gid_attr.gid_type, &gid); + switch (nw_type) { + case RDMA_NETWORK_IPV6: + memcpy(&qp_params->sgid.bytes[0], &gid.raw[0], + sizeof(qp_params->sgid)); + memcpy(&qp_params->dgid.bytes[0], + &attr->ah_attr.grh.dgid, + sizeof(qp_params->dgid)); + qp_params->roce_mode = ROCE_V2_IPV6; + SET_FIELD(qp_params->modify_flags, + QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1); + break; + case RDMA_NETWORK_IB: + memcpy(&qp_params->sgid.bytes[0], &gid.raw[0], + sizeof(qp_params->sgid)); + memcpy(&qp_params->dgid.bytes[0], + &attr->ah_attr.grh.dgid, + sizeof(qp_params->dgid)); + qp_params->roce_mode = ROCE_V1; + break; + case RDMA_NETWORK_IPV4: + memset(&qp_params->sgid, 0, sizeof(qp_params->sgid)); + memset(&qp_params->dgid, 0, sizeof(qp_params->dgid)); + ipv4_addr = qedr_get_ipv4_from_gid(gid.raw); + qp_params->sgid.ipv4_addr = ipv4_addr; + ipv4_addr = + qedr_get_ipv4_from_gid(attr->ah_attr.grh.dgid.raw); + qp_params->dgid.ipv4_addr = ipv4_addr; + SET_FIELD(qp_params->modify_flags, + QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1); + qp_params->roce_mode = ROCE_V2_IPV4; + break; + } + } + + for (i = 0; i < 4; i++) { + qp_params->sgid.dwords[i] = ntohl(qp_params->sgid.dwords[i]); + qp_params->dgid.dwords[i] = ntohl(qp_params->dgid.dwords[i]); + } + + if (qp_params->vlan_id >= VLAN_CFI_MASK) + qp_params->vlan_id = 0; + + return 0; +} + +static void qedr_cleanup_user_sq(struct qedr_dev *dev, struct qedr_qp *qp) +{ + qedr_free_pbl(dev, &qp->usq.pbl_info, qp->usq.pbl_tbl); + ib_umem_release(qp->usq.umem); +} + +static void qedr_cleanup_user_rq(struct qedr_dev *dev, struct qedr_qp *qp) +{ + qedr_free_pbl(dev, &qp->urq.pbl_info, qp->urq.pbl_tbl); + ib_umem_release(qp->urq.umem); +} + +static void qedr_cleanup_kernel_sq(struct qedr_dev *dev, struct qedr_qp *qp) +{ + dev->ops->common->chain_free(dev->cdev, &qp->sq.pbl); + kfree(qp->wqe_wr_id); +} + +static void qedr_cleanup_kernel_rq(struct qedr_dev *dev, struct qedr_qp *qp) +{ + dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl); + kfree(qp->rqe_wr_id); +} + +static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev, + struct ib_qp_init_attr *attrs) +{ + struct qedr_device_attr *qattr = &dev->attr; + + /* QP0... attrs->qp_type == IB_QPT_GSI */ + if (attrs->qp_type != IB_QPT_RC && attrs->qp_type != IB_QPT_GSI) { + DP_DEBUG(dev, QEDR_MSG_QP, + "create qp: unsupported qp type=0x%x requested\n", + attrs->qp_type); + return -EINVAL; + } + + if (attrs->cap.max_send_wr > qattr->max_sqe) { + DP_ERR(dev, + "create qp: cannot create a SQ with %d elements (max_send_wr=0x%x)\n", + attrs->cap.max_send_wr, qattr->max_sqe); + return -EINVAL; + } + + if (attrs->cap.max_inline_data > qattr->max_inline) { + DP_ERR(dev, + "create qp: unsupported inline data size=0x%x requested (max_inline=0x%x)\n", + attrs->cap.max_inline_data, qattr->max_inline); + return -EINVAL; + } + + if (attrs->cap.max_send_sge > qattr->max_sge) { + DP_ERR(dev, + "create qp: unsupported send_sge=0x%x requested (max_send_sge=0x%x)\n", + attrs->cap.max_send_sge, qattr->max_sge); + return -EINVAL; + } + + if (attrs->cap.max_recv_sge > qattr->max_sge) { + DP_ERR(dev, + "create qp: unsupported recv_sge=0x%x requested (max_recv_sge=0x%x)\n", + attrs->cap.max_recv_sge, qattr->max_sge); + return -EINVAL; + } + + /* Unprivileged user space cannot create special QP */ + if (ibpd->uobject && attrs->qp_type == IB_QPT_GSI) { + DP_ERR(dev, + "create qp: userspace can't create special QPs of type=0x%x\n", + attrs->qp_type); + return -EINVAL; + } + + return 0; +} + +static void qedr_copy_rq_uresp(struct qedr_create_qp_uresp *uresp, + struct qedr_qp *qp) +{ + uresp->rq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD); + uresp->rq_icid = qp->icid; +} + +static void qedr_copy_sq_uresp(struct qedr_create_qp_uresp *uresp, + struct qedr_qp *qp) +{ + uresp->sq_db_offset = DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD); + uresp->sq_icid = qp->icid + 1; +} + +static int qedr_copy_qp_uresp(struct qedr_dev *dev, + struct qedr_qp *qp, struct ib_udata *udata) +{ + struct qedr_create_qp_uresp uresp; + int rc; + + memset(&uresp, 0, sizeof(uresp)); + qedr_copy_sq_uresp(&uresp, qp); + qedr_copy_rq_uresp(&uresp, qp); + + uresp.atomic_supported = dev->atomic_cap != IB_ATOMIC_NONE; + uresp.qp_id = qp->qp_id; + + rc = ib_copy_to_udata(udata, &uresp, sizeof(uresp)); + if (rc) + DP_ERR(dev, + "create qp: failed a copy to user space with qp icid=0x%x.\n", + qp->icid); + + return rc; +} + +static void qedr_set_qp_init_params(struct qedr_dev *dev, + struct qedr_qp *qp, + struct qedr_pd *pd, + struct ib_qp_init_attr *attrs) +{ + qp->pd = pd; + + spin_lock_init(&qp->q_lock); + + qp->qp_type = attrs->qp_type; + qp->max_inline_data = attrs->cap.max_inline_data; + qp->sq.max_sges = attrs->cap.max_send_sge; + qp->state = QED_ROCE_QP_STATE_RESET; + qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false; + qp->sq_cq = get_qedr_cq(attrs->send_cq); + qp->rq_cq = get_qedr_cq(attrs->recv_cq); + qp->dev = dev; + + DP_DEBUG(dev, QEDR_MSG_QP, + "QP params:\tpd = %d, qp_type = %d, max_inline_data = %d, state = %d, signaled = %d, use_srq=%d\n", + pd->pd_id, qp->qp_type, qp->max_inline_data, + qp->state, qp->signaled, (attrs->srq) ? 1 : 0); + DP_DEBUG(dev, QEDR_MSG_QP, + "SQ params:\tsq_max_sges = %d, sq_cq_id = %d\n", + qp->sq.max_sges, qp->sq_cq->icid); + qp->rq.max_sges = attrs->cap.max_recv_sge; + DP_DEBUG(dev, QEDR_MSG_QP, + "RQ params:\trq_max_sges = %d, rq_cq_id = %d\n", + qp->rq.max_sges, qp->rq_cq->icid); +} + +static inline void +qedr_init_qp_user_params(struct qed_rdma_create_qp_in_params *params, + struct qedr_create_qp_ureq *ureq) +{ + /* QP handle to be written in CQE */ + params->qp_handle_lo = ureq->qp_handle_lo; + params->qp_handle_hi = ureq->qp_handle_hi; +} + +static inline void +qedr_init_qp_kernel_doorbell_sq(struct qedr_dev *dev, struct qedr_qp *qp) +{ + qp->sq.db = dev->db_addr + + DB_ADDR_SHIFT(DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD); + qp->sq.db_data.data.icid = qp->icid + 1; +} + +static inline void +qedr_init_qp_kernel_doorbell_rq(struct qedr_dev *dev, struct qedr_qp *qp) +{ + qp->rq.db = dev->db_addr + + DB_ADDR_SHIFT(DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD); + qp->rq.db_data.data.icid = qp->icid; +} + +static inline int +qedr_init_qp_kernel_params_rq(struct qedr_dev *dev, + struct qedr_qp *qp, struct ib_qp_init_attr *attrs) +{ + /* Allocate driver internal RQ array */ + qp->rqe_wr_id = kcalloc(qp->rq.max_wr, sizeof(*qp->rqe_wr_id), + GFP_KERNEL); + if (!qp->rqe_wr_id) + return -ENOMEM; + + DP_DEBUG(dev, QEDR_MSG_QP, "RQ max_wr set to %d.\n", qp->rq.max_wr); + + return 0; +} + +static inline int +qedr_init_qp_kernel_params_sq(struct qedr_dev *dev, + struct qedr_qp *qp, + struct ib_qp_init_attr *attrs, + struct qed_rdma_create_qp_in_params *params) +{ + u32 temp_max_wr; + + /* Allocate driver internal SQ array */ + temp_max_wr = attrs->cap.max_send_wr * dev->wq_multiplier; + temp_max_wr = min_t(u32, temp_max_wr, dev->attr.max_sqe); + + /* temp_max_wr < attr->max_sqe < u16 so the casting is safe */ + qp->sq.max_wr = (u16)temp_max_wr; + qp->wqe_wr_id = kcalloc(qp->sq.max_wr, sizeof(*qp->wqe_wr_id), + GFP_KERNEL); + if (!qp->wqe_wr_id) + return -ENOMEM; + + DP_DEBUG(dev, QEDR_MSG_QP, "SQ max_wr set to %d.\n", qp->sq.max_wr); + + /* QP handle to be written in CQE */ + params->qp_handle_lo = lower_32_bits((uintptr_t)qp); + params->qp_handle_hi = upper_32_bits((uintptr_t)qp); + + return 0; +} + +static inline int qedr_init_qp_kernel_sq(struct qedr_dev *dev, + struct qedr_qp *qp, + struct ib_qp_init_attr *attrs) +{ + u32 n_sq_elems, n_sq_entries; + int rc; + + /* A single work request may take up to QEDR_MAX_SQ_WQE_SIZE elements in + * the ring. The ring should allow at least a single WR, even if the + * user requested none, due to allocation issues. + */ + n_sq_entries = attrs->cap.max_send_wr; + n_sq_entries = min_t(u32, n_sq_entries, dev->attr.max_sqe); + n_sq_entries = max_t(u32, n_sq_entries, 1); + n_sq_elems = n_sq_entries * QEDR_MAX_SQE_ELEMENTS_PER_SQE; + rc = dev->ops->common->chain_alloc(dev->cdev, + QED_CHAIN_USE_TO_PRODUCE, + QED_CHAIN_MODE_PBL, + QED_CHAIN_CNT_TYPE_U32, + n_sq_elems, + QEDR_SQE_ELEMENT_SIZE, + &qp->sq.pbl); + if (rc) { + DP_ERR(dev, "failed to allocate QP %p SQ\n", qp); + return rc; + } + + DP_DEBUG(dev, QEDR_MSG_SQ, + "SQ Pbl base addr = %llx max_send_wr=%d max_wr=%d capacity=%d, rc=%d\n", + qed_chain_get_pbl_phys(&qp->sq.pbl), attrs->cap.max_send_wr, + n_sq_entries, qed_chain_get_capacity(&qp->sq.pbl), rc); + return 0; +} + +static inline int qedr_init_qp_kernel_rq(struct qedr_dev *dev, + struct qedr_qp *qp, + struct ib_qp_init_attr *attrs) +{ + u32 n_rq_elems, n_rq_entries; + int rc; + + /* A single work request may take up to QEDR_MAX_RQ_WQE_SIZE elements in + * the ring. There ring should allow at least a single WR, even if the + * user requested none, due to allocation issues. + */ + n_rq_entries = max_t(u32, attrs->cap.max_recv_wr, 1); + n_rq_elems = n_rq_entries * QEDR_MAX_RQE_ELEMENTS_PER_RQE; + rc = dev->ops->common->chain_alloc(dev->cdev, + QED_CHAIN_USE_TO_CONSUME_PRODUCE, + QED_CHAIN_MODE_PBL, + QED_CHAIN_CNT_TYPE_U32, + n_rq_elems, + QEDR_RQE_ELEMENT_SIZE, + &qp->rq.pbl); + + if (rc) { + DP_ERR(dev, "failed to allocate memory for QP %p RQ\n", qp); + return -ENOMEM; + } + + DP_DEBUG(dev, QEDR_MSG_RQ, + "RQ Pbl base addr = %llx max_recv_wr=%d max_wr=%d capacity=%d, rc=%d\n", + qed_chain_get_pbl_phys(&qp->rq.pbl), attrs->cap.max_recv_wr, + n_rq_entries, qed_chain_get_capacity(&qp->rq.pbl), rc); + + /* n_rq_entries < u16 so the casting is safe */ + qp->rq.max_wr = (u16)n_rq_entries; + + return 0; +} + +static inline void +qedr_init_qp_in_params_sq(struct qedr_dev *dev, + struct qedr_pd *pd, + struct qedr_qp *qp, + struct ib_qp_init_attr *attrs, + struct ib_udata *udata, + struct qed_rdma_create_qp_in_params *params) +{ + /* QP handle to be written in an async event */ + params->qp_handle_async_lo = lower_32_bits((uintptr_t)qp); + params->qp_handle_async_hi = upper_32_bits((uintptr_t)qp); + + params->signal_all = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR); + params->fmr_and_reserved_lkey = !udata; + params->pd = pd->pd_id; + params->dpi = pd->uctx ? pd->uctx->dpi : dev->dpi; + params->sq_cq_id = get_qedr_cq(attrs->send_cq)->icid; + params->max_sq_sges = 0; + params->stats_queue = 0; + + if (udata) { + params->sq_num_pages = qp->usq.pbl_info.num_pbes; + params->sq_pbl_ptr = qp->usq.pbl_tbl->pa; + } else { + params->sq_num_pages = qed_chain_get_page_cnt(&qp->sq.pbl); + params->sq_pbl_ptr = qed_chain_get_pbl_phys(&qp->sq.pbl); + } +} + +static inline void +qedr_init_qp_in_params_rq(struct qedr_qp *qp, + struct ib_qp_init_attr *attrs, + struct ib_udata *udata, + struct qed_rdma_create_qp_in_params *params) +{ + params->rq_cq_id = get_qedr_cq(attrs->recv_cq)->icid; + params->srq_id = 0; + params->use_srq = false; + + if (udata) { + params->rq_num_pages = qp->urq.pbl_info.num_pbes; + params->rq_pbl_ptr = qp->urq.pbl_tbl->pa; + } else { + params->rq_num_pages = qed_chain_get_page_cnt(&qp->rq.pbl); + params->rq_pbl_ptr = qed_chain_get_pbl_phys(&qp->rq.pbl); + } +} + +static inline void qedr_qp_user_print(struct qedr_dev *dev, struct qedr_qp *qp) +{ + DP_DEBUG(dev, QEDR_MSG_QP, + "create qp: successfully created user QP. qp=%p, sq_addr=0x%llx, sq_len=%zd, rq_addr=0x%llx, rq_len=%zd\n", + qp, qp->usq.buf_addr, qp->usq.buf_len, qp->urq.buf_addr, + qp->urq.buf_len); +} + +static inline int qedr_init_user_qp(struct ib_ucontext *ib_ctx, + struct qedr_dev *dev, + struct qedr_qp *qp, + struct qedr_create_qp_ureq *ureq) +{ + int rc; + + /* SQ - read access only (0), dma sync not required (0) */ + rc = qedr_init_user_queue(ib_ctx, dev, &qp->usq, ureq->sq_addr, + ureq->sq_len, 0, 0); + if (rc) + return rc; + + /* RQ - read access only (0), dma sync not required (0) */ + rc = qedr_init_user_queue(ib_ctx, dev, &qp->urq, ureq->rq_addr, + ureq->rq_len, 0, 0); + + if (rc) + qedr_cleanup_user_sq(dev, qp); + return rc; +} + +static inline int +qedr_init_kernel_qp(struct qedr_dev *dev, + struct qedr_qp *qp, + struct ib_qp_init_attr *attrs, + struct qed_rdma_create_qp_in_params *params) +{ + int rc; + + rc = qedr_init_qp_kernel_sq(dev, qp, attrs); + if (rc) { + DP_ERR(dev, "failed to init kernel QP %p SQ\n", qp); + return rc; + } + + rc = qedr_init_qp_kernel_params_sq(dev, qp, attrs, params); + if (rc) { + dev->ops->common->chain_free(dev->cdev, &qp->sq.pbl); + DP_ERR(dev, "failed to init kernel QP %p SQ params\n", qp); + return rc; + } + + rc = qedr_init_qp_kernel_rq(dev, qp, attrs); + if (rc) { + qedr_cleanup_kernel_sq(dev, qp); + DP_ERR(dev, "failed to init kernel QP %p RQ\n", qp); + return rc; + } + + rc = qedr_init_qp_kernel_params_rq(dev, qp, attrs); + if (rc) { + DP_ERR(dev, "failed to init kernel QP %p RQ params\n", qp); + qedr_cleanup_kernel_sq(dev, qp); + dev->ops->common->chain_free(dev->cdev, &qp->rq.pbl); + return rc; + } + + return rc; +} + +struct ib_qp *qedr_create_qp(struct ib_pd *ibpd, + struct ib_qp_init_attr *attrs, + struct ib_udata *udata) +{ + struct qedr_dev *dev = get_qedr_dev(ibpd->device); + struct qed_rdma_create_qp_out_params out_params; + struct qed_rdma_create_qp_in_params in_params; + struct qedr_pd *pd = get_qedr_pd(ibpd); + struct ib_ucontext *ib_ctx = NULL; + struct qedr_ucontext *ctx = NULL; + struct qedr_create_qp_ureq ureq; + struct qedr_qp *qp; + int rc = 0; + + DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n", + udata ? "user library" : "kernel", pd); + + rc = qedr_check_qp_attrs(ibpd, dev, attrs); + if (rc) + return ERR_PTR(rc); + + qp = kzalloc(sizeof(*qp), GFP_KERNEL); + if (!qp) + return ERR_PTR(-ENOMEM); + + if (attrs->srq) + return ERR_PTR(-EINVAL); + + DP_DEBUG(dev, QEDR_MSG_QP, + "create qp: sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n", + get_qedr_cq(attrs->send_cq), + get_qedr_cq(attrs->send_cq)->icid, + get_qedr_cq(attrs->recv_cq), + get_qedr_cq(attrs->recv_cq)->icid); + + qedr_set_qp_init_params(dev, qp, pd, attrs); + + if (attrs->qp_type == IB_QPT_GSI) { + if (udata) { + DP_ERR(dev, + "create qp: unexpected udata when creating GSI QP\n"); + goto err0; + } + return qedr_create_gsi_qp(dev, attrs, qp); + } + + memset(&in_params, 0, sizeof(in_params)); + + if (udata) { + if (!(udata && ibpd->uobject && ibpd->uobject->context)) + goto err0; + + ib_ctx = ibpd->uobject->context; + ctx = get_qedr_ucontext(ib_ctx); + + memset(&ureq, 0, sizeof(ureq)); + if (ib_copy_from_udata(&ureq, udata, sizeof(ureq))) { + DP_ERR(dev, + "create qp: problem copying data from user space\n"); + goto err0; + } + + rc = qedr_init_user_qp(ib_ctx, dev, qp, &ureq); + if (rc) + goto err0; + + qedr_init_qp_user_params(&in_params, &ureq); + } else { + rc = qedr_init_kernel_qp(dev, qp, attrs, &in_params); + if (rc) + goto err0; + } + + qedr_init_qp_in_params_sq(dev, pd, qp, attrs, udata, &in_params); + qedr_init_qp_in_params_rq(qp, attrs, udata, &in_params); + + qp->qed_qp = dev->ops->rdma_create_qp(dev->rdma_ctx, + &in_params, &out_params); + + if (!qp->qed_qp) + goto err1; + + qp->qp_id = out_params.qp_id; + qp->icid = out_params.icid; + qp->ibqp.qp_num = qp->qp_id; + + if (udata) { + rc = qedr_copy_qp_uresp(dev, qp, udata); + if (rc) + goto err2; + + qedr_qp_user_print(dev, qp); + } else { + qedr_init_qp_kernel_doorbell_sq(dev, qp); + qedr_init_qp_kernel_doorbell_rq(dev, qp); + } + + DP_DEBUG(dev, QEDR_MSG_QP, "created %s space QP %p\n", + udata ? "user" : "kernel", qp); + + return &qp->ibqp; + +err2: + rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp); + if (rc) + DP_ERR(dev, "create qp: fatal fault. rc=%d", rc); +err1: + if (udata) { + qedr_cleanup_user_sq(dev, qp); + qedr_cleanup_user_rq(dev, qp); + } else { + qedr_cleanup_kernel_sq(dev, qp); + qedr_cleanup_kernel_rq(dev, qp); + } + +err0: + kfree(qp); + + return ERR_PTR(-EFAULT); +} + +enum ib_qp_state qedr_get_ibqp_state(enum qed_roce_qp_state qp_state) +{ + switch (qp_state) { + case QED_ROCE_QP_STATE_RESET: + return IB_QPS_RESET; + case QED_ROCE_QP_STATE_INIT: + return IB_QPS_INIT; + case QED_ROCE_QP_STATE_RTR: + return IB_QPS_RTR; + case QED_ROCE_QP_STATE_RTS: + return IB_QPS_RTS; + case QED_ROCE_QP_STATE_SQD: + return IB_QPS_SQD; + case QED_ROCE_QP_STATE_ERR: + return IB_QPS_ERR; + case QED_ROCE_QP_STATE_SQE: + return IB_QPS_SQE; + } + return IB_QPS_ERR; +} + +enum qed_roce_qp_state qedr_get_state_from_ibqp(enum ib_qp_state qp_state) +{ + switch (qp_state) { + case IB_QPS_RESET: + return QED_ROCE_QP_STATE_RESET; + case IB_QPS_INIT: + return QED_ROCE_QP_STATE_INIT; + case IB_QPS_RTR: + return QED_ROCE_QP_STATE_RTR; + case IB_QPS_RTS: + return QED_ROCE_QP_STATE_RTS; + case IB_QPS_SQD: + return QED_ROCE_QP_STATE_SQD; + case IB_QPS_ERR: + return QED_ROCE_QP_STATE_ERR; + default: + return QED_ROCE_QP_STATE_ERR; + } +} + +static void qedr_reset_qp_hwq_info(struct qedr_qp_hwq_info *qph) +{ + qed_chain_reset(&qph->pbl); + qph->prod = 0; + qph->cons = 0; + qph->wqe_cons = 0; + qph->db_data.data.value = cpu_to_le16(0); +} + +static int qedr_update_qp_state(struct qedr_dev *dev, + struct qedr_qp *qp, + enum qed_roce_qp_state new_state) +{ + int status = 0; + + if (new_state == qp->state) + return 1; + + switch (qp->state) { + case QED_ROCE_QP_STATE_RESET: + switch (new_state) { + case QED_ROCE_QP_STATE_INIT: + qp->prev_wqe_size = 0; + qedr_reset_qp_hwq_info(&qp->sq); + qedr_reset_qp_hwq_info(&qp->rq); + break; + default: + status = -EINVAL; + break; + }; + break; + case QED_ROCE_QP_STATE_INIT: + switch (new_state) { + case QED_ROCE_QP_STATE_RTR: + /* Update doorbell (in case post_recv was + * done before move to RTR) + */ + wmb(); + writel(qp->rq.db_data.raw, qp->rq.db); + /* Make sure write takes effect */ + mmiowb(); + break; + case QED_ROCE_QP_STATE_ERR: + break; + default: + /* Invalid state change. */ + status = -EINVAL; + break; + }; + break; + case QED_ROCE_QP_STATE_RTR: + /* RTR->XXX */ + switch (new_state) { + case QED_ROCE_QP_STATE_RTS: + break; + case QED_ROCE_QP_STATE_ERR: + break; + default: + /* Invalid state change. */ + status = -EINVAL; + break; + }; + break; + case QED_ROCE_QP_STATE_RTS: + /* RTS->XXX */ + switch (new_state) { + case QED_ROCE_QP_STATE_SQD: + break; + case QED_ROCE_QP_STATE_ERR: + break; + default: + /* Invalid state change. */ + status = -EINVAL; + break; + }; + break; + case QED_ROCE_QP_STATE_SQD: + /* SQD->XXX */ + switch (new_state) { + case QED_ROCE_QP_STATE_RTS: + case QED_ROCE_QP_STATE_ERR: + break; + default: + /* Invalid state change. */ + status = -EINVAL; + break; + }; + break; + case QED_ROCE_QP_STATE_ERR: + /* ERR->XXX */ + switch (new_state) { + case QED_ROCE_QP_STATE_RESET: + break; + default: + status = -EINVAL; + break; + }; + break; + default: + status = -EINVAL; + break; + }; + + return status; +} + +int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) +{ + struct qedr_qp *qp = get_qedr_qp(ibqp); + struct qed_rdma_modify_qp_in_params qp_params = { 0 }; + struct qedr_dev *dev = get_qedr_dev(&qp->dev->ibdev); + enum ib_qp_state old_qp_state, new_qp_state; + int rc = 0; + + DP_DEBUG(dev, QEDR_MSG_QP, + "modify qp: qp %p attr_mask=0x%x, state=%d", qp, attr_mask, + attr->qp_state); + + old_qp_state = qedr_get_ibqp_state(qp->state); + if (attr_mask & IB_QP_STATE) + new_qp_state = attr->qp_state; + else + new_qp_state = old_qp_state; + + if (!ib_modify_qp_is_ok + (old_qp_state, new_qp_state, ibqp->qp_type, attr_mask, + IB_LINK_LAYER_ETHERNET)) { + DP_ERR(dev, + "modify qp: invalid attribute mask=0x%x specified for\n" + "qpn=0x%x of type=0x%x old_qp_state=0x%x, new_qp_state=0x%x\n", + attr_mask, qp->qp_id, ibqp->qp_type, old_qp_state, + new_qp_state); + rc = -EINVAL; + goto err; + } + + /* Translate the masks... */ + if (attr_mask & IB_QP_STATE) { + SET_FIELD(qp_params.modify_flags, + QED_RDMA_MODIFY_QP_VALID_NEW_STATE, 1); + qp_params.new_state = qedr_get_state_from_ibqp(attr->qp_state); + } + + if (attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) + qp_params.sqd_async = true; + + if (attr_mask & IB_QP_PKEY_INDEX) { + SET_FIELD(qp_params.modify_flags, + QED_ROCE_MODIFY_QP_VALID_PKEY, 1); + if (attr->pkey_index >= QEDR_ROCE_PKEY_TABLE_LEN) { + rc = -EINVAL; + goto err; + } + + qp_params.pkey = QEDR_ROCE_PKEY_DEFAULT; + } + + if (attr_mask & IB_QP_QKEY) + qp->qkey = attr->qkey; + + if (attr_mask & IB_QP_ACCESS_FLAGS) { + SET_FIELD(qp_params.modify_flags, + QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN, 1); + qp_params.incoming_rdma_read_en = attr->qp_access_flags & + IB_ACCESS_REMOTE_READ; + qp_params.incoming_rdma_write_en = attr->qp_access_flags & + IB_ACCESS_REMOTE_WRITE; + qp_params.incoming_atomic_en = attr->qp_access_flags & + IB_ACCESS_REMOTE_ATOMIC; + } + + if (attr_mask & (IB_QP_AV | IB_QP_PATH_MTU)) { + if (attr_mask & IB_QP_PATH_MTU) { + if (attr->path_mtu < IB_MTU_256 || + attr->path_mtu > IB_MTU_4096) { + pr_err("error: Only MTU sizes of 256, 512, 1024, 2048 and 4096 are supported by RoCE\n"); + rc = -EINVAL; + goto err; + } + qp->mtu = min(ib_mtu_enum_to_int(attr->path_mtu), + ib_mtu_enum_to_int(iboe_get_mtu + (dev->ndev->mtu))); + } + + if (!qp->mtu) { + qp->mtu = + ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu)); + pr_err("Fixing zeroed MTU to qp->mtu = %d\n", qp->mtu); + } + + SET_FIELD(qp_params.modify_flags, + QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR, 1); + + qp_params.traffic_class_tos = attr->ah_attr.grh.traffic_class; + qp_params.flow_label = attr->ah_attr.grh.flow_label; + qp_params.hop_limit_ttl = attr->ah_attr.grh.hop_limit; + + qp->sgid_idx = attr->ah_attr.grh.sgid_index; + + rc = get_gid_info_from_table(ibqp, attr, attr_mask, &qp_params); + if (rc) { + DP_ERR(dev, + "modify qp: problems with GID index %d (rc=%d)\n", + attr->ah_attr.grh.sgid_index, rc); + return rc; + } + + rc = qedr_get_dmac(dev, &attr->ah_attr, + qp_params.remote_mac_addr); + if (rc) + return rc; + + qp_params.use_local_mac = true; + ether_addr_copy(qp_params.local_mac_addr, dev->ndev->dev_addr); + + DP_DEBUG(dev, QEDR_MSG_QP, "dgid=%x:%x:%x:%x\n", + qp_params.dgid.dwords[0], qp_params.dgid.dwords[1], + qp_params.dgid.dwords[2], qp_params.dgid.dwords[3]); + DP_DEBUG(dev, QEDR_MSG_QP, "sgid=%x:%x:%x:%x\n", + qp_params.sgid.dwords[0], qp_params.sgid.dwords[1], + qp_params.sgid.dwords[2], qp_params.sgid.dwords[3]); + DP_DEBUG(dev, QEDR_MSG_QP, "remote_mac=[%pM]\n", + qp_params.remote_mac_addr); +; + + qp_params.mtu = qp->mtu; + qp_params.lb_indication = false; + } + + if (!qp_params.mtu) { + /* Stay with current MTU */ + if (qp->mtu) + qp_params.mtu = qp->mtu; + else + qp_params.mtu = + ib_mtu_enum_to_int(iboe_get_mtu(dev->ndev->mtu)); + } + + if (attr_mask & IB_QP_TIMEOUT) { + SET_FIELD(qp_params.modify_flags, + QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT, 1); + + qp_params.ack_timeout = attr->timeout; + if (attr->timeout) { + u32 temp; + + temp = 4096 * (1UL << attr->timeout) / 1000 / 1000; + /* FW requires [msec] */ + qp_params.ack_timeout = temp; + } else { + /* Infinite */ + qp_params.ack_timeout = 0; + } + } + if (attr_mask & IB_QP_RETRY_CNT) { + SET_FIELD(qp_params.modify_flags, + QED_ROCE_MODIFY_QP_VALID_RETRY_CNT, 1); + qp_params.retry_cnt = attr->retry_cnt; + } + + if (attr_mask & IB_QP_RNR_RETRY) { + SET_FIELD(qp_params.modify_flags, + QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT, 1); + qp_params.rnr_retry_cnt = attr->rnr_retry; + } + + if (attr_mask & IB_QP_RQ_PSN) { + SET_FIELD(qp_params.modify_flags, + QED_ROCE_MODIFY_QP_VALID_RQ_PSN, 1); + qp_params.rq_psn = attr->rq_psn; + qp->rq_psn = attr->rq_psn; + } + + if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC) { + if (attr->max_rd_atomic > dev->attr.max_qp_req_rd_atomic_resc) { + rc = -EINVAL; + DP_ERR(dev, + "unsupported max_rd_atomic=%d, supported=%d\n", + attr->max_rd_atomic, + dev->attr.max_qp_req_rd_atomic_resc); + goto err; + } + + SET_FIELD(qp_params.modify_flags, + QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ, 1); + qp_params.max_rd_atomic_req = attr->max_rd_atomic; + } + + if (attr_mask & IB_QP_MIN_RNR_TIMER) { + SET_FIELD(qp_params.modify_flags, + QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER, 1); + qp_params.min_rnr_nak_timer = attr->min_rnr_timer; + } + + if (attr_mask & IB_QP_SQ_PSN) { + SET_FIELD(qp_params.modify_flags, + QED_ROCE_MODIFY_QP_VALID_SQ_PSN, 1); + qp_params.sq_psn = attr->sq_psn; + qp->sq_psn = attr->sq_psn; + } + + if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC) { + if (attr->max_dest_rd_atomic > + dev->attr.max_qp_resp_rd_atomic_resc) { + DP_ERR(dev, + "unsupported max_dest_rd_atomic=%d, supported=%d\n", + attr->max_dest_rd_atomic, + dev->attr.max_qp_resp_rd_atomic_resc); + + rc = -EINVAL; + goto err; + } + + SET_FIELD(qp_params.modify_flags, + QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP, 1); + qp_params.max_rd_atomic_resp = attr->max_dest_rd_atomic; + } + + if (attr_mask & IB_QP_DEST_QPN) { + SET_FIELD(qp_params.modify_flags, + QED_ROCE_MODIFY_QP_VALID_DEST_QP, 1); + + qp_params.dest_qp = attr->dest_qp_num; + qp->dest_qp_num = attr->dest_qp_num; + } + + if (qp->qp_type != IB_QPT_GSI) + rc = dev->ops->rdma_modify_qp(dev->rdma_ctx, + qp->qed_qp, &qp_params); + + if (attr_mask & IB_QP_STATE) { + if ((qp->qp_type != IB_QPT_GSI) && (!udata)) + qedr_update_qp_state(dev, qp, qp_params.new_state); + qp->state = qp_params.new_state; + } + +err: + return rc; +} + +static int qedr_to_ib_qp_acc_flags(struct qed_rdma_query_qp_out_params *params) +{ + int ib_qp_acc_flags = 0; + + if (params->incoming_rdma_write_en) + ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE; + if (params->incoming_rdma_read_en) + ib_qp_acc_flags |= IB_ACCESS_REMOTE_READ; + if (params->incoming_atomic_en) + ib_qp_acc_flags |= IB_ACCESS_REMOTE_ATOMIC; + ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE; + return ib_qp_acc_flags; +} + +int qedr_query_qp(struct ib_qp *ibqp, + struct ib_qp_attr *qp_attr, + int attr_mask, struct ib_qp_init_attr *qp_init_attr) +{ + struct qed_rdma_query_qp_out_params params; + struct qedr_qp *qp = get_qedr_qp(ibqp); + struct qedr_dev *dev = qp->dev; + int rc = 0; + + memset(¶ms, 0, sizeof(params)); + + rc = dev->ops->rdma_query_qp(dev->rdma_ctx, qp->qed_qp, ¶ms); + if (rc) + goto err; + + memset(qp_attr, 0, sizeof(*qp_attr)); + memset(qp_init_attr, 0, sizeof(*qp_init_attr)); + + qp_attr->qp_state = qedr_get_ibqp_state(params.state); + qp_attr->cur_qp_state = qedr_get_ibqp_state(params.state); + qp_attr->path_mtu = iboe_get_mtu(params.mtu); + qp_attr->path_mig_state = IB_MIG_MIGRATED; + qp_attr->rq_psn = params.rq_psn; + qp_attr->sq_psn = params.sq_psn; + qp_attr->dest_qp_num = params.dest_qp; + + qp_attr->qp_access_flags = qedr_to_ib_qp_acc_flags(¶ms); + + qp_attr->cap.max_send_wr = qp->sq.max_wr; + qp_attr->cap.max_recv_wr = qp->rq.max_wr; + qp_attr->cap.max_send_sge = qp->sq.max_sges; + qp_attr->cap.max_recv_sge = qp->rq.max_sges; + qp_attr->cap.max_inline_data = qp->max_inline_data; + qp_init_attr->cap = qp_attr->cap; + + memcpy(&qp_attr->ah_attr.grh.dgid.raw[0], ¶ms.dgid.bytes[0], + sizeof(qp_attr->ah_attr.grh.dgid.raw)); + + qp_attr->ah_attr.grh.flow_label = params.flow_label; + qp_attr->ah_attr.grh.sgid_index = qp->sgid_idx; + qp_attr->ah_attr.grh.hop_limit = params.hop_limit_ttl; + qp_attr->ah_attr.grh.traffic_class = params.traffic_class_tos; + + qp_attr->ah_attr.ah_flags = IB_AH_GRH; + qp_attr->ah_attr.port_num = 1; + qp_attr->ah_attr.sl = 0; + qp_attr->timeout = params.timeout; + qp_attr->rnr_retry = params.rnr_retry; + qp_attr->retry_cnt = params.retry_cnt; + qp_attr->min_rnr_timer = params.min_rnr_nak_timer; + qp_attr->pkey_index = params.pkey_index; + qp_attr->port_num = 1; + qp_attr->ah_attr.src_path_bits = 0; + qp_attr->ah_attr.static_rate = 0; + qp_attr->alt_pkey_index = 0; + qp_attr->alt_port_num = 0; + qp_attr->alt_timeout = 0; + memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr)); + + qp_attr->sq_draining = (params.state == QED_ROCE_QP_STATE_SQD) ? 1 : 0; + qp_attr->max_dest_rd_atomic = params.max_dest_rd_atomic; + qp_attr->max_rd_atomic = params.max_rd_atomic; + qp_attr->en_sqd_async_notify = (params.sqd_async) ? 1 : 0; + + DP_DEBUG(dev, QEDR_MSG_QP, "QEDR_QUERY_QP: max_inline_data=%d\n", + qp_attr->cap.max_inline_data); + +err: + return rc; +} + +int qedr_destroy_qp(struct ib_qp *ibqp) +{ + struct qedr_qp *qp = get_qedr_qp(ibqp); + struct qedr_dev *dev = qp->dev; + struct ib_qp_attr attr; + int attr_mask = 0; + int rc = 0; + + DP_DEBUG(dev, QEDR_MSG_QP, "destroy qp: destroying %p, qp type=%d\n", + qp, qp->qp_type); + + if (qp->state != (QED_ROCE_QP_STATE_RESET | QED_ROCE_QP_STATE_ERR | + QED_ROCE_QP_STATE_INIT)) { + attr.qp_state = IB_QPS_ERR; + attr_mask |= IB_QP_STATE; + + /* Change the QP state to ERROR */ + qedr_modify_qp(ibqp, &attr, attr_mask, NULL); + } + + if (qp->qp_type != IB_QPT_GSI) { + rc = dev->ops->rdma_destroy_qp(dev->rdma_ctx, qp->qed_qp); + if (rc) + return rc; + } else { + qedr_destroy_gsi_qp(dev); + } + + if (ibqp->uobject && ibqp->uobject->context) { + qedr_cleanup_user_sq(dev, qp); + qedr_cleanup_user_rq(dev, qp); + } else { + qedr_cleanup_kernel_sq(dev, qp); + qedr_cleanup_kernel_rq(dev, qp); + } + + kfree(qp); + + return rc; +} + +struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) +{ + struct qedr_ah *ah; + + ah = kzalloc(sizeof(*ah), GFP_ATOMIC); + if (!ah) + return ERR_PTR(-ENOMEM); + + ah->attr = *attr; + + return &ah->ibah; +} + +int qedr_destroy_ah(struct ib_ah *ibah) +{ + struct qedr_ah *ah = get_qedr_ah(ibah); + + kfree(ah); + return 0; +} + +static void free_mr_info(struct qedr_dev *dev, struct mr_info *info) +{ + struct qedr_pbl *pbl, *tmp; + + if (info->pbl_table) + list_add_tail(&info->pbl_table->list_entry, + &info->free_pbl_list); + + if (!list_empty(&info->inuse_pbl_list)) + list_splice(&info->inuse_pbl_list, &info->free_pbl_list); + + list_for_each_entry_safe(pbl, tmp, &info->free_pbl_list, list_entry) { + list_del(&pbl->list_entry); + qedr_free_pbl(dev, &info->pbl_info, pbl); + } +} + +static int init_mr_info(struct qedr_dev *dev, struct mr_info *info, + size_t page_list_len, bool two_layered) +{ + struct qedr_pbl *tmp; + int rc; + + INIT_LIST_HEAD(&info->free_pbl_list); + INIT_LIST_HEAD(&info->inuse_pbl_list); + + rc = qedr_prepare_pbl_tbl(dev, &info->pbl_info, + page_list_len, two_layered); + if (rc) + goto done; + + info->pbl_table = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL); + if (!info->pbl_table) { + rc = -ENOMEM; + goto done; + } + + DP_DEBUG(dev, QEDR_MSG_MR, "pbl_table_pa = %pa\n", + &info->pbl_table->pa); + + /* in usual case we use 2 PBLs, so we add one to free + * list and allocating another one + */ + tmp = qedr_alloc_pbl_tbl(dev, &info->pbl_info, GFP_KERNEL); + if (!tmp) { + DP_DEBUG(dev, QEDR_MSG_MR, "Extra PBL is not allocated\n"); + goto done; + } + + list_add_tail(&tmp->list_entry, &info->free_pbl_list); + + DP_DEBUG(dev, QEDR_MSG_MR, "extra pbl_table_pa = %pa\n", &tmp->pa); + +done: + if (rc) + free_mr_info(dev, info); + + return rc; +} + +struct ib_mr *qedr_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, + u64 usr_addr, int acc, struct ib_udata *udata) +{ + struct qedr_dev *dev = get_qedr_dev(ibpd->device); + struct qedr_mr *mr; + struct qedr_pd *pd; + int rc = -ENOMEM; + + pd = get_qedr_pd(ibpd); + DP_DEBUG(dev, QEDR_MSG_MR, + "qedr_register user mr pd = %d start = %lld, len = %lld, usr_addr = %lld, acc = %d\n", + pd->pd_id, start, len, usr_addr, acc); + + if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE)) + return ERR_PTR(-EINVAL); + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(rc); + + mr->type = QEDR_MR_USER; + + mr->umem = ib_umem_get(ibpd->uobject->context, start, len, acc, 0); + if (IS_ERR(mr->umem)) { + rc = -EFAULT; + goto err0; + } + + rc = init_mr_info(dev, &mr->info, ib_umem_page_count(mr->umem), 1); + if (rc) + goto err1; + + qedr_populate_pbls(dev, mr->umem, mr->info.pbl_table, + &mr->info.pbl_info); + + rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid); + if (rc) { + DP_ERR(dev, "roce alloc tid returned an error %d\n", rc); + goto err1; + } + + /* Index only, 18 bit long, lkey = itid << 8 | key */ + mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR; + mr->hw_mr.key = 0; + mr->hw_mr.pd = pd->pd_id; + mr->hw_mr.local_read = 1; + mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0; + mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0; + mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0; + mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0; + mr->hw_mr.mw_bind = false; + mr->hw_mr.pbl_ptr = mr->info.pbl_table[0].pa; + mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered; + mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size); + mr->hw_mr.page_size_log = ilog2(mr->umem->page_size); + mr->hw_mr.fbo = ib_umem_offset(mr->umem); + mr->hw_mr.length = len; + mr->hw_mr.vaddr = usr_addr; + mr->hw_mr.zbva = false; + mr->hw_mr.phy_mr = false; + mr->hw_mr.dma_mr = false; + + rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr); + if (rc) { + DP_ERR(dev, "roce register tid returned an error %d\n", rc); + goto err2; + } + + mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key; + if (mr->hw_mr.remote_write || mr->hw_mr.remote_read || + mr->hw_mr.remote_atomic) + mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key; + + DP_DEBUG(dev, QEDR_MSG_MR, "register user mr lkey: %x\n", + mr->ibmr.lkey); + return &mr->ibmr; + +err2: + dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid); +err1: + qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table); +err0: + kfree(mr); + return ERR_PTR(rc); +} + +int qedr_dereg_mr(struct ib_mr *ib_mr) +{ + struct qedr_mr *mr = get_qedr_mr(ib_mr); + struct qedr_dev *dev = get_qedr_dev(ib_mr->device); + int rc = 0; + + rc = dev->ops->rdma_deregister_tid(dev->rdma_ctx, mr->hw_mr.itid); + if (rc) + return rc; + + dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid); + + if ((mr->type != QEDR_MR_DMA) && (mr->type != QEDR_MR_FRMR)) + qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table); + + /* it could be user registered memory. */ + if (mr->umem) + ib_umem_release(mr->umem); + + kfree(mr); + + return rc; +} + +struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd, int max_page_list_len) +{ + struct qedr_pd *pd = get_qedr_pd(ibpd); + struct qedr_dev *dev = get_qedr_dev(ibpd->device); + struct qedr_mr *mr; + int rc = -ENOMEM; + + DP_DEBUG(dev, QEDR_MSG_MR, + "qedr_alloc_frmr pd = %d max_page_list_len= %d\n", pd->pd_id, + max_page_list_len); + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(rc); + + mr->dev = dev; + mr->type = QEDR_MR_FRMR; + + rc = init_mr_info(dev, &mr->info, max_page_list_len, 1); + if (rc) + goto err0; + + rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid); + if (rc) { + DP_ERR(dev, "roce alloc tid returned an error %d\n", rc); + goto err0; + } + + /* Index only, 18 bit long, lkey = itid << 8 | key */ + mr->hw_mr.tid_type = QED_RDMA_TID_FMR; + mr->hw_mr.key = 0; + mr->hw_mr.pd = pd->pd_id; + mr->hw_mr.local_read = 1; + mr->hw_mr.local_write = 0; + mr->hw_mr.remote_read = 0; + mr->hw_mr.remote_write = 0; + mr->hw_mr.remote_atomic = 0; + mr->hw_mr.mw_bind = false; + mr->hw_mr.pbl_ptr = 0; + mr->hw_mr.pbl_two_level = mr->info.pbl_info.two_layered; + mr->hw_mr.pbl_page_size_log = ilog2(mr->info.pbl_info.pbl_size); + mr->hw_mr.fbo = 0; + mr->hw_mr.length = 0; + mr->hw_mr.vaddr = 0; + mr->hw_mr.zbva = false; + mr->hw_mr.phy_mr = true; + mr->hw_mr.dma_mr = false; + + rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr); + if (rc) { + DP_ERR(dev, "roce register tid returned an error %d\n", rc); + goto err1; + } + + mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key; + mr->ibmr.rkey = mr->ibmr.lkey; + + DP_DEBUG(dev, QEDR_MSG_MR, "alloc frmr: %x\n", mr->ibmr.lkey); + return mr; + +err1: + dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid); +err0: + kfree(mr); + return ERR_PTR(rc); +} + +struct ib_mr *qedr_alloc_mr(struct ib_pd *ibpd, + enum ib_mr_type mr_type, u32 max_num_sg) +{ + struct qedr_dev *dev; + struct qedr_mr *mr; + + if (mr_type != IB_MR_TYPE_MEM_REG) + return ERR_PTR(-EINVAL); + + mr = __qedr_alloc_mr(ibpd, max_num_sg); + + if (IS_ERR(mr)) + return ERR_PTR(-EINVAL); + + dev = mr->dev; + + return &mr->ibmr; +} + +static int qedr_set_page(struct ib_mr *ibmr, u64 addr) +{ + struct qedr_mr *mr = get_qedr_mr(ibmr); + struct qedr_pbl *pbl_table; + struct regpair *pbe; + u32 pbes_in_page; + + if (unlikely(mr->npages == mr->info.pbl_info.num_pbes)) { + DP_ERR(mr->dev, "qedr_set_page failes when %d\n", mr->npages); + return -ENOMEM; + } + + DP_DEBUG(mr->dev, QEDR_MSG_MR, "qedr_set_page pages[%d] = 0x%llx\n", + mr->npages, addr); + + pbes_in_page = mr->info.pbl_info.pbl_size / sizeof(u64); + pbl_table = mr->info.pbl_table + (mr->npages / pbes_in_page); + pbe = (struct regpair *)pbl_table->va; + pbe += mr->npages % pbes_in_page; + pbe->lo = cpu_to_le32((u32)addr); + pbe->hi = cpu_to_le32((u32)upper_32_bits(addr)); + + mr->npages++; + + return 0; +} + +static void handle_completed_mrs(struct qedr_dev *dev, struct mr_info *info) +{ + int work = info->completed - info->completed_handled - 1; + + DP_DEBUG(dev, QEDR_MSG_MR, "Special FMR work = %d\n", work); + while (work-- > 0 && !list_empty(&info->inuse_pbl_list)) { + struct qedr_pbl *pbl; + + /* Free all the page list that are possible to be freed + * (all the ones that were invalidated), under the assumption + * that if an FMR was completed successfully that means that + * if there was an invalidate operation before it also ended + */ + pbl = list_first_entry(&info->inuse_pbl_list, + struct qedr_pbl, list_entry); + list_del(&pbl->list_entry); + list_add_tail(&pbl->list_entry, &info->free_pbl_list); + info->completed_handled++; + } +} + +int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, + int sg_nents, unsigned int *sg_offset) +{ + struct qedr_mr *mr = get_qedr_mr(ibmr); + + mr->npages = 0; + + handle_completed_mrs(mr->dev, &mr->info); + return ib_sg_to_pages(ibmr, sg, sg_nents, NULL, qedr_set_page); +} + +struct ib_mr *qedr_get_dma_mr(struct ib_pd *ibpd, int acc) +{ + struct qedr_dev *dev = get_qedr_dev(ibpd->device); + struct qedr_pd *pd = get_qedr_pd(ibpd); + struct qedr_mr *mr; + int rc; + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + mr->type = QEDR_MR_DMA; + + rc = dev->ops->rdma_alloc_tid(dev->rdma_ctx, &mr->hw_mr.itid); + if (rc) { + DP_ERR(dev, "roce alloc tid returned an error %d\n", rc); + goto err1; + } + + /* index only, 18 bit long, lkey = itid << 8 | key */ + mr->hw_mr.tid_type = QED_RDMA_TID_REGISTERED_MR; + mr->hw_mr.pd = pd->pd_id; + mr->hw_mr.local_read = 1; + mr->hw_mr.local_write = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0; + mr->hw_mr.remote_read = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0; + mr->hw_mr.remote_write = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0; + mr->hw_mr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0; + mr->hw_mr.dma_mr = true; + + rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr); + if (rc) { + DP_ERR(dev, "roce register tid returned an error %d\n", rc); + goto err2; + } + + mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key; + if (mr->hw_mr.remote_write || mr->hw_mr.remote_read || + mr->hw_mr.remote_atomic) + mr->ibmr.rkey = mr->hw_mr.itid << 8 | mr->hw_mr.key; + + DP_DEBUG(dev, QEDR_MSG_MR, "get dma mr: lkey = %x\n", mr->ibmr.lkey); + return &mr->ibmr; + +err2: + dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid); +err1: + kfree(mr); + return ERR_PTR(rc); +} + +static inline int qedr_wq_is_full(struct qedr_qp_hwq_info *wq) +{ + return (((wq->prod + 1) % wq->max_wr) == wq->cons); +} + +static int sge_data_len(struct ib_sge *sg_list, int num_sge) +{ + int i, len = 0; + + for (i = 0; i < num_sge; i++) + len += sg_list[i].length; + + return len; +} + +static void swap_wqe_data64(u64 *p) +{ + int i; + + for (i = 0; i < QEDR_SQE_ELEMENT_SIZE / sizeof(u64); i++, p++) + *p = cpu_to_be64(cpu_to_le64(*p)); +} + +static u32 qedr_prepare_sq_inline_data(struct qedr_dev *dev, + struct qedr_qp *qp, u8 *wqe_size, + struct ib_send_wr *wr, + struct ib_send_wr **bad_wr, u8 *bits, + u8 bit) +{ + u32 data_size = sge_data_len(wr->sg_list, wr->num_sge); + char *seg_prt, *wqe; + int i, seg_siz; + + if (data_size > ROCE_REQ_MAX_INLINE_DATA_SIZE) { + DP_ERR(dev, "Too much inline data in WR: %d\n", data_size); + *bad_wr = wr; + return 0; + } + + if (!data_size) + return data_size; + + *bits |= bit; + + seg_prt = NULL; + wqe = NULL; + seg_siz = 0; + + /* Copy data inline */ + for (i = 0; i < wr->num_sge; i++) { + u32 len = wr->sg_list[i].length; + void *src = (void *)(uintptr_t)wr->sg_list[i].addr; + + while (len > 0) { + u32 cur; + + /* New segment required */ + if (!seg_siz) { + wqe = (char *)qed_chain_produce(&qp->sq.pbl); + seg_prt = wqe; + seg_siz = sizeof(struct rdma_sq_common_wqe); + (*wqe_size)++; + } + + /* Calculate currently allowed length */ + cur = min_t(u32, len, seg_siz); + memcpy(seg_prt, src, cur); + + /* Update segment variables */ + seg_prt += cur; + seg_siz -= cur; + + /* Update sge variables */ + src += cur; + len -= cur; + + /* Swap fully-completed segments */ + if (!seg_siz) + swap_wqe_data64((u64 *)wqe); + } + } + + /* swap last not completed segment */ + if (seg_siz) + swap_wqe_data64((u64 *)wqe); + + return data_size; +} + +#define RQ_SGE_SET(sge, vaddr, vlength, vflags) \ + do { \ + DMA_REGPAIR_LE(sge->addr, vaddr); \ + (sge)->length = cpu_to_le32(vlength); \ + (sge)->flags = cpu_to_le32(vflags); \ + } while (0) + +#define SRQ_HDR_SET(hdr, vwr_id, num_sge) \ + do { \ + DMA_REGPAIR_LE(hdr->wr_id, vwr_id); \ + (hdr)->num_sges = num_sge; \ + } while (0) + +#define SRQ_SGE_SET(sge, vaddr, vlength, vlkey) \ + do { \ + DMA_REGPAIR_LE(sge->addr, vaddr); \ + (sge)->length = cpu_to_le32(vlength); \ + (sge)->l_key = cpu_to_le32(vlkey); \ + } while (0) + +static u32 qedr_prepare_sq_sges(struct qedr_qp *qp, u8 *wqe_size, + struct ib_send_wr *wr) +{ + u32 data_size = 0; + int i; + + for (i = 0; i < wr->num_sge; i++) { + struct rdma_sq_sge *sge = qed_chain_produce(&qp->sq.pbl); + + DMA_REGPAIR_LE(sge->addr, wr->sg_list[i].addr); + sge->l_key = cpu_to_le32(wr->sg_list[i].lkey); + sge->length = cpu_to_le32(wr->sg_list[i].length); + data_size += wr->sg_list[i].length; + } + + if (wqe_size) + *wqe_size += wr->num_sge; + + return data_size; +} + +static u32 qedr_prepare_sq_rdma_data(struct qedr_dev *dev, + struct qedr_qp *qp, + struct rdma_sq_rdma_wqe_1st *rwqe, + struct rdma_sq_rdma_wqe_2nd *rwqe2, + struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + rwqe2->r_key = cpu_to_le32(rdma_wr(wr)->rkey); + DMA_REGPAIR_LE(rwqe2->remote_va, rdma_wr(wr)->remote_addr); + + if (wr->send_flags & IB_SEND_INLINE) { + u8 flags = 0; + + SET_FIELD2(flags, RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG, 1); + return qedr_prepare_sq_inline_data(dev, qp, &rwqe->wqe_size, wr, + bad_wr, &rwqe->flags, flags); + } + + return qedr_prepare_sq_sges(qp, &rwqe->wqe_size, wr); +} + +static u32 qedr_prepare_sq_send_data(struct qedr_dev *dev, + struct qedr_qp *qp, + struct rdma_sq_send_wqe_1st *swqe, + struct rdma_sq_send_wqe_2st *swqe2, + struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + memset(swqe2, 0, sizeof(*swqe2)); + if (wr->send_flags & IB_SEND_INLINE) { + u8 flags = 0; + + SET_FIELD2(flags, RDMA_SQ_SEND_WQE_INLINE_FLG, 1); + return qedr_prepare_sq_inline_data(dev, qp, &swqe->wqe_size, wr, + bad_wr, &swqe->flags, flags); + } + + return qedr_prepare_sq_sges(qp, &swqe->wqe_size, wr); +} + +static int qedr_prepare_reg(struct qedr_qp *qp, + struct rdma_sq_fmr_wqe_1st *fwqe1, + struct ib_reg_wr *wr) +{ + struct qedr_mr *mr = get_qedr_mr(wr->mr); + struct rdma_sq_fmr_wqe_2nd *fwqe2; + + fwqe2 = (struct rdma_sq_fmr_wqe_2nd *)qed_chain_produce(&qp->sq.pbl); + fwqe1->addr.hi = upper_32_bits(mr->ibmr.iova); + fwqe1->addr.lo = lower_32_bits(mr->ibmr.iova); + fwqe1->l_key = wr->key; + + SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_READ, + !!(wr->access & IB_ACCESS_REMOTE_READ)); + SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE, + !!(wr->access & IB_ACCESS_REMOTE_WRITE)); + SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_ENABLE_ATOMIC, + !!(wr->access & IB_ACCESS_REMOTE_ATOMIC)); + SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_READ, 1); + SET_FIELD2(fwqe2->access_ctrl, RDMA_SQ_FMR_WQE_2ND_LOCAL_WRITE, + !!(wr->access & IB_ACCESS_LOCAL_WRITE)); + fwqe2->fmr_ctrl = 0; + + SET_FIELD2(fwqe2->fmr_ctrl, RDMA_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG, + ilog2(mr->ibmr.page_size) - 12); + + fwqe2->length_hi = 0; + fwqe2->length_lo = mr->ibmr.length; + fwqe2->pbl_addr.hi = upper_32_bits(mr->info.pbl_table->pa); + fwqe2->pbl_addr.lo = lower_32_bits(mr->info.pbl_table->pa); + + qp->wqe_wr_id[qp->sq.prod].mr = mr; + + return 0; +} + +enum ib_wc_opcode qedr_ib_to_wc_opcode(enum ib_wr_opcode opcode) +{ + switch (opcode) { + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_WRITE_WITH_IMM: + return IB_WC_RDMA_WRITE; + case IB_WR_SEND_WITH_IMM: + case IB_WR_SEND: + case IB_WR_SEND_WITH_INV: + return IB_WC_SEND; + case IB_WR_RDMA_READ: + return IB_WC_RDMA_READ; + case IB_WR_ATOMIC_CMP_AND_SWP: + return IB_WC_COMP_SWAP; + case IB_WR_ATOMIC_FETCH_AND_ADD: + return IB_WC_FETCH_ADD; + case IB_WR_REG_MR: + return IB_WC_REG_MR; + case IB_WR_LOCAL_INV: + return IB_WC_LOCAL_INV; + default: + return IB_WC_SEND; + } +} + +inline bool qedr_can_post_send(struct qedr_qp *qp, struct ib_send_wr *wr) +{ + int wq_is_full, err_wr, pbl_is_full; + struct qedr_dev *dev = qp->dev; + + /* prevent SQ overflow and/or processing of a bad WR */ + err_wr = wr->num_sge > qp->sq.max_sges; + wq_is_full = qedr_wq_is_full(&qp->sq); + pbl_is_full = qed_chain_get_elem_left_u32(&qp->sq.pbl) < + QEDR_MAX_SQE_ELEMENTS_PER_SQE; + if (wq_is_full || err_wr || pbl_is_full) { + if (wq_is_full && !(qp->err_bitmap & QEDR_QP_ERR_SQ_FULL)) { + DP_ERR(dev, + "error: WQ is full. Post send on QP %p failed (this error appears only once)\n", + qp); + qp->err_bitmap |= QEDR_QP_ERR_SQ_FULL; + } + + if (err_wr && !(qp->err_bitmap & QEDR_QP_ERR_BAD_SR)) { + DP_ERR(dev, + "error: WR is bad. Post send on QP %p failed (this error appears only once)\n", + qp); + qp->err_bitmap |= QEDR_QP_ERR_BAD_SR; + } + + if (pbl_is_full && + !(qp->err_bitmap & QEDR_QP_ERR_SQ_PBL_FULL)) { + DP_ERR(dev, + "error: WQ PBL is full. Post send on QP %p failed (this error appears only once)\n", + qp); + qp->err_bitmap |= QEDR_QP_ERR_SQ_PBL_FULL; + } + return false; + } + return true; +} + +int __qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + struct qedr_dev *dev = get_qedr_dev(ibqp->device); + struct qedr_qp *qp = get_qedr_qp(ibqp); + struct rdma_sq_atomic_wqe_1st *awqe1; + struct rdma_sq_atomic_wqe_2nd *awqe2; + struct rdma_sq_atomic_wqe_3rd *awqe3; + struct rdma_sq_send_wqe_2st *swqe2; + struct rdma_sq_local_inv_wqe *iwqe; + struct rdma_sq_rdma_wqe_2nd *rwqe2; + struct rdma_sq_send_wqe_1st *swqe; + struct rdma_sq_rdma_wqe_1st *rwqe; + struct rdma_sq_fmr_wqe_1st *fwqe1; + struct rdma_sq_common_wqe *wqe; + u32 length; + int rc = 0; + bool comp; + + if (!qedr_can_post_send(qp, wr)) { + *bad_wr = wr; + return -ENOMEM; + } + + wqe = qed_chain_produce(&qp->sq.pbl); + qp->wqe_wr_id[qp->sq.prod].signaled = + !!(wr->send_flags & IB_SEND_SIGNALED) || qp->signaled; + + wqe->flags = 0; + SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_SE_FLG, + !!(wr->send_flags & IB_SEND_SOLICITED)); + comp = (!!(wr->send_flags & IB_SEND_SIGNALED)) || qp->signaled; + SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_COMP_FLG, comp); + SET_FIELD2(wqe->flags, RDMA_SQ_SEND_WQE_RD_FENCE_FLG, + !!(wr->send_flags & IB_SEND_FENCE)); + wqe->prev_wqe_size = qp->prev_wqe_size; + + qp->wqe_wr_id[qp->sq.prod].opcode = qedr_ib_to_wc_opcode(wr->opcode); + + switch (wr->opcode) { + case IB_WR_SEND_WITH_IMM: + wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_IMM; + swqe = (struct rdma_sq_send_wqe_1st *)wqe; + swqe->wqe_size = 2; + swqe2 = qed_chain_produce(&qp->sq.pbl); + + swqe->inv_key_or_imm_data = cpu_to_le32(wr->ex.imm_data); + length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2, + wr, bad_wr); + swqe->length = cpu_to_le32(length); + qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size; + qp->prev_wqe_size = swqe->wqe_size; + qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length; + break; + case IB_WR_SEND: + wqe->req_type = RDMA_SQ_REQ_TYPE_SEND; + swqe = (struct rdma_sq_send_wqe_1st *)wqe; + + swqe->wqe_size = 2; + swqe2 = qed_chain_produce(&qp->sq.pbl); + length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2, + wr, bad_wr); + swqe->length = cpu_to_le32(length); + qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size; + qp->prev_wqe_size = swqe->wqe_size; + qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length; + break; + case IB_WR_SEND_WITH_INV: + wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_INVALIDATE; + swqe = (struct rdma_sq_send_wqe_1st *)wqe; + swqe2 = qed_chain_produce(&qp->sq.pbl); + swqe->wqe_size = 2; + swqe->inv_key_or_imm_data = cpu_to_le32(wr->ex.invalidate_rkey); + length = qedr_prepare_sq_send_data(dev, qp, swqe, swqe2, + wr, bad_wr); + swqe->length = cpu_to_le32(length); + qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size; + qp->prev_wqe_size = swqe->wqe_size; + qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length; + break; + + case IB_WR_RDMA_WRITE_WITH_IMM: + wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR_WITH_IMM; + rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe; + + rwqe->wqe_size = 2; + rwqe->imm_data = htonl(cpu_to_le32(wr->ex.imm_data)); + rwqe2 = qed_chain_produce(&qp->sq.pbl); + length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2, + wr, bad_wr); + rwqe->length = cpu_to_le32(length); + qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size; + qp->prev_wqe_size = rwqe->wqe_size; + qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length; + break; + case IB_WR_RDMA_WRITE: + wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR; + rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe; + + rwqe->wqe_size = 2; + rwqe2 = qed_chain_produce(&qp->sq.pbl); + length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2, + wr, bad_wr); + rwqe->length = cpu_to_le32(length); + qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size; + qp->prev_wqe_size = rwqe->wqe_size; + qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length; + break; + case IB_WR_RDMA_READ_WITH_INV: + DP_ERR(dev, + "RDMA READ WITH INVALIDATE not supported\n"); + *bad_wr = wr; + rc = -EINVAL; + break; + + case IB_WR_RDMA_READ: + wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_RD; + rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe; + + rwqe->wqe_size = 2; + rwqe2 = qed_chain_produce(&qp->sq.pbl); + length = qedr_prepare_sq_rdma_data(dev, qp, rwqe, rwqe2, + wr, bad_wr); + rwqe->length = cpu_to_le32(length); + qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size; + qp->prev_wqe_size = rwqe->wqe_size; + qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length; + break; + + case IB_WR_ATOMIC_CMP_AND_SWP: + case IB_WR_ATOMIC_FETCH_AND_ADD: + awqe1 = (struct rdma_sq_atomic_wqe_1st *)wqe; + awqe1->wqe_size = 4; + + awqe2 = qed_chain_produce(&qp->sq.pbl); + DMA_REGPAIR_LE(awqe2->remote_va, atomic_wr(wr)->remote_addr); + awqe2->r_key = cpu_to_le32(atomic_wr(wr)->rkey); + + awqe3 = qed_chain_produce(&qp->sq.pbl); + + if (wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD) { + wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_ADD; + DMA_REGPAIR_LE(awqe3->swap_data, + atomic_wr(wr)->compare_add); + } else { + wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_CMP_AND_SWAP; + DMA_REGPAIR_LE(awqe3->swap_data, + atomic_wr(wr)->swap); + DMA_REGPAIR_LE(awqe3->cmp_data, + atomic_wr(wr)->compare_add); + } + + qedr_prepare_sq_sges(qp, NULL, wr); + + qp->wqe_wr_id[qp->sq.prod].wqe_size = awqe1->wqe_size; + qp->prev_wqe_size = awqe1->wqe_size; + break; + + case IB_WR_LOCAL_INV: + iwqe = (struct rdma_sq_local_inv_wqe *)wqe; + iwqe->wqe_size = 1; + + iwqe->req_type = RDMA_SQ_REQ_TYPE_LOCAL_INVALIDATE; + iwqe->inv_l_key = wr->ex.invalidate_rkey; + qp->wqe_wr_id[qp->sq.prod].wqe_size = iwqe->wqe_size; + qp->prev_wqe_size = iwqe->wqe_size; + break; + case IB_WR_REG_MR: + DP_DEBUG(dev, QEDR_MSG_CQ, "REG_MR\n"); + wqe->req_type = RDMA_SQ_REQ_TYPE_FAST_MR; + fwqe1 = (struct rdma_sq_fmr_wqe_1st *)wqe; + fwqe1->wqe_size = 2; + + rc = qedr_prepare_reg(qp, fwqe1, reg_wr(wr)); + if (rc) { + DP_ERR(dev, "IB_REG_MR failed rc=%d\n", rc); + *bad_wr = wr; + break; + } + + qp->wqe_wr_id[qp->sq.prod].wqe_size = fwqe1->wqe_size; + qp->prev_wqe_size = fwqe1->wqe_size; + break; + default: + DP_ERR(dev, "invalid opcode 0x%x!\n", wr->opcode); + rc = -EINVAL; + *bad_wr = wr; + break; + } + + if (*bad_wr) { + u16 value; + + /* Restore prod to its position before + * this WR was processed + */ + value = le16_to_cpu(qp->sq.db_data.data.value); + qed_chain_set_prod(&qp->sq.pbl, value, wqe); + + /* Restore prev_wqe_size */ + qp->prev_wqe_size = wqe->prev_wqe_size; + rc = -EINVAL; + DP_ERR(dev, "POST SEND FAILED\n"); + } + + return rc; +} + +int qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + struct qedr_dev *dev = get_qedr_dev(ibqp->device); + struct qedr_qp *qp = get_qedr_qp(ibqp); + unsigned long flags; + int rc = 0; + + *bad_wr = NULL; + + if (qp->qp_type == IB_QPT_GSI) + return qedr_gsi_post_send(ibqp, wr, bad_wr); + + spin_lock_irqsave(&qp->q_lock, flags); + + if ((qp->state == QED_ROCE_QP_STATE_RESET) || + (qp->state == QED_ROCE_QP_STATE_ERR)) { + spin_unlock_irqrestore(&qp->q_lock, flags); + *bad_wr = wr; + DP_DEBUG(dev, QEDR_MSG_CQ, + "QP in wrong state! QP icid=0x%x state %d\n", + qp->icid, qp->state); + return -EINVAL; + } + + if (!wr) { + DP_ERR(dev, "Got an empty post send.\n"); + return -EINVAL; + } + + while (wr) { + rc = __qedr_post_send(ibqp, wr, bad_wr); + if (rc) + break; + + qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id; + + qedr_inc_sw_prod(&qp->sq); + + qp->sq.db_data.data.value++; + + wr = wr->next; + } + + /* Trigger doorbell + * If there was a failure in the first WR then it will be triggered in + * vane. However this is not harmful (as long as the producer value is + * unchanged). For performance reasons we avoid checking for this + * redundant doorbell. + */ + wmb(); + writel(qp->sq.db_data.raw, qp->sq.db); + + /* Make sure write sticks */ + mmiowb(); + + spin_unlock_irqrestore(&qp->q_lock, flags); + + return rc; +} + +int qedr_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, + struct ib_recv_wr **bad_wr) +{ + struct qedr_qp *qp = get_qedr_qp(ibqp); + struct qedr_dev *dev = qp->dev; + unsigned long flags; + int status = 0; + + if (qp->qp_type == IB_QPT_GSI) + return qedr_gsi_post_recv(ibqp, wr, bad_wr); + + spin_lock_irqsave(&qp->q_lock, flags); + + if ((qp->state == QED_ROCE_QP_STATE_RESET) || + (qp->state == QED_ROCE_QP_STATE_ERR)) { + spin_unlock_irqrestore(&qp->q_lock, flags); + *bad_wr = wr; + return -EINVAL; + } + + while (wr) { + int i; + + if (qed_chain_get_elem_left_u32(&qp->rq.pbl) < + QEDR_MAX_RQE_ELEMENTS_PER_RQE || + wr->num_sge > qp->rq.max_sges) { + DP_ERR(dev, "Can't post WR (%d < %d) || (%d > %d)\n", + qed_chain_get_elem_left_u32(&qp->rq.pbl), + QEDR_MAX_RQE_ELEMENTS_PER_RQE, wr->num_sge, + qp->rq.max_sges); + status = -ENOMEM; + *bad_wr = wr; + break; + } + for (i = 0; i < wr->num_sge; i++) { + u32 flags = 0; + struct rdma_rq_sge *rqe = + qed_chain_produce(&qp->rq.pbl); + + /* First one must include the number + * of SGE in the list + */ + if (!i) + SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES, + wr->num_sge); + + SET_FIELD(flags, RDMA_RQ_SGE_L_KEY, + wr->sg_list[i].lkey); + + RQ_SGE_SET(rqe, wr->sg_list[i].addr, + wr->sg_list[i].length, flags); + } + + /* Special case of no sges. FW requires between 1-4 sges... + * in this case we need to post 1 sge with length zero. this is + * because rdma write with immediate consumes an RQ. + */ + if (!wr->num_sge) { + u32 flags = 0; + struct rdma_rq_sge *rqe = + qed_chain_produce(&qp->rq.pbl); + + /* First one must include the number + * of SGE in the list + */ + SET_FIELD(flags, RDMA_RQ_SGE_L_KEY, 0); + SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES, 1); + + RQ_SGE_SET(rqe, 0, 0, flags); + i = 1; + } + + qp->rqe_wr_id[qp->rq.prod].wr_id = wr->wr_id; + qp->rqe_wr_id[qp->rq.prod].wqe_size = i; + + qedr_inc_sw_prod(&qp->rq); + + /* Flush all the writes before signalling doorbell */ + wmb(); + + qp->rq.db_data.data.value++; + + writel(qp->rq.db_data.raw, qp->rq.db); + + /* Make sure write sticks */ + mmiowb(); + + wr = wr->next; + } + + spin_unlock_irqrestore(&qp->q_lock, flags); + + return status; +} + +static int is_valid_cqe(struct qedr_cq *cq, union rdma_cqe *cqe) +{ + struct rdma_cqe_requester *resp_cqe = &cqe->req; + + return (resp_cqe->flags & RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK) == + cq->pbl_toggle; +} + +static struct qedr_qp *cqe_get_qp(union rdma_cqe *cqe) +{ + struct rdma_cqe_requester *resp_cqe = &cqe->req; + struct qedr_qp *qp; + + qp = (struct qedr_qp *)(uintptr_t)HILO_GEN(resp_cqe->qp_handle.hi, + resp_cqe->qp_handle.lo, + u64); + return qp; +} + +static enum rdma_cqe_type cqe_get_type(union rdma_cqe *cqe) +{ + struct rdma_cqe_requester *resp_cqe = &cqe->req; + + return GET_FIELD(resp_cqe->flags, RDMA_CQE_REQUESTER_TYPE); +} + +/* Return latest CQE (needs processing) */ +static union rdma_cqe *get_cqe(struct qedr_cq *cq) +{ + return cq->latest_cqe; +} + +/* In fmr we need to increase the number of fmr completed counter for the fmr + * algorithm determining whether we can free a pbl or not. + * we need to perform this whether the work request was signaled or not. for + * this purpose we call this function from the condition that checks if a wr + * should be skipped, to make sure we don't miss it ( possibly this fmr + * operation was not signalted) + */ +static inline void qedr_chk_if_fmr(struct qedr_qp *qp) +{ + if (qp->wqe_wr_id[qp->sq.cons].opcode == IB_WC_REG_MR) + qp->wqe_wr_id[qp->sq.cons].mr->info.completed++; +} + +static int process_req(struct qedr_dev *dev, struct qedr_qp *qp, + struct qedr_cq *cq, int num_entries, + struct ib_wc *wc, u16 hw_cons, enum ib_wc_status status, + int force) +{ + u16 cnt = 0; + + while (num_entries && qp->sq.wqe_cons != hw_cons) { + if (!qp->wqe_wr_id[qp->sq.cons].signaled && !force) { + qedr_chk_if_fmr(qp); + /* skip WC */ + goto next_cqe; + } + + /* fill WC */ + wc->status = status; + wc->wc_flags = 0; + wc->src_qp = qp->id; + wc->qp = &qp->ibqp; + + wc->wr_id = qp->wqe_wr_id[qp->sq.cons].wr_id; + wc->opcode = qp->wqe_wr_id[qp->sq.cons].opcode; + + switch (wc->opcode) { + case IB_WC_RDMA_WRITE: + wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len; + break; + case IB_WC_COMP_SWAP: + case IB_WC_FETCH_ADD: + wc->byte_len = 8; + break; + case IB_WC_REG_MR: + qp->wqe_wr_id[qp->sq.cons].mr->info.completed++; + break; + default: + break; + } + + num_entries--; + wc++; + cnt++; +next_cqe: + while (qp->wqe_wr_id[qp->sq.cons].wqe_size--) + qed_chain_consume(&qp->sq.pbl); + qedr_inc_sw_cons(&qp->sq); + } + + return cnt; +} + +static int qedr_poll_cq_req(struct qedr_dev *dev, + struct qedr_qp *qp, struct qedr_cq *cq, + int num_entries, struct ib_wc *wc, + struct rdma_cqe_requester *req) +{ + int cnt = 0; + + switch (req->status) { + case RDMA_CQE_REQ_STS_OK: + cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons, + IB_WC_SUCCESS, 0); + break; + case RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR: + DP_ERR(dev, + "Error: POLL CQ with RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR. CQ icid=0x%x, QP icid=0x%x\n", + cq->icid, qp->icid); + cnt = process_req(dev, qp, cq, num_entries, wc, req->sq_cons, + IB_WC_WR_FLUSH_ERR, 0); + break; + default: + /* process all WQE before the cosumer */ + qp->state = QED_ROCE_QP_STATE_ERR; + cnt = process_req(dev, qp, cq, num_entries, wc, + req->sq_cons - 1, IB_WC_SUCCESS, 0); + wc += cnt; + /* if we have extra WC fill it with actual error info */ + if (cnt < num_entries) { + enum ib_wc_status wc_status; + + switch (req->status) { + case RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR: + DP_ERR(dev, + "Error: POLL CQ with RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR. CQ icid=0x%x, QP icid=0x%x\n", + cq->icid, qp->icid); + wc_status = IB_WC_BAD_RESP_ERR; + break; + case RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR: + DP_ERR(dev, + "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR. CQ icid=0x%x, QP icid=0x%x\n", + cq->icid, qp->icid); + wc_status = IB_WC_LOC_LEN_ERR; + break; + case RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR: + DP_ERR(dev, + "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n", + cq->icid, qp->icid); + wc_status = IB_WC_LOC_QP_OP_ERR; + break; + case RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR: + DP_ERR(dev, + "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR. CQ icid=0x%x, QP icid=0x%x\n", + cq->icid, qp->icid); + wc_status = IB_WC_LOC_PROT_ERR; + break; + case RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR: + DP_ERR(dev, + "Error: POLL CQ with RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n", + cq->icid, qp->icid); + wc_status = IB_WC_MW_BIND_ERR; + break; + case RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR: + DP_ERR(dev, + "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR. CQ icid=0x%x, QP icid=0x%x\n", + cq->icid, qp->icid); + wc_status = IB_WC_REM_INV_REQ_ERR; + break; + case RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR: + DP_ERR(dev, + "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR. CQ icid=0x%x, QP icid=0x%x\n", + cq->icid, qp->icid); + wc_status = IB_WC_REM_ACCESS_ERR; + break; + case RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR: + DP_ERR(dev, + "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR. CQ icid=0x%x, QP icid=0x%x\n", + cq->icid, qp->icid); + wc_status = IB_WC_REM_OP_ERR; + break; + case RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR: + DP_ERR(dev, + "Error: POLL CQ with RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n", + cq->icid, qp->icid); + wc_status = IB_WC_RNR_RETRY_EXC_ERR; + break; + case RDMA_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR: + DP_ERR(dev, + "Error: POLL CQ with ROCE_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR. CQ icid=0x%x, QP icid=0x%x\n", + cq->icid, qp->icid); + wc_status = IB_WC_RETRY_EXC_ERR; + break; + default: + DP_ERR(dev, + "Error: POLL CQ with IB_WC_GENERAL_ERR. CQ icid=0x%x, QP icid=0x%x\n", + cq->icid, qp->icid); + wc_status = IB_WC_GENERAL_ERR; + } + cnt += process_req(dev, qp, cq, 1, wc, req->sq_cons, + wc_status, 1); + } + } + + return cnt; +} + +static void __process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp, + struct qedr_cq *cq, struct ib_wc *wc, + struct rdma_cqe_responder *resp, u64 wr_id) +{ + enum ib_wc_status wc_status = IB_WC_SUCCESS; + u8 flags; + + wc->opcode = IB_WC_RECV; + wc->wc_flags = 0; + + switch (resp->status) { + case RDMA_CQE_RESP_STS_LOCAL_ACCESS_ERR: + wc_status = IB_WC_LOC_ACCESS_ERR; + break; + case RDMA_CQE_RESP_STS_LOCAL_LENGTH_ERR: + wc_status = IB_WC_LOC_LEN_ERR; + break; + case RDMA_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR: + wc_status = IB_WC_LOC_QP_OP_ERR; + break; + case RDMA_CQE_RESP_STS_LOCAL_PROTECTION_ERR: + wc_status = IB_WC_LOC_PROT_ERR; + break; + case RDMA_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR: + wc_status = IB_WC_MW_BIND_ERR; + break; + case RDMA_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR: + wc_status = IB_WC_REM_INV_RD_REQ_ERR; + break; + case RDMA_CQE_RESP_STS_OK: + wc_status = IB_WC_SUCCESS; + wc->byte_len = le32_to_cpu(resp->length); + + flags = resp->flags & QEDR_RESP_RDMA_IMM; + + if (flags == QEDR_RESP_RDMA_IMM) + wc->opcode = IB_WC_RECV_RDMA_WITH_IMM; + + if (flags == QEDR_RESP_RDMA_IMM || flags == QEDR_RESP_IMM) { + wc->ex.imm_data = + le32_to_cpu(resp->imm_data_or_inv_r_Key); + wc->wc_flags |= IB_WC_WITH_IMM; + } + break; + default: + wc->status = IB_WC_GENERAL_ERR; + DP_ERR(dev, "Invalid CQE status detected\n"); + } + + /* fill WC */ + wc->status = wc_status; + wc->src_qp = qp->id; + wc->qp = &qp->ibqp; + wc->wr_id = wr_id; +} + +static int process_resp_one(struct qedr_dev *dev, struct qedr_qp *qp, + struct qedr_cq *cq, struct ib_wc *wc, + struct rdma_cqe_responder *resp) +{ + u64 wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id; + + __process_resp_one(dev, qp, cq, wc, resp, wr_id); + + while (qp->rqe_wr_id[qp->rq.cons].wqe_size--) + qed_chain_consume(&qp->rq.pbl); + qedr_inc_sw_cons(&qp->rq); + + return 1; +} + +static int process_resp_flush(struct qedr_qp *qp, struct qedr_cq *cq, + int num_entries, struct ib_wc *wc, u16 hw_cons) +{ + u16 cnt = 0; + + while (num_entries && qp->rq.wqe_cons != hw_cons) { + /* fill WC */ + wc->status = IB_WC_WR_FLUSH_ERR; + wc->wc_flags = 0; + wc->src_qp = qp->id; + wc->byte_len = 0; + wc->wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id; + wc->qp = &qp->ibqp; + num_entries--; + wc++; + cnt++; + while (qp->rqe_wr_id[qp->rq.cons].wqe_size--) + qed_chain_consume(&qp->rq.pbl); + qedr_inc_sw_cons(&qp->rq); + } + + return cnt; +} + +static void try_consume_resp_cqe(struct qedr_cq *cq, struct qedr_qp *qp, + struct rdma_cqe_responder *resp, int *update) +{ + if (le16_to_cpu(resp->rq_cons) == qp->rq.wqe_cons) { + consume_cqe(cq); + *update |= 1; + } +} + +static int qedr_poll_cq_resp(struct qedr_dev *dev, struct qedr_qp *qp, + struct qedr_cq *cq, int num_entries, + struct ib_wc *wc, struct rdma_cqe_responder *resp, + int *update) +{ + int cnt; + + if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) { + cnt = process_resp_flush(qp, cq, num_entries, wc, + resp->rq_cons); + try_consume_resp_cqe(cq, qp, resp, update); + } else { + cnt = process_resp_one(dev, qp, cq, wc, resp); + consume_cqe(cq); + *update |= 1; + } + + return cnt; +} + +static void try_consume_req_cqe(struct qedr_cq *cq, struct qedr_qp *qp, + struct rdma_cqe_requester *req, int *update) +{ + if (le16_to_cpu(req->sq_cons) == qp->sq.wqe_cons) { + consume_cqe(cq); + *update |= 1; + } +} + +int qedr_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) +{ + struct qedr_dev *dev = get_qedr_dev(ibcq->device); + struct qedr_cq *cq = get_qedr_cq(ibcq); + union rdma_cqe *cqe = cq->latest_cqe; + u32 old_cons, new_cons; + unsigned long flags; + int update = 0; + int done = 0; + + if (cq->cq_type == QEDR_CQ_TYPE_GSI) + return qedr_gsi_poll_cq(ibcq, num_entries, wc); + + spin_lock_irqsave(&cq->cq_lock, flags); + old_cons = qed_chain_get_cons_idx_u32(&cq->pbl); + while (num_entries && is_valid_cqe(cq, cqe)) { + struct qedr_qp *qp; + int cnt = 0; + + /* prevent speculative reads of any field of CQE */ + rmb(); + + qp = cqe_get_qp(cqe); + if (!qp) { + WARN(1, "Error: CQE QP pointer is NULL. CQE=%p\n", cqe); + break; + } + + wc->qp = &qp->ibqp; + + switch (cqe_get_type(cqe)) { + case RDMA_CQE_TYPE_REQUESTER: + cnt = qedr_poll_cq_req(dev, qp, cq, num_entries, wc, + &cqe->req); + try_consume_req_cqe(cq, qp, &cqe->req, &update); + break; + case RDMA_CQE_TYPE_RESPONDER_RQ: + cnt = qedr_poll_cq_resp(dev, qp, cq, num_entries, wc, + &cqe->resp, &update); + break; + case RDMA_CQE_TYPE_INVALID: + default: + DP_ERR(dev, "Error: invalid CQE type = %d\n", + cqe_get_type(cqe)); + } + num_entries -= cnt; + wc += cnt; + done += cnt; + + cqe = get_cqe(cq); + } + new_cons = qed_chain_get_cons_idx_u32(&cq->pbl); + + cq->cq_cons += new_cons - old_cons; + + if (update) + /* doorbell notifies abount latest VALID entry, + * but chain already point to the next INVALID one + */ + doorbell_cq(cq, cq->cq_cons - 1, cq->arm_flags); + + spin_unlock_irqrestore(&cq->cq_lock, flags); + return done; +} + +int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags, + u8 port_num, + const struct ib_wc *in_wc, + const struct ib_grh *in_grh, + const struct ib_mad_hdr *mad_hdr, + size_t in_mad_size, struct ib_mad_hdr *out_mad, + size_t *out_mad_size, u16 *out_mad_pkey_index) +{ + struct qedr_dev *dev = get_qedr_dev(ibdev); + + DP_DEBUG(dev, QEDR_MSG_GSI, + "QEDR_PROCESS_MAD in_mad %x %x %x %x %x %x %x %x\n", + mad_hdr->attr_id, mad_hdr->base_version, mad_hdr->attr_mod, + mad_hdr->class_specific, mad_hdr->class_version, + mad_hdr->method, mad_hdr->mgmt_class, mad_hdr->status); + return IB_MAD_RESULT_SUCCESS; +} + +int qedr_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable) +{ + struct ib_port_attr attr; + int err; + + err = qedr_query_port(ibdev, port_num, &attr); + if (err) + return err; + + immutable->pkey_tbl_len = attr.pkey_tbl_len; + immutable->gid_tbl_len = attr.gid_tbl_len; + immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE | + RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; + immutable->max_mad_size = IB_MGMT_MAD_SIZE; + + return 0; +} diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h new file mode 100644 index 000000000000..a9b5e67bb81e --- /dev/null +++ b/drivers/infiniband/hw/qedr/verbs.h @@ -0,0 +1,101 @@ +/* QLogic qedr NIC Driver + * Copyright (c) 2015-2016 QLogic Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __QEDR_VERBS_H__ +#define __QEDR_VERBS_H__ + +int qedr_query_device(struct ib_device *ibdev, + struct ib_device_attr *attr, struct ib_udata *udata); +int qedr_query_port(struct ib_device *, u8 port, struct ib_port_attr *props); +int qedr_modify_port(struct ib_device *, u8 port, int mask, + struct ib_port_modify *props); + +int qedr_query_gid(struct ib_device *, u8 port, int index, union ib_gid *gid); + +int qedr_query_pkey(struct ib_device *, u8 port, u16 index, u16 *pkey); + +struct ib_ucontext *qedr_alloc_ucontext(struct ib_device *, struct ib_udata *); +int qedr_dealloc_ucontext(struct ib_ucontext *); + +int qedr_mmap(struct ib_ucontext *, struct vm_area_struct *vma); +int qedr_del_gid(struct ib_device *device, u8 port_num, + unsigned int index, void **context); +int qedr_add_gid(struct ib_device *device, u8 port_num, + unsigned int index, const union ib_gid *gid, + const struct ib_gid_attr *attr, void **context); +struct ib_pd *qedr_alloc_pd(struct ib_device *, + struct ib_ucontext *, struct ib_udata *); +int qedr_dealloc_pd(struct ib_pd *pd); + +struct ib_cq *qedr_create_cq(struct ib_device *ibdev, + const struct ib_cq_init_attr *attr, + struct ib_ucontext *ib_ctx, + struct ib_udata *udata); +int qedr_resize_cq(struct ib_cq *, int cqe, struct ib_udata *); +int qedr_destroy_cq(struct ib_cq *); +int qedr_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags); +struct ib_qp *qedr_create_qp(struct ib_pd *, struct ib_qp_init_attr *attrs, + struct ib_udata *); +int qedr_modify_qp(struct ib_qp *, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata); +int qedr_query_qp(struct ib_qp *, struct ib_qp_attr *qp_attr, + int qp_attr_mask, struct ib_qp_init_attr *); +int qedr_destroy_qp(struct ib_qp *ibqp); + +struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr); +int qedr_destroy_ah(struct ib_ah *ibah); + +int qedr_dereg_mr(struct ib_mr *); +struct ib_mr *qedr_get_dma_mr(struct ib_pd *, int acc); + +struct ib_mr *qedr_reg_user_mr(struct ib_pd *, u64 start, u64 length, + u64 virt, int acc, struct ib_udata *); + +int qedr_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, + int sg_nents, unsigned int *sg_offset); + +struct ib_mr *qedr_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, + u32 max_num_sg); +int qedr_poll_cq(struct ib_cq *, int num_entries, struct ib_wc *wc); +int qedr_post_send(struct ib_qp *, struct ib_send_wr *, + struct ib_send_wr **bad_wr); +int qedr_post_recv(struct ib_qp *, struct ib_recv_wr *, + struct ib_recv_wr **bad_wr); +int qedr_process_mad(struct ib_device *ibdev, int process_mad_flags, + u8 port_num, const struct ib_wc *in_wc, + const struct ib_grh *in_grh, + const struct ib_mad_hdr *in_mad, + size_t in_mad_size, struct ib_mad_hdr *out_mad, + size_t *out_mad_size, u16 *out_mad_pkey_index); + +int qedr_port_immutable(struct ib_device *ibdev, u8 port_num, + struct ib_port_immutable *immutable); +#endif diff --git a/drivers/infiniband/hw/qib/qib.h b/drivers/infiniband/hw/qib/qib.h index bbf0a163aeab..a3e21a25cea5 100644 --- a/drivers/infiniband/hw/qib/qib.h +++ b/drivers/infiniband/hw/qib/qib.h @@ -52,6 +52,7 @@ #include <linux/kref.h> #include <linux/sched.h> #include <linux/kthread.h> +#include <rdma/ib_hdrs.h> #include <rdma/rdma_vt.h> #include "qib_common.h" @@ -1131,7 +1132,6 @@ extern spinlock_t qib_devs_lock; extern struct qib_devdata *qib_lookup(int unit); extern u32 qib_cpulist_count; extern unsigned long *qib_cpulist; -extern u16 qpt_mask; extern unsigned qib_cc_table_size; int qib_init(struct qib_devdata *, int); diff --git a/drivers/infiniband/hw/qib/qib_debugfs.c b/drivers/infiniband/hw/qib/qib_debugfs.c index 5e75b43c596b..5bad8e3b40bb 100644 --- a/drivers/infiniband/hw/qib/qib_debugfs.c +++ b/drivers/infiniband/hw/qib/qib_debugfs.c @@ -189,27 +189,32 @@ static int _ctx_stats_seq_show(struct seq_file *s, void *v) DEBUGFS_FILE(ctx_stats) static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos) + __acquires(RCU) { struct qib_qp_iter *iter; loff_t n = *pos; - rcu_read_lock(); iter = qib_qp_iter_init(s->private); + + /* stop calls rcu_read_unlock */ + rcu_read_lock(); + if (!iter) return NULL; - while (n--) { + do { if (qib_qp_iter_next(iter)) { kfree(iter); return NULL; } - } + } while (n--); return iter; } static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr, loff_t *pos) + __must_hold(RCU) { struct qib_qp_iter *iter = iter_ptr; @@ -224,6 +229,7 @@ static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr, } static void _qp_stats_seq_stop(struct seq_file *s, void *iter_ptr) + __releases(RCU) { rcu_read_unlock(); } diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c index 67ee6438cf59..728e0a030d2e 100644 --- a/drivers/infiniband/hw/qib/qib_driver.c +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -319,8 +319,8 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd, ret = 1; else if (eflags == QLOGIC_IB_RHF_H_TIDERR) { /* For TIDERR and RC QPs premptively schedule a NAK */ - struct qib_ib_header *hdr = (struct qib_ib_header *) rhdr; - struct qib_other_headers *ohdr = NULL; + struct ib_header *hdr = (struct ib_header *)rhdr; + struct ib_other_headers *ohdr = NULL; struct qib_ibport *ibp = &ppd->ibport_data; struct qib_devdata *dd = ppd->dd; struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; @@ -588,8 +588,7 @@ move_along: qib_schedule_send(qp); spin_unlock_irqrestore(&qp->s_lock, flags); } - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); } bail: diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index fcdf37913a26..f1e66efea98a 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -64,7 +64,7 @@ static int qibfs_mknod(struct inode *dir, struct dentry *dentry, inode->i_uid = GLOBAL_ROOT_UID; inode->i_gid = GLOBAL_ROOT_GID; inode->i_blocks = 0; - inode->i_atime = CURRENT_TIME; + inode->i_atime = current_time(inode); inode->i_mtime = inode->i_atime; inode->i_ctime = inode->i_atime; inode->i_private = data; @@ -328,26 +328,12 @@ static ssize_t flash_write(struct file *file, const char __user *buf, pos = *ppos; - if (pos != 0) { - ret = -EINVAL; - goto bail; - } - - if (count != sizeof(struct qib_flash)) { - ret = -EINVAL; - goto bail; - } - - tmp = kmalloc(count, GFP_KERNEL); - if (!tmp) { - ret = -ENOMEM; - goto bail; - } + if (pos != 0 || count != sizeof(struct qib_flash)) + return -EINVAL; - if (copy_from_user(tmp, buf, count)) { - ret = -EFAULT; - goto bail_tmp; - } + tmp = memdup_user(buf, count); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); dd = private2dd(file); if (qib_eeprom_write(dd, pos, tmp, count)) { @@ -361,8 +347,6 @@ static ssize_t flash_write(struct file *file, const char __user *buf, bail_tmp: kfree(tmp); - -bail: return ret; } diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index ce4034071f9c..ded27172320e 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -1415,7 +1415,7 @@ static void flush_fifo(struct qib_pportdata *ppd) u32 *hdr; u64 pbc; const unsigned hdrwords = 7; - static struct qib_ib_header ibhdr = { + static struct ib_header ibhdr = { .lrh[0] = cpu_to_be16(0xF000 | QIB_LRH_BTH), .lrh[1] = IB_LID_PERMISSIVE, .lrh[2] = cpu_to_be16(hdrwords + SIZE_OF_CRC), diff --git a/drivers/infiniband/hw/qib/qib_init.c b/drivers/infiniband/hw/qib/qib_init.c index f253111e682e..1730aa839a47 100644 --- a/drivers/infiniband/hw/qib/qib_init.c +++ b/drivers/infiniband/hw/qib/qib_init.c @@ -614,8 +614,8 @@ static int qib_create_workqueues(struct qib_devdata *dd) snprintf(wq_name, sizeof(wq_name), "qib%d_%d", dd->unit, pidx); - ppd->qib_wq = - create_singlethread_workqueue(wq_name); + ppd->qib_wq = alloc_ordered_workqueue(wq_name, + WQ_MEM_RECLAIM); if (!ppd->qib_wq) goto wq_error; } diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 9cc0aae1d781..99d31efe4c2f 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -41,14 +41,6 @@ #include "qib.h" -/* - * mask field which was present in now deleted qib_qpn_table - * is not present in rvt_qpn_table. Defining the same field - * as qpt_mask here instead of adding the mask field to - * rvt_qpn_table. - */ -u16 qpt_mask; - static inline unsigned mk_qpn(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map, unsigned off) { @@ -57,7 +49,7 @@ static inline unsigned mk_qpn(struct rvt_qpn_table *qpt, static inline unsigned find_next_offset(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map, unsigned off, - unsigned n) + unsigned n, u16 qpt_mask) { if (qpt_mask) { off++; @@ -179,6 +171,7 @@ int qib_alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, struct qib_ibdev *verbs_dev = container_of(rdi, struct qib_ibdev, rdi); struct qib_devdata *dd = container_of(verbs_dev, struct qib_devdata, verbs_dev); + u16 qpt_mask = dd->qpn_mask; if (type == IB_QPT_SMI || type == IB_QPT_GSI) { unsigned n; @@ -215,7 +208,7 @@ int qib_alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt, goto bail; } offset = find_next_offset(qpt, map, offset, - dd->n_krcv_queues); + dd->n_krcv_queues, qpt_mask); qpn = mk_qpn(qpt, map, offset); /* * This test differs from alloc_pidmap(). @@ -573,10 +566,6 @@ struct qib_qp_iter *qib_qp_iter_init(struct qib_ibdev *dev) return NULL; iter->dev = dev; - if (qib_qp_iter_next(iter)) { - kfree(iter); - return NULL; - } return iter; } diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index 444028a3582a..2097512e75aa 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -75,7 +75,7 @@ static void start_timer(struct rvt_qp *qp) * Note the QP s_lock must be held. */ static int qib_make_rc_ack(struct qib_ibdev *dev, struct rvt_qp *qp, - struct qib_other_headers *ohdr, u32 pmtu) + struct ib_other_headers *ohdr, u32 pmtu) { struct rvt_ack_entry *e; u32 hwords; @@ -154,10 +154,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct rvt_qp *qp, len = 0; qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE); ohdr->u.at.aeth = qib_compute_aeth(qp); - ohdr->u.at.atomic_ack_eth[0] = - cpu_to_be32(e->atomic_data >> 32); - ohdr->u.at.atomic_ack_eth[1] = - cpu_to_be32(e->atomic_data); + ib_u64_put(e->atomic_data, &ohdr->u.at.atomic_ack_eth); hwords += sizeof(ohdr->u.at) / sizeof(u32); bth2 = e->psn & QIB_PSN_MASK; e->sent = 1; @@ -234,7 +231,7 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags) { struct qib_qp_priv *priv = qp->priv; struct qib_ibdev *dev = to_idev(qp->ibqp.device); - struct qib_other_headers *ohdr; + struct ib_other_headers *ohdr; struct rvt_sge_state *ss; struct rvt_swqe *wqe; u32 hwords; @@ -444,20 +441,18 @@ int qib_make_rc_req(struct rvt_qp *qp, unsigned long *flags) } if (wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) { qp->s_state = OP(COMPARE_SWAP); - ohdr->u.atomic_eth.swap_data = cpu_to_be64( - wqe->atomic_wr.swap); - ohdr->u.atomic_eth.compare_data = cpu_to_be64( - wqe->atomic_wr.compare_add); + put_ib_ateth_swap(wqe->atomic_wr.swap, + &ohdr->u.atomic_eth); + put_ib_ateth_swap(wqe->atomic_wr.compare_add, + &ohdr->u.atomic_eth); } else { qp->s_state = OP(FETCH_ADD); - ohdr->u.atomic_eth.swap_data = cpu_to_be64( - wqe->atomic_wr.compare_add); - ohdr->u.atomic_eth.compare_data = 0; + put_ib_ateth_swap(wqe->atomic_wr.compare_add, + &ohdr->u.atomic_eth); + put_ib_ateth_swap(0, &ohdr->u.atomic_eth); } - ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32( - wqe->atomic_wr.remote_addr >> 32); - ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32( - wqe->atomic_wr.remote_addr); + put_ib_ateth_vaddr(wqe->atomic_wr.remote_addr, + &ohdr->u.atomic_eth); ohdr->u.atomic_eth.rkey = cpu_to_be32( wqe->atomic_wr.rkey); hwords += sizeof(struct ib_atomic_eth) / sizeof(u32); @@ -632,8 +627,8 @@ void qib_send_rc_ack(struct rvt_qp *qp) u32 hwords; u32 pbufn; u32 __iomem *piobuf; - struct qib_ib_header hdr; - struct qib_other_headers *ohdr; + struct ib_header hdr; + struct ib_other_headers *ohdr; u32 control; unsigned long flags; @@ -942,9 +937,9 @@ static void reset_sending_psn(struct rvt_qp *qp, u32 psn) /* * This should be called with the QP s_lock held and interrupts disabled. */ -void qib_rc_send_complete(struct rvt_qp *qp, struct qib_ib_header *hdr) +void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr) { - struct qib_other_headers *ohdr; + struct ib_other_headers *ohdr; struct rvt_swqe *wqe; struct ib_wc wc; unsigned i; @@ -1177,7 +1172,7 @@ static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode, qib_restart_rc(qp, qp->s_last_psn + 1, 0); if (list_empty(&qp->rspwait)) { qp->r_flags |= RVT_R_RSP_SEND; - atomic_inc(&qp->refcount); + rvt_get_qp(qp); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); } @@ -1361,7 +1356,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct qib_ibport *ibp, u32 psn, qib_restart_rc(qp, qp->s_last_psn + 1, 0); if (list_empty(&qp->rspwait)) { qp->r_flags |= RVT_R_RSP_SEND; - atomic_inc(&qp->refcount); + rvt_get_qp(qp); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); } } @@ -1383,7 +1378,7 @@ static void rdma_seq_err(struct rvt_qp *qp, struct qib_ibport *ibp, u32 psn, * Called at interrupt level. */ static void qib_rc_rcv_resp(struct qib_ibport *ibp, - struct qib_other_headers *ohdr, + struct ib_other_headers *ohdr, void *data, u32 tlen, struct rvt_qp *qp, u32 opcode, @@ -1463,12 +1458,9 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp, case OP(ATOMIC_ACKNOWLEDGE): case OP(RDMA_READ_RESPONSE_FIRST): aeth = be32_to_cpu(ohdr->u.aeth); - if (opcode == OP(ATOMIC_ACKNOWLEDGE)) { - __be32 *p = ohdr->u.at.atomic_ack_eth; - - val = ((u64) be32_to_cpu(p[0]) << 32) | - be32_to_cpu(p[1]); - } else + if (opcode == OP(ATOMIC_ACKNOWLEDGE)) + val = ib_u64_get(&ohdr->u.at.atomic_ack_eth); + else val = 0; if (!do_rc_ack(qp, aeth, psn, opcode, val, rcd) || opcode != OP(RDMA_READ_RESPONSE_FIRST)) @@ -1608,7 +1600,7 @@ bail: * Return 1 if no more processing is needed; otherwise return 0 to * schedule a response to be sent. */ -static int qib_rc_rcv_error(struct qib_other_headers *ohdr, +static int qib_rc_rcv_error(struct ib_other_headers *ohdr, void *data, struct rvt_qp *qp, u32 opcode, @@ -1640,7 +1632,7 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr, */ if (list_empty(&qp->rspwait)) { qp->r_flags |= RVT_R_RSP_NAK; - atomic_inc(&qp->refcount); + rvt_get_qp(qp); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); } } @@ -1848,11 +1840,11 @@ static inline void qib_update_ack_queue(struct rvt_qp *qp, unsigned n) * for the given QP. * Called at interrupt level. */ -void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, +void qib_rc_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr, int has_grh, void *data, u32 tlen, struct rvt_qp *qp) { struct qib_ibport *ibp = &rcd->ppd->ibport_data; - struct qib_other_headers *ohdr; + struct ib_other_headers *ohdr; u32 opcode; u32 hdrsize; u32 psn; @@ -2177,8 +2169,7 @@ send_last: e->rdma_sge.mr = NULL; } ateth = &ohdr->u.atomic_eth; - vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) | - be32_to_cpu(ateth->vaddr[1]); + vaddr = get_ib_ateth_vaddr(ateth); if (unlikely(vaddr & (sizeof(u64) - 1))) goto nack_inv_unlck; rkey = be32_to_cpu(ateth->rkey); @@ -2189,11 +2180,11 @@ send_last: goto nack_acc_unlck; /* Perform atomic OP and save result. */ maddr = (atomic64_t *) qp->r_sge.sge.vaddr; - sdata = be64_to_cpu(ateth->swap_data); + sdata = get_ib_ateth_swap(ateth); e->atomic_data = (opcode == OP(FETCH_ADD)) ? (u64) atomic64_add_return(sdata, maddr) - sdata : (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr, - be64_to_cpu(ateth->compare_data), + get_ib_ateth_compare(ateth), sdata); rvt_put_mr(qp->r_sge.sge.mr); qp->r_sge.num_sge = 0; @@ -2233,7 +2224,7 @@ rnr_nak: /* Queue RNR NAK for later */ if (list_empty(&qp->rspwait)) { qp->r_flags |= RVT_R_RSP_NAK; - atomic_inc(&qp->refcount); + rvt_get_qp(qp); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); } return; @@ -2245,7 +2236,7 @@ nack_op_err: /* Queue NAK for later */ if (list_empty(&qp->rspwait)) { qp->r_flags |= RVT_R_RSP_NAK; - atomic_inc(&qp->refcount); + rvt_get_qp(qp); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); } return; @@ -2259,7 +2250,7 @@ nack_inv: /* Queue NAK for later */ if (list_empty(&qp->rspwait)) { qp->r_flags |= RVT_R_RSP_NAK; - atomic_inc(&qp->refcount); + rvt_get_qp(qp); list_add_tail(&qp->rspwait, &rcd->qp_wait_list); } return; diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c index b67779256297..de1bde5950f5 100644 --- a/drivers/infiniband/hw/qib/qib_ruc.c +++ b/drivers/infiniband/hw/qib/qib_ruc.c @@ -265,7 +265,7 @@ static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id) * * The s_lock will be acquired around the qib_migrate_qp() call. */ -int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr, +int qib_ruc_check_hdr(struct qib_ibport *ibp, struct ib_header *hdr, int has_grh, struct rvt_qp *qp, u32 bth0) { __be64 guid; @@ -680,7 +680,7 @@ u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr, return sizeof(struct ib_grh) / sizeof(u32); } -void qib_make_ruc_header(struct rvt_qp *qp, struct qib_other_headers *ohdr, +void qib_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, u32 bth0, u32 bth2) { struct qib_qp_priv *priv = qp->priv; diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c index 1d61bd04f449..5b2d483451ad 100644 --- a/drivers/infiniband/hw/qib/qib_uc.c +++ b/drivers/infiniband/hw/qib/qib_uc.c @@ -48,7 +48,7 @@ int qib_make_uc_req(struct rvt_qp *qp, unsigned long *flags) { struct qib_qp_priv *priv = qp->priv; - struct qib_other_headers *ohdr; + struct ib_other_headers *ohdr; struct rvt_swqe *wqe; u32 hwords; u32 bth0; @@ -236,10 +236,10 @@ bail: * for the given QP. * Called at interrupt level. */ -void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, +void qib_uc_rcv(struct qib_ibport *ibp, struct ib_header *hdr, int has_grh, void *data, u32 tlen, struct rvt_qp *qp) { - struct qib_other_headers *ohdr; + struct ib_other_headers *ohdr; u32 opcode; u32 hdrsize; u32 psn; diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c index 10d062561bd9..f45cad1198b0 100644 --- a/drivers/infiniband/hw/qib/qib_ud.c +++ b/drivers/infiniband/hw/qib/qib_ud.c @@ -245,7 +245,7 @@ drop: int qib_make_ud_req(struct rvt_qp *qp, unsigned long *flags) { struct qib_qp_priv *priv = qp->priv; - struct qib_other_headers *ohdr; + struct ib_other_headers *ohdr; struct ib_ah_attr *ah_attr; struct qib_pportdata *ppd; struct qib_ibport *ibp; @@ -435,10 +435,10 @@ static unsigned qib_lookup_pkey(struct qib_ibport *ibp, u16 pkey) * for the given QP. * Called at interrupt level. */ -void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, +void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr, int has_grh, void *data, u32 tlen, struct rvt_qp *qp) { - struct qib_other_headers *ohdr; + struct ib_other_headers *ohdr; int opcode; u32 hdrsize; u32 pad; diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c index 2d2b94fd3633..75f08624ac05 100644 --- a/drivers/infiniband/hw/qib/qib_user_pages.c +++ b/drivers/infiniband/hw/qib/qib_user_pages.c @@ -67,7 +67,8 @@ static int __qib_get_user_pages(unsigned long start_page, size_t num_pages, for (got = 0; got < num_pages; got += ret) { ret = get_user_pages(start_page + got * PAGE_SIZE, - num_pages - got, 1, 1, + num_pages - got, + FOLL_WRITE | FOLL_FORCE, p + got, NULL); if (ret < 0) goto bail_release; diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index fd1dfbce5539..954f15064514 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -313,7 +313,7 @@ static void qib_copy_from_sge(void *data, struct rvt_sge_state *ss, u32 length) * for the given QP. * Called at interrupt level. */ -static void qib_qp_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, +static void qib_qp_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr, int has_grh, void *data, u32 tlen, struct rvt_qp *qp) { struct qib_ibport *ibp = &rcd->ppd->ibport_data; @@ -366,10 +366,10 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen) { struct qib_pportdata *ppd = rcd->ppd; struct qib_ibport *ibp = &ppd->ibport_data; - struct qib_ib_header *hdr = rhdr; + struct ib_header *hdr = rhdr; struct qib_devdata *dd = ppd->dd; struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; - struct qib_other_headers *ohdr; + struct ib_other_headers *ohdr; struct rvt_qp *qp; u32 qp_num; int lnh; @@ -841,7 +841,7 @@ static void sdma_complete(struct qib_sdma_txreq *cookie, int status) if (tx->wqe) qib_send_complete(qp, tx->wqe, IB_WC_SUCCESS); else if (qp->ibqp.qp_type == IB_QPT_RC) { - struct qib_ib_header *hdr; + struct ib_header *hdr; if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF) hdr = &tx->align_buf->hdr; @@ -889,7 +889,7 @@ static int wait_kmem(struct qib_ibdev *dev, struct rvt_qp *qp) return ret; } -static int qib_verbs_send_dma(struct rvt_qp *qp, struct qib_ib_header *hdr, +static int qib_verbs_send_dma(struct rvt_qp *qp, struct ib_header *hdr, u32 hdrwords, struct rvt_sge_state *ss, u32 len, u32 plen, u32 dwords) { @@ -1025,7 +1025,7 @@ static int no_bufs_available(struct rvt_qp *qp) return ret; } -static int qib_verbs_send_pio(struct rvt_qp *qp, struct qib_ib_header *ibhdr, +static int qib_verbs_send_pio(struct rvt_qp *qp, struct ib_header *ibhdr, u32 hdrwords, struct rvt_sge_state *ss, u32 len, u32 plen, u32 dwords) { @@ -1133,7 +1133,7 @@ done: * Return zero if packet is sent or queued OK. * Return non-zero and clear qp->s_flags RVT_S_BUSY otherwise. */ -int qib_verbs_send(struct rvt_qp *qp, struct qib_ib_header *hdr, +int qib_verbs_send(struct rvt_qp *qp, struct ib_header *hdr, u32 hdrwords, struct rvt_sge_state *ss, u32 len) { struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device); @@ -1370,7 +1370,8 @@ static int qib_modify_device(struct ib_device *device, } if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC) { - memcpy(device->node_desc, device_modify->node_desc, 64); + memcpy(device->node_desc, device_modify->node_desc, + IB_DEVICE_NODE_DESC_MAX); for (i = 0; i < dd->num_pports; i++) { struct qib_ibport *ibp = &dd->pport[i].ibport_data; @@ -1606,8 +1607,6 @@ int qib_register_ib_device(struct qib_devdata *dd) /* Only need to initialize non-zero fields. */ setup_timer(&dev->mem_timer, mem_timer, (unsigned long)dev); - qpt_mask = dd->qpn_mask; - INIT_LIST_HEAD(&dev->piowait); INIT_LIST_HEAD(&dev->dmawait); INIT_LIST_HEAD(&dev->txwait); diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h index 736ced684842..94fd30fdedac 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.h +++ b/drivers/infiniband/hw/qib/qib_verbs.h @@ -45,6 +45,7 @@ #include <linux/completion.h> #include <rdma/ib_pack.h> #include <rdma/ib_user_verbs.h> +#include <rdma/ib_hdrs.h> #include <rdma/rdma_vt.h> #include <rdma/rdmavt_cq.h> @@ -63,16 +64,6 @@ struct qib_verbs_txreq; */ #define QIB_UVERBS_ABI_VERSION 2 -#define IB_SEQ_NAK (3 << 29) - -/* AETH NAK opcode values */ -#define IB_RNR_NAK 0x20 -#define IB_NAK_PSN_ERROR 0x60 -#define IB_NAK_INVALID_REQUEST 0x61 -#define IB_NAK_REMOTE_ACCESS_ERROR 0x62 -#define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63 -#define IB_NAK_INVALID_RD_REQUEST 0x64 - /* IB Performance Manager status values */ #define IB_PMA_SAMPLE_STATUS_DONE 0x00 #define IB_PMA_SAMPLE_STATUS_STARTED 0x01 @@ -87,22 +78,9 @@ struct qib_verbs_txreq; #define QIB_VENDOR_IPG cpu_to_be16(0xFFA0) -#define IB_BTH_REQ_ACK (1 << 31) -#define IB_BTH_SOLICITED (1 << 23) -#define IB_BTH_MIG_REQ (1 << 22) - /* XXX Should be defined in ib_verbs.h enum ib_port_cap_flags */ #define IB_PORT_OTHER_LOCAL_CHANGES_SUP (1 << 26) -#define IB_GRH_VERSION 6 -#define IB_GRH_VERSION_MASK 0xF -#define IB_GRH_VERSION_SHIFT 28 -#define IB_GRH_TCLASS_MASK 0xFF -#define IB_GRH_TCLASS_SHIFT 20 -#define IB_GRH_FLOW_MASK 0xFFFFF -#define IB_GRH_FLOW_SHIFT 0 -#define IB_GRH_NEXT_HDR 0x1B - #define IB_DEFAULT_GID_PREFIX cpu_to_be64(0xfe80000000000000ULL) /* Values for set/get portinfo VLCap OperationalVLs */ @@ -129,61 +107,9 @@ static inline int qib_num_vls(int vls) } } -struct ib_reth { - __be64 vaddr; - __be32 rkey; - __be32 length; -} __packed; - -struct ib_atomic_eth { - __be32 vaddr[2]; /* unaligned so access as 2 32-bit words */ - __be32 rkey; - __be64 swap_data; - __be64 compare_data; -} __packed; - -struct qib_other_headers { - __be32 bth[3]; - union { - struct { - __be32 deth[2]; - __be32 imm_data; - } ud; - struct { - struct ib_reth reth; - __be32 imm_data; - } rc; - struct { - __be32 aeth; - __be32 atomic_ack_eth[2]; - } at; - __be32 imm_data; - __be32 aeth; - __be32 ieth; - struct ib_atomic_eth atomic_eth; - } u; -} __packed; - -/* - * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes - * long (72 w/ imm_data). Only the first 56 bytes of the IB header - * will be in the eager header buffer. The remaining 12 or 16 bytes - * are in the data buffer. - */ -struct qib_ib_header { - __be16 lrh[4]; - union { - struct { - struct ib_grh grh; - struct qib_other_headers oth; - } l; - struct qib_other_headers oth; - } u; -} __packed; - struct qib_pio_header { __le32 pbc[2]; - struct qib_ib_header hdr; + struct ib_header hdr; } __packed; /* @@ -191,7 +117,7 @@ struct qib_pio_header { * is made common. */ struct qib_qp_priv { - struct qib_ib_header *s_hdr; /* next packet header to send */ + struct ib_header *s_hdr; /* next packet header to send */ struct list_head iowait; /* link for wait PIO buf */ atomic_t s_dma_busy; struct qib_verbs_txreq *s_tx; @@ -376,7 +302,7 @@ void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail); void qib_put_txreq(struct qib_verbs_txreq *tx); -int qib_verbs_send(struct rvt_qp *qp, struct qib_ib_header *hdr, +int qib_verbs_send(struct rvt_qp *qp, struct ib_header *hdr, u32 hdrwords, struct rvt_sge_state *ss, u32 len); void qib_copy_sge(struct rvt_sge_state *ss, void *data, u32 length, @@ -384,10 +310,10 @@ void qib_copy_sge(struct rvt_sge_state *ss, void *data, u32 length, void qib_skip_sge(struct rvt_sge_state *ss, u32 length, int release); -void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, +void qib_uc_rcv(struct qib_ibport *ibp, struct ib_header *hdr, int has_grh, void *data, u32 tlen, struct rvt_qp *qp); -void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr, +void qib_rc_rcv(struct qib_ctxtdata *rcd, struct ib_header *hdr, int has_grh, void *data, u32 tlen, struct rvt_qp *qp); int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr); @@ -398,13 +324,13 @@ struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid); void qib_rc_rnr_retry(unsigned long arg); -void qib_rc_send_complete(struct rvt_qp *qp, struct qib_ib_header *hdr); +void qib_rc_send_complete(struct rvt_qp *qp, struct ib_header *hdr); void qib_rc_error(struct rvt_qp *qp, enum ib_wc_status err); int qib_post_ud_send(struct rvt_qp *qp, struct ib_send_wr *wr); -void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr, +void qib_ud_rcv(struct qib_ibport *ibp, struct ib_header *hdr, int has_grh, void *data, u32 tlen, struct rvt_qp *qp); void mr_rcu_callback(struct rcu_head *list); @@ -413,13 +339,13 @@ int qib_get_rwqe(struct rvt_qp *qp, int wr_id_only); void qib_migrate_qp(struct rvt_qp *qp); -int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr, +int qib_ruc_check_hdr(struct qib_ibport *ibp, struct ib_header *hdr, int has_grh, struct rvt_qp *qp, u32 bth0); u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr, struct ib_global_route *grh, u32 hwords, u32 nwords); -void qib_make_ruc_header(struct rvt_qp *qp, struct qib_other_headers *ohdr, +void qib_make_ruc_header(struct rvt_qp *qp, struct ib_other_headers *ohdr, u32 bth0, u32 bth2); void _qib_do_send(struct work_struct *work); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index c229b9f4a52d..0a89a955550b 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -664,7 +664,8 @@ static int __init usnic_ib_init(void) return err; } - if (pci_register_driver(&usnic_ib_pci_driver)) { + err = pci_register_driver(&usnic_ib_pci_driver); + if (err) { usnic_err("Unable to register with PCI\n"); goto out_umem_fini; } diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index a0b6ebee4d8a..1ccee6ea5bc3 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -111,6 +111,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, int i; int flags; dma_addr_t pa; + unsigned int gup_flags; if (!can_do_mlock()) return -EPERM; @@ -135,6 +136,8 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, flags = IOMMU_READ | IOMMU_CACHE; flags |= (writable) ? IOMMU_WRITE : 0; + gup_flags = FOLL_WRITE; + gup_flags |= (writable) ? 0 : FOLL_FORCE; cur_base = addr & PAGE_MASK; ret = 0; @@ -142,7 +145,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable, ret = get_user_pages(cur_base, min_t(unsigned long, npages, PAGE_SIZE / sizeof(struct page *)), - 1, !writable, page_list, NULL); + gup_flags, page_list, NULL); if (ret < 0) goto out; diff --git a/drivers/infiniband/sw/rdmavt/cq.c b/drivers/infiniband/sw/rdmavt/cq.c index f2f229efbe64..6d9904a4a0ab 100644 --- a/drivers/infiniband/sw/rdmavt/cq.c +++ b/drivers/infiniband/sw/rdmavt/cq.c @@ -129,7 +129,7 @@ void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited) if (likely(worker)) { cq->notify = RVT_CQ_NONE; cq->triggered++; - queue_kthread_work(worker, &cq->comptask); + kthread_queue_work(worker, &cq->comptask); } } @@ -265,7 +265,7 @@ struct ib_cq *rvt_create_cq(struct ib_device *ibdev, cq->ibcq.cqe = entries; cq->notify = RVT_CQ_NONE; spin_lock_init(&cq->lock); - init_kthread_work(&cq->comptask, send_complete); + kthread_init_work(&cq->comptask, send_complete); cq->queue = wc; ret = &cq->ibcq; @@ -295,7 +295,7 @@ int rvt_destroy_cq(struct ib_cq *ibcq) struct rvt_cq *cq = ibcq_to_rvtcq(ibcq); struct rvt_dev_info *rdi = cq->rdi; - flush_kthread_work(&cq->comptask); + kthread_flush_work(&cq->comptask); spin_lock(&rdi->n_cqs_lock); rdi->n_cqs_allocated--; spin_unlock(&rdi->n_cqs_lock); @@ -514,7 +514,7 @@ int rvt_driver_cq_init(struct rvt_dev_info *rdi) rdi->worker = kzalloc(sizeof(*rdi->worker), GFP_KERNEL); if (!rdi->worker) return -ENOMEM; - init_kthread_worker(rdi->worker); + kthread_init_worker(rdi->worker); task = kthread_create_on_node( kthread_worker_fn, rdi->worker, @@ -547,7 +547,7 @@ void rvt_cq_exit(struct rvt_dev_info *rdi) /* blocks future queuing from send_complete() */ rdi->worker = NULL; smp_wmb(); /* See rdi_cq_enter */ - flush_kthread_worker(worker); + kthread_flush_worker(worker); kthread_stop(worker->task); kfree(worker); } diff --git a/drivers/infiniband/sw/rdmavt/dma.c b/drivers/infiniband/sw/rdmavt/dma.c index 33076a5eee2f..01f71caa3ac4 100644 --- a/drivers/infiniband/sw/rdmavt/dma.c +++ b/drivers/infiniband/sw/rdmavt/dma.c @@ -138,6 +138,21 @@ static void rvt_unmap_sg(struct ib_device *dev, /* This is a stub, nothing to be done here */ } +static int rvt_map_sg_attrs(struct ib_device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction direction, + unsigned long attrs) +{ + return rvt_map_sg(dev, sgl, nents, direction); +} + +static void rvt_unmap_sg_attrs(struct ib_device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction direction, + unsigned long attrs) +{ + return rvt_unmap_sg(dev, sg, nents, direction); +} + static void rvt_sync_single_for_cpu(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction dir) { @@ -177,6 +192,8 @@ struct ib_dma_mapping_ops rvt_default_dma_mapping_ops = { .unmap_page = rvt_dma_unmap_page, .map_sg = rvt_map_sg, .unmap_sg = rvt_unmap_sg, + .map_sg_attrs = rvt_map_sg_attrs, + .unmap_sg_attrs = rvt_unmap_sg_attrs, .sync_single_for_cpu = rvt_sync_single_for_cpu, .sync_single_for_device = rvt_sync_single_for_device, .alloc_coherent = rvt_dma_alloc_coherent, diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 80c4b6b401b8..46b64970058e 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -294,7 +294,7 @@ static void __rvt_free_mr(struct rvt_mr *mr) { rvt_deinit_mregion(&mr->mr); rvt_free_lkey(&mr->mr); - vfree(mr); + kfree(mr); } /** diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index bdb540f25a88..6500c3b5a89c 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -488,60 +488,23 @@ static void rvt_remove_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp) spin_unlock_irqrestore(&rdi->qp_dev->qpt_lock, flags); if (removed) { synchronize_rcu(); - if (atomic_dec_and_test(&qp->refcount)) - wake_up(&qp->wait); + rvt_put_qp(qp); } } /** - * reset_qp - initialize the QP state to the reset state - * @qp: the QP to reset + * rvt_init_qp - initialize the QP state to the reset state + * @qp: the QP to init or reinit * @type: the QP type - * r and s lock are required to be held by the caller + * + * This function is called from both rvt_create_qp() and + * rvt_reset_qp(). The difference is that the reset + * patch the necessary locks to protect against concurent + * access. */ -static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, - enum ib_qp_type type) - __releases(&qp->s_lock) - __releases(&qp->s_hlock) - __releases(&qp->r_lock) - __acquires(&qp->r_lock) - __acquires(&qp->s_hlock) - __acquires(&qp->s_lock) +static void rvt_init_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, + enum ib_qp_type type) { - if (qp->state != IB_QPS_RESET) { - qp->state = IB_QPS_RESET; - - /* Let drivers flush their waitlist */ - rdi->driver_f.flush_qp_waiters(qp); - qp->s_flags &= ~(RVT_S_TIMER | RVT_S_ANY_WAIT); - spin_unlock(&qp->s_lock); - spin_unlock(&qp->s_hlock); - spin_unlock_irq(&qp->r_lock); - - /* Stop the send queue and the retry timer */ - rdi->driver_f.stop_send_queue(qp); - - /* Wait for things to stop */ - rdi->driver_f.quiesce_qp(qp); - - /* take qp out the hash and wait for it to be unused */ - rvt_remove_qp(rdi, qp); - wait_event(qp->wait, !atomic_read(&qp->refcount)); - - /* grab the lock b/c it was locked at call time */ - spin_lock_irq(&qp->r_lock); - spin_lock(&qp->s_hlock); - spin_lock(&qp->s_lock); - - rvt_clear_mr_refs(qp, 1); - } - - /* - * Let the driver do any tear down it needs to for a qp - * that has been reset - */ - rdi->driver_f.notify_qp_reset(qp); - qp->remote_qpn = 0; qp->qkey = 0; qp->qp_access_flags = 0; @@ -587,6 +550,60 @@ static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, } /** + * rvt_reset_qp - initialize the QP state to the reset state + * @qp: the QP to reset + * @type: the QP type + * + * r_lock, s_hlock, and s_lock are required to be held by the caller + */ +static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, + enum ib_qp_type type) + __must_hold(&qp->s_lock) + __must_hold(&qp->s_hlock) + __must_hold(&qp->r_lock) +{ + lockdep_assert_held(&qp->r_lock); + lockdep_assert_held(&qp->s_hlock); + lockdep_assert_held(&qp->s_lock); + if (qp->state != IB_QPS_RESET) { + qp->state = IB_QPS_RESET; + + /* Let drivers flush their waitlist */ + rdi->driver_f.flush_qp_waiters(qp); + qp->s_flags &= ~(RVT_S_TIMER | RVT_S_ANY_WAIT); + spin_unlock(&qp->s_lock); + spin_unlock(&qp->s_hlock); + spin_unlock_irq(&qp->r_lock); + + /* Stop the send queue and the retry timer */ + rdi->driver_f.stop_send_queue(qp); + + /* Wait for things to stop */ + rdi->driver_f.quiesce_qp(qp); + + /* take qp out the hash and wait for it to be unused */ + rvt_remove_qp(rdi, qp); + wait_event(qp->wait, !atomic_read(&qp->refcount)); + + /* grab the lock b/c it was locked at call time */ + spin_lock_irq(&qp->r_lock); + spin_lock(&qp->s_hlock); + spin_lock(&qp->s_lock); + + rvt_clear_mr_refs(qp, 1); + /* + * Let the driver do any tear down or re-init it needs to for + * a qp that has been reset + */ + rdi->driver_f.notify_qp_reset(qp); + } + rvt_init_qp(rdi, qp, type); + lockdep_assert_held(&qp->r_lock); + lockdep_assert_held(&qp->s_hlock); + lockdep_assert_held(&qp->s_lock); +} + +/** * rvt_create_qp - create a queue pair for a device * @ibpd: the protection domain who's device we create the queue pair for * @init_attr: the attributes of the queue pair @@ -766,7 +783,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, } qp->ibqp.qp_num = err; qp->port_num = init_attr->port_num; - rvt_reset_qp(rdi, qp, init_attr->qp_type); + rvt_init_qp(rdi, qp, init_attr->qp_type); break; default: @@ -873,7 +890,8 @@ bail_qpn: free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num); bail_rq_wq: - vfree(qp->r_rq.wq); + if (!qp->ip) + vfree(qp->r_rq.wq); bail_driver_priv: rdi->driver_f.qp_priv_free(rdi, qp); @@ -905,6 +923,8 @@ int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err) int ret = 0; struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device); + lockdep_assert_held(&qp->r_lock); + lockdep_assert_held(&qp->s_lock); if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET) goto bail; @@ -979,7 +999,7 @@ static void rvt_insert_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp) struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1]; unsigned long flags; - atomic_inc(&qp->refcount); + rvt_get_qp(qp); spin_lock_irqsave(&rdi->qp_dev->qpt_lock, flags); if (qp->ibqp.qp_num <= 1) { @@ -996,7 +1016,7 @@ static void rvt_insert_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp) } /** - * qib_modify_qp - modify the attributes of a queue pair + * rvt_modify_qp - modify the attributes of a queue pair * @ibqp: the queue pair who's attributes we're modifying * @attr: the new attributes * @attr_mask: the mask of attributes to modify diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index 55f0e8f0ca79..ab6c3c25d7ff 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -358,18 +358,15 @@ static int __init rxe_module_init(void) /* initialize slab caches for managed objects */ err = rxe_cache_init(); if (err) { - pr_err("rxe: unable to init object pools\n"); + pr_err("unable to init object pools\n"); return err; } err = rxe_net_init(); - if (err) { - pr_err("rxe: unable to init\n"); - rxe_cache_exit(); + if (err) return err; - } - pr_info("rxe: loaded\n"); + pr_info("loaded\n"); return 0; } @@ -379,8 +376,8 @@ static void __exit rxe_module_exit(void) rxe_net_exit(); rxe_cache_exit(); - pr_info("rxe: unloaded\n"); + pr_info("unloaded\n"); } -module_init(rxe_module_init); +late_initcall(rxe_module_init); module_exit(rxe_module_exit); diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h index 12c71c549f97..a696af81e4a5 100644 --- a/drivers/infiniband/sw/rxe/rxe.h +++ b/drivers/infiniband/sw/rxe/rxe.h @@ -34,6 +34,11 @@ #ifndef RXE_H #define RXE_H +#ifdef pr_fmt +#undef pr_fmt +#endif +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/module.h> #include <linux/skbuff.h> #include <linux/crc32.h> diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c index 5c9474212d4e..604f6fee96bd 100644 --- a/drivers/infiniband/sw/rxe/rxe_av.c +++ b/drivers/infiniband/sw/rxe/rxe_av.c @@ -39,7 +39,7 @@ int rxe_av_chk_attr(struct rxe_dev *rxe, struct ib_ah_attr *attr) struct rxe_port *port; if (attr->port_num != 1) { - pr_info("rxe: invalid port_num = %d\n", attr->port_num); + pr_info("invalid port_num = %d\n", attr->port_num); return -EINVAL; } @@ -47,7 +47,7 @@ int rxe_av_chk_attr(struct rxe_dev *rxe, struct ib_ah_attr *attr) if (attr->ah_flags & IB_AH_GRH) { if (attr->grh.sgid_index > port->attr.gid_tbl_len) { - pr_info("rxe: invalid sgid index = %d\n", + pr_info("invalid sgid index = %d\n", attr->grh.sgid_index); return -EINVAL; } diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index 36f67de44095..6c5e29db88e3 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -567,7 +567,8 @@ int rxe_completer(void *arg) state = COMPST_GET_ACK; while (1) { - pr_debug("state = %s\n", comp_state_name[state]); + pr_debug("qp#%d state = %s\n", qp_num(qp), + comp_state_name[state]); switch (state) { case COMPST_GET_ACK: skb = skb_dequeue(&qp->resp_pkts); @@ -689,7 +690,14 @@ int rxe_completer(void *arg) qp->req.need_retry = 1; rxe_run_task(&qp->req.task, 1); } + + if (pkt) { + rxe_drop_ref(pkt->qp); + kfree_skb(skb); + } + goto exit; + } else { wqe->status = IB_WC_RETRY_EXC_ERR; state = COMPST_ERROR; @@ -702,7 +710,8 @@ int rxe_completer(void *arg) qp->comp.rnr_retry--; qp->req.need_retry = 1; - pr_debug("set rnr nak timer\n"); + pr_debug("qp#%d set rnr nak timer\n", + qp_num(qp)); mod_timer(&qp->rnr_nak_timer, jiffies + rnrnak_jiffies(aeth_syn(pkt) & ~AETH_TYPE_MASK)); @@ -716,6 +725,12 @@ int rxe_completer(void *arg) case COMPST_ERROR: do_complete(qp, wqe); rxe_qp_error(qp); + + if (pkt) { + rxe_drop_ref(pkt->qp); + kfree_skb(skb); + } + goto exit; } } diff --git a/drivers/infiniband/sw/rxe/rxe_dma.c b/drivers/infiniband/sw/rxe/rxe_dma.c index 7634c1a81b2b..a0f8af5851ae 100644 --- a/drivers/infiniband/sw/rxe/rxe_dma.c +++ b/drivers/infiniband/sw/rxe/rxe_dma.c @@ -117,6 +117,21 @@ static void rxe_unmap_sg(struct ib_device *dev, WARN_ON(!valid_dma_direction(direction)); } +static int rxe_map_sg_attrs(struct ib_device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction direction, + unsigned long attrs) +{ + return rxe_map_sg(dev, sgl, nents, direction); +} + +static void rxe_unmap_sg_attrs(struct ib_device *dev, + struct scatterlist *sg, int nents, + enum dma_data_direction direction, + unsigned long attrs) +{ + rxe_unmap_sg(dev, sg, nents, direction); +} + static void rxe_sync_single_for_cpu(struct ib_device *dev, u64 addr, size_t size, enum dma_data_direction dir) @@ -159,6 +174,8 @@ struct ib_dma_mapping_ops rxe_dma_mapping_ops = { .unmap_page = rxe_dma_unmap_page, .map_sg = rxe_map_sg, .unmap_sg = rxe_unmap_sg, + .map_sg_attrs = rxe_map_sg_attrs, + .unmap_sg_attrs = rxe_unmap_sg_attrs, .sync_single_for_cpu = rxe_sync_single_for_cpu, .sync_single_for_device = rxe_sync_single_for_device, .alloc_coherent = rxe_dma_alloc_coherent, diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index 4a5484ef604f..73849a5a91b3 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -198,7 +198,7 @@ void free_rd_atomic_resource(struct rxe_qp *qp, struct resp_res *res); static inline void rxe_advance_resp_resource(struct rxe_qp *qp) { qp->resp.res_head++; - if (unlikely(qp->resp.res_head == qp->attr.max_rd_atomic)) + if (unlikely(qp->resp.res_head == qp->attr.max_dest_rd_atomic)) qp->resp.res_head = 0; } diff --git a/drivers/infiniband/sw/rxe/rxe_mmap.c b/drivers/infiniband/sw/rxe/rxe_mmap.c index 54b3c7c99eff..c572a4c09359 100644 --- a/drivers/infiniband/sw/rxe/rxe_mmap.c +++ b/drivers/infiniband/sw/rxe/rxe_mmap.c @@ -126,7 +126,7 @@ found_it: ret = remap_vmalloc_range(vma, ip->obj, 0); if (ret) { - pr_err("rxe: err %d from remap_vmalloc_range\n", ret); + pr_err("err %d from remap_vmalloc_range\n", ret); goto done; } diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index f3dab6574504..1869152f1d23 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -39,7 +39,7 @@ */ static u8 rxe_get_key(void) { - static unsigned key = 1; + static u32 key = 1; key = key << 1; diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 0b8d2ea8b41d..b8258e4f0aea 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -65,7 +65,7 @@ struct rxe_dev *net_to_rxe(struct net_device *ndev) return found; } -struct rxe_dev *get_rxe_by_name(const char* name) +struct rxe_dev *get_rxe_by_name(const char *name) { struct rxe_dev *rxe; struct rxe_dev *found = NULL; @@ -275,9 +275,10 @@ static struct socket *rxe_setup_udp_tunnel(struct net *net, __be16 port, return sock; } -static void rxe_release_udp_tunnel(struct socket *sk) +void rxe_release_udp_tunnel(struct socket *sk) { - udp_tunnel_sock_release(sk); + if (sk) + udp_tunnel_sock_release(sk); } static void prepare_udp_hdr(struct sk_buff *skb, __be16 src_port, @@ -349,14 +350,14 @@ static void prepare_ipv6_hdr(struct dst_entry *dst, struct sk_buff *skb, ip6h->payload_len = htons(skb->len - sizeof(*ip6h)); } -static int prepare4(struct rxe_dev *rxe, struct sk_buff *skb, struct rxe_av *av) +static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, + struct sk_buff *skb, struct rxe_av *av) { struct dst_entry *dst; bool xnet = false; __be16 df = htons(IP_DF); struct in_addr *saddr = &av->sgid_addr._sockaddr_in.sin_addr; struct in_addr *daddr = &av->dgid_addr._sockaddr_in.sin_addr; - struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); dst = rxe_find_route4(rxe->ndev, saddr, daddr); if (!dst) { @@ -375,12 +376,12 @@ static int prepare4(struct rxe_dev *rxe, struct sk_buff *skb, struct rxe_av *av) return 0; } -static int prepare6(struct rxe_dev *rxe, struct sk_buff *skb, struct rxe_av *av) +static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, + struct sk_buff *skb, struct rxe_av *av) { struct dst_entry *dst; struct in6_addr *saddr = &av->sgid_addr._sockaddr_in6.sin6_addr; struct in6_addr *daddr = &av->dgid_addr._sockaddr_in6.sin6_addr; - struct rxe_pkt_info *pkt = SKB_TO_PKT(skb); dst = rxe_find_route6(rxe->ndev, saddr, daddr); if (!dst) { @@ -407,9 +408,9 @@ static int prepare(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, struct rxe_av *av = rxe_get_av(pkt); if (av->network_type == RDMA_NETWORK_IPV4) - err = prepare4(rxe, skb, av); + err = prepare4(rxe, pkt, skb, av); else if (av->network_type == RDMA_NETWORK_IPV6) - err = prepare6(rxe, skb, av); + err = prepare6(rxe, pkt, skb, av); *crc = rxe_icrc_hdr(pkt, skb); @@ -600,8 +601,7 @@ void rxe_port_up(struct rxe_dev *rxe) port->attr.phys_state = IB_PHYS_STATE_LINK_UP; rxe_port_event(rxe, IB_EVENT_PORT_ACTIVE); - pr_info("rxe: set %s active\n", rxe->ib_dev.name); - return; + pr_info("set %s active\n", rxe->ib_dev.name); } /* Caller must hold net_info_lock */ @@ -614,8 +614,7 @@ void rxe_port_down(struct rxe_dev *rxe) port->attr.phys_state = IB_PHYS_STATE_LINK_DOWN; rxe_port_event(rxe, IB_EVENT_PORT_ERR); - pr_info("rxe: set %s down\n", rxe->ib_dev.name); - return; + pr_info("set %s down\n", rxe->ib_dev.name); } static int rxe_notify(struct notifier_block *not_blk, @@ -640,7 +639,7 @@ static int rxe_notify(struct notifier_block *not_blk, rxe_port_down(rxe); break; case NETDEV_CHANGEMTU: - pr_info("rxe: %s changed mtu to %d\n", ndev->name, ndev->mtu); + pr_info("%s changed mtu to %d\n", ndev->name, ndev->mtu); rxe_set_mtu(rxe, ndev->mtu); break; case NETDEV_REBOOT: @@ -650,7 +649,7 @@ static int rxe_notify(struct notifier_block *not_blk, case NETDEV_CHANGENAME: case NETDEV_FEAT_CHANGE: default: - pr_info("rxe: ignoring netdev event = %ld for %s\n", + pr_info("ignoring netdev event = %ld for %s\n", event, ndev->name); break; } @@ -658,51 +657,68 @@ out: return NOTIFY_OK; } -static struct notifier_block rxe_net_notifier = { +struct notifier_block rxe_net_notifier = { .notifier_call = rxe_notify, }; -int rxe_net_init(void) +int rxe_net_ipv4_init(void) { - int err; - spin_lock_init(&dev_list_lock); - recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net, - htons(ROCE_V2_UDP_DPORT), true); - if (IS_ERR(recv_sockets.sk6)) { - recv_sockets.sk6 = NULL; - pr_err("rxe: Failed to create IPv6 UDP tunnel\n"); - return -1; - } - recv_sockets.sk4 = rxe_setup_udp_tunnel(&init_net, - htons(ROCE_V2_UDP_DPORT), false); + htons(ROCE_V2_UDP_DPORT), false); if (IS_ERR(recv_sockets.sk4)) { - rxe_release_udp_tunnel(recv_sockets.sk6); recv_sockets.sk4 = NULL; - recv_sockets.sk6 = NULL; - pr_err("rxe: Failed to create IPv4 UDP tunnel\n"); + pr_err("Failed to create IPv4 UDP tunnel\n"); return -1; } - err = register_netdevice_notifier(&rxe_net_notifier); - if (err) { - rxe_release_udp_tunnel(recv_sockets.sk6); - rxe_release_udp_tunnel(recv_sockets.sk4); - pr_err("rxe: Failed to rigister netdev notifier\n"); - } + return 0; +} - return err; +int rxe_net_ipv6_init(void) +{ +#if IS_ENABLED(CONFIG_IPV6) + + spin_lock_init(&dev_list_lock); + + recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net, + htons(ROCE_V2_UDP_DPORT), true); + if (IS_ERR(recv_sockets.sk6)) { + recv_sockets.sk6 = NULL; + pr_err("Failed to create IPv6 UDP tunnel\n"); + return -1; + } +#endif + return 0; } void rxe_net_exit(void) { - if (recv_sockets.sk6) - rxe_release_udp_tunnel(recv_sockets.sk6); + rxe_release_udp_tunnel(recv_sockets.sk6); + rxe_release_udp_tunnel(recv_sockets.sk4); + unregister_netdevice_notifier(&rxe_net_notifier); +} - if (recv_sockets.sk4) - rxe_release_udp_tunnel(recv_sockets.sk4); +int rxe_net_init(void) +{ + int err; - unregister_netdevice_notifier(&rxe_net_notifier); + recv_sockets.sk6 = NULL; + + err = rxe_net_ipv4_init(); + if (err) + return err; + err = rxe_net_ipv6_init(); + if (err) + goto err_out; + err = register_netdevice_notifier(&rxe_net_notifier); + if (err) { + pr_err("Failed to register netdev notifier\n"); + goto err_out; + } + return 0; +err_out: + rxe_net_exit(); + return err; } diff --git a/drivers/infiniband/sw/rxe/rxe_net.h b/drivers/infiniband/sw/rxe/rxe_net.h index 7b06f76d16cc..1c06b3bfe1b6 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.h +++ b/drivers/infiniband/sw/rxe/rxe_net.h @@ -44,6 +44,8 @@ struct rxe_recv_sockets { }; extern struct rxe_recv_sockets recv_sockets; +extern struct notifier_block rxe_net_notifier; +void rxe_release_udp_tunnel(struct socket *sk); struct rxe_dev *rxe_net_add(struct net_device *ndev); diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 22ba24f2a2c1..b8036cfbce04 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -146,7 +146,7 @@ static void free_rd_atomic_resources(struct rxe_qp *qp) if (qp->resp.resources) { int i; - for (i = 0; i < qp->attr.max_rd_atomic; i++) { + for (i = 0; i < qp->attr.max_dest_rd_atomic; i++) { struct resp_res *res = &qp->resp.resources[i]; free_rd_atomic_resource(qp, res); @@ -174,7 +174,7 @@ static void cleanup_rd_atomic_resources(struct rxe_qp *qp) struct resp_res *res; if (qp->resp.resources) { - for (i = 0; i < qp->attr.max_rd_atomic; i++) { + for (i = 0; i < qp->attr.max_dest_rd_atomic; i++) { res = &qp->resp.resources[i]; free_rd_atomic_resource(qp, res); } @@ -298,8 +298,8 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, wqe_size = rcv_wqe_size(qp->rq.max_sge); - pr_debug("max_wr = %d, max_sge = %d, wqe_size = %d\n", - qp->rq.max_wr, qp->rq.max_sge, wqe_size); + pr_debug("qp#%d max_wr = %d, max_sge = %d, wqe_size = %d\n", + qp_num(qp), qp->rq.max_wr, qp->rq.max_sge, wqe_size); qp->rq.queue = rxe_queue_init(rxe, &qp->rq.max_wr, @@ -596,14 +596,21 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, if (mask & IB_QP_MAX_QP_RD_ATOMIC) { int max_rd_atomic = __roundup_pow_of_two(attr->max_rd_atomic); + qp->attr.max_rd_atomic = max_rd_atomic; + atomic_set(&qp->req.rd_atomic, max_rd_atomic); + } + + if (mask & IB_QP_MAX_DEST_RD_ATOMIC) { + int max_dest_rd_atomic = + __roundup_pow_of_two(attr->max_dest_rd_atomic); + + qp->attr.max_dest_rd_atomic = max_dest_rd_atomic; + free_rd_atomic_resources(qp); - err = alloc_rd_atomic_resources(qp, max_rd_atomic); + err = alloc_rd_atomic_resources(qp, max_dest_rd_atomic); if (err) return err; - - qp->attr.max_rd_atomic = max_rd_atomic; - atomic_set(&qp->req.rd_atomic, max_rd_atomic); } if (mask & IB_QP_CUR_STATE) @@ -673,24 +680,27 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, if (mask & IB_QP_RETRY_CNT) { qp->attr.retry_cnt = attr->retry_cnt; qp->comp.retry_cnt = attr->retry_cnt; - pr_debug("set retry count = %d\n", attr->retry_cnt); + pr_debug("qp#%d set retry count = %d\n", qp_num(qp), + attr->retry_cnt); } if (mask & IB_QP_RNR_RETRY) { qp->attr.rnr_retry = attr->rnr_retry; qp->comp.rnr_retry = attr->rnr_retry; - pr_debug("set rnr retry count = %d\n", attr->rnr_retry); + pr_debug("qp#%d set rnr retry count = %d\n", qp_num(qp), + attr->rnr_retry); } if (mask & IB_QP_RQ_PSN) { qp->attr.rq_psn = (attr->rq_psn & BTH_PSN_MASK); qp->resp.psn = qp->attr.rq_psn; - pr_debug("set resp psn = 0x%x\n", qp->resp.psn); + pr_debug("qp#%d set resp psn = 0x%x\n", qp_num(qp), + qp->resp.psn); } if (mask & IB_QP_MIN_RNR_TIMER) { qp->attr.min_rnr_timer = attr->min_rnr_timer; - pr_debug("set min rnr timer = 0x%x\n", + pr_debug("qp#%d set min rnr timer = 0x%x\n", qp_num(qp), attr->min_rnr_timer); } @@ -698,12 +708,7 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, qp->attr.sq_psn = (attr->sq_psn & BTH_PSN_MASK); qp->req.psn = qp->attr.sq_psn; qp->comp.psn = qp->attr.sq_psn; - pr_debug("set req psn = 0x%x\n", qp->req.psn); - } - - if (mask & IB_QP_MAX_DEST_RD_ATOMIC) { - qp->attr.max_dest_rd_atomic = - __roundup_pow_of_two(attr->max_dest_rd_atomic); + pr_debug("qp#%d set req psn = 0x%x\n", qp_num(qp), qp->req.psn); } if (mask & IB_QP_PATH_MIG_STATE) @@ -717,38 +722,38 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, switch (attr->qp_state) { case IB_QPS_RESET: - pr_debug("qp state -> RESET\n"); + pr_debug("qp#%d state -> RESET\n", qp_num(qp)); rxe_qp_reset(qp); break; case IB_QPS_INIT: - pr_debug("qp state -> INIT\n"); + pr_debug("qp#%d state -> INIT\n", qp_num(qp)); qp->req.state = QP_STATE_INIT; qp->resp.state = QP_STATE_INIT; break; case IB_QPS_RTR: - pr_debug("qp state -> RTR\n"); + pr_debug("qp#%d state -> RTR\n", qp_num(qp)); qp->resp.state = QP_STATE_READY; break; case IB_QPS_RTS: - pr_debug("qp state -> RTS\n"); + pr_debug("qp#%d state -> RTS\n", qp_num(qp)); qp->req.state = QP_STATE_READY; break; case IB_QPS_SQD: - pr_debug("qp state -> SQD\n"); + pr_debug("qp#%d state -> SQD\n", qp_num(qp)); rxe_qp_drain(qp); break; case IB_QPS_SQE: - pr_warn("qp state -> SQE !!?\n"); + pr_warn("qp#%d state -> SQE !!?\n", qp_num(qp)); /* Not possible from modify_qp. */ break; case IB_QPS_ERR: - pr_debug("qp state -> ERR\n"); + pr_debug("qp#%d state -> ERR\n", qp_num(qp)); rxe_qp_error(qp); break; } diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index 3d464c23e08b..46f062842a9a 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -312,7 +312,7 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) * make a copy of the skb to post to the next qp */ skb_copy = (mce->qp_list.next != &mcg->qp_list) ? - skb_clone(skb, GFP_KERNEL) : NULL; + skb_clone(skb, GFP_ATOMIC) : NULL; pkt->qp = qp; rxe_add_ref(qp); @@ -387,7 +387,8 @@ int rxe_rcv(struct sk_buff *skb) pack_icrc = be32_to_cpu(*icrcp); calc_icrc = rxe_icrc_hdr(pkt, skb); - calc_icrc = crc32_le(calc_icrc, (u8 *)payload_addr(pkt), payload_size(pkt)); + calc_icrc = crc32_le(calc_icrc, (u8 *)payload_addr(pkt), + payload_size(pkt)); calc_icrc = cpu_to_be32(~calc_icrc); if (unlikely(calc_icrc != pack_icrc)) { char saddr[sizeof(struct in6_addr)]; diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 33b2d9d77021..832846b73ea0 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -38,7 +38,7 @@ #include "rxe_queue.h" static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe, - unsigned opcode); + u32 opcode); static inline void retry_first_write_send(struct rxe_qp *qp, struct rxe_send_wqe *wqe, @@ -121,7 +121,7 @@ void rnr_nak_timer(unsigned long data) { struct rxe_qp *qp = (struct rxe_qp *)data; - pr_debug("rnr nak timer fired\n"); + pr_debug("qp#%d rnr nak timer fired\n", qp_num(qp)); rxe_run_task(&qp->req.task, 1); } @@ -187,7 +187,7 @@ static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp) return wqe; } -static int next_opcode_rc(struct rxe_qp *qp, unsigned opcode, int fits) +static int next_opcode_rc(struct rxe_qp *qp, u32 opcode, int fits) { switch (opcode) { case IB_WR_RDMA_WRITE: @@ -259,7 +259,7 @@ static int next_opcode_rc(struct rxe_qp *qp, unsigned opcode, int fits) return -EINVAL; } -static int next_opcode_uc(struct rxe_qp *qp, unsigned opcode, int fits) +static int next_opcode_uc(struct rxe_qp *qp, u32 opcode, int fits) { switch (opcode) { case IB_WR_RDMA_WRITE: @@ -311,7 +311,7 @@ static int next_opcode_uc(struct rxe_qp *qp, unsigned opcode, int fits) } static int next_opcode(struct rxe_qp *qp, struct rxe_send_wqe *wqe, - unsigned opcode) + u32 opcode) { int fits = (wqe->dma.resid <= qp->mtu); @@ -511,24 +511,21 @@ static int fill_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe, } static void update_wqe_state(struct rxe_qp *qp, - struct rxe_send_wqe *wqe, - struct rxe_pkt_info *pkt, - enum wqe_state *prev_state) + struct rxe_send_wqe *wqe, + struct rxe_pkt_info *pkt) { - enum wqe_state prev_state_ = wqe->state; - if (pkt->mask & RXE_END_MASK) { if (qp_type(qp) == IB_QPT_RC) wqe->state = wqe_state_pending; } else { wqe->state = wqe_state_processing; } - - *prev_state = prev_state_; } -static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, - struct rxe_pkt_info *pkt, int payload) +static void update_wqe_psn(struct rxe_qp *qp, + struct rxe_send_wqe *wqe, + struct rxe_pkt_info *pkt, + int payload) { /* number of packets left to send including current one */ int num_pkt = (wqe->dma.resid + payload + qp->mtu - 1) / qp->mtu; @@ -546,9 +543,34 @@ static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, qp->req.psn = (wqe->first_psn + num_pkt) & BTH_PSN_MASK; else qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK; +} - qp->req.opcode = pkt->opcode; +static void save_state(struct rxe_send_wqe *wqe, + struct rxe_qp *qp, + struct rxe_send_wqe *rollback_wqe, + struct rxe_qp *rollback_qp) +{ + rollback_wqe->state = wqe->state; + rollback_wqe->first_psn = wqe->first_psn; + rollback_wqe->last_psn = wqe->last_psn; + rollback_qp->req.psn = qp->req.psn; +} +static void rollback_state(struct rxe_send_wqe *wqe, + struct rxe_qp *qp, + struct rxe_send_wqe *rollback_wqe, + struct rxe_qp *rollback_qp) +{ + wqe->state = rollback_wqe->state; + wqe->first_psn = rollback_wqe->first_psn; + wqe->last_psn = rollback_wqe->last_psn; + qp->req.psn = rollback_qp->req.psn; +} + +static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, + struct rxe_pkt_info *pkt, int payload) +{ + qp->req.opcode = pkt->opcode; if (pkt->mask & RXE_END_MASK) qp->req.wqe_index = next_index(qp->sq.queue, qp->req.wqe_index); @@ -566,12 +588,13 @@ int rxe_requester(void *arg) struct rxe_pkt_info pkt; struct sk_buff *skb; struct rxe_send_wqe *wqe; - unsigned mask; + enum rxe_hdr_mask mask; int payload; int mtu; int opcode; int ret; - enum wqe_state prev_state; + struct rxe_qp rollback_qp; + struct rxe_send_wqe rollback_wqe; next_wqe: if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR)) @@ -603,7 +626,8 @@ next_wqe: rmr = rxe_pool_get_index(&rxe->mr_pool, wqe->wr.ex.invalidate_rkey >> 8); if (!rmr) { - pr_err("No mr for key %#x\n", wqe->wr.ex.invalidate_rkey); + pr_err("No mr for key %#x\n", + wqe->wr.ex.invalidate_rkey); wqe->state = wqe_state_error; wqe->status = IB_WC_MW_BIND_ERR; goto exit; @@ -679,22 +703,30 @@ next_wqe: skb = init_req_packet(qp, wqe, opcode, payload, &pkt); if (unlikely(!skb)) { - pr_err("Failed allocating skb\n"); + pr_err("qp#%d Failed allocating skb\n", qp_num(qp)); goto err; } if (fill_packet(qp, wqe, &pkt, skb, payload)) { - pr_debug("Error during fill packet\n"); + pr_debug("qp#%d Error during fill packet\n", qp_num(qp)); goto err; } - update_wqe_state(qp, wqe, &pkt, &prev_state); + /* + * To prevent a race on wqe access between requester and completer, + * wqe members state and psn need to be set before calling + * rxe_xmit_packet(). + * Otherwise, completer might initiate an unjustified retry flow. + */ + save_state(wqe, qp, &rollback_wqe, &rollback_qp); + update_wqe_state(qp, wqe, &pkt); + update_wqe_psn(qp, wqe, &pkt, payload); ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb); if (ret) { qp->need_req_skb = 1; kfree_skb(skb); - wqe->state = prev_state; + rollback_state(wqe, qp, &rollback_wqe, &rollback_qp); if (ret == -EAGAIN) { rxe_run_task(&qp->req.task, 1); diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index ebb03b46e2ad..dd3d88adc003 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -383,7 +383,7 @@ static enum resp_states check_resource(struct rxe_qp *qp, * too many read/atomic ops, we just * recycle the responder resource queue */ - if (likely(qp->attr.max_rd_atomic > 0)) + if (likely(qp->attr.max_dest_rd_atomic > 0)) return RESPST_CHK_LENGTH; else return RESPST_ERR_TOO_MANY_RDMA_ATM_REQ; @@ -749,6 +749,18 @@ static enum resp_states read_reply(struct rxe_qp *qp, return state; } +static void build_rdma_network_hdr(union rdma_network_hdr *hdr, + struct rxe_pkt_info *pkt) +{ + struct sk_buff *skb = PKT_TO_SKB(pkt); + + memset(hdr, 0, sizeof(*hdr)); + if (skb->protocol == htons(ETH_P_IP)) + memcpy(&hdr->roce4grh, ip_hdr(skb), sizeof(hdr->roce4grh)); + else if (skb->protocol == htons(ETH_P_IPV6)) + memcpy(&hdr->ibgrh, ipv6_hdr(skb), sizeof(hdr->ibgrh)); +} + /* Executes a new request. A retried request never reach that function (send * and writes are discarded, and reads and atomics are retried elsewhere. */ @@ -761,13 +773,8 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) qp_type(qp) == IB_QPT_SMI || qp_type(qp) == IB_QPT_GSI) { union rdma_network_hdr hdr; - struct sk_buff *skb = PKT_TO_SKB(pkt); - memset(&hdr, 0, sizeof(hdr)); - if (skb->protocol == htons(ETH_P_IP)) - memcpy(&hdr.roce4grh, ip_hdr(skb), sizeof(hdr.roce4grh)); - else if (skb->protocol == htons(ETH_P_IPV6)) - memcpy(&hdr.ibgrh, ipv6_hdr(skb), sizeof(hdr.ibgrh)); + build_rdma_network_hdr(&hdr, pkt); err = send_data_in(qp, &hdr, sizeof(hdr)); if (err) @@ -881,7 +888,8 @@ static enum resp_states do_complete(struct rxe_qp *qp, rmr = rxe_pool_get_index(&rxe->mr_pool, wc->ex.invalidate_rkey >> 8); if (unlikely(!rmr)) { - pr_err("Bad rkey %#x invalidation\n", wc->ex.invalidate_rkey); + pr_err("Bad rkey %#x invalidation\n", + wc->ex.invalidate_rkey); return RESPST_ERROR; } rmr->state = RXE_MEM_STATE_FREE; @@ -972,11 +980,13 @@ static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt, free_rd_atomic_resource(qp, res); rxe_advance_resp_resource(qp); + memcpy(SKB_TO_PKT(skb), &ack_pkt, sizeof(skb->cb)); + res->type = RXE_ATOMIC_MASK; res->atomic.skb = skb; - res->first_psn = qp->resp.psn; - res->last_psn = qp->resp.psn; - res->cur_psn = qp->resp.psn; + res->first_psn = ack_pkt.psn; + res->last_psn = ack_pkt.psn; + res->cur_psn = ack_pkt.psn; rc = rxe_xmit_packet(rxe, qp, &ack_pkt, skb_copy); if (rc) { @@ -1116,8 +1126,7 @@ static enum resp_states duplicate_request(struct rxe_qp *qp, rc = RESPST_CLEANUP; goto out; } - bth_set_psn(SKB_TO_PKT(skb_copy), - qp->resp.psn - 1); + /* Resend the result. */ rc = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, pkt, skb_copy); @@ -1207,7 +1216,8 @@ int rxe_responder(void *arg) } while (1) { - pr_debug("state = %s\n", resp_state_name[state]); + pr_debug("qp#%d state = %s\n", qp_num(qp), + resp_state_name[state]); switch (state) { case RESPST_GET_REQ: state = get_req(qp, &pkt); diff --git a/drivers/infiniband/sw/rxe/rxe_sysfs.c b/drivers/infiniband/sw/rxe/rxe_sysfs.c index cf8e77800046..d5ed7571128f 100644 --- a/drivers/infiniband/sw/rxe/rxe_sysfs.c +++ b/drivers/infiniband/sw/rxe/rxe_sysfs.c @@ -79,7 +79,7 @@ static int rxe_param_set_add(const char *val, const struct kernel_param *kp) len = sanitize_arg(val, intf, sizeof(intf)); if (!len) { - pr_err("rxe: add: invalid interface name\n"); + pr_err("add: invalid interface name\n"); err = -EINVAL; goto err; } @@ -92,20 +92,20 @@ static int rxe_param_set_add(const char *val, const struct kernel_param *kp) } if (net_to_rxe(ndev)) { - pr_err("rxe: already configured on %s\n", intf); + pr_err("already configured on %s\n", intf); err = -EINVAL; goto err; } rxe = rxe_net_add(ndev); if (!rxe) { - pr_err("rxe: failed to add %s\n", intf); + pr_err("failed to add %s\n", intf); err = -EINVAL; goto err; } rxe_set_port_state(ndev); - pr_info("rxe: added %s to %s\n", rxe->ib_dev.name, intf); + pr_info("added %s to %s\n", rxe->ib_dev.name, intf); err: if (ndev) dev_put(ndev); @@ -120,7 +120,7 @@ static int rxe_param_set_remove(const char *val, const struct kernel_param *kp) len = sanitize_arg(val, intf, sizeof(intf)); if (!len) { - pr_err("rxe: add: invalid interface name\n"); + pr_err("add: invalid interface name\n"); return -EINVAL; } @@ -133,7 +133,7 @@ static int rxe_param_set_remove(const char *val, const struct kernel_param *kp) rxe = get_rxe_by_name(intf); if (!rxe) { - pr_err("rxe: not configured on %s\n", intf); + pr_err("not configured on %s\n", intf); return -EINVAL; } diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 4552be960c6a..19841c863daf 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -100,10 +100,12 @@ static int rxe_query_port(struct ib_device *dev, rxe->ndev->ethtool_ops->get_settings(rxe->ndev, &cmd); speed = cmd.speed; } else { - pr_warn("%s speed is unknown, defaulting to 1000\n", rxe->ndev->name); + pr_warn("%s speed is unknown, defaulting to 1000\n", + rxe->ndev->name); speed = 1000; } - rxe_eth_speed_to_ib_speed(speed, &attr->active_speed, &attr->active_width); + rxe_eth_speed_to_ib_speed(speed, &attr->active_speed, + &attr->active_width); mutex_unlock(&rxe->usdev_lock); return 0; @@ -761,7 +763,7 @@ static int init_send_wqe(struct rxe_qp *qp, struct ib_send_wr *ibwr, } static int post_one_send(struct rxe_qp *qp, struct ib_send_wr *ibwr, - unsigned mask, u32 length) + unsigned int mask, u32 length) { int err; struct rxe_sq *sq = &qp->sq; @@ -801,26 +803,15 @@ err1: return err; } -static int rxe_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, - struct ib_send_wr **bad_wr) +static int rxe_post_send_kernel(struct rxe_qp *qp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) { int err = 0; - struct rxe_qp *qp = to_rqp(ibqp); unsigned int mask; unsigned int length = 0; int i; int must_sched; - if (unlikely(!qp->valid)) { - *bad_wr = wr; - return -EINVAL; - } - - if (unlikely(qp->req.state < QP_STATE_READY)) { - *bad_wr = wr; - return -EINVAL; - } - while (wr) { mask = wr_opcode_mask(wr->opcode, qp); if (unlikely(!mask)) { @@ -861,6 +852,29 @@ static int rxe_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, return err; } +static int rxe_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, + struct ib_send_wr **bad_wr) +{ + struct rxe_qp *qp = to_rqp(ibqp); + + if (unlikely(!qp->valid)) { + *bad_wr = wr; + return -EINVAL; + } + + if (unlikely(qp->req.state < QP_STATE_READY)) { + *bad_wr = wr; + return -EINVAL; + } + + if (qp->is_user) { + /* Utilize process context to do protocol processing */ + rxe_run_task(&qp->req.task, 0); + return 0; + } else + return rxe_post_send_kernel(qp, wr, bad_wr); +} + static int rxe_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, struct ib_recv_wr **bad_wr) { @@ -1133,8 +1147,8 @@ static int rxe_set_page(struct ib_mr *ibmr, u64 addr) return 0; } -static int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, - unsigned int *sg_offset) +static int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, + int sg_nents, unsigned int *sg_offset) { struct rxe_mem *mr = to_rmr(ibmr); int n; diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 4f7d9b48df64..7b8d2d9e2263 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -478,6 +478,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_ah *address, u32 qpn); void ipoib_reap_ah(struct work_struct *work); +struct ipoib_path *__path_find(struct net_device *dev, void *gid); void ipoib_mark_paths_invalid(struct net_device *dev); void ipoib_flush_paths(struct net_device *dev); int ipoib_check_sm_sendonly_fullmember_support(struct ipoib_dev_priv *priv); @@ -771,7 +772,13 @@ static inline void ipoib_unregister_debugfs(void) { } #define ipoib_printk(level, priv, format, arg...) \ printk(level "%s: " format, ((struct ipoib_dev_priv *) priv)->dev->name , ## arg) #define ipoib_warn(priv, format, arg...) \ - ipoib_printk(KERN_WARNING, priv, format , ## arg) +do { \ + static DEFINE_RATELIMIT_STATE(_rs, \ + 10 * HZ /*10 seconds */, \ + 100); \ + if (__ratelimit(&_rs)) \ + ipoib_printk(KERN_WARNING, priv, format , ## arg);\ +} while (0) extern int ipoib_sendq_size; extern int ipoib_recvq_size; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 951d9abcca8b..4ad297d3de89 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1318,6 +1318,8 @@ void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx) } } +#define QPN_AND_OPTIONS_OFFSET 4 + static void ipoib_cm_tx_start(struct work_struct *work) { struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, @@ -1326,6 +1328,7 @@ static void ipoib_cm_tx_start(struct work_struct *work) struct ipoib_neigh *neigh; struct ipoib_cm_tx *p; unsigned long flags; + struct ipoib_path *path; int ret; struct ib_sa_path_rec pathrec; @@ -1338,7 +1341,19 @@ static void ipoib_cm_tx_start(struct work_struct *work) p = list_entry(priv->cm.start_list.next, typeof(*p), list); list_del_init(&p->list); neigh = p->neigh; + qpn = IPOIB_QPN(neigh->daddr); + /* + * As long as the search is with these 2 locks, + * path existence indicates its validity. + */ + path = __path_find(dev, neigh->daddr + QPN_AND_OPTIONS_OFFSET); + if (!path) { + pr_info("%s ignore not valid path %pI6\n", + __func__, + neigh->daddr + QPN_AND_OPTIONS_OFFSET); + goto free_neigh; + } memcpy(&pathrec, &p->path->pathrec, sizeof pathrec); spin_unlock_irqrestore(&priv->lock, flags); @@ -1350,6 +1365,7 @@ static void ipoib_cm_tx_start(struct work_struct *work) spin_lock_irqsave(&priv->lock, flags); if (ret) { +free_neigh: neigh = p->neigh; if (neigh) { neigh->cm = NULL; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index dc6d241b9406..be11d5d5b8c1 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -1161,8 +1161,17 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, } if (level == IPOIB_FLUSH_LIGHT) { + int oper_up; ipoib_mark_paths_invalid(dev); + /* Set IPoIB operation as down to prevent races between: + * the flush flow which leaves MCG and on the fly joins + * which can happen during that time. mcast restart task + * should deal with join requests we missed. + */ + oper_up = test_and_clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags); ipoib_mcast_dev_flush(dev); + if (oper_up) + set_bit(IPOIB_FLAG_OPER_UP, &priv->flags); ipoib_flush_ah(dev); } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 74bcaa064226..5636fc3da6b8 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -485,7 +485,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf) return -EINVAL; } -static struct ipoib_path *__path_find(struct net_device *dev, void *gid) +struct ipoib_path *__path_find(struct net_device *dev, void *gid) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct rb_node *n = priv->path_tree.rb_node; @@ -2196,7 +2196,8 @@ static int __init ipoib_init_module(void) * its private workqueue, and we only queue up flush events * on our global flush workqueue. This avoids the deadlocks. */ - ipoib_workqueue = create_singlethread_workqueue("ipoib_flush"); + ipoib_workqueue = alloc_ordered_workqueue("ipoib_flush", + WQ_MEM_RECLAIM); if (!ipoib_workqueue) { ret = -ENOMEM; goto err_fs; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index c55ecb2c3736..189dcd1709d2 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -147,7 +147,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) int ret, size; int i; - priv->pd = ib_alloc_pd(priv->ca); + priv->pd = ib_alloc_pd(priv->ca, 0); if (IS_ERR(priv->pd)) { printk(KERN_WARNING "%s: failed to allocate PD\n", ca->name); return -ENODEV; @@ -157,7 +157,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) * the various IPoIB tasks assume they will never race against * themselves, so always use a single thread workqueue */ - priv->wq = create_singlethread_workqueue("ipoib_wq"); + priv->wq = alloc_ordered_workqueue("ipoib_wq", WQ_MEM_RECLAIM); if (!priv->wq) { printk(KERN_WARNING "ipoib: failed to allocate device WQ\n"); goto out_free_pd; diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 0351059783b1..0be6a7c5ddb5 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -374,7 +374,6 @@ struct iser_reg_ops { struct iser_device { struct ib_device *ib_device; struct ib_pd *pd; - struct ib_mr *mr; struct ib_event_handler event_handler; struct list_head ig_list; int refcount; diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 90be56893414..9c3e9ab53a41 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -199,7 +199,11 @@ iser_reg_dma(struct iser_device *device, struct iser_data_buf *mem, * FIXME: rework the registration code path to differentiate * rkey/lkey use cases */ - reg->rkey = device->mr ? device->mr->rkey : 0; + + if (device->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) + reg->rkey = device->pd->unsafe_global_rkey; + else + reg->rkey = 0; reg->sge.addr = ib_sg_dma_address(device->ib_device, &sg[0]); reg->sge.length = ib_sg_dma_len(device->ib_device, &sg[0]); diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 1b4945367e4f..a4b791dfaa1d 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -88,7 +88,8 @@ static int iser_create_device_ib_res(struct iser_device *device) device->comps_used, ib_dev->name, ib_dev->num_comp_vectors, max_cqe); - device->pd = ib_alloc_pd(ib_dev); + device->pd = ib_alloc_pd(ib_dev, + iser_always_reg ? 0 : IB_PD_UNSAFE_GLOBAL_RKEY); if (IS_ERR(device->pd)) goto pd_err; @@ -103,26 +104,13 @@ static int iser_create_device_ib_res(struct iser_device *device) } } - if (!iser_always_reg) { - int access = IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_WRITE | - IB_ACCESS_REMOTE_READ; - - device->mr = ib_get_dma_mr(device->pd, access); - if (IS_ERR(device->mr)) - goto cq_err; - } - INIT_IB_EVENT_HANDLER(&device->event_handler, ib_dev, iser_event_handler); if (ib_register_event_handler(&device->event_handler)) - goto handler_err; + goto cq_err; return 0; -handler_err: - if (device->mr) - ib_dereg_mr(device->mr); cq_err: for (i = 0; i < device->comps_used; i++) { struct iser_comp *comp = &device->comps[i]; @@ -154,14 +142,10 @@ static void iser_free_device_ib_res(struct iser_device *device) } (void)ib_unregister_event_handler(&device->event_handler); - if (device->mr) - (void)ib_dereg_mr(device->mr); ib_dealloc_pd(device->pd); kfree(device->comps); device->comps = NULL; - - device->mr = NULL; device->pd = NULL; } diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index ba6be060a476..6dd43f63238e 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -309,7 +309,7 @@ isert_create_device_ib_res(struct isert_device *device) if (ret) goto out; - device->pd = ib_alloc_pd(ib_dev); + device->pd = ib_alloc_pd(ib_dev, 0); if (IS_ERR(device->pd)) { ret = PTR_ERR(device->pd); isert_err("failed to allocate pd, device %p, ret=%d\n", @@ -403,6 +403,7 @@ isert_init_conn(struct isert_conn *isert_conn) INIT_LIST_HEAD(&isert_conn->node); init_completion(&isert_conn->login_comp); init_completion(&isert_conn->login_req_comp); + init_waitqueue_head(&isert_conn->rem_wait); kref_init(&isert_conn->kref); mutex_init(&isert_conn->mutex); INIT_WORK(&isert_conn->release_work, isert_release_work); @@ -448,7 +449,7 @@ isert_alloc_login_buf(struct isert_conn *isert_conn, isert_conn->login_rsp_buf = kzalloc(ISER_RX_PAYLOAD_SIZE, GFP_KERNEL); if (!isert_conn->login_rsp_buf) { - isert_err("Unable to allocate isert_conn->login_rspbuf\n"); + ret = -ENOMEM; goto out_unmap_login_req_buf; } @@ -578,7 +579,8 @@ isert_connect_release(struct isert_conn *isert_conn) BUG_ON(!device); isert_free_rx_descriptors(isert_conn); - if (isert_conn->cm_id) + if (isert_conn->cm_id && + !isert_conn->dev_removed) rdma_destroy_id(isert_conn->cm_id); if (isert_conn->qp) { @@ -593,7 +595,10 @@ isert_connect_release(struct isert_conn *isert_conn) isert_device_put(device); - kfree(isert_conn); + if (isert_conn->dev_removed) + wake_up_interruptible(&isert_conn->rem_wait); + else + kfree(isert_conn); } static void @@ -753,6 +758,7 @@ static int isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) { struct isert_np *isert_np = cma_id->context; + struct isert_conn *isert_conn; int ret = 0; isert_info("%s (%d): status %d id %p np %p\n", @@ -773,10 +779,21 @@ isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) break; case RDMA_CM_EVENT_ADDR_CHANGE: /* FALLTHRU */ case RDMA_CM_EVENT_DISCONNECTED: /* FALLTHRU */ - case RDMA_CM_EVENT_DEVICE_REMOVAL: /* FALLTHRU */ case RDMA_CM_EVENT_TIMEWAIT_EXIT: /* FALLTHRU */ ret = isert_disconnected_handler(cma_id, event->event); break; + case RDMA_CM_EVENT_DEVICE_REMOVAL: + isert_conn = cma_id->qp->qp_context; + isert_conn->dev_removed = true; + isert_disconnected_handler(cma_id, event->event); + wait_event_interruptible(isert_conn->rem_wait, + isert_conn->state == ISER_CONN_DOWN); + kfree(isert_conn); + /* + * return non-zero from the callback to destroy + * the rdma cm id + */ + return 1; case RDMA_CM_EVENT_REJECTED: /* FALLTHRU */ case RDMA_CM_EVENT_UNREACHABLE: /* FALLTHRU */ case RDMA_CM_EVENT_CONNECT_ERROR: diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index fc791efe3a10..c02ada57d7f5 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -158,6 +158,8 @@ struct isert_conn { struct work_struct release_work; bool logout_posted; bool snd_w_inv; + wait_queue_head_t rem_wait; + bool dev_removed; }; #define ISERT_MAX_CQ 64 diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 3322ed750172..d980fb458ad4 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -1262,6 +1262,7 @@ static int srp_map_finish_fmr(struct srp_map_state *state, { struct srp_target_port *target = ch->target; struct srp_device *dev = target->srp_host->srp_dev; + struct ib_pd *pd = target->pd; struct ib_pool_fmr *fmr; u64 io_addr = 0; @@ -1273,9 +1274,9 @@ static int srp_map_finish_fmr(struct srp_map_state *state, if (state->npages == 0) return 0; - if (state->npages == 1 && target->global_mr) { + if (state->npages == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) { srp_map_desc(state, state->base_dma_addr, state->dma_len, - target->global_mr->rkey); + pd->unsafe_global_rkey); goto reset_state; } @@ -1315,6 +1316,7 @@ static int srp_map_finish_fr(struct srp_map_state *state, { struct srp_target_port *target = ch->target; struct srp_device *dev = target->srp_host->srp_dev; + struct ib_pd *pd = target->pd; struct ib_send_wr *bad_wr; struct ib_reg_wr wr; struct srp_fr_desc *desc; @@ -1326,12 +1328,12 @@ static int srp_map_finish_fr(struct srp_map_state *state, WARN_ON_ONCE(!dev->use_fast_reg); - if (sg_nents == 1 && target->global_mr) { + if (sg_nents == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) { unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0; srp_map_desc(state, sg_dma_address(state->sg) + sg_offset, sg_dma_len(state->sg) - sg_offset, - target->global_mr->rkey); + pd->unsafe_global_rkey); if (sg_offset_p) *sg_offset_p = 0; return 1; @@ -1386,7 +1388,7 @@ static int srp_map_finish_fr(struct srp_map_state *state, static int srp_map_sg_entry(struct srp_map_state *state, struct srp_rdma_ch *ch, - struct scatterlist *sg, int sg_index) + struct scatterlist *sg) { struct srp_target_port *target = ch->target; struct srp_device *dev = target->srp_host->srp_dev; @@ -1400,7 +1402,9 @@ static int srp_map_sg_entry(struct srp_map_state *state, while (dma_len) { unsigned offset = dma_addr & ~dev->mr_page_mask; - if (state->npages == dev->max_pages_per_mr || offset != 0) { + + if (state->npages == dev->max_pages_per_mr || + (state->npages > 0 && offset != 0)) { ret = srp_map_finish_fmr(state, ch); if (ret) return ret; @@ -1417,12 +1421,12 @@ static int srp_map_sg_entry(struct srp_map_state *state, } /* - * If the last entry of the MR wasn't a full page, then we need to + * If the end of the MR is not on a page boundary then we need to * close it out and start a new one -- we can only merge at page * boundaries. */ ret = 0; - if (len != dev->mr_page_size) + if ((dma_addr & ~dev->mr_page_mask) != 0) ret = srp_map_finish_fmr(state, ch); return ret; } @@ -1439,7 +1443,7 @@ static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch, state->fmr.end = req->fmr_list + ch->target->mr_per_cmd; for_each_sg(scat, sg, count, i) { - ret = srp_map_sg_entry(state, ch, sg, i); + ret = srp_map_sg_entry(state, ch, sg); if (ret) return ret; } @@ -1491,7 +1495,7 @@ static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch, for_each_sg(scat, sg, count, i) { srp_map_desc(state, ib_sg_dma_address(dev->dev, sg), ib_sg_dma_len(dev->dev, sg), - target->global_mr->rkey); + target->pd->unsafe_global_rkey); } return 0; @@ -1591,6 +1595,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, struct srp_request *req) { struct srp_target_port *target = ch->target; + struct ib_pd *pd = target->pd; struct scatterlist *scat; struct srp_cmd *cmd = req->cmd->buf; int len, nents, count, ret; @@ -1626,7 +1631,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, fmt = SRP_DATA_DESC_DIRECT; len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf); - if (count == 1 && target->global_mr) { + if (count == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) { /* * The midlayer only generated a single gather/scatter * entry, or DMA mapping coalesced everything to a @@ -1636,7 +1641,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, struct srp_direct_buf *buf = (void *) cmd->add_data; buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, scat)); - buf->key = cpu_to_be32(target->global_mr->rkey); + buf->key = cpu_to_be32(pd->unsafe_global_rkey); buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat)); req->nmdesc = 0; @@ -1709,14 +1714,14 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch, memcpy(indirect_hdr->desc_list, req->indirect_desc, count * sizeof (struct srp_direct_buf)); - if (!target->global_mr) { + if (!(pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) { ret = srp_map_idb(ch, req, state.gen.next, state.gen.end, idb_len, &idb_rkey); if (ret < 0) goto unmap; req->nmdesc++; } else { - idb_rkey = cpu_to_be32(target->global_mr->rkey); + idb_rkey = cpu_to_be32(pd->unsafe_global_rkey); } indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr); @@ -3268,8 +3273,8 @@ static ssize_t srp_create_target(struct device *dev, target->io_class = SRP_REV16A_IB_IO_CLASS; target->scsi_host = target_host; target->srp_host = host; + target->pd = host->srp_dev->pd; target->lkey = host->srp_dev->pd->local_dma_lkey; - target->global_mr = host->srp_dev->global_mr; target->cmd_sg_cnt = cmd_sg_entries; target->sg_tablesize = indirect_sg_entries ? : cmd_sg_entries; target->allow_ext_sg = allow_ext_sg; @@ -3524,6 +3529,7 @@ static void srp_add_one(struct ib_device *device) struct srp_host *host; int mr_page_shift, p; u64 max_pages_per_mr; + unsigned int flags = 0; srp_dev = kzalloc(sizeof(*srp_dev), GFP_KERNEL); if (!srp_dev) @@ -3558,6 +3564,10 @@ static void srp_add_one(struct ib_device *device) srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr; } + if (never_register || !register_always || + (!srp_dev->has_fmr && !srp_dev->has_fr)) + flags |= IB_PD_UNSAFE_GLOBAL_RKEY; + if (srp_dev->use_fast_reg) { srp_dev->max_pages_per_mr = min_t(u32, srp_dev->max_pages_per_mr, @@ -3573,19 +3583,10 @@ static void srp_add_one(struct ib_device *device) INIT_LIST_HEAD(&srp_dev->dev_list); srp_dev->dev = device; - srp_dev->pd = ib_alloc_pd(device); + srp_dev->pd = ib_alloc_pd(device, flags); if (IS_ERR(srp_dev->pd)) goto free_dev; - if (never_register || !register_always || - (!srp_dev->has_fmr && !srp_dev->has_fr)) { - srp_dev->global_mr = ib_get_dma_mr(srp_dev->pd, - IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_READ | - IB_ACCESS_REMOTE_WRITE); - if (IS_ERR(srp_dev->global_mr)) - goto err_pd; - } for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) { host = srp_add_port(srp_dev, p); @@ -3596,9 +3597,6 @@ static void srp_add_one(struct ib_device *device) ib_set_client_data(device, &srp_client, srp_dev); return; -err_pd: - ib_dealloc_pd(srp_dev->pd); - free_dev: kfree(srp_dev); } @@ -3638,8 +3636,6 @@ static void srp_remove_one(struct ib_device *device, void *client_data) kfree(host); } - if (srp_dev->global_mr) - ib_dereg_mr(srp_dev->global_mr); ib_dealloc_pd(srp_dev->pd); kfree(srp_dev); diff --git a/drivers/infiniband/ulp/srp/ib_srp.h b/drivers/infiniband/ulp/srp/ib_srp.h index 26bb9b0a7a63..21c69695f9d4 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.h +++ b/drivers/infiniband/ulp/srp/ib_srp.h @@ -90,7 +90,6 @@ struct srp_device { struct list_head dev_list; struct ib_device *dev; struct ib_pd *pd; - struct ib_mr *global_mr; u64 mr_page_mask; int mr_page_size; int mr_max_size; @@ -179,7 +178,7 @@ struct srp_target_port { spinlock_t lock; /* read only in the hot path */ - struct ib_mr *global_mr; + struct ib_pd *pd; struct srp_rdma_ch *ch; u32 ch_count; u32 lkey; diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index dfa23b075a88..0b1f69ed2e92 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -522,6 +522,11 @@ static int srpt_refresh_port(struct srpt_port *sport) if (ret) goto err_query_port; + snprintf(sport->port_guid, sizeof(sport->port_guid), + "0x%016llx%016llx", + be64_to_cpu(sport->gid.global.subnet_prefix), + be64_to_cpu(sport->gid.global.interface_id)); + if (!sport->mad_agent) { memset(®_req, 0, sizeof(reg_req)); reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT; @@ -2475,7 +2480,7 @@ static void srpt_add_one(struct ib_device *device) init_waitqueue_head(&sdev->ch_releaseQ); mutex_init(&sdev->mutex); - sdev->pd = ib_alloc_pd(device); + sdev->pd = ib_alloc_pd(device, 0); if (IS_ERR(sdev->pd)) goto free_dev; @@ -2548,10 +2553,6 @@ static void srpt_add_one(struct ib_device *device) sdev->device->name, i); goto err_ring; } - snprintf(sport->port_guid, sizeof(sport->port_guid), - "0x%016llx%016llx", - be64_to_cpu(sport->gid.global.subnet_prefix), - be64_to_cpu(sport->gid.global.interface_id)); } spin_lock(&srpt_dev_lock); |