diff options
45 files changed, 1421 insertions, 307 deletions
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index 3d2e489ab732..ff9163dc1596 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -46,6 +46,7 @@ #include <linux/completion.h> #include <linux/slab.h> #include <linux/module.h> +#include <linux/sysctl.h> #include <rdma/iw_cm.h> #include <rdma/ib_addr.h> @@ -65,6 +66,20 @@ struct iwcm_work { struct list_head free_list; }; +static unsigned int default_backlog = 256; + +static struct ctl_table_header *iwcm_ctl_table_hdr; +static struct ctl_table iwcm_ctl_table[] = { + { + .procname = "default_backlog", + .data = &default_backlog, + .maxlen = sizeof(default_backlog), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { } +}; + /* * The following services provide a mechanism for pre-allocating iwcm_work * elements. The design pre-allocates them based on the cm_id type: @@ -425,6 +440,9 @@ int iw_cm_listen(struct iw_cm_id *cm_id, int backlog) cm_id_priv = container_of(cm_id, struct iwcm_id_private, id); + if (!backlog) + backlog = default_backlog; + ret = alloc_work_entries(cm_id_priv, backlog); if (ret) return ret; @@ -1030,11 +1048,20 @@ static int __init iw_cm_init(void) if (!iwcm_wq) return -ENOMEM; + iwcm_ctl_table_hdr = register_net_sysctl(&init_net, "net/iw_cm", + iwcm_ctl_table); + if (!iwcm_ctl_table_hdr) { + pr_err("iw_cm: couldn't register sysctl paths\n"); + destroy_workqueue(iwcm_wq); + return -ENOMEM; + } + return 0; } static void __exit iw_cm_cleanup(void) { + unregister_net_sysctl_table(iwcm_ctl_table_hdr); destroy_workqueue(iwcm_wq); } diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index a283274a5a09..643c08a025a5 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -221,6 +221,7 @@ IB_UVERBS_DECLARE_CMD(query_port); IB_UVERBS_DECLARE_CMD(alloc_pd); IB_UVERBS_DECLARE_CMD(dealloc_pd); IB_UVERBS_DECLARE_CMD(reg_mr); +IB_UVERBS_DECLARE_CMD(rereg_mr); IB_UVERBS_DECLARE_CMD(dereg_mr); IB_UVERBS_DECLARE_CMD(alloc_mw); IB_UVERBS_DECLARE_CMD(dealloc_mw); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index ea6203ee7bcc..0600c50e6215 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -1002,6 +1002,99 @@ err_free: return ret; } +ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_rereg_mr cmd; + struct ib_uverbs_rereg_mr_resp resp; + struct ib_udata udata; + struct ib_pd *pd = NULL; + struct ib_mr *mr; + struct ib_pd *old_pd; + int ret; + struct ib_uobject *uobj; + + if (out_len < sizeof(resp)) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof(cmd))) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof(cmd), + (unsigned long) cmd.response + sizeof(resp), + in_len - sizeof(cmd), out_len - sizeof(resp)); + + if (cmd.flags & ~IB_MR_REREG_SUPPORTED || !cmd.flags) + return -EINVAL; + + if ((cmd.flags & IB_MR_REREG_TRANS) && + (!cmd.start || !cmd.hca_va || 0 >= cmd.length || + (cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK))) + return -EINVAL; + + uobj = idr_write_uobj(&ib_uverbs_mr_idr, cmd.mr_handle, + file->ucontext); + + if (!uobj) + return -EINVAL; + + mr = uobj->object; + + if (cmd.flags & IB_MR_REREG_ACCESS) { + ret = ib_check_mr_access(cmd.access_flags); + if (ret) + goto put_uobjs; + } + + if (cmd.flags & IB_MR_REREG_PD) { + pd = idr_read_pd(cmd.pd_handle, file->ucontext); + if (!pd) { + ret = -EINVAL; + goto put_uobjs; + } + } + + if (atomic_read(&mr->usecnt)) { + ret = -EBUSY; + goto put_uobj_pd; + } + + old_pd = mr->pd; + ret = mr->device->rereg_user_mr(mr, cmd.flags, cmd.start, + cmd.length, cmd.hca_va, + cmd.access_flags, pd, &udata); + if (!ret) { + if (cmd.flags & IB_MR_REREG_PD) { + atomic_inc(&pd->usecnt); + mr->pd = pd; + atomic_dec(&old_pd->usecnt); + } + } else { + goto put_uobj_pd; + } + + memset(&resp, 0, sizeof(resp)); + resp.lkey = mr->lkey; + resp.rkey = mr->rkey; + + if (copy_to_user((void __user *)(unsigned long)cmd.response, + &resp, sizeof(resp))) + ret = -EFAULT; + else + ret = in_len; + +put_uobj_pd: + if (cmd.flags & IB_MR_REREG_PD) + put_pd_read(pd); + +put_uobjs: + + put_uobj_write(mr->uobject); + + return ret; +} + ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, const char __user *buf, int in_len, int out_len) diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 08219fb3338b..c73b22a257fe 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -87,6 +87,7 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd, [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd, [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr, + [IB_USER_VERBS_CMD_REREG_MR] = ib_uverbs_rereg_mr, [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr, [IB_USER_VERBS_CMD_ALLOC_MW] = ib_uverbs_alloc_mw, [IB_USER_VERBS_CMD_DEALLOC_MW] = ib_uverbs_dealloc_mw, diff --git a/drivers/infiniband/hw/amso1100/c2_cq.c b/drivers/infiniband/hw/amso1100/c2_cq.c index 49e0e8533f74..1b63185b4ad4 100644 --- a/drivers/infiniband/hw/amso1100/c2_cq.c +++ b/drivers/infiniband/hw/amso1100/c2_cq.c @@ -260,11 +260,14 @@ static void c2_free_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq) mq->msg_pool.host, dma_unmap_addr(mq, mapping)); } -static int c2_alloc_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq, int q_size, - int msg_size) +static int c2_alloc_cq_buf(struct c2_dev *c2dev, struct c2_mq *mq, + size_t q_size, size_t msg_size) { u8 *pool_start; + if (q_size > SIZE_MAX / msg_size) + return -EINVAL; + pool_start = dma_alloc_coherent(&c2dev->pcidev->dev, q_size * msg_size, &mq->host_dma, GFP_KERNEL); if (!pool_start) diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c index d61d0a18f784..a98426fed9ee 100644 --- a/drivers/infiniband/hw/cxgb4/ev.c +++ b/drivers/infiniband/hw/cxgb4/ev.c @@ -182,6 +182,7 @@ int c4iw_ev_handler(struct c4iw_dev *dev, u32 qid) chp = get_chp(dev, qid); if (chp) { + t4_clear_cq_armed(&chp->cq); spin_lock_irqsave(&chp->comp_handler_lock, flag); (*chp->ibcq.comp_handler)(&chp->ibcq, chp->ibcq.cq_context); spin_unlock_irqrestore(&chp->comp_handler_lock, flag); diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index 086f62f5dc9e..60cfc11a66e4 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -1066,7 +1066,7 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp, struct c4iw_cq *schp) { int count; - int flushed; + int rq_flushed, sq_flushed; unsigned long flag; PDBG("%s qhp %p rchp %p schp %p\n", __func__, qhp, rchp, schp); @@ -1084,27 +1084,40 @@ static void __flush_qp(struct c4iw_qp *qhp, struct c4iw_cq *rchp, c4iw_flush_hw_cq(rchp); c4iw_count_rcqes(&rchp->cq, &qhp->wq, &count); - flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count); + rq_flushed = c4iw_flush_rq(&qhp->wq, &rchp->cq, count); spin_unlock(&qhp->lock); spin_unlock_irqrestore(&rchp->lock, flag); - if (flushed) { - spin_lock_irqsave(&rchp->comp_handler_lock, flag); - (*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context); - spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); - } /* locking hierarchy: cq lock first, then qp lock. */ spin_lock_irqsave(&schp->lock, flag); spin_lock(&qhp->lock); if (schp != rchp) c4iw_flush_hw_cq(schp); - flushed = c4iw_flush_sq(qhp); + sq_flushed = c4iw_flush_sq(qhp); spin_unlock(&qhp->lock); spin_unlock_irqrestore(&schp->lock, flag); - if (flushed) { - spin_lock_irqsave(&schp->comp_handler_lock, flag); - (*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context); - spin_unlock_irqrestore(&schp->comp_handler_lock, flag); + + if (schp == rchp) { + if (t4_clear_cq_armed(&rchp->cq) && + (rq_flushed || sq_flushed)) { + spin_lock_irqsave(&rchp->comp_handler_lock, flag); + (*rchp->ibcq.comp_handler)(&rchp->ibcq, + rchp->ibcq.cq_context); + spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); + } + } else { + if (t4_clear_cq_armed(&rchp->cq) && rq_flushed) { + spin_lock_irqsave(&rchp->comp_handler_lock, flag); + (*rchp->ibcq.comp_handler)(&rchp->ibcq, + rchp->ibcq.cq_context); + spin_unlock_irqrestore(&rchp->comp_handler_lock, flag); + } + if (t4_clear_cq_armed(&schp->cq) && sq_flushed) { + spin_lock_irqsave(&schp->comp_handler_lock, flag); + (*schp->ibcq.comp_handler)(&schp->ibcq, + schp->ibcq.cq_context); + spin_unlock_irqrestore(&schp->comp_handler_lock, flag); + } } } diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index 68b0a6bf4eb0..d8d7fa3e446d 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -531,6 +531,10 @@ static inline int t4_wq_db_enabled(struct t4_wq *wq) return !wq->rq.queue[wq->rq.size].status.db_off; } +enum t4_cq_flags { + CQ_ARMED = 1, +}; + struct t4_cq { struct t4_cqe *queue; dma_addr_t dma_addr; @@ -551,12 +555,19 @@ struct t4_cq { u16 cidx_inc; u8 gen; u8 error; + unsigned long flags; }; +static inline int t4_clear_cq_armed(struct t4_cq *cq) +{ + return test_and_clear_bit(CQ_ARMED, &cq->flags); +} + static inline int t4_arm_cq(struct t4_cq *cq, int se) { u32 val; + set_bit(CQ_ARMED, &cq->flags); while (cq->cidx_inc > CIDXINC_MASK) { val = SEINTARM(0) | CIDXINC(CIDXINC_MASK) | TIMERREG(7) | INGRESSQID(cq->cqid); diff --git a/drivers/infiniband/hw/ipath/ipath_mad.c b/drivers/infiniband/hw/ipath/ipath_mad.c index 43f2d0424d4f..e890e5ba0e01 100644 --- a/drivers/infiniband/hw/ipath/ipath_mad.c +++ b/drivers/infiniband/hw/ipath/ipath_mad.c @@ -726,7 +726,7 @@ bail: * @dd: the infinipath device * @pkeys: the PKEY table */ -static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys) +static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys, u8 port) { struct ipath_portdata *pd; int i; @@ -759,6 +759,7 @@ static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys) } if (changed) { u64 pkey; + struct ib_event event; pkey = (u64) dd->ipath_pkeys[0] | ((u64) dd->ipath_pkeys[1] << 16) | @@ -768,12 +769,17 @@ static int set_pkeys(struct ipath_devdata *dd, u16 *pkeys) (unsigned long long) pkey); ipath_write_kreg(dd, dd->ipath_kregs->kr_partitionkey, pkey); + + event.event = IB_EVENT_PKEY_CHANGE; + event.device = &dd->verbs_dev->ibdev; + event.element.port_num = port; + ib_dispatch_event(&event); } return 0; } static int recv_subn_set_pkeytable(struct ib_smp *smp, - struct ib_device *ibdev) + struct ib_device *ibdev, u8 port) { u32 startpx = 32 * (be32_to_cpu(smp->attr_mod) & 0xffff); __be16 *p = (__be16 *) smp->data; @@ -784,7 +790,7 @@ static int recv_subn_set_pkeytable(struct ib_smp *smp, for (i = 0; i < n; i++) q[i] = be16_to_cpu(p[i]); - if (startpx != 0 || set_pkeys(dev->dd, q) != 0) + if (startpx != 0 || set_pkeys(dev->dd, q, port) != 0) smp->status |= IB_SMP_INVALID_FIELD; return recv_subn_get_pkeytable(smp, ibdev); @@ -1342,7 +1348,7 @@ static int process_subn(struct ib_device *ibdev, int mad_flags, ret = recv_subn_set_portinfo(smp, ibdev, port_num); goto bail; case IB_SMP_ATTR_PKEY_TABLE: - ret = recv_subn_set_pkeytable(smp, ibdev); + ret = recv_subn_set_pkeytable(smp, ibdev, port_num); goto bail; case IB_SMP_ATTR_SM_INFO: if (dev->port_cap_flags & IB_PORT_SM_DISABLED) { diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 0f7027e7db13..e1e558a3d692 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -910,8 +910,7 @@ static int __mlx4_ib_default_rules_match(struct ib_qp *qp, const struct default_rules *pdefault_rules = default_table; u8 link_layer = rdma_port_get_link_layer(qp->device, flow_attr->port); - for (i = 0; i < sizeof(default_table)/sizeof(default_table[0]); i++, - pdefault_rules++) { + for (i = 0; i < ARRAY_SIZE(default_table); i++, pdefault_rules++) { __u32 field_types[IB_FLOW_SPEC_SUPPORT_LAYERS]; memset(&field_types, 0, sizeof(field_types)); @@ -965,8 +964,7 @@ static int __mlx4_ib_create_default_rules( int size = 0; int i; - for (i = 0; i < sizeof(pdefault_rules->rules_create_list)/ - sizeof(pdefault_rules->rules_create_list[0]); i++) { + for (i = 0; i < ARRAY_SIZE(pdefault_rules->rules_create_list); i++) { int ret; union ib_flow_spec ib_spec; switch (pdefault_rules->rules_create_list[i]) { @@ -2007,6 +2005,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | (1ull << IB_USER_VERBS_CMD_REG_MR) | + (1ull << IB_USER_VERBS_CMD_REREG_MR) | (1ull << IB_USER_VERBS_CMD_DEREG_MR) | (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | (1ull << IB_USER_VERBS_CMD_CREATE_CQ) | @@ -2059,6 +2058,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_dev.req_notify_cq = mlx4_ib_arm_cq; ibdev->ib_dev.get_dma_mr = mlx4_ib_get_dma_mr; ibdev->ib_dev.reg_user_mr = mlx4_ib_reg_user_mr; + ibdev->ib_dev.rereg_user_mr = mlx4_ib_rereg_user_mr; ibdev->ib_dev.dereg_mr = mlx4_ib_dereg_mr; ibdev->ib_dev.alloc_fast_reg_mr = mlx4_ib_alloc_fast_reg_mr; ibdev->ib_dev.alloc_fast_reg_page_list = mlx4_ib_alloc_fast_reg_page_list; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 369da3ca5d64..e8cad3926bfc 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -788,5 +788,9 @@ int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn); void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count); int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, int is_attach); +int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, + u64 start, u64 length, u64 virt_addr, + int mr_access_flags, struct ib_pd *pd, + struct ib_udata *udata); #endif /* MLX4_IB_H */ diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index cb2a8727f3fb..9b0e80e59b08 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -144,8 +144,10 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (!mr) return ERR_PTR(-ENOMEM); + /* Force registering the memory as writable. */ + /* Used for memory re-registeration. HCA protects the access */ mr->umem = ib_umem_get(pd->uobject->context, start, length, - access_flags, 0); + access_flags | IB_ACCESS_LOCAL_WRITE, 0); if (IS_ERR(mr->umem)) { err = PTR_ERR(mr->umem); goto err_free; @@ -183,6 +185,90 @@ err_free: return ERR_PTR(err); } +int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, + u64 start, u64 length, u64 virt_addr, + int mr_access_flags, struct ib_pd *pd, + struct ib_udata *udata) +{ + struct mlx4_ib_dev *dev = to_mdev(mr->device); + struct mlx4_ib_mr *mmr = to_mmr(mr); + struct mlx4_mpt_entry *mpt_entry; + struct mlx4_mpt_entry **pmpt_entry = &mpt_entry; + int err; + + /* Since we synchronize this call and mlx4_ib_dereg_mr via uverbs, + * we assume that the calls can't run concurrently. Otherwise, a + * race exists. + */ + err = mlx4_mr_hw_get_mpt(dev->dev, &mmr->mmr, &pmpt_entry); + + if (err) + return err; + + if (flags & IB_MR_REREG_PD) { + err = mlx4_mr_hw_change_pd(dev->dev, *pmpt_entry, + to_mpd(pd)->pdn); + + if (err) + goto release_mpt_entry; + } + + if (flags & IB_MR_REREG_ACCESS) { + err = mlx4_mr_hw_change_access(dev->dev, *pmpt_entry, + convert_access(mr_access_flags)); + + if (err) + goto release_mpt_entry; + } + + if (flags & IB_MR_REREG_TRANS) { + int shift; + int err; + int n; + + mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr); + ib_umem_release(mmr->umem); + mmr->umem = ib_umem_get(mr->uobject->context, start, length, + mr_access_flags | + IB_ACCESS_LOCAL_WRITE, + 0); + if (IS_ERR(mmr->umem)) { + err = PTR_ERR(mmr->umem); + mmr->umem = NULL; + goto release_mpt_entry; + } + n = ib_umem_page_count(mmr->umem); + shift = ilog2(mmr->umem->page_size); + + mmr->mmr.iova = virt_addr; + mmr->mmr.size = length; + err = mlx4_mr_rereg_mem_write(dev->dev, &mmr->mmr, + virt_addr, length, n, shift, + *pmpt_entry); + if (err) { + ib_umem_release(mmr->umem); + goto release_mpt_entry; + } + + err = mlx4_ib_umem_write_mtt(dev, &mmr->mmr.mtt, mmr->umem); + if (err) { + mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr); + ib_umem_release(mmr->umem); + goto release_mpt_entry; + } + } + + /* If we couldn't transfer the MR to the HCA, just remember to + * return a failure. But dereg_mr will free the resources. + */ + err = mlx4_mr_hw_write_mpt(dev->dev, &mmr->mmr, pmpt_entry); + +release_mpt_entry: + mlx4_mr_hw_put_mpt(dev->dev, pmpt_entry); + + return err; +} + int mlx4_ib_dereg_mr(struct ib_mr *ibmr) { struct mlx4_ib_mr *mr = to_mmr(ibmr); diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index bbbcf389272c..416cb7244224 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2501,7 +2501,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, spin_lock_irqsave(&qp->sq.lock, flags); for (nreq = 0; wr; nreq++, wr = wr->next) { - if (unlikely(wr->opcode >= sizeof(mlx5_ib_opcode) / sizeof(mlx5_ib_opcode[0]))) { + if (unlikely(wr->opcode >= ARRAY_SIZE(mlx5_ib_opcode))) { mlx5_ib_warn(dev, "\n"); err = -EINVAL; *bad_wr = wr; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma.h b/drivers/infiniband/hw/ocrdma/ocrdma.h index 19011dbb930f..b43456ae124b 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma.h @@ -40,7 +40,7 @@ #include <be_roce.h> #include "ocrdma_sli.h" -#define OCRDMA_ROCE_DRV_VERSION "10.2.145.0u" +#define OCRDMA_ROCE_DRV_VERSION "10.2.287.0u" #define OCRDMA_ROCE_DRV_DESC "Emulex OneConnect RoCE Driver" #define OCRDMA_NODE_DESC "Emulex OneConnect RoCE HCA" @@ -137,6 +137,7 @@ struct mqe_ctx { u16 cqe_status; u16 ext_status; bool cmd_done; + bool fw_error_state; }; struct ocrdma_hw_mr { @@ -235,7 +236,10 @@ struct ocrdma_dev { struct list_head entry; struct rcu_head rcu; int id; - u64 stag_arr[OCRDMA_MAX_STAG]; + u64 *stag_arr; + u8 sl; /* service level */ + bool pfc_state; + atomic_t update_sl; u16 pvid; u32 asic_id; @@ -518,4 +522,22 @@ static inline u8 ocrdma_get_asic_type(struct ocrdma_dev *dev) OCRDMA_SLI_ASIC_GEN_NUM_SHIFT; } +static inline u8 ocrdma_get_pfc_prio(u8 *pfc, u8 prio) +{ + return *(pfc + prio); +} + +static inline u8 ocrdma_get_app_prio(u8 *app_prio, u8 prio) +{ + return *(app_prio + prio); +} + +static inline u8 ocrdma_is_enabled_and_synced(u32 state) +{ /* May also be used to interpret TC-state, QCN-state + * Appl-state and Logical-link-state in future. + */ + return (state & OCRDMA_STATE_FLAG_ENABLED) && + (state & OCRDMA_STATE_FLAG_SYNC); +} + #endif diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index d4cc01f10c01..40f8536c10b0 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -35,6 +35,8 @@ #include "ocrdma_ah.h" #include "ocrdma_hw.h" +#define OCRDMA_VID_PCP_SHIFT 0xD + static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, struct ib_ah_attr *attr, int pdid) { @@ -55,7 +57,7 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, if (vlan_tag && (vlan_tag < 0x1000)) { eth.eth_type = cpu_to_be16(0x8100); eth.roce_eth_type = cpu_to_be16(OCRDMA_ROCE_ETH_TYPE); - vlan_tag |= (attr->sl & 7) << 13; + vlan_tag |= (dev->sl & 0x07) << OCRDMA_VID_PCP_SHIFT; eth.vlan_tag = cpu_to_be16(vlan_tag); eth_sz = sizeof(struct ocrdma_eth_vlan); vlan_enabled = true; @@ -100,6 +102,8 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) if (!(attr->ah_flags & IB_AH_GRH)) return ERR_PTR(-EINVAL); + if (atomic_cmpxchg(&dev->update_sl, 1, 0)) + ocrdma_init_service_level(dev); ah = kzalloc(sizeof(*ah), GFP_ATOMIC); if (!ah) return ERR_PTR(-ENOMEM); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 3bbf2010a821..dd35ae558ae1 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -525,7 +525,7 @@ static int ocrdma_mbx_mq_cq_create(struct ocrdma_dev *dev, cmd->ev_cnt_flags = OCRDMA_CREATE_CQ_DEF_FLAGS; cmd->eqn = eq->id; - cmd->cqe_count = cq->size / sizeof(struct ocrdma_mcqe); + cmd->pdid_cqecnt = cq->size / sizeof(struct ocrdma_mcqe); ocrdma_build_q_pages(&cmd->pa[0], cq->size / OCRDMA_MIN_Q_PAGE_SIZE, cq->dma, PAGE_SIZE_4K); @@ -661,7 +661,7 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev, { struct ocrdma_qp *qp = NULL; struct ocrdma_cq *cq = NULL; - struct ib_event ib_evt = { 0 }; + struct ib_event ib_evt; int cq_event = 0; int qp_event = 1; int srq_event = 0; @@ -674,6 +674,8 @@ static void ocrdma_dispatch_ibevent(struct ocrdma_dev *dev, if (cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQVALID) cq = dev->cq_tbl[cqe->cqvalid_cqid & OCRDMA_AE_MCQE_CQID_MASK]; + memset(&ib_evt, 0, sizeof(ib_evt)); + ib_evt.device = &dev->ibdev; switch (type) { @@ -771,6 +773,10 @@ static void ocrdma_process_grp5_aync(struct ocrdma_dev *dev, OCRDMA_AE_PVID_MCQE_TAG_MASK) >> OCRDMA_AE_PVID_MCQE_TAG_SHIFT); break; + + case OCRDMA_ASYNC_EVENT_COS_VALUE: + atomic_set(&dev->update_sl, 1); + break; default: /* Not interested evts. */ break; @@ -962,8 +968,12 @@ static int ocrdma_wait_mqe_cmpl(struct ocrdma_dev *dev) msecs_to_jiffies(30000)); if (status) return 0; - else + else { + dev->mqe_ctx.fw_error_state = true; + pr_err("%s(%d) mailbox timeout: fw not responding\n", + __func__, dev->id); return -1; + } } /* issue a mailbox command on the MQ */ @@ -975,6 +985,8 @@ static int ocrdma_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe) struct ocrdma_mbx_rsp *rsp = NULL; mutex_lock(&dev->mqe_ctx.lock); + if (dev->mqe_ctx.fw_error_state) + goto mbx_err; ocrdma_post_mqe(dev, mqe); status = ocrdma_wait_mqe_cmpl(dev); if (status) @@ -1078,7 +1090,8 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev, OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_SHIFT; attr->max_mw = rsp->max_mw; attr->max_mr = rsp->max_mr; - attr->max_mr_size = ~0ull; + attr->max_mr_size = ((u64)rsp->max_mr_size_hi << 32) | + rsp->max_mr_size_lo; attr->max_fmr = 0; attr->max_pages_per_frmr = rsp->max_pages_per_frmr; attr->max_num_mr_pbl = rsp->max_num_mr_pbl; @@ -1252,7 +1265,9 @@ static int ocrdma_mbx_get_ctrl_attribs(struct ocrdma_dev *dev) ctrl_attr_rsp = (struct ocrdma_get_ctrl_attribs_rsp *)dma.va; hba_attribs = &ctrl_attr_rsp->ctrl_attribs.hba_attribs; - dev->hba_port_num = hba_attribs->phy_port; + dev->hba_port_num = (hba_attribs->ptpnum_maxdoms_hbast_cv & + OCRDMA_HBA_ATTRB_PTNUM_MASK) + >> OCRDMA_HBA_ATTRB_PTNUM_SHIFT; strncpy(dev->model_number, hba_attribs->controller_model_number, 31); } @@ -1302,7 +1317,8 @@ int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed) goto mbx_err; rsp = (struct ocrdma_get_link_speed_rsp *)cmd; - *lnk_speed = rsp->phys_port_speed; + *lnk_speed = (rsp->pflt_pps_ld_pnum & OCRDMA_PHY_PS_MASK) + >> OCRDMA_PHY_PS_SHIFT; mbx_err: kfree(cmd); @@ -1328,11 +1344,16 @@ static int ocrdma_mbx_get_phy_info(struct ocrdma_dev *dev) goto mbx_err; rsp = (struct ocrdma_get_phy_info_rsp *)cmd; - dev->phy.phy_type = le16_to_cpu(rsp->phy_type); + dev->phy.phy_type = + (rsp->ityp_ptyp & OCRDMA_PHY_TYPE_MASK); + dev->phy.interface_type = + (rsp->ityp_ptyp & OCRDMA_IF_TYPE_MASK) + >> OCRDMA_IF_TYPE_SHIFT; dev->phy.auto_speeds_supported = - le16_to_cpu(rsp->auto_speeds_supported); + (rsp->fspeed_aspeed & OCRDMA_ASPEED_SUPP_MASK); dev->phy.fixed_speeds_supported = - le16_to_cpu(rsp->fixed_speeds_supported); + (rsp->fspeed_aspeed & OCRDMA_FSPEED_SUPP_MASK) + >> OCRDMA_FSPEED_SUPP_SHIFT; mbx_err: kfree(cmd); return status; @@ -1457,8 +1478,8 @@ static int ocrdma_mbx_create_ah_tbl(struct ocrdma_dev *dev) pbes = (struct ocrdma_pbe *)dev->av_tbl.pbl.va; for (i = 0; i < dev->av_tbl.size / OCRDMA_MIN_Q_PAGE_SIZE; i++) { - pbes[i].pa_lo = (u32) (pa & 0xffffffff); - pbes[i].pa_hi = (u32) upper_32_bits(pa); + pbes[i].pa_lo = (u32)cpu_to_le32(pa & 0xffffffff); + pbes[i].pa_hi = (u32)cpu_to_le32(upper_32_bits(pa)); pa += PAGE_SIZE; } cmd->tbl_addr[0].lo = (u32)(dev->av_tbl.pbl.pa & 0xFFFFFFFF); @@ -1501,6 +1522,7 @@ static void ocrdma_mbx_delete_ah_tbl(struct ocrdma_dev *dev) ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); dma_free_coherent(&pdev->dev, dev->av_tbl.size, dev->av_tbl.va, dev->av_tbl.pa); + dev->av_tbl.va = NULL; dma_free_coherent(&pdev->dev, PAGE_SIZE, dev->av_tbl.pbl.va, dev->av_tbl.pbl.pa); kfree(cmd); @@ -1624,14 +1646,16 @@ int ocrdma_mbx_create_cq(struct ocrdma_dev *dev, struct ocrdma_cq *cq, cmd->cmd.pgsz_pgcnt |= OCRDMA_CREATE_CQ_DPP << OCRDMA_CREATE_CQ_TYPE_SHIFT; cq->phase_change = false; - cmd->cmd.cqe_count = (cq->len / cqe_size); + cmd->cmd.pdid_cqecnt = (cq->len / cqe_size); } else { - cmd->cmd.cqe_count = (cq->len / cqe_size) - 1; + cmd->cmd.pdid_cqecnt = (cq->len / cqe_size) - 1; cmd->cmd.ev_cnt_flags |= OCRDMA_CREATE_CQ_FLAGS_AUTO_VALID; cq->phase_change = true; } - cmd->cmd.pd_id = pd_id; /* valid only for v3 */ + /* pd_id valid only for v3 */ + cmd->cmd.pdid_cqecnt |= (pd_id << + OCRDMA_CREATE_CQ_CMD_PDID_SHIFT); ocrdma_build_q_pages(&cmd->cmd.pa[0], hw_pages, cq->pa, page_size); status = ocrdma_mbx_cmd(dev, (struct ocrdma_mqe *)cmd); if (status) @@ -2206,7 +2230,8 @@ int ocrdma_mbx_create_qp(struct ocrdma_qp *qp, struct ib_qp_init_attr *attrs, OCRDMA_CREATE_QP_REQ_RQ_CQID_MASK; qp->rq_cq = cq; - if (pd->dpp_enabled && pd->num_dpp_qp) { + if (pd->dpp_enabled && attrs->cap.max_inline_data && pd->num_dpp_qp && + (attrs->cap.max_inline_data <= dev->attr.max_inline_data)) { ocrdma_set_create_qp_dpp_cmd(cmd, pd, qp, enable_dpp_cq, dpp_cq_id); } @@ -2264,6 +2289,8 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, if ((ah_attr->ah_flags & IB_AH_GRH) == 0) return -EINVAL; + if (atomic_cmpxchg(&qp->dev->update_sl, 1, 0)) + ocrdma_init_service_level(qp->dev); cmd->params.tclass_sq_psn |= (ah_attr->grh.traffic_class << OCRDMA_QP_PARAMS_TCLASS_SHIFT); cmd->params.rnt_rc_sl_fl |= @@ -2297,6 +2324,8 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp, cmd->params.vlan_dmac_b4_to_b5 |= vlan_id << OCRDMA_QP_PARAMS_VLAN_SHIFT; cmd->flags |= OCRDMA_QP_PARA_VLAN_EN_VALID; + cmd->params.rnt_rc_sl_fl |= + (qp->dev->sl & 0x07) << OCRDMA_QP_PARAMS_SL_SHIFT; } return 0; } @@ -2604,6 +2633,168 @@ int ocrdma_mbx_destroy_srq(struct ocrdma_dev *dev, struct ocrdma_srq *srq) return status; } +static int ocrdma_mbx_get_dcbx_config(struct ocrdma_dev *dev, u32 ptype, + struct ocrdma_dcbx_cfg *dcbxcfg) +{ + int status = 0; + dma_addr_t pa; + struct ocrdma_mqe cmd; + + struct ocrdma_get_dcbx_cfg_req *req = NULL; + struct ocrdma_get_dcbx_cfg_rsp *rsp = NULL; + struct pci_dev *pdev = dev->nic_info.pdev; + struct ocrdma_mqe_sge *mqe_sge = cmd.u.nonemb_req.sge; + + memset(&cmd, 0, sizeof(struct ocrdma_mqe)); + cmd.hdr.pyld_len = max_t (u32, sizeof(struct ocrdma_get_dcbx_cfg_rsp), + sizeof(struct ocrdma_get_dcbx_cfg_req)); + req = dma_alloc_coherent(&pdev->dev, cmd.hdr.pyld_len, &pa, GFP_KERNEL); + if (!req) { + status = -ENOMEM; + goto mem_err; + } + + cmd.hdr.spcl_sge_cnt_emb |= (1 << OCRDMA_MQE_HDR_SGE_CNT_SHIFT) & + OCRDMA_MQE_HDR_SGE_CNT_MASK; + mqe_sge->pa_lo = (u32) (pa & 0xFFFFFFFFUL); + mqe_sge->pa_hi = (u32) upper_32_bits(pa); + mqe_sge->len = cmd.hdr.pyld_len; + + memset(req, 0, sizeof(struct ocrdma_get_dcbx_cfg_req)); + ocrdma_init_mch(&req->hdr, OCRDMA_CMD_GET_DCBX_CONFIG, + OCRDMA_SUBSYS_DCBX, cmd.hdr.pyld_len); + req->param_type = ptype; + + status = ocrdma_mbx_cmd(dev, &cmd); + if (status) + goto mbx_err; + + rsp = (struct ocrdma_get_dcbx_cfg_rsp *)req; + ocrdma_le32_to_cpu(rsp, sizeof(struct ocrdma_get_dcbx_cfg_rsp)); + memcpy(dcbxcfg, &rsp->cfg, sizeof(struct ocrdma_dcbx_cfg)); + +mbx_err: + dma_free_coherent(&pdev->dev, cmd.hdr.pyld_len, req, pa); +mem_err: + return status; +} + +#define OCRDMA_MAX_SERVICE_LEVEL_INDEX 0x08 +#define OCRDMA_DEFAULT_SERVICE_LEVEL 0x05 + +static int ocrdma_parse_dcbxcfg_rsp(struct ocrdma_dev *dev, int ptype, + struct ocrdma_dcbx_cfg *dcbxcfg, + u8 *srvc_lvl) +{ + int status = -EINVAL, indx, slindx; + int ventry_cnt; + struct ocrdma_app_parameter *app_param; + u8 valid, proto_sel; + u8 app_prio, pfc_prio; + u16 proto; + + if (!(dcbxcfg->tcv_aev_opv_st & OCRDMA_DCBX_STATE_MASK)) { + pr_info("%s ocrdma%d DCBX is disabled\n", + dev_name(&dev->nic_info.pdev->dev), dev->id); + goto out; + } + + if (!ocrdma_is_enabled_and_synced(dcbxcfg->pfc_state)) { + pr_info("%s ocrdma%d priority flow control(%s) is %s%s\n", + dev_name(&dev->nic_info.pdev->dev), dev->id, + (ptype > 0 ? "operational" : "admin"), + (dcbxcfg->pfc_state & OCRDMA_STATE_FLAG_ENABLED) ? + "enabled" : "disabled", + (dcbxcfg->pfc_state & OCRDMA_STATE_FLAG_SYNC) ? + "" : ", not sync'ed"); + goto out; + } else { + pr_info("%s ocrdma%d priority flow control is enabled and sync'ed\n", + dev_name(&dev->nic_info.pdev->dev), dev->id); + } + + ventry_cnt = (dcbxcfg->tcv_aev_opv_st >> + OCRDMA_DCBX_APP_ENTRY_SHIFT) + & OCRDMA_DCBX_STATE_MASK; + + for (indx = 0; indx < ventry_cnt; indx++) { + app_param = &dcbxcfg->app_param[indx]; + valid = (app_param->valid_proto_app >> + OCRDMA_APP_PARAM_VALID_SHIFT) + & OCRDMA_APP_PARAM_VALID_MASK; + proto_sel = (app_param->valid_proto_app + >> OCRDMA_APP_PARAM_PROTO_SEL_SHIFT) + & OCRDMA_APP_PARAM_PROTO_SEL_MASK; + proto = app_param->valid_proto_app & + OCRDMA_APP_PARAM_APP_PROTO_MASK; + + if ( + valid && proto == OCRDMA_APP_PROTO_ROCE && + proto_sel == OCRDMA_PROTO_SELECT_L2) { + for (slindx = 0; slindx < + OCRDMA_MAX_SERVICE_LEVEL_INDEX; slindx++) { + app_prio = ocrdma_get_app_prio( + (u8 *)app_param->app_prio, + slindx); + pfc_prio = ocrdma_get_pfc_prio( + (u8 *)dcbxcfg->pfc_prio, + slindx); + + if (app_prio && pfc_prio) { + *srvc_lvl = slindx; + status = 0; + goto out; + } + } + if (slindx == OCRDMA_MAX_SERVICE_LEVEL_INDEX) { + pr_info("%s ocrdma%d application priority not set for 0x%x protocol\n", + dev_name(&dev->nic_info.pdev->dev), + dev->id, proto); + } + } + } + +out: + return status; +} + +void ocrdma_init_service_level(struct ocrdma_dev *dev) +{ + int status = 0, indx; + struct ocrdma_dcbx_cfg dcbxcfg; + u8 srvc_lvl = OCRDMA_DEFAULT_SERVICE_LEVEL; + int ptype = OCRDMA_PARAMETER_TYPE_OPER; + + for (indx = 0; indx < 2; indx++) { + status = ocrdma_mbx_get_dcbx_config(dev, ptype, &dcbxcfg); + if (status) { + pr_err("%s(): status=%d\n", __func__, status); + ptype = OCRDMA_PARAMETER_TYPE_ADMIN; + continue; + } + + status = ocrdma_parse_dcbxcfg_rsp(dev, ptype, + &dcbxcfg, &srvc_lvl); + if (status) { + ptype = OCRDMA_PARAMETER_TYPE_ADMIN; + continue; + } + + break; + } + + if (status) + pr_info("%s ocrdma%d service level default\n", + dev_name(&dev->nic_info.pdev->dev), dev->id); + else + pr_info("%s ocrdma%d service level %d\n", + dev_name(&dev->nic_info.pdev->dev), dev->id, + srvc_lvl); + + dev->pfc_state = ocrdma_is_enabled_and_synced(dcbxcfg.pfc_state); + dev->sl = srvc_lvl; +} + int ocrdma_alloc_av(struct ocrdma_dev *dev, struct ocrdma_ah *ah) { int i; @@ -2709,13 +2900,15 @@ int ocrdma_init_hw(struct ocrdma_dev *dev) goto conf_err; status = ocrdma_mbx_get_phy_info(dev); if (status) - goto conf_err; + goto info_attrb_err; status = ocrdma_mbx_get_ctrl_attribs(dev); if (status) - goto conf_err; + goto info_attrb_err; return 0; +info_attrb_err: + ocrdma_mbx_delete_ah_tbl(dev); conf_err: ocrdma_destroy_mq(dev); mq_err: diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h index e513f7293142..6eed8f191322 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.h @@ -135,4 +135,6 @@ int ocrdma_get_irq(struct ocrdma_dev *dev, struct ocrdma_eq *eq); int ocrdma_mbx_rdma_stats(struct ocrdma_dev *, bool reset); char *port_speed_string(struct ocrdma_dev *dev); +void ocrdma_init_service_level(struct ocrdma_dev *); + #endif /* __OCRDMA_HW_H__ */ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 7c504e079744..256a06bc0b68 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -324,6 +324,11 @@ static int ocrdma_alloc_resources(struct ocrdma_dev *dev) if (!dev->qp_tbl) goto alloc_err; } + + dev->stag_arr = kzalloc(sizeof(u64) * OCRDMA_MAX_STAG, GFP_KERNEL); + if (dev->stag_arr == NULL) + goto alloc_err; + spin_lock_init(&dev->av_tbl.lock); spin_lock_init(&dev->flush_q_lock); return 0; @@ -334,6 +339,7 @@ alloc_err: static void ocrdma_free_resources(struct ocrdma_dev *dev) { + kfree(dev->stag_arr); kfree(dev->qp_tbl); kfree(dev->cq_tbl); kfree(dev->sgid_tbl); @@ -353,15 +359,25 @@ static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, { struct ocrdma_dev *dev = dev_get_drvdata(device); - return scnprintf(buf, PAGE_SIZE, "%s", &dev->attr.fw_ver[0]); + return scnprintf(buf, PAGE_SIZE, "%s\n", &dev->attr.fw_ver[0]); +} + +static ssize_t show_hca_type(struct device *device, + struct device_attribute *attr, char *buf) +{ + struct ocrdma_dev *dev = dev_get_drvdata(device); + + return scnprintf(buf, PAGE_SIZE, "%s\n", &dev->model_number[0]); } static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL); static DEVICE_ATTR(fw_ver, S_IRUGO, show_fw_ver, NULL); +static DEVICE_ATTR(hca_type, S_IRUGO, show_hca_type, NULL); static struct device_attribute *ocrdma_attributes[] = { &dev_attr_hw_rev, - &dev_attr_fw_ver + &dev_attr_fw_ver, + &dev_attr_hca_type }; static void ocrdma_remove_sysfiles(struct ocrdma_dev *dev) @@ -372,6 +388,58 @@ static void ocrdma_remove_sysfiles(struct ocrdma_dev *dev) device_remove_file(&dev->ibdev.dev, ocrdma_attributes[i]); } +static void ocrdma_init_ipv4_gids(struct ocrdma_dev *dev, + struct net_device *net) +{ + struct in_device *in_dev; + union ib_gid gid; + in_dev = in_dev_get(net); + if (in_dev) { + for_ifa(in_dev) { + ipv6_addr_set_v4mapped(ifa->ifa_address, + (struct in6_addr *)&gid); + ocrdma_add_sgid(dev, &gid); + } + endfor_ifa(in_dev); + in_dev_put(in_dev); + } +} + +static void ocrdma_init_ipv6_gids(struct ocrdma_dev *dev, + struct net_device *net) +{ +#if IS_ENABLED(CONFIG_IPV6) + struct inet6_dev *in6_dev; + union ib_gid *pgid; + struct inet6_ifaddr *ifp; + in6_dev = in6_dev_get(net); + if (in6_dev) { + read_lock_bh(&in6_dev->lock); + list_for_each_entry(ifp, &in6_dev->addr_list, if_list) { + pgid = (union ib_gid *)&ifp->addr; + ocrdma_add_sgid(dev, pgid); + } + read_unlock_bh(&in6_dev->lock); + in6_dev_put(in6_dev); + } +#endif +} + +static void ocrdma_init_gid_table(struct ocrdma_dev *dev) +{ + struct net_device *net_dev; + + for_each_netdev(&init_net, net_dev) { + struct net_device *real_dev = rdma_vlan_dev_real_dev(net_dev) ? + rdma_vlan_dev_real_dev(net_dev) : net_dev; + + if (real_dev == dev->nic_info.netdev) { + ocrdma_init_ipv4_gids(dev, net_dev); + ocrdma_init_ipv6_gids(dev, net_dev); + } + } +} + static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info) { int status = 0, i; @@ -399,6 +467,8 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info) if (status) goto alloc_err; + ocrdma_init_service_level(dev); + ocrdma_init_gid_table(dev); status = ocrdma_register_device(dev); if (status) goto alloc_err; @@ -508,6 +578,12 @@ static int ocrdma_close(struct ocrdma_dev *dev) return 0; } +static void ocrdma_shutdown(struct ocrdma_dev *dev) +{ + ocrdma_close(dev); + ocrdma_remove(dev); +} + /* event handling via NIC driver ensures that all the NIC specific * initialization done before RoCE driver notifies * event to stack. @@ -521,6 +597,9 @@ static void ocrdma_event_handler(struct ocrdma_dev *dev, u32 event) case BE_DEV_DOWN: ocrdma_close(dev); break; + case BE_DEV_SHUTDOWN: + ocrdma_shutdown(dev); + break; } } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index 96c9ee602ba4..904989ec5eaa 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -44,35 +44,39 @@ enum { #define OCRDMA_SUBSYS_ROCE 10 enum { OCRDMA_CMD_QUERY_CONFIG = 1, - OCRDMA_CMD_ALLOC_PD, - OCRDMA_CMD_DEALLOC_PD, - - OCRDMA_CMD_CREATE_AH_TBL, - OCRDMA_CMD_DELETE_AH_TBL, - - OCRDMA_CMD_CREATE_QP, - OCRDMA_CMD_QUERY_QP, - OCRDMA_CMD_MODIFY_QP, - OCRDMA_CMD_DELETE_QP, - - OCRDMA_CMD_RSVD1, - OCRDMA_CMD_ALLOC_LKEY, - OCRDMA_CMD_DEALLOC_LKEY, - OCRDMA_CMD_REGISTER_NSMR, - OCRDMA_CMD_REREGISTER_NSMR, - OCRDMA_CMD_REGISTER_NSMR_CONT, - OCRDMA_CMD_QUERY_NSMR, - OCRDMA_CMD_ALLOC_MW, - OCRDMA_CMD_QUERY_MW, - - OCRDMA_CMD_CREATE_SRQ, - OCRDMA_CMD_QUERY_SRQ, - OCRDMA_CMD_MODIFY_SRQ, - OCRDMA_CMD_DELETE_SRQ, - - OCRDMA_CMD_ATTACH_MCAST, - OCRDMA_CMD_DETACH_MCAST, - OCRDMA_CMD_GET_RDMA_STATS, + OCRDMA_CMD_ALLOC_PD = 2, + OCRDMA_CMD_DEALLOC_PD = 3, + + OCRDMA_CMD_CREATE_AH_TBL = 4, + OCRDMA_CMD_DELETE_AH_TBL = 5, + + OCRDMA_CMD_CREATE_QP = 6, + OCRDMA_CMD_QUERY_QP = 7, + OCRDMA_CMD_MODIFY_QP = 8 , + OCRDMA_CMD_DELETE_QP = 9, + + OCRDMA_CMD_RSVD1 = 10, + OCRDMA_CMD_ALLOC_LKEY = 11, + OCRDMA_CMD_DEALLOC_LKEY = 12, + OCRDMA_CMD_REGISTER_NSMR = 13, + OCRDMA_CMD_REREGISTER_NSMR = 14, + OCRDMA_CMD_REGISTER_NSMR_CONT = 15, + OCRDMA_CMD_QUERY_NSMR = 16, + OCRDMA_CMD_ALLOC_MW = 17, + OCRDMA_CMD_QUERY_MW = 18, + + OCRDMA_CMD_CREATE_SRQ = 19, + OCRDMA_CMD_QUERY_SRQ = 20, + OCRDMA_CMD_MODIFY_SRQ = 21, + OCRDMA_CMD_DELETE_SRQ = 22, + + OCRDMA_CMD_ATTACH_MCAST = 23, + OCRDMA_CMD_DETACH_MCAST = 24, + + OCRDMA_CMD_CREATE_RBQ = 25, + OCRDMA_CMD_DESTROY_RBQ = 26, + + OCRDMA_CMD_GET_RDMA_STATS = 27, OCRDMA_CMD_MAX }; @@ -103,7 +107,7 @@ enum { #define OCRDMA_MAX_QP 2048 #define OCRDMA_MAX_CQ 2048 -#define OCRDMA_MAX_STAG 8192 +#define OCRDMA_MAX_STAG 16384 enum { OCRDMA_DB_RQ_OFFSET = 0xE0, @@ -422,7 +426,12 @@ struct ocrdma_ae_qp_mcqe { #define OCRDMA_ASYNC_RDMA_EVE_CODE 0x14 #define OCRDMA_ASYNC_GRP5_EVE_CODE 0x5 -#define OCRDMA_ASYNC_EVENT_PVID_STATE 0x3 + +enum ocrdma_async_grp5_events { + OCRDMA_ASYNC_EVENT_QOS_VALUE = 0x01, + OCRDMA_ASYNC_EVENT_COS_VALUE = 0x02, + OCRDMA_ASYNC_EVENT_PVID_STATE = 0x03 +}; enum OCRDMA_ASYNC_EVENT_TYPE { OCRDMA_CQ_ERROR = 0x00, @@ -525,8 +534,8 @@ struct ocrdma_mbx_query_config { u32 max_ird_ord_per_qp; u32 max_shared_ird_ord; u32 max_mr; - u32 max_mr_size_lo; u32 max_mr_size_hi; + u32 max_mr_size_lo; u32 max_num_mr_pbl; u32 max_mw; u32 max_fmr; @@ -580,17 +589,26 @@ enum { OCRDMA_FN_MODE_RDMA = 0x4 }; +enum { + OCRDMA_IF_TYPE_MASK = 0xFFFF0000, + OCRDMA_IF_TYPE_SHIFT = 0x10, + OCRDMA_PHY_TYPE_MASK = 0x0000FFFF, + OCRDMA_FUTURE_DETAILS_MASK = 0xFFFF0000, + OCRDMA_FUTURE_DETAILS_SHIFT = 0x10, + OCRDMA_EX_PHY_DETAILS_MASK = 0x0000FFFF, + OCRDMA_FSPEED_SUPP_MASK = 0xFFFF0000, + OCRDMA_FSPEED_SUPP_SHIFT = 0x10, + OCRDMA_ASPEED_SUPP_MASK = 0x0000FFFF +}; + struct ocrdma_get_phy_info_rsp { struct ocrdma_mqe_hdr hdr; struct ocrdma_mbx_rsp rsp; - u16 phy_type; - u16 interface_type; + u32 ityp_ptyp; u32 misc_params; - u16 ext_phy_details; - u16 rsvd; - u16 auto_speeds_supported; - u16 fixed_speeds_supported; + u32 ftrdtl_exphydtl; + u32 fspeed_aspeed; u32 future_use[2]; }; @@ -603,19 +621,34 @@ enum { OCRDMA_PHY_SPEED_40GBPS = 0x20 }; +enum { + OCRDMA_PORT_NUM_MASK = 0x3F, + OCRDMA_PT_MASK = 0xC0, + OCRDMA_PT_SHIFT = 0x6, + OCRDMA_LINK_DUP_MASK = 0x0000FF00, + OCRDMA_LINK_DUP_SHIFT = 0x8, + OCRDMA_PHY_PS_MASK = 0x00FF0000, + OCRDMA_PHY_PS_SHIFT = 0x10, + OCRDMA_PHY_PFLT_MASK = 0xFF000000, + OCRDMA_PHY_PFLT_SHIFT = 0x18, + OCRDMA_QOS_LNKSP_MASK = 0xFFFF0000, + OCRDMA_QOS_LNKSP_SHIFT = 0x10, + OCRDMA_LLST_MASK = 0xFF, + OCRDMA_PLFC_MASK = 0x00000400, + OCRDMA_PLFC_SHIFT = 0x8, + OCRDMA_PLRFC_MASK = 0x00000200, + OCRDMA_PLRFC_SHIFT = 0x8, + OCRDMA_PLTFC_MASK = 0x00000100, + OCRDMA_PLTFC_SHIFT = 0x8 +}; struct ocrdma_get_link_speed_rsp { struct ocrdma_mqe_hdr hdr; struct ocrdma_mbx_rsp rsp; - u8 pt_port_num; - u8 link_duplex; - u8 phys_port_speed; - u8 phys_port_fault; - u16 rsvd1; - u16 qos_lnk_speed; - u8 logical_lnk_status; - u8 rsvd2[3]; + u32 pflt_pps_ld_pnum; + u32 qos_lsp; + u32 res_lls; }; enum { @@ -666,8 +699,7 @@ struct ocrdma_create_cq_cmd { u32 pgsz_pgcnt; u32 ev_cnt_flags; u32 eqn; - u16 cqe_count; - u16 pd_id; + u32 pdid_cqecnt; u32 rsvd6; struct ocrdma_pa pa[OCRDMA_CREATE_CQ_MAX_PAGES]; }; @@ -678,6 +710,10 @@ struct ocrdma_create_cq { }; enum { + OCRDMA_CREATE_CQ_CMD_PDID_SHIFT = 0x10 +}; + +enum { OCRDMA_CREATE_CQ_RSP_CQ_ID_MASK = 0xFFFF }; @@ -1231,7 +1267,6 @@ struct ocrdma_destroy_srq { enum { OCRDMA_ALLOC_PD_ENABLE_DPP = BIT(16), - OCRDMA_PD_MAX_DPP_ENABLED_QP = 8, OCRDMA_DPP_PAGE_SIZE = 4096 }; @@ -1896,12 +1931,62 @@ struct ocrdma_rdma_stats_resp { struct ocrdma_rx_dbg_stats rx_dbg_stats; } __packed; +enum { + OCRDMA_HBA_ATTRB_EPROM_VER_LO_MASK = 0xFF, + OCRDMA_HBA_ATTRB_EPROM_VER_HI_MASK = 0xFF00, + OCRDMA_HBA_ATTRB_EPROM_VER_HI_SHIFT = 0x08, + OCRDMA_HBA_ATTRB_CDBLEN_MASK = 0xFFFF, + OCRDMA_HBA_ATTRB_ASIC_REV_MASK = 0xFF0000, + OCRDMA_HBA_ATTRB_ASIC_REV_SHIFT = 0x10, + OCRDMA_HBA_ATTRB_GUID0_MASK = 0xFF000000, + OCRDMA_HBA_ATTRB_GUID0_SHIFT = 0x18, + OCRDMA_HBA_ATTRB_GUID13_MASK = 0xFF, + OCRDMA_HBA_ATTRB_GUID14_MASK = 0xFF00, + OCRDMA_HBA_ATTRB_GUID14_SHIFT = 0x08, + OCRDMA_HBA_ATTRB_GUID15_MASK = 0xFF0000, + OCRDMA_HBA_ATTRB_GUID15_SHIFT = 0x10, + OCRDMA_HBA_ATTRB_PCNT_MASK = 0xFF000000, + OCRDMA_HBA_ATTRB_PCNT_SHIFT = 0x18, + OCRDMA_HBA_ATTRB_LDTOUT_MASK = 0xFFFF, + OCRDMA_HBA_ATTRB_ISCSI_VER_MASK = 0xFF0000, + OCRDMA_HBA_ATTRB_ISCSI_VER_SHIFT = 0x10, + OCRDMA_HBA_ATTRB_MFUNC_DEV_MASK = 0xFF000000, + OCRDMA_HBA_ATTRB_MFUNC_DEV_SHIFT = 0x18, + OCRDMA_HBA_ATTRB_CV_MASK = 0xFF, + OCRDMA_HBA_ATTRB_HBA_ST_MASK = 0xFF00, + OCRDMA_HBA_ATTRB_HBA_ST_SHIFT = 0x08, + OCRDMA_HBA_ATTRB_MAX_DOMS_MASK = 0xFF0000, + OCRDMA_HBA_ATTRB_MAX_DOMS_SHIFT = 0x10, + OCRDMA_HBA_ATTRB_PTNUM_MASK = 0x3F000000, + OCRDMA_HBA_ATTRB_PTNUM_SHIFT = 0x18, + OCRDMA_HBA_ATTRB_PT_MASK = 0xC0000000, + OCRDMA_HBA_ATTRB_PT_SHIFT = 0x1E, + OCRDMA_HBA_ATTRB_ISCSI_FET_MASK = 0xFF, + OCRDMA_HBA_ATTRB_ASIC_GEN_MASK = 0xFF00, + OCRDMA_HBA_ATTRB_ASIC_GEN_SHIFT = 0x08, + OCRDMA_HBA_ATTRB_PCI_VID_MASK = 0xFFFF, + OCRDMA_HBA_ATTRB_PCI_DID_MASK = 0xFFFF0000, + OCRDMA_HBA_ATTRB_PCI_DID_SHIFT = 0x10, + OCRDMA_HBA_ATTRB_PCI_SVID_MASK = 0xFFFF, + OCRDMA_HBA_ATTRB_PCI_SSID_MASK = 0xFFFF0000, + OCRDMA_HBA_ATTRB_PCI_SSID_SHIFT = 0x10, + OCRDMA_HBA_ATTRB_PCI_BUSNUM_MASK = 0xFF, + OCRDMA_HBA_ATTRB_PCI_DEVNUM_MASK = 0xFF00, + OCRDMA_HBA_ATTRB_PCI_DEVNUM_SHIFT = 0x08, + OCRDMA_HBA_ATTRB_PCI_FUNCNUM_MASK = 0xFF0000, + OCRDMA_HBA_ATTRB_PCI_FUNCNUM_SHIFT = 0x10, + OCRDMA_HBA_ATTRB_IF_TYPE_MASK = 0xFF000000, + OCRDMA_HBA_ATTRB_IF_TYPE_SHIFT = 0x18, + OCRDMA_HBA_ATTRB_NETFIL_MASK =0xFF +}; struct mgmt_hba_attribs { u8 flashrom_version_string[32]; u8 manufacturer_name[32]; u32 supported_modes; - u32 rsvd0[3]; + u32 rsvd_eprom_verhi_verlo; + u32 mbx_ds_ver; + u32 epfw_ds_ver; u8 ncsi_ver_string[12]; u32 default_extended_timeout; u8 controller_model_number[32]; @@ -1914,34 +1999,26 @@ struct mgmt_hba_attribs { u8 driver_version_string[32]; u8 fw_on_flash_version_string[32]; u32 functionalities_supported; - u16 max_cdblength; - u8 asic_revision; - u8 generational_guid[16]; - u8 hba_port_count; - u16 default_link_down_timeout; - u8 iscsi_ver_min_max; - u8 multifunction_device; - u8 cache_valid; - u8 hba_status; - u8 max_domains_supported; - u8 phy_port; + u32 guid0_asicrev_cdblen; + u8 generational_guid[12]; + u32 portcnt_guid15; + u32 mfuncdev_iscsi_ldtout; + u32 ptpnum_maxdoms_hbast_cv; u32 firmware_post_status; u32 hba_mtu[8]; - u32 rsvd1[4]; + u32 res_asicgen_iscsi_feaures; + u32 rsvd1[3]; }; struct mgmt_controller_attrib { struct mgmt_hba_attribs hba_attribs; - u16 pci_vendor_id; - u16 pci_device_id; - u16 pci_sub_vendor_id; - u16 pci_sub_system_id; - u8 pci_bus_number; - u8 pci_device_number; - u8 pci_function_number; - u8 interface_type; - u64 unique_identifier; - u32 rsvd0[5]; + u32 pci_did_vid; + u32 pci_ssid_svid; + u32 ityp_fnum_devnum_bnum; + u32 uid_hi; + u32 uid_lo; + u32 res_nnetfil; + u32 rsvd0[4]; }; struct ocrdma_get_ctrl_attribs_rsp { @@ -1949,5 +2026,79 @@ struct ocrdma_get_ctrl_attribs_rsp { struct mgmt_controller_attrib ctrl_attribs; }; +#define OCRDMA_SUBSYS_DCBX 0x10 + +enum OCRDMA_DCBX_OPCODE { + OCRDMA_CMD_GET_DCBX_CONFIG = 0x01 +}; + +enum OCRDMA_DCBX_PARAM_TYPE { + OCRDMA_PARAMETER_TYPE_ADMIN = 0x00, + OCRDMA_PARAMETER_TYPE_OPER = 0x01, + OCRDMA_PARAMETER_TYPE_PEER = 0x02 +}; + +enum OCRDMA_DCBX_APP_PROTO { + OCRDMA_APP_PROTO_ROCE = 0x8915 +}; + +enum OCRDMA_DCBX_PROTO { + OCRDMA_PROTO_SELECT_L2 = 0x00, + OCRDMA_PROTO_SELECT_L4 = 0x01 +}; + +enum OCRDMA_DCBX_APP_PARAM { + OCRDMA_APP_PARAM_APP_PROTO_MASK = 0xFFFF, + OCRDMA_APP_PARAM_PROTO_SEL_MASK = 0xFF, + OCRDMA_APP_PARAM_PROTO_SEL_SHIFT = 0x10, + OCRDMA_APP_PARAM_VALID_MASK = 0xFF, + OCRDMA_APP_PARAM_VALID_SHIFT = 0x18 +}; + +enum OCRDMA_DCBX_STATE_FLAGS { + OCRDMA_STATE_FLAG_ENABLED = 0x01, + OCRDMA_STATE_FLAG_ADDVERTISED = 0x02, + OCRDMA_STATE_FLAG_WILLING = 0x04, + OCRDMA_STATE_FLAG_SYNC = 0x08, + OCRDMA_STATE_FLAG_UNSUPPORTED = 0x40000000, + OCRDMA_STATE_FLAG_NEG_FAILD = 0x80000000 +}; + +enum OCRDMA_TCV_AEV_OPV_ST { + OCRDMA_DCBX_TC_SUPPORT_MASK = 0xFF, + OCRDMA_DCBX_TC_SUPPORT_SHIFT = 0x18, + OCRDMA_DCBX_APP_ENTRY_SHIFT = 0x10, + OCRDMA_DCBX_OP_PARAM_SHIFT = 0x08, + OCRDMA_DCBX_STATE_MASK = 0xFF +}; + +struct ocrdma_app_parameter { + u32 valid_proto_app; + u32 oui; + u32 app_prio[2]; +}; + +struct ocrdma_dcbx_cfg { + u32 tcv_aev_opv_st; + u32 tc_state; + u32 pfc_state; + u32 qcn_state; + u32 appl_state; + u32 ll_state; + u32 tc_bw[2]; + u32 tc_prio[8]; + u32 pfc_prio[2]; + struct ocrdma_app_parameter app_param[15]; +}; + +struct ocrdma_get_dcbx_cfg_req { + struct ocrdma_mbx_hdr hdr; + u32 param_type; +} __packed; + +struct ocrdma_get_dcbx_cfg_rsp { + struct ocrdma_mbx_rsp hdr; + struct ocrdma_dcbx_cfg cfg; +} __packed; #endif /* __OCRDMA_SLI_H__ */ diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index edf6211d84b8..acb434d16903 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -69,11 +69,11 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr) memcpy(&attr->fw_ver, &dev->attr.fw_ver[0], min(sizeof(dev->attr.fw_ver), sizeof(attr->fw_ver))); ocrdma_get_guid(dev, (u8 *)&attr->sys_image_guid); - attr->max_mr_size = ~0ull; + attr->max_mr_size = dev->attr.max_mr_size; attr->page_size_cap = 0xffff000; attr->vendor_id = dev->nic_info.pdev->vendor; attr->vendor_part_id = dev->nic_info.pdev->device; - attr->hw_ver = 0; + attr->hw_ver = dev->asic_id; attr->max_qp = dev->attr.max_qp; attr->max_ah = OCRDMA_MAX_AH; attr->max_qp_wr = dev->attr.max_wqe; @@ -268,7 +268,8 @@ static struct ocrdma_pd *_ocrdma_alloc_pd(struct ocrdma_dev *dev, pd->dpp_enabled = ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R; pd->num_dpp_qp = - pd->dpp_enabled ? OCRDMA_PD_MAX_DPP_ENABLED_QP : 0; + pd->dpp_enabled ? (dev->nic_info.db_page_size / + dev->attr.wqe_size) : 0; } retry: @@ -328,7 +329,10 @@ static int ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext *uctx) struct ocrdma_pd *pd = uctx->cntxt_pd; struct ocrdma_dev *dev = get_ocrdma_dev(pd->ibpd.device); - BUG_ON(uctx->pd_in_use); + if (uctx->pd_in_use) { + pr_err("%s(%d) Freeing in use pdid=0x%x.\n", + __func__, dev->id, pd->id); + } uctx->cntxt_pd = NULL; status = _ocrdma_dealloc_pd(dev, pd); return status; @@ -843,6 +847,13 @@ int ocrdma_dereg_mr(struct ib_mr *ib_mr) if (mr->umem) ib_umem_release(mr->umem); kfree(mr); + + /* Don't stop cleanup, in case FW is unresponsive */ + if (dev->mqe_ctx.fw_error_state) { + status = 0; + pr_err("%s(%d) fw not responding.\n", + __func__, dev->id); + } return status; } @@ -2054,6 +2065,13 @@ int ocrdma_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } while (wr) { + if (qp->qp_type == IB_QPT_UD && + (wr->opcode != IB_WR_SEND && + wr->opcode != IB_WR_SEND_WITH_IMM)) { + *bad_wr = wr; + status = -EINVAL; + break; + } if (ocrdma_hwq_free_cnt(&qp->sq) == 0 || wr->num_sge > qp->sq.max_sges) { *bad_wr = wr; @@ -2488,6 +2506,11 @@ static bool ocrdma_poll_err_scqe(struct ocrdma_qp *qp, *stop = true; expand = false; } + } else if (is_hw_sq_empty(qp)) { + /* Do nothing */ + expand = false; + *polled = false; + *stop = false; } else { *polled = true; expand = ocrdma_update_err_scqe(ibwc, cqe, qp, status); @@ -2593,6 +2616,11 @@ static bool ocrdma_poll_err_rcqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe, *stop = true; expand = false; } + } else if (is_hw_rq_empty(qp)) { + /* Do nothing */ + expand = false; + *polled = false; + *stop = false; } else { *polled = true; expand = ocrdma_update_err_rcqe(ibwc, cqe, qp, status); diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index c639f90cfda4..3edce617c31b 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -86,7 +86,6 @@ enum { IPOIB_FLAG_INITIALIZED = 1, IPOIB_FLAG_ADMIN_UP = 2, IPOIB_PKEY_ASSIGNED = 3, - IPOIB_PKEY_STOP = 4, IPOIB_FLAG_SUBINTERFACE = 5, IPOIB_MCAST_RUN = 6, IPOIB_STOP_REAPER = 7, @@ -312,7 +311,6 @@ struct ipoib_dev_priv { struct list_head multicast_list; struct rb_root multicast_tree; - struct delayed_work pkey_poll_task; struct delayed_work mcast_task; struct work_struct carrier_on_task; struct work_struct flush_light; @@ -473,10 +471,11 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work); void ipoib_pkey_event(struct work_struct *work); void ipoib_ib_dev_cleanup(struct net_device *dev); -int ipoib_ib_dev_open(struct net_device *dev); +int ipoib_ib_dev_open(struct net_device *dev, int flush); int ipoib_ib_dev_up(struct net_device *dev); int ipoib_ib_dev_down(struct net_device *dev, int flush); int ipoib_ib_dev_stop(struct net_device *dev, int flush); +void ipoib_pkey_dev_check_presence(struct net_device *dev); int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port); void ipoib_dev_cleanup(struct net_device *dev); @@ -532,8 +531,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf); void ipoib_setup(struct net_device *dev); -void ipoib_pkey_poll(struct work_struct *work); -int ipoib_pkey_dev_delay_open(struct net_device *dev); +void ipoib_pkey_open(struct ipoib_dev_priv *priv); void ipoib_drain_cq(struct net_device *dev); void ipoib_set_ethtool_ops(struct net_device *dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c index 50061854616e..6bd5740e2691 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c @@ -281,10 +281,8 @@ void ipoib_delete_debug_files(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); - if (priv->mcg_dentry) - debugfs_remove(priv->mcg_dentry); - if (priv->path_dentry) - debugfs_remove(priv->path_dentry); + debugfs_remove(priv->mcg_dentry); + debugfs_remove(priv->path_dentry); } int ipoib_register_debugfs(void) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 6a7003ddb0be..72626c348174 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -664,17 +664,18 @@ static void ipoib_ib_tx_timer_func(unsigned long ctx) drain_tx_cq((struct net_device *)ctx); } -int ipoib_ib_dev_open(struct net_device *dev) +int ipoib_ib_dev_open(struct net_device *dev, int flush) { struct ipoib_dev_priv *priv = netdev_priv(dev); int ret; - if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &priv->pkey_index)) { - ipoib_warn(priv, "P_Key 0x%04x not found\n", priv->pkey); - clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); + ipoib_pkey_dev_check_presence(dev); + + if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) { + ipoib_warn(priv, "P_Key 0x%04x is %s\n", priv->pkey, + (!(priv->pkey & 0x7fff) ? "Invalid" : "not found")); return -1; } - set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); ret = ipoib_init_qp(dev); if (ret) { @@ -705,16 +706,17 @@ int ipoib_ib_dev_open(struct net_device *dev) dev_stop: if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) napi_enable(&priv->napi); - ipoib_ib_dev_stop(dev, 1); + ipoib_ib_dev_stop(dev, flush); return -1; } -static void ipoib_pkey_dev_check_presence(struct net_device *dev) +void ipoib_pkey_dev_check_presence(struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); - u16 pkey_index = 0; - if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &pkey_index)) + if (!(priv->pkey & 0x7fff) || + ib_find_pkey(priv->ca, priv->port, priv->pkey, + &priv->pkey_index)) clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); else set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); @@ -745,14 +747,6 @@ int ipoib_ib_dev_down(struct net_device *dev, int flush) clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags); netif_carrier_off(dev); - /* Shutdown the P_Key thread if still active */ - if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) { - mutex_lock(&pkey_mutex); - set_bit(IPOIB_PKEY_STOP, &priv->flags); - cancel_delayed_work_sync(&priv->pkey_poll_task); - mutex_unlock(&pkey_mutex); - } - ipoib_mcast_stop_thread(dev, flush); ipoib_mcast_dev_flush(dev); @@ -924,7 +918,7 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port) (unsigned long) dev); if (dev->flags & IFF_UP) { - if (ipoib_ib_dev_open(dev)) { + if (ipoib_ib_dev_open(dev, 1)) { ipoib_transport_dev_cleanup(dev); return -ENODEV; } @@ -966,13 +960,27 @@ static inline int update_parent_pkey(struct ipoib_dev_priv *priv) return 1; } +/* + * returns 0 if pkey value was found in a different slot. + */ +static inline int update_child_pkey(struct ipoib_dev_priv *priv) +{ + u16 old_index = priv->pkey_index; + + priv->pkey_index = 0; + ipoib_pkey_dev_check_presence(priv->dev); + + if (test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags) && + (old_index == priv->pkey_index)) + return 1; + return 0; +} static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, enum ipoib_flush_level level) { struct ipoib_dev_priv *cpriv; struct net_device *dev = priv->dev; - u16 new_index; int result; down_read(&priv->vlan_rwsem); @@ -986,16 +994,20 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, up_read(&priv->vlan_rwsem); - if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) { - /* for non-child devices must check/update the pkey value here */ - if (level == IPOIB_FLUSH_HEAVY && - !test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) - update_parent_pkey(priv); + if (!test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags) && + level != IPOIB_FLUSH_HEAVY) { ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_INITIALIZED not set.\n"); return; } if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) { + /* interface is down. update pkey and leave. */ + if (level == IPOIB_FLUSH_HEAVY) { + if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) + update_parent_pkey(priv); + else + update_child_pkey(priv); + } ipoib_dbg(priv, "Not flushing - IPOIB_FLAG_ADMIN_UP not set.\n"); return; } @@ -1005,20 +1017,13 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, * (parent) devices should always takes what present in pkey index 0 */ if (test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) { - if (ib_find_pkey(priv->ca, priv->port, priv->pkey, &new_index)) { - clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags); - ipoib_ib_dev_down(dev, 0); - ipoib_ib_dev_stop(dev, 0); - if (ipoib_pkey_dev_delay_open(dev)) - return; - } - /* restart QP only if P_Key index is changed */ - if (test_and_set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags) && - new_index == priv->pkey_index) { + result = update_child_pkey(priv); + if (result) { + /* restart QP only if P_Key index is changed */ ipoib_dbg(priv, "Not flushing - P_Key index not changed.\n"); return; } - priv->pkey_index = new_index; + } else { result = update_parent_pkey(priv); /* restart QP only if P_Key value changed */ @@ -1038,8 +1043,12 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, ipoib_ib_dev_down(dev, 0); if (level == IPOIB_FLUSH_HEAVY) { - ipoib_ib_dev_stop(dev, 0); - ipoib_ib_dev_open(dev); + if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags)) + ipoib_ib_dev_stop(dev, 0); + if (ipoib_ib_dev_open(dev, 0) != 0) + return; + if (netif_queue_stopped(dev)) + netif_start_queue(dev); } /* @@ -1094,54 +1103,4 @@ void ipoib_ib_dev_cleanup(struct net_device *dev) ipoib_transport_dev_cleanup(dev); } -/* - * Delayed P_Key Assigment Interim Support - * - * The following is initial implementation of delayed P_Key assigment - * mechanism. It is using the same approach implemented for the multicast - * group join. The single goal of this implementation is to quickly address - * Bug #2507. This implementation will probably be removed when the P_Key - * change async notification is available. - */ - -void ipoib_pkey_poll(struct work_struct *work) -{ - struct ipoib_dev_priv *priv = - container_of(work, struct ipoib_dev_priv, pkey_poll_task.work); - struct net_device *dev = priv->dev; - - ipoib_pkey_dev_check_presence(dev); - - if (test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) - ipoib_open(dev); - else { - mutex_lock(&pkey_mutex); - if (!test_bit(IPOIB_PKEY_STOP, &priv->flags)) - queue_delayed_work(ipoib_workqueue, - &priv->pkey_poll_task, - HZ); - mutex_unlock(&pkey_mutex); - } -} - -int ipoib_pkey_dev_delay_open(struct net_device *dev) -{ - struct ipoib_dev_priv *priv = netdev_priv(dev); - - /* Look for the interface pkey value in the IB Port P_Key table and */ - /* set the interface pkey assigment flag */ - ipoib_pkey_dev_check_presence(dev); - /* P_Key value not assigned yet - start polling */ - if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) { - mutex_lock(&pkey_mutex); - clear_bit(IPOIB_PKEY_STOP, &priv->flags); - queue_delayed_work(ipoib_workqueue, - &priv->pkey_poll_task, - HZ); - mutex_unlock(&pkey_mutex); - return 1; - } - - return 0; -} diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 5786a78ff8bc..217cb77157d8 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -108,11 +108,11 @@ int ipoib_open(struct net_device *dev) set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags); - if (ipoib_pkey_dev_delay_open(dev)) - return 0; - - if (ipoib_ib_dev_open(dev)) + if (ipoib_ib_dev_open(dev, 1)) { + if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags)) + return 0; goto err_disable; + } if (ipoib_ib_dev_up(dev)) goto err_stop; @@ -1379,7 +1379,6 @@ void ipoib_setup(struct net_device *dev) INIT_LIST_HEAD(&priv->dead_ahs); INIT_LIST_HEAD(&priv->multicast_list); - INIT_DELAYED_WORK(&priv->pkey_poll_task, ipoib_pkey_poll); INIT_DELAYED_WORK(&priv->mcast_task, ipoib_mcast_join_task); INIT_WORK(&priv->carrier_on_task, ipoib_mcast_carrier_on_task); INIT_WORK(&priv->flush_light, ipoib_ib_dev_flush_light); diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index eb7973957a6e..61ee91d88380 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -596,20 +596,28 @@ iscsi_iser_ep_connect(struct Scsi_Host *shost, struct sockaddr *dst_addr, struct iser_conn *ib_conn; struct iscsi_endpoint *ep; - ep = iscsi_create_endpoint(sizeof(*ib_conn)); + ep = iscsi_create_endpoint(0); if (!ep) return ERR_PTR(-ENOMEM); - ib_conn = ep->dd_data; + ib_conn = kzalloc(sizeof(*ib_conn), GFP_KERNEL); + if (!ib_conn) { + err = -ENOMEM; + goto failure; + } + + ep->dd_data = ib_conn; ib_conn->ep = ep; iser_conn_init(ib_conn); - err = iser_connect(ib_conn, NULL, (struct sockaddr_in *)dst_addr, - non_blocking); + err = iser_connect(ib_conn, NULL, dst_addr, non_blocking); if (err) - return ERR_PTR(err); + goto failure; return ep; +failure: + iscsi_destroy_endpoint(ep); + return ERR_PTR(err); } static int @@ -619,15 +627,16 @@ iscsi_iser_ep_poll(struct iscsi_endpoint *ep, int timeout_ms) int rc; ib_conn = ep->dd_data; - rc = wait_event_interruptible_timeout(ib_conn->wait, - ib_conn->state == ISER_CONN_UP, - msecs_to_jiffies(timeout_ms)); - + rc = wait_for_completion_interruptible_timeout(&ib_conn->up_completion, + msecs_to_jiffies(timeout_ms)); /* if conn establishment failed, return error code to iscsi */ - if (!rc && - (ib_conn->state == ISER_CONN_TERMINATING || - ib_conn->state == ISER_CONN_DOWN)) - rc = -1; + if (rc == 0) { + mutex_lock(&ib_conn->state_mutex); + if (ib_conn->state == ISER_CONN_TERMINATING || + ib_conn->state == ISER_CONN_DOWN) + rc = -1; + mutex_unlock(&ib_conn->state_mutex); + } iser_info("ib conn %p rc = %d\n", ib_conn, rc); @@ -646,19 +655,25 @@ iscsi_iser_ep_disconnect(struct iscsi_endpoint *ep) ib_conn = ep->dd_data; iser_info("ep %p ib conn %p state %d\n", ep, ib_conn, ib_conn->state); + mutex_lock(&ib_conn->state_mutex); iser_conn_terminate(ib_conn); /* - * if iser_conn and iscsi_conn are bound, we must wait iscsi_conn_stop - * call and ISER_CONN_DOWN state before freeing the iser resources. - * otherwise we are safe to free resources immediately. + * if iser_conn and iscsi_conn are bound, we must wait for + * iscsi_conn_stop and flush errors completion before freeing + * the iser resources. Otherwise we are safe to free resources + * immediately. */ if (ib_conn->iscsi_conn) { INIT_WORK(&ib_conn->release_work, iser_release_work); queue_work(release_wq, &ib_conn->release_work); + mutex_unlock(&ib_conn->state_mutex); } else { + ib_conn->state = ISER_CONN_DOWN; + mutex_unlock(&ib_conn->state_mutex); iser_conn_release(ib_conn); } + iscsi_destroy_endpoint(ep); } static umode_t iser_attr_is_visible(int param_type, int param) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 97cd385bf7f7..c877dad381cb 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -326,7 +326,6 @@ struct iser_conn { struct iser_device *device; /* device context */ struct rdma_cm_id *cma_id; /* CMA ID */ struct ib_qp *qp; /* QP */ - wait_queue_head_t wait; /* waitq for conn/disconn */ unsigned qp_max_recv_dtos; /* num of rx buffers */ unsigned qp_max_recv_dtos_mask; /* above minus 1 */ unsigned min_posted_rx; /* qp_max_recv_dtos >> 2 */ @@ -335,6 +334,9 @@ struct iser_conn { char name[ISER_OBJECT_NAME_SIZE]; struct work_struct release_work; struct completion stop_completion; + struct mutex state_mutex; + struct completion flush_completion; + struct completion up_completion; struct list_head conn_list; /* entry in ig conn list */ char *login_buf; @@ -448,8 +450,8 @@ int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *task, enum iser_data_dir cmd_dir); int iser_connect(struct iser_conn *ib_conn, - struct sockaddr_in *src_addr, - struct sockaddr_in *dst_addr, + struct sockaddr *src_addr, + struct sockaddr *dst_addr, int non_blocking); int iser_reg_page_vec(struct iser_conn *ib_conn, diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index ea01075f9f9b..3ef167f97d6f 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -491,10 +491,9 @@ out_err: } /** - * releases the QP objects, returns 0 on success, - * -1 on failure + * releases the QP object */ -static int iser_free_ib_conn_res(struct iser_conn *ib_conn) +static void iser_free_ib_conn_res(struct iser_conn *ib_conn) { int cq_index; BUG_ON(ib_conn == NULL); @@ -513,8 +512,6 @@ static int iser_free_ib_conn_res(struct iser_conn *ib_conn) } ib_conn->qp = NULL; - - return 0; } /** @@ -568,31 +565,40 @@ static void iser_device_try_release(struct iser_device *device) mutex_unlock(&ig.device_list_mutex); } +/** + * Called with state mutex held + **/ static int iser_conn_state_comp_exch(struct iser_conn *ib_conn, enum iser_ib_conn_state comp, enum iser_ib_conn_state exch) { int ret; - spin_lock_bh(&ib_conn->lock); if ((ret = (ib_conn->state == comp))) ib_conn->state = exch; - spin_unlock_bh(&ib_conn->lock); return ret; } void iser_release_work(struct work_struct *work) { struct iser_conn *ib_conn; + int rc; ib_conn = container_of(work, struct iser_conn, release_work); /* wait for .conn_stop callback */ - wait_for_completion(&ib_conn->stop_completion); + rc = wait_for_completion_timeout(&ib_conn->stop_completion, 30 * HZ); + WARN_ON(rc == 0); /* wait for the qp`s post send and post receive buffers to empty */ - wait_event_interruptible(ib_conn->wait, - ib_conn->state == ISER_CONN_DOWN); + rc = wait_for_completion_timeout(&ib_conn->flush_completion, 30 * HZ); + WARN_ON(rc == 0); + + ib_conn->state = ISER_CONN_DOWN; + + mutex_lock(&ib_conn->state_mutex); + ib_conn->state = ISER_CONN_DOWN; + mutex_unlock(&ib_conn->state_mutex); iser_conn_release(ib_conn); } @@ -604,23 +610,27 @@ void iser_conn_release(struct iser_conn *ib_conn) { struct iser_device *device = ib_conn->device; - BUG_ON(ib_conn->state == ISER_CONN_UP); - mutex_lock(&ig.connlist_mutex); list_del(&ib_conn->conn_list); mutex_unlock(&ig.connlist_mutex); + + mutex_lock(&ib_conn->state_mutex); + BUG_ON(ib_conn->state != ISER_CONN_DOWN); + iser_free_rx_descriptors(ib_conn); iser_free_ib_conn_res(ib_conn); ib_conn->device = NULL; /* on EVENT_ADDR_ERROR there's no device yet for this conn */ if (device != NULL) iser_device_try_release(device); + mutex_unlock(&ib_conn->state_mutex); + /* if cma handler context, the caller actually destroy the id */ if (ib_conn->cma_id != NULL) { rdma_destroy_id(ib_conn->cma_id); ib_conn->cma_id = NULL; } - iscsi_destroy_endpoint(ib_conn->ep); + kfree(ib_conn); } /** @@ -642,22 +652,31 @@ void iser_conn_terminate(struct iser_conn *ib_conn) ib_conn,err); } +/** + * Called with state mutex held + **/ static void iser_connect_error(struct rdma_cm_id *cma_id) { struct iser_conn *ib_conn; ib_conn = (struct iser_conn *)cma_id->context; - ib_conn->state = ISER_CONN_DOWN; - wake_up_interruptible(&ib_conn->wait); } +/** + * Called with state mutex held + **/ static void iser_addr_handler(struct rdma_cm_id *cma_id) { struct iser_device *device; struct iser_conn *ib_conn; int ret; + ib_conn = (struct iser_conn *)cma_id->context; + if (ib_conn->state != ISER_CONN_PENDING) + /* bailout */ + return; + device = iser_device_find_by_ib_device(cma_id); if (!device) { iser_err("device lookup/creation failed\n"); @@ -665,7 +684,6 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id) return; } - ib_conn = (struct iser_conn *)cma_id->context; ib_conn->device = device; /* connection T10-PI support */ @@ -689,18 +707,27 @@ static void iser_addr_handler(struct rdma_cm_id *cma_id) } } +/** + * Called with state mutex held + **/ static void iser_route_handler(struct rdma_cm_id *cma_id) { struct rdma_conn_param conn_param; int ret; struct iser_cm_hdr req_hdr; + struct iser_conn *ib_conn = (struct iser_conn *)cma_id->context; + struct iser_device *device = ib_conn->device; + + if (ib_conn->state != ISER_CONN_PENDING) + /* bailout */ + return; ret = iser_create_ib_conn_res((struct iser_conn *)cma_id->context); if (ret) goto failure; memset(&conn_param, 0, sizeof conn_param); - conn_param.responder_resources = 4; + conn_param.responder_resources = device->dev_attr.max_qp_rd_atom; conn_param.initiator_depth = 1; conn_param.retry_count = 7; conn_param.rnr_retry_count = 6; @@ -728,12 +755,16 @@ static void iser_connected_handler(struct rdma_cm_id *cma_id) struct ib_qp_attr attr; struct ib_qp_init_attr init_attr; + ib_conn = (struct iser_conn *)cma_id->context; + if (ib_conn->state != ISER_CONN_PENDING) + /* bailout */ + return; + (void)ib_query_qp(cma_id->qp, &attr, ~0, &init_attr); iser_info("remote qpn:%x my qpn:%x\n", attr.dest_qp_num, cma_id->qp->qp_num); - ib_conn = (struct iser_conn *)cma_id->context; - if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_PENDING, ISER_CONN_UP)) - wake_up_interruptible(&ib_conn->wait); + ib_conn->state = ISER_CONN_UP; + complete(&ib_conn->up_completion); } static void iser_disconnected_handler(struct rdma_cm_id *cma_id) @@ -752,19 +783,25 @@ static void iser_disconnected_handler(struct rdma_cm_id *cma_id) iser_err("iscsi_iser connection isn't bound\n"); } - /* Complete the termination process if no posts are pending */ + /* Complete the termination process if no posts are pending. This code + * block also exists in iser_handle_comp_error(), but it is needed here + * for cases of no flushes at all, e.g. discovery over rdma. + */ if (ib_conn->post_recv_buf_count == 0 && (atomic_read(&ib_conn->post_send_buf_count) == 0)) { - ib_conn->state = ISER_CONN_DOWN; - wake_up_interruptible(&ib_conn->wait); + complete(&ib_conn->flush_completion); } } static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) { + struct iser_conn *ib_conn; + + ib_conn = (struct iser_conn *)cma_id->context; iser_info("event %d status %d conn %p id %p\n", event->event, event->status, cma_id->context, cma_id); + mutex_lock(&ib_conn->state_mutex); switch (event->event) { case RDMA_CM_EVENT_ADDR_RESOLVED: iser_addr_handler(cma_id); @@ -785,24 +822,28 @@ static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *eve case RDMA_CM_EVENT_DISCONNECTED: case RDMA_CM_EVENT_DEVICE_REMOVAL: case RDMA_CM_EVENT_ADDR_CHANGE: + case RDMA_CM_EVENT_TIMEWAIT_EXIT: iser_disconnected_handler(cma_id); break; default: iser_err("Unexpected RDMA CM event (%d)\n", event->event); break; } + mutex_unlock(&ib_conn->state_mutex); return 0; } void iser_conn_init(struct iser_conn *ib_conn) { ib_conn->state = ISER_CONN_INIT; - init_waitqueue_head(&ib_conn->wait); ib_conn->post_recv_buf_count = 0; atomic_set(&ib_conn->post_send_buf_count, 0); init_completion(&ib_conn->stop_completion); + init_completion(&ib_conn->flush_completion); + init_completion(&ib_conn->up_completion); INIT_LIST_HEAD(&ib_conn->conn_list); spin_lock_init(&ib_conn->lock); + mutex_init(&ib_conn->state_mutex); } /** @@ -810,22 +851,21 @@ void iser_conn_init(struct iser_conn *ib_conn) * sleeps until the connection is established or rejected */ int iser_connect(struct iser_conn *ib_conn, - struct sockaddr_in *src_addr, - struct sockaddr_in *dst_addr, + struct sockaddr *src_addr, + struct sockaddr *dst_addr, int non_blocking) { - struct sockaddr *src, *dst; int err = 0; - sprintf(ib_conn->name, "%pI4:%d", - &dst_addr->sin_addr.s_addr, dst_addr->sin_port); + mutex_lock(&ib_conn->state_mutex); + + sprintf(ib_conn->name, "%pISp", dst_addr); + + iser_info("connecting to: %s\n", ib_conn->name); /* the device is known only --after-- address resolution */ ib_conn->device = NULL; - iser_info("connecting to: %pI4, port 0x%x\n", - &dst_addr->sin_addr, dst_addr->sin_port); - ib_conn->state = ISER_CONN_PENDING; ib_conn->cma_id = rdma_create_id(iser_cma_handler, @@ -837,23 +877,21 @@ int iser_connect(struct iser_conn *ib_conn, goto id_failure; } - src = (struct sockaddr *)src_addr; - dst = (struct sockaddr *)dst_addr; - err = rdma_resolve_addr(ib_conn->cma_id, src, dst, 1000); + err = rdma_resolve_addr(ib_conn->cma_id, src_addr, dst_addr, 1000); if (err) { iser_err("rdma_resolve_addr failed: %d\n", err); goto addr_failure; } if (!non_blocking) { - wait_event_interruptible(ib_conn->wait, - (ib_conn->state != ISER_CONN_PENDING)); + wait_for_completion_interruptible(&ib_conn->up_completion); if (ib_conn->state != ISER_CONN_UP) { err = -EIO; goto connect_failure; } } + mutex_unlock(&ib_conn->state_mutex); mutex_lock(&ig.connlist_mutex); list_add(&ib_conn->conn_list, &ig.connlist); @@ -865,6 +903,7 @@ id_failure: addr_failure: ib_conn->state = ISER_CONN_DOWN; connect_failure: + mutex_unlock(&ib_conn->state_mutex); iser_conn_release(ib_conn); return err; } @@ -1049,18 +1088,19 @@ static void iser_handle_comp_error(struct iser_tx_desc *desc, if (ib_conn->post_recv_buf_count == 0 && atomic_read(&ib_conn->post_send_buf_count) == 0) { - /* getting here when the state is UP means that the conn is * - * being terminated asynchronously from the iSCSI layer's * - * perspective. */ - if (iser_conn_state_comp_exch(ib_conn, ISER_CONN_UP, - ISER_CONN_TERMINATING)) + /** + * getting here when the state is UP means that the conn is + * being terminated asynchronously from the iSCSI layer's + * perspective. It is safe to peek at the connection state + * since iscsi_conn_failure is allowed to be called twice. + **/ + if (ib_conn->state == ISER_CONN_UP) iscsi_conn_failure(ib_conn->iscsi_conn, ISCSI_ERR_CONN_FAILED); /* no more non completed posts to the QP, complete the * termination process w.o worrying on disconnect event */ - ib_conn->state = ISER_CONN_DOWN; - wake_up_interruptible(&ib_conn->wait); + complete(&ib_conn->flush_completion); } } diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index e3c2c5b4297f..62d2a18e1b41 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -130,6 +130,7 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr); static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event); static struct scsi_transport_template *ib_srp_transport_template; +static struct workqueue_struct *srp_remove_wq; static struct ib_client srp_client = { .name = "srp", @@ -731,7 +732,7 @@ static bool srp_queue_remove_work(struct srp_target_port *target) spin_unlock_irq(&target->lock); if (changed) - queue_work(system_long_wq, &target->remove_work); + queue_work(srp_remove_wq, &target->remove_work); return changed; } @@ -1643,10 +1644,14 @@ static void srp_process_rsp(struct srp_target_port *target, struct srp_rsp *rsp) SCSI_SENSE_BUFFERSIZE)); } - if (rsp->flags & (SRP_RSP_FLAG_DOOVER | SRP_RSP_FLAG_DOUNDER)) - scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt)); - else if (rsp->flags & (SRP_RSP_FLAG_DIOVER | SRP_RSP_FLAG_DIUNDER)) + if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER)) scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt)); + else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER)) + scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt)); + else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER)) + scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt)); + else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER)) + scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt)); srp_free_req(target, req, scmnd, be32_to_cpu(rsp->req_lim_delta)); @@ -3261,9 +3266,10 @@ static void srp_remove_one(struct ib_device *device) spin_unlock(&host->target_lock); /* - * Wait for target port removal tasks. + * Wait for tl_err and target port removal tasks. */ flush_workqueue(system_long_wq); + flush_workqueue(srp_remove_wq); kfree(host); } @@ -3313,16 +3319,22 @@ static int __init srp_init_module(void) indirect_sg_entries = cmd_sg_entries; } + srp_remove_wq = create_workqueue("srp_remove"); + if (!srp_remove_wq) { + ret = -ENOMEM; + goto out; + } + + ret = -ENOMEM; ib_srp_transport_template = srp_attach_transport(&ib_srp_transport_functions); if (!ib_srp_transport_template) - return -ENOMEM; + goto destroy_wq; ret = class_register(&srp_class); if (ret) { pr_err("couldn't register class infiniband_srp\n"); - srp_release_transport(ib_srp_transport_template); - return ret; + goto release_tr; } ib_sa_register_client(&srp_sa_client); @@ -3330,13 +3342,22 @@ static int __init srp_init_module(void) ret = ib_register_client(&srp_client); if (ret) { pr_err("couldn't register IB client\n"); - srp_release_transport(ib_srp_transport_template); - ib_sa_unregister_client(&srp_sa_client); - class_unregister(&srp_class); - return ret; + goto unreg_sa; } - return 0; +out: + return ret; + +unreg_sa: + ib_sa_unregister_client(&srp_sa_client); + class_unregister(&srp_class); + +release_tr: + srp_release_transport(ib_srp_transport_template); + +destroy_wq: + destroy_workqueue(srp_remove_wq); + goto out; } static void __exit srp_cleanup_module(void) @@ -3345,6 +3366,7 @@ static void __exit srp_cleanup_module(void) ib_sa_unregister_client(&srp_sa_client); class_unregister(&srp_class); srp_release_transport(ib_srp_transport_template); + destroy_workqueue(srp_remove_wq); } module_init(srp_init_module); diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 8a8311e35cbd..d28a8c284da9 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -198,6 +198,7 @@ static void srpt_event_handler(struct ib_event_handler *handler, case IB_EVENT_PKEY_CHANGE: case IB_EVENT_SM_CHANGE: case IB_EVENT_CLIENT_REREGISTER: + case IB_EVENT_GID_CHANGE: /* Refresh port data asynchronously. */ if (event->element.port_num <= sdev->device->phys_port_cnt) { sport = &sdev->port[event->element.port_num - 1]; diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index c2f5d2d3b932..56d4d100e024 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -890,5 +890,6 @@ void be_roce_dev_remove(struct be_adapter *); */ void be_roce_dev_open(struct be_adapter *); void be_roce_dev_close(struct be_adapter *); +void be_roce_dev_shutdown(struct be_adapter *); #endif /* BE_H */ diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 1e187fb760f8..36ce69ae9a2f 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -4958,6 +4958,7 @@ static void be_shutdown(struct pci_dev *pdev) if (!adapter) return; + be_roce_dev_shutdown(adapter); cancel_delayed_work_sync(&adapter->work); cancel_delayed_work_sync(&adapter->func_recovery_work); diff --git a/drivers/net/ethernet/emulex/benet/be_roce.c b/drivers/net/ethernet/emulex/benet/be_roce.c index 5bf16603a3e9..ef4672dc7357 100644 --- a/drivers/net/ethernet/emulex/benet/be_roce.c +++ b/drivers/net/ethernet/emulex/benet/be_roce.c @@ -120,7 +120,8 @@ static void _be_roce_dev_open(struct be_adapter *adapter) { if (ocrdma_drv && adapter->ocrdma_dev && ocrdma_drv->state_change_handler) - ocrdma_drv->state_change_handler(adapter->ocrdma_dev, 0); + ocrdma_drv->state_change_handler(adapter->ocrdma_dev, + BE_DEV_UP); } void be_roce_dev_open(struct be_adapter *adapter) @@ -136,7 +137,8 @@ static void _be_roce_dev_close(struct be_adapter *adapter) { if (ocrdma_drv && adapter->ocrdma_dev && ocrdma_drv->state_change_handler) - ocrdma_drv->state_change_handler(adapter->ocrdma_dev, 1); + ocrdma_drv->state_change_handler(adapter->ocrdma_dev, + BE_DEV_DOWN); } void be_roce_dev_close(struct be_adapter *adapter) @@ -148,6 +150,18 @@ void be_roce_dev_close(struct be_adapter *adapter) } } +void be_roce_dev_shutdown(struct be_adapter *adapter) +{ + if (be_roce_supported(adapter)) { + mutex_lock(&be_adapter_list_lock); + if (ocrdma_drv && adapter->ocrdma_dev && + ocrdma_drv->state_change_handler) + ocrdma_drv->state_change_handler(adapter->ocrdma_dev, + BE_DEV_SHUTDOWN); + mutex_unlock(&be_adapter_list_lock); + } +} + int be_roce_register_driver(struct ocrdma_driver *drv) { struct be_adapter *dev; diff --git a/drivers/net/ethernet/emulex/benet/be_roce.h b/drivers/net/ethernet/emulex/benet/be_roce.h index a3d9e96c18eb..e6f7eb1a7d87 100644 --- a/drivers/net/ethernet/emulex/benet/be_roce.h +++ b/drivers/net/ethernet/emulex/benet/be_roce.h @@ -62,7 +62,8 @@ struct ocrdma_driver { enum { BE_DEV_UP = 0, - BE_DEV_DOWN = 1 + BE_DEV_DOWN = 1, + BE_DEV_SHUTDOWN = 2 }; /* APIs for RoCE driver to register callback handlers, diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 5d940a26055c..65a4a0f88ea0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -1311,6 +1311,15 @@ static struct mlx4_cmd_info cmd_info[] = { .wrapper = mlx4_MAD_IFC_wrapper }, { + .opcode = MLX4_CMD_MAD_DEMUX, + .has_inbox = false, + .has_outbox = false, + .out_is_imm = false, + .encode_slave_id = false, + .verify = NULL, + .wrapper = mlx4_CMD_EPERM_wrapper + }, + { .opcode = MLX4_CMD_QUERY_IF_STAT, .has_inbox = false, .has_outbox = true, diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 688e1eabab29..494753e44ae3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -136,7 +136,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [7] = "FSM (MAC anti-spoofing) support", [8] = "Dynamic QP updates support", [9] = "Device managed flow steering IPoIB support", - [10] = "TCP/IP offloads/flow-steering for VXLAN support" + [10] = "TCP/IP offloads/flow-steering for VXLAN support", + [11] = "MAD DEMUX (Secure-Host) support" }; int i; @@ -571,6 +572,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET 0xa0 #define QUERY_DEV_CAP_FW_REASSIGN_MAC 0x9d #define QUERY_DEV_CAP_VXLAN 0x9e +#define QUERY_DEV_CAP_MAD_DEMUX_OFFSET 0xb0 dev_cap->flags2 = 0; mailbox = mlx4_alloc_cmd_mailbox(dev); @@ -748,6 +750,11 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) MLX4_GET(dev_cap->max_counters, outbox, QUERY_DEV_CAP_MAX_COUNTERS_OFFSET); + MLX4_GET(field32, outbox, + QUERY_DEV_CAP_MAD_DEMUX_OFFSET); + if (field32 & (1 << 0)) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_MAD_DEMUX; + MLX4_GET(field32, outbox, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET); if (field32 & (1 << 16)) dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_UPDATE_QP; @@ -2016,3 +2023,85 @@ void mlx4_opreq_action(struct work_struct *work) out: mlx4_free_cmd_mailbox(dev, mailbox); } + +static int mlx4_check_smp_firewall_active(struct mlx4_dev *dev, + struct mlx4_cmd_mailbox *mailbox) +{ +#define MLX4_CMD_MAD_DEMUX_SET_ATTR_OFFSET 0x10 +#define MLX4_CMD_MAD_DEMUX_GETRESP_ATTR_OFFSET 0x20 +#define MLX4_CMD_MAD_DEMUX_TRAP_ATTR_OFFSET 0x40 +#define MLX4_CMD_MAD_DEMUX_TRAP_REPRESS_ATTR_OFFSET 0x70 + + u32 set_attr_mask, getresp_attr_mask; + u32 trap_attr_mask, traprepress_attr_mask; + + MLX4_GET(set_attr_mask, mailbox->buf, + MLX4_CMD_MAD_DEMUX_SET_ATTR_OFFSET); + mlx4_dbg(dev, "SMP firewall set_attribute_mask = 0x%x\n", + set_attr_mask); + + MLX4_GET(getresp_attr_mask, mailbox->buf, + MLX4_CMD_MAD_DEMUX_GETRESP_ATTR_OFFSET); + mlx4_dbg(dev, "SMP firewall getresp_attribute_mask = 0x%x\n", + getresp_attr_mask); + + MLX4_GET(trap_attr_mask, mailbox->buf, + MLX4_CMD_MAD_DEMUX_TRAP_ATTR_OFFSET); + mlx4_dbg(dev, "SMP firewall trap_attribute_mask = 0x%x\n", + trap_attr_mask); + + MLX4_GET(traprepress_attr_mask, mailbox->buf, + MLX4_CMD_MAD_DEMUX_TRAP_REPRESS_ATTR_OFFSET); + mlx4_dbg(dev, "SMP firewall traprepress_attribute_mask = 0x%x\n", + traprepress_attr_mask); + + if (set_attr_mask && getresp_attr_mask && trap_attr_mask && + traprepress_attr_mask) + return 1; + + return 0; +} + +int mlx4_config_mad_demux(struct mlx4_dev *dev) +{ + struct mlx4_cmd_mailbox *mailbox; + int secure_host_active; + int err; + + /* Check if mad_demux is supported */ + if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_MAD_DEMUX)) + return 0; + + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR(mailbox)) { + mlx4_warn(dev, "Failed to allocate mailbox for cmd MAD_DEMUX"); + return -ENOMEM; + } + + /* Query mad_demux to find out which MADs are handled by internal sma */ + err = mlx4_cmd_box(dev, 0, mailbox->dma, 0x01 /* subn mgmt class */, + MLX4_CMD_MAD_DEMUX_QUERY_RESTR, MLX4_CMD_MAD_DEMUX, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); + if (err) { + mlx4_warn(dev, "MLX4_CMD_MAD_DEMUX: query restrictions failed (%d)\n", + err); + goto out; + } + + secure_host_active = mlx4_check_smp_firewall_active(dev, mailbox); + + /* Config mad_demux to handle all MADs returned by the query above */ + err = mlx4_cmd(dev, mailbox->dma, 0x01 /* subn mgmt class */, + MLX4_CMD_MAD_DEMUX_CONFIG, MLX4_CMD_MAD_DEMUX, + MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); + if (err) { + mlx4_warn(dev, "MLX4_CMD_MAD_DEMUX: configure failed (%d)\n", err); + goto out; + } + + if (secure_host_active) + mlx4_warn(dev, "HCA operating in secure-host mode. SMP firewall activated.\n"); +out: + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 82ab427290c3..f2c8e8ba23fe 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -1831,6 +1831,11 @@ static int mlx4_setup_hca(struct mlx4_dev *dev) mlx4_err(dev, "Failed to initialize multicast group table, aborting\n"); goto err_mr_table_free; } + err = mlx4_config_mad_demux(dev); + if (err) { + mlx4_err(dev, "Failed in config_mad_demux, aborting\n"); + goto err_mcg_table_free; + } } err = mlx4_init_eq_table(dev); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 1d8af7336807..3398ff7e67e9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -279,6 +279,8 @@ struct mlx4_icm_table { #define MLX4_MPT_FLAG_PHYSICAL (1 << 9) #define MLX4_MPT_FLAG_REGION (1 << 8) +#define MLX4_MPT_PD_MASK (0x1FFFFUL) +#define MLX4_MPT_PD_VF_MASK (0xFE0000UL) #define MLX4_MPT_PD_FLAG_FAST_REG (1 << 27) #define MLX4_MPT_PD_FLAG_RAE (1 << 28) #define MLX4_MPT_PD_FLAG_EN_INV (3 << 24) @@ -1311,5 +1313,6 @@ void mlx4_init_quotas(struct mlx4_dev *dev); int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave, int port); /* Returns the VF index of slave */ int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave); +int mlx4_config_mad_demux(struct mlx4_dev *dev); #endif /* MLX4_H */ diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index 2839abb878a6..7d717eccb7b0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -298,6 +298,131 @@ static int mlx4_HW2SW_MPT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED); } +int mlx4_mr_hw_get_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr, + struct mlx4_mpt_entry ***mpt_entry) +{ + int err; + int key = key_to_hw_index(mmr->key) & (dev->caps.num_mpts - 1); + struct mlx4_cmd_mailbox *mailbox = NULL; + + /* Make sure that at this point we have single-threaded access only */ + + if (mmr->enabled != MLX4_MPT_EN_HW) + return -EINVAL; + + err = mlx4_HW2SW_MPT(dev, NULL, key); + + if (err) { + mlx4_warn(dev, "HW2SW_MPT failed (%d).", err); + mlx4_warn(dev, "Most likely the MR has MWs bound to it.\n"); + return err; + } + + mmr->enabled = MLX4_MPT_EN_SW; + + if (!mlx4_is_mfunc(dev)) { + **mpt_entry = mlx4_table_find( + &mlx4_priv(dev)->mr_table.dmpt_table, + key, NULL); + } else { + mailbox = mlx4_alloc_cmd_mailbox(dev); + if (IS_ERR_OR_NULL(mailbox)) + return PTR_ERR(mailbox); + + err = mlx4_cmd_box(dev, 0, mailbox->dma, key, + 0, MLX4_CMD_QUERY_MPT, + MLX4_CMD_TIME_CLASS_B, + MLX4_CMD_WRAPPED); + + if (err) + goto free_mailbox; + + *mpt_entry = (struct mlx4_mpt_entry **)&mailbox->buf; + } + + if (!(*mpt_entry) || !(**mpt_entry)) { + err = -ENOMEM; + goto free_mailbox; + } + + return 0; + +free_mailbox: + mlx4_free_cmd_mailbox(dev, mailbox); + return err; +} +EXPORT_SYMBOL_GPL(mlx4_mr_hw_get_mpt); + +int mlx4_mr_hw_write_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr, + struct mlx4_mpt_entry **mpt_entry) +{ + int err; + + if (!mlx4_is_mfunc(dev)) { + /* Make sure any changes to this entry are flushed */ + wmb(); + + *(u8 *)(*mpt_entry) = MLX4_MPT_STATUS_HW; + + /* Make sure the new status is written */ + wmb(); + + err = mlx4_SYNC_TPT(dev); + } else { + int key = key_to_hw_index(mmr->key) & (dev->caps.num_mpts - 1); + + struct mlx4_cmd_mailbox *mailbox = + container_of((void *)mpt_entry, struct mlx4_cmd_mailbox, + buf); + + err = mlx4_SW2HW_MPT(dev, mailbox, key); + } + + mmr->pd = be32_to_cpu((*mpt_entry)->pd_flags) & MLX4_MPT_PD_MASK; + if (!err) + mmr->enabled = MLX4_MPT_EN_HW; + return err; +} +EXPORT_SYMBOL_GPL(mlx4_mr_hw_write_mpt); + +void mlx4_mr_hw_put_mpt(struct mlx4_dev *dev, + struct mlx4_mpt_entry **mpt_entry) +{ + if (mlx4_is_mfunc(dev)) { + struct mlx4_cmd_mailbox *mailbox = + container_of((void *)mpt_entry, struct mlx4_cmd_mailbox, + buf); + mlx4_free_cmd_mailbox(dev, mailbox); + } +} +EXPORT_SYMBOL_GPL(mlx4_mr_hw_put_mpt); + +int mlx4_mr_hw_change_pd(struct mlx4_dev *dev, struct mlx4_mpt_entry *mpt_entry, + u32 pdn) +{ + u32 pd_flags = be32_to_cpu(mpt_entry->pd_flags); + /* The wrapper function will put the slave's id here */ + if (mlx4_is_mfunc(dev)) + pd_flags &= ~MLX4_MPT_PD_VF_MASK; + mpt_entry->pd_flags = cpu_to_be32((pd_flags & ~MLX4_MPT_PD_MASK) | + (pdn & MLX4_MPT_PD_MASK) + | MLX4_MPT_PD_FLAG_EN_INV); + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_mr_hw_change_pd); + +int mlx4_mr_hw_change_access(struct mlx4_dev *dev, + struct mlx4_mpt_entry *mpt_entry, + u32 access) +{ + u32 flags = (be32_to_cpu(mpt_entry->flags) & ~MLX4_PERM_MASK) | + (access & MLX4_PERM_MASK); + + mpt_entry->flags = cpu_to_be32(flags); + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_mr_hw_change_access); + static int mlx4_mr_alloc_reserved(struct mlx4_dev *dev, u32 mridx, u32 pd, u64 iova, u64 size, u32 access, int npages, int page_shift, struct mlx4_mr *mr) @@ -463,6 +588,41 @@ int mlx4_mr_free(struct mlx4_dev *dev, struct mlx4_mr *mr) } EXPORT_SYMBOL_GPL(mlx4_mr_free); +void mlx4_mr_rereg_mem_cleanup(struct mlx4_dev *dev, struct mlx4_mr *mr) +{ + mlx4_mtt_cleanup(dev, &mr->mtt); +} +EXPORT_SYMBOL_GPL(mlx4_mr_rereg_mem_cleanup); + +int mlx4_mr_rereg_mem_write(struct mlx4_dev *dev, struct mlx4_mr *mr, + u64 iova, u64 size, int npages, + int page_shift, struct mlx4_mpt_entry *mpt_entry) +{ + int err; + + mpt_entry->start = cpu_to_be64(mr->iova); + mpt_entry->length = cpu_to_be64(mr->size); + mpt_entry->entity_size = cpu_to_be32(mr->mtt.page_shift); + + err = mlx4_mtt_init(dev, npages, page_shift, &mr->mtt); + if (err) + return err; + + if (mr->mtt.order < 0) { + mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_PHYSICAL); + mpt_entry->mtt_addr = 0; + } else { + mpt_entry->mtt_addr = cpu_to_be64(mlx4_mtt_addr(dev, + &mr->mtt)); + if (mr->mtt.page_shift == 0) + mpt_entry->mtt_sz = cpu_to_be32(1 << mr->mtt.order); + } + mr->enabled = MLX4_MPT_EN_SW; + + return 0; +} +EXPORT_SYMBOL_GPL(mlx4_mr_rereg_mem_write); + int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr) { struct mlx4_cmd_mailbox *mailbox; diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 0efc1368e5a8..1089367fed22 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -2613,12 +2613,34 @@ int mlx4_QUERY_MPT_wrapper(struct mlx4_dev *dev, int slave, if (err) return err; - if (mpt->com.from_state != RES_MPT_HW) { + if (mpt->com.from_state == RES_MPT_MAPPED) { + /* In order to allow rereg in SRIOV, we need to alter the MPT entry. To do + * that, the VF must read the MPT. But since the MPT entry memory is not + * in the VF's virtual memory space, it must use QUERY_MPT to obtain the + * entry contents. To guarantee that the MPT cannot be changed, the driver + * must perform HW2SW_MPT before this query and return the MPT entry to HW + * ownership fofollowing the change. The change here allows the VF to + * perform QUERY_MPT also when the entry is in SW ownership. + */ + struct mlx4_mpt_entry *mpt_entry = mlx4_table_find( + &mlx4_priv(dev)->mr_table.dmpt_table, + mpt->key, NULL); + + if (NULL == mpt_entry || NULL == outbox->buf) { + err = -EINVAL; + goto out; + } + + memcpy(outbox->buf, mpt_entry, sizeof(*mpt_entry)); + + err = 0; + } else if (mpt->com.from_state == RES_MPT_HW) { + err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); + } else { err = -EBUSY; goto out; } - err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); out: put_res(dev, slave, id, RES_MPT); diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c index 13e898332e45..a0c5bfdc5366 100644 --- a/drivers/scsi/scsi_transport_srp.c +++ b/drivers/scsi/scsi_transport_srp.c @@ -473,7 +473,8 @@ static void __srp_start_tl_fail_timers(struct srp_rport *rport) if (delay > 0) queue_delayed_work(system_long_wq, &rport->reconnect_work, 1UL * delay * HZ); - if (srp_rport_set_state(rport, SRP_RPORT_BLOCKED) == 0) { + if ((fast_io_fail_tmo >= 0 || dev_loss_tmo >= 0) && + srp_rport_set_state(rport, SRP_RPORT_BLOCKED) == 0) { pr_debug("%s new state: %d\n", dev_name(&shost->shost_gendev), rport->state); scsi_target_block(&shost->shost_gendev); diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index c8450366c130..379c02648ab3 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -116,6 +116,7 @@ enum { /* special QP and management commands */ MLX4_CMD_CONF_SPECIAL_QP = 0x23, MLX4_CMD_MAD_IFC = 0x24, + MLX4_CMD_MAD_DEMUX = 0x203, /* multicast commands */ MLX4_CMD_READ_MCG = 0x25, @@ -186,6 +187,12 @@ enum { }; enum { + MLX4_CMD_MAD_DEMUX_CONFIG = 0, + MLX4_CMD_MAD_DEMUX_QUERY_STATE = 1, + MLX4_CMD_MAD_DEMUX_QUERY_RESTR = 2, /* Query mad demux restrictions */ +}; + +enum { MLX4_CMD_WRAPPED, MLX4_CMD_NATIVE }; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 35b51e7af886..be4ab553a317 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -172,6 +172,7 @@ enum { MLX4_DEV_CAP_FLAG2_UPDATE_QP = 1LL << 8, MLX4_DEV_CAP_FLAG2_DMFS_IPOIB = 1LL << 9, MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS = 1LL << 10, + MLX4_DEV_CAP_FLAG2_MAD_DEMUX = 1LL << 11, }; enum { @@ -262,6 +263,7 @@ enum { MLX4_PERM_REMOTE_WRITE = 1 << 13, MLX4_PERM_ATOMIC = 1 << 14, MLX4_PERM_BIND_MW = 1 << 15, + MLX4_PERM_MASK = 0xFC00 }; enum { @@ -1243,4 +1245,19 @@ int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port); int mlx4_vf_get_enable_smi_admin(struct mlx4_dev *dev, int slave, int port); int mlx4_vf_set_enable_smi_admin(struct mlx4_dev *dev, int slave, int port, int enable); +int mlx4_mr_hw_get_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr, + struct mlx4_mpt_entry ***mpt_entry); +int mlx4_mr_hw_write_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr, + struct mlx4_mpt_entry **mpt_entry); +int mlx4_mr_hw_change_pd(struct mlx4_dev *dev, struct mlx4_mpt_entry *mpt_entry, + u32 pdn); +int mlx4_mr_hw_change_access(struct mlx4_dev *dev, + struct mlx4_mpt_entry *mpt_entry, + u32 access); +void mlx4_mr_hw_put_mpt(struct mlx4_dev *dev, + struct mlx4_mpt_entry **mpt_entry); +void mlx4_mr_rereg_mem_cleanup(struct mlx4_dev *dev, struct mlx4_mr *mr); +int mlx4_mr_rereg_mem_write(struct mlx4_dev *dev, struct mlx4_mr *mr, + u64 iova, u64 size, int npages, + int page_shift, struct mlx4_mpt_entry *mpt_entry); #endif /* MLX4_DEVICE_H */ diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 7ccef342f724..ed44cc07a7b3 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1097,7 +1097,8 @@ struct ib_mr_attr { enum ib_mr_rereg_flags { IB_MR_REREG_TRANS = 1, IB_MR_REREG_PD = (1<<1), - IB_MR_REREG_ACCESS = (1<<2) + IB_MR_REREG_ACCESS = (1<<2), + IB_MR_REREG_SUPPORTED = ((IB_MR_REREG_ACCESS << 1) - 1) }; /** @@ -1547,6 +1548,13 @@ struct ib_device { u64 virt_addr, int mr_access_flags, struct ib_udata *udata); + int (*rereg_user_mr)(struct ib_mr *mr, + int flags, + u64 start, u64 length, + u64 virt_addr, + int mr_access_flags, + struct ib_pd *pd, + struct ib_udata *udata); int (*query_mr)(struct ib_mr *mr, struct ib_mr_attr *mr_attr); int (*dereg_mr)(struct ib_mr *mr); diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index cbfdd4ca9510..26daf55ff76e 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -276,6 +276,22 @@ struct ib_uverbs_reg_mr_resp { __u32 rkey; }; +struct ib_uverbs_rereg_mr { + __u64 response; + __u32 mr_handle; + __u32 flags; + __u64 start; + __u64 length; + __u64 hca_va; + __u32 pd_handle; + __u32 access_flags; +}; + +struct ib_uverbs_rereg_mr_resp { + __u32 lkey; + __u32 rkey; +}; + struct ib_uverbs_dereg_mr { __u32 mr_handle; }; diff --git a/include/uapi/rdma/rdma_user_cm.h b/include/uapi/rdma/rdma_user_cm.h index 99b80abf360a..3066718eb120 100644 --- a/include/uapi/rdma/rdma_user_cm.h +++ b/include/uapi/rdma/rdma_user_cm.h @@ -34,6 +34,7 @@ #define RDMA_USER_CM_H #include <linux/types.h> +#include <linux/socket.h> #include <linux/in6.h> #include <rdma/ib_user_verbs.h> #include <rdma/ib_user_sa.h> |