diff options
Diffstat (limited to 'drivers/infiniband')
62 files changed, 604 insertions, 391 deletions
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index e6dfa1bd3def..5f65a78b27c9 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2462,18 +2462,24 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) if (addr->dev_addr.bound_dev_if) { ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if); - if (!ndev) - return -ENODEV; + if (!ndev) { + ret = -ENODEV; + goto err2; + } if (ndev->flags & IFF_LOOPBACK) { dev_put(ndev); - if (!id_priv->id.device->get_netdev) - return -EOPNOTSUPP; + if (!id_priv->id.device->get_netdev) { + ret = -EOPNOTSUPP; + goto err2; + } ndev = id_priv->id.device->get_netdev(id_priv->id.device, id_priv->id.port_num); - if (!ndev) - return -ENODEV; + if (!ndev) { + ret = -ENODEV; + goto err2; + } } route->path_rec->net = &init_net; diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index 3a3c5d73bbfc..51c79b2fb0b8 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -106,7 +106,6 @@ struct mcast_group { atomic_t refcount; enum mcast_group_state state; struct ib_sa_query *query; - int query_id; u16 pkey_index; u8 leave_state; int retries; @@ -340,11 +339,7 @@ static int send_join(struct mcast_group *group, struct mcast_member *member) member->multicast.comp_mask, 3000, GFP_KERNEL, join_handler, group, &group->query); - if (ret >= 0) { - group->query_id = ret; - ret = 0; - } - return ret; + return (ret > 0) ? 0 : ret; } static int send_leave(struct mcast_group *group, u8 leave_state) @@ -364,11 +359,7 @@ static int send_leave(struct mcast_group *group, u8 leave_state) IB_SA_MCMEMBER_REC_JOIN_STATE, 3000, GFP_KERNEL, leave_handler, group, &group->query); - if (ret >= 0) { - group->query_id = ret; - ret = 0; - } - return ret; + return (ret > 0) ? 0 : ret; } static void join_group(struct mcast_group *group, struct mcast_member *member, diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 3aca7f6171b4..80f988984f44 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -333,6 +333,8 @@ static void remove_ep_tid(struct c4iw_ep *ep) spin_lock_irqsave(&ep->com.dev->lock, flags); _remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid, 0); + if (idr_is_empty(&ep->com.dev->hwtid_idr)) + wake_up(&ep->com.dev->wait); spin_unlock_irqrestore(&ep->com.dev->lock, flags); } @@ -1827,8 +1829,12 @@ static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) (ep->mpa_pkt + sizeof(*mpa)); ep->ird = ntohs(mpa_v2_params->ird) & MPA_V2_IRD_ORD_MASK; + ep->ird = min_t(u32, ep->ird, + cur_max_read_depth(ep->com.dev)); ep->ord = ntohs(mpa_v2_params->ord) & MPA_V2_IRD_ORD_MASK; + ep->ord = min_t(u32, ep->ord, + cur_max_read_depth(ep->com.dev)); PDBG("%s initiator ird %u ord %u\n", __func__, ep->ird, ep->ord); if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL) @@ -2113,8 +2119,10 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip, } ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t, n, pdev, rt_tos2priority(tos)); - if (!ep->l2t) + if (!ep->l2t) { + dev_put(pdev); goto out; + } ep->mtu = pdev->mtu; ep->tx_chan = cxgb4_port_chan(pdev); ep->smac_idx = cxgb4_tp_smt_idx(adapter_type, @@ -3136,7 +3144,7 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { if (conn_param->ord > ep->ird) { if (RELAXED_IRD_NEGOTIATION) { - ep->ord = ep->ird; + conn_param->ord = ep->ird; } else { ep->ird = conn_param->ird; ep->ord = conn_param->ord; diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 812ab7278b8e..ac926c942fee 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -1016,15 +1016,15 @@ int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata) int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) { struct c4iw_cq *chp; - int ret; + int ret = 0; unsigned long flag; chp = to_c4iw_cq(ibcq); spin_lock_irqsave(&chp->lock, flag); - ret = t4_arm_cq(&chp->cq, - (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED); + t4_arm_cq(&chp->cq, + (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED); + if (flags & IB_CQ_REPORT_MISSED_EVENTS) + ret = t4_cq_notempty(&chp->cq); spin_unlock_irqrestore(&chp->lock, flag); - if (ret && !(flags & IB_CQ_REPORT_MISSED_EVENTS)) - ret = 0; return ret; } diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 071d7332ec06..3c4b2126e0d1 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -872,9 +872,13 @@ static void c4iw_rdev_close(struct c4iw_rdev *rdev) static void c4iw_dealloc(struct uld_ctx *ctx) { c4iw_rdev_close(&ctx->dev->rdev); + WARN_ON_ONCE(!idr_is_empty(&ctx->dev->cqidr)); idr_destroy(&ctx->dev->cqidr); + WARN_ON_ONCE(!idr_is_empty(&ctx->dev->qpidr)); idr_destroy(&ctx->dev->qpidr); + WARN_ON_ONCE(!idr_is_empty(&ctx->dev->mmidr)); idr_destroy(&ctx->dev->mmidr); + wait_event(ctx->dev->wait, idr_is_empty(&ctx->dev->hwtid_idr)); idr_destroy(&ctx->dev->hwtid_idr); idr_destroy(&ctx->dev->stid_idr); idr_destroy(&ctx->dev->atid_idr); @@ -992,6 +996,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop) mutex_init(&devp->rdev.stats.lock); mutex_init(&devp->db_mutex); INIT_LIST_HEAD(&devp->db_fc_list); + init_waitqueue_head(&devp->wait); devp->avail_ird = devp->rdev.lldi.max_ird_adapter; if (c4iw_debugfs_root) { diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index aa47e0ae80bc..4b83b84f7ddf 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -263,6 +263,7 @@ struct c4iw_dev { struct idr stid_idr; struct list_head db_fc_list; u32 avail_ird; + wait_queue_head_t wait; }; static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev) diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index edb1172b6f54..690435229be7 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -683,7 +683,7 @@ static int build_inv_stag(union t4_wr *wqe, struct ib_send_wr *wr, return 0; } -void _free_qp(struct kref *kref) +static void _free_qp(struct kref *kref) { struct c4iw_qp *qhp; diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index 6126bbe36095..02173f4315fa 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -634,6 +634,11 @@ static inline int t4_valid_cqe(struct t4_cq *cq, struct t4_cqe *cqe) return (CQE_GENBIT(cqe) == cq->gen); } +static inline int t4_cq_notempty(struct t4_cq *cq) +{ + return cq->sw_in_use || t4_valid_cqe(cq, &cq->queue[cq->cidx]); +} + static inline int t4_next_hw_cqe(struct t4_cq *cq, struct t4_cqe **cqe) { int ret; diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index 79575ee873f2..0566393e5aba 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -47,7 +47,6 @@ #include <linux/topology.h> #include <linux/cpumask.h> #include <linux/module.h> -#include <linux/cpumask.h> #include "hfi.h" #include "affinity.h" @@ -682,7 +681,7 @@ int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf, size_t count) { struct hfi1_affinity_node *entry; - struct cpumask mask; + cpumask_var_t mask; int ret, i; spin_lock(&node_affinity.lock); @@ -692,19 +691,24 @@ int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf, if (!entry) return -EINVAL; - ret = cpulist_parse(buf, &mask); + ret = zalloc_cpumask_var(&mask, GFP_KERNEL); + if (!ret) + return -ENOMEM; + + ret = cpulist_parse(buf, mask); if (ret) - return ret; + goto out; - if (!cpumask_subset(&mask, cpu_online_mask) || cpumask_empty(&mask)) { + if (!cpumask_subset(mask, cpu_online_mask) || cpumask_empty(mask)) { dd_dev_warn(dd, "Invalid CPU mask\n"); - return -EINVAL; + ret = -EINVAL; + goto out; } mutex_lock(&sdma_affinity_mutex); /* reset the SDMA interrupt affinity details */ init_cpu_mask_set(&entry->def_intr); - cpumask_copy(&entry->def_intr.mask, &mask); + cpumask_copy(&entry->def_intr.mask, mask); /* * Reassign the affinity for each SDMA interrupt. */ @@ -720,8 +724,9 @@ int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf, if (ret) break; } - mutex_unlock(&sdma_affinity_mutex); +out: + free_cpumask_var(mask); return ret ? ret : strnlen(buf, PAGE_SIZE); } diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index b32638d58ae8..cc38004cea42 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -9490,6 +9490,78 @@ static void init_lcb(struct hfi1_devdata *dd) write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 0x00); } +/* + * Perform a test read on the QSFP. Return 0 on success, -ERRNO + * on error. + */ +static int test_qsfp_read(struct hfi1_pportdata *ppd) +{ + int ret; + u8 status; + + /* report success if not a QSFP */ + if (ppd->port_type != PORT_TYPE_QSFP) + return 0; + + /* read byte 2, the status byte */ + ret = one_qsfp_read(ppd, ppd->dd->hfi1_id, 2, &status, 1); + if (ret < 0) + return ret; + if (ret != 1) + return -EIO; + + return 0; /* success */ +} + +/* + * Values for QSFP retry. + * + * Give up after 10s (20 x 500ms). The overall timeout was empirically + * arrived at from experience on a large cluster. + */ +#define MAX_QSFP_RETRIES 20 +#define QSFP_RETRY_WAIT 500 /* msec */ + +/* + * Try a QSFP read. If it fails, schedule a retry for later. + * Called on first link activation after driver load. + */ +static void try_start_link(struct hfi1_pportdata *ppd) +{ + if (test_qsfp_read(ppd)) { + /* read failed */ + if (ppd->qsfp_retry_count >= MAX_QSFP_RETRIES) { + dd_dev_err(ppd->dd, "QSFP not responding, giving up\n"); + return; + } + dd_dev_info(ppd->dd, + "QSFP not responding, waiting and retrying %d\n", + (int)ppd->qsfp_retry_count); + ppd->qsfp_retry_count++; + queue_delayed_work(ppd->hfi1_wq, &ppd->start_link_work, + msecs_to_jiffies(QSFP_RETRY_WAIT)); + return; + } + ppd->qsfp_retry_count = 0; + + /* + * Tune the SerDes to a ballpark setting for optimal signal and bit + * error rate. Needs to be done before starting the link. + */ + tune_serdes(ppd); + start_link(ppd); +} + +/* + * Workqueue function to start the link after a delay. + */ +void handle_start_link(struct work_struct *work) +{ + struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata, + start_link_work.work); + try_start_link(ppd); +} + int bringup_serdes(struct hfi1_pportdata *ppd) { struct hfi1_devdata *dd = ppd->dd; @@ -9525,14 +9597,8 @@ int bringup_serdes(struct hfi1_pportdata *ppd) set_qsfp_int_n(ppd, 1); } - /* - * Tune the SerDes to a ballpark setting for - * optimal signal and bit error rate - * Needs to be done before starting the link - */ - tune_serdes(ppd); - - return start_link(ppd); + try_start_link(ppd); + return 0; } void hfi1_quiet_serdes(struct hfi1_pportdata *ppd) @@ -9549,6 +9615,10 @@ void hfi1_quiet_serdes(struct hfi1_pportdata *ppd) ppd->driver_link_ready = 0; ppd->link_enabled = 0; + ppd->qsfp_retry_count = MAX_QSFP_RETRIES; /* prevent more retries */ + flush_delayed_work(&ppd->start_link_work); + cancel_delayed_work_sync(&ppd->start_link_work); + ppd->offline_disabled_reason = HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED); set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SMA_DISABLED, 0, @@ -12865,7 +12935,7 @@ fail: */ static int set_up_context_variables(struct hfi1_devdata *dd) { - int num_kernel_contexts; + unsigned long num_kernel_contexts; int total_contexts; int ret; unsigned ngroups; @@ -12894,9 +12964,9 @@ static int set_up_context_variables(struct hfi1_devdata *dd) */ if (num_kernel_contexts > (dd->chip_send_contexts - num_vls - 1)) { dd_dev_err(dd, - "Reducing # kernel rcv contexts to: %d, from %d\n", + "Reducing # kernel rcv contexts to: %d, from %lu\n", (int)(dd->chip_send_contexts - num_vls - 1), - (int)num_kernel_contexts); + num_kernel_contexts); num_kernel_contexts = dd->chip_send_contexts - num_vls - 1; } /* diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h index ed11107c50fe..e29573769efc 100644 --- a/drivers/infiniband/hw/hfi1/chip.h +++ b/drivers/infiniband/hw/hfi1/chip.h @@ -706,6 +706,7 @@ void handle_link_up(struct work_struct *work); void handle_link_down(struct work_struct *work); void handle_link_downgrade(struct work_struct *work); void handle_link_bounce(struct work_struct *work); +void handle_start_link(struct work_struct *work); void handle_sma_message(struct work_struct *work); void reset_qsfp(struct hfi1_pportdata *ppd); void qsfp_event(struct work_struct *work); diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index dbab9d9cc288..5e9be16f6cd3 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -59,6 +59,40 @@ static struct dentry *hfi1_dbg_root; +/* wrappers to enforce srcu in seq file */ +static ssize_t hfi1_seq_read( + struct file *file, + char __user *buf, + size_t size, + loff_t *ppos) +{ + struct dentry *d = file->f_path.dentry; + int srcu_idx; + ssize_t r; + + r = debugfs_use_file_start(d, &srcu_idx); + if (likely(!r)) + r = seq_read(file, buf, size, ppos); + debugfs_use_file_finish(srcu_idx); + return r; +} + +static loff_t hfi1_seq_lseek( + struct file *file, + loff_t offset, + int whence) +{ + struct dentry *d = file->f_path.dentry; + int srcu_idx; + loff_t r; + + r = debugfs_use_file_start(d, &srcu_idx); + if (likely(!r)) + r = seq_lseek(file, offset, whence); + debugfs_use_file_finish(srcu_idx); + return r; +} + #define private2dd(file) (file_inode(file)->i_private) #define private2ppd(file) (file_inode(file)->i_private) @@ -87,8 +121,8 @@ static int _##name##_open(struct inode *inode, struct file *s) \ static const struct file_operations _##name##_file_ops = { \ .owner = THIS_MODULE, \ .open = _##name##_open, \ - .read = seq_read, \ - .llseek = seq_lseek, \ + .read = hfi1_seq_read, \ + .llseek = hfi1_seq_lseek, \ .release = seq_release \ } @@ -105,11 +139,9 @@ do { \ DEBUGFS_FILE_CREATE(#name, parent, data, &_##name##_file_ops, S_IRUGO) static void *_opcode_stats_seq_start(struct seq_file *s, loff_t *pos) -__acquires(RCU) { struct hfi1_opcode_stats_perctx *opstats; - rcu_read_lock(); if (*pos >= ARRAY_SIZE(opstats->stats)) return NULL; return pos; @@ -126,9 +158,7 @@ static void *_opcode_stats_seq_next(struct seq_file *s, void *v, loff_t *pos) } static void _opcode_stats_seq_stop(struct seq_file *s, void *v) -__releases(RCU) { - rcu_read_unlock(); } static int _opcode_stats_seq_show(struct seq_file *s, void *v) @@ -223,28 +253,32 @@ DEBUGFS_SEQ_FILE_OPEN(ctx_stats) DEBUGFS_FILE_OPS(ctx_stats); static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos) -__acquires(RCU) + __acquires(RCU) { struct qp_iter *iter; loff_t n = *pos; - rcu_read_lock(); iter = qp_iter_init(s->private); + + /* stop calls rcu_read_unlock */ + rcu_read_lock(); + if (!iter) return NULL; - while (n--) { + do { if (qp_iter_next(iter)) { kfree(iter); return NULL; } - } + } while (n--); return iter; } static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr, loff_t *pos) + __must_hold(RCU) { struct qp_iter *iter = iter_ptr; @@ -259,7 +293,7 @@ static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr, } static void _qp_stats_seq_stop(struct seq_file *s, void *iter_ptr) -__releases(RCU) + __releases(RCU) { rcu_read_unlock(); } @@ -281,12 +315,10 @@ DEBUGFS_SEQ_FILE_OPEN(qp_stats) DEBUGFS_FILE_OPS(qp_stats); static void *_sdes_seq_start(struct seq_file *s, loff_t *pos) -__acquires(RCU) { struct hfi1_ibdev *ibd; struct hfi1_devdata *dd; - rcu_read_lock(); ibd = (struct hfi1_ibdev *)s->private; dd = dd_from_dev(ibd); if (!dd->per_sdma || *pos >= dd->num_sdma) @@ -306,9 +338,7 @@ static void *_sdes_seq_next(struct seq_file *s, void *v, loff_t *pos) } static void _sdes_seq_stop(struct seq_file *s, void *v) -__releases(RCU) { - rcu_read_unlock(); } static int _sdes_seq_show(struct seq_file *s, void *v) @@ -335,11 +365,9 @@ static ssize_t dev_counters_read(struct file *file, char __user *buf, struct hfi1_devdata *dd; ssize_t rval; - rcu_read_lock(); dd = private2dd(file); avail = hfi1_read_cntrs(dd, NULL, &counters); rval = simple_read_from_buffer(buf, count, ppos, counters, avail); - rcu_read_unlock(); return rval; } @@ -352,11 +380,9 @@ static ssize_t dev_names_read(struct file *file, char __user *buf, struct hfi1_devdata *dd; ssize_t rval; - rcu_read_lock(); dd = private2dd(file); avail = hfi1_read_cntrs(dd, &names, NULL); rval = simple_read_from_buffer(buf, count, ppos, names, avail); - rcu_read_unlock(); return rval; } @@ -379,11 +405,9 @@ static ssize_t portnames_read(struct file *file, char __user *buf, struct hfi1_devdata *dd; ssize_t rval; - rcu_read_lock(); dd = private2dd(file); avail = hfi1_read_portcntrs(dd->pport, &names, NULL); rval = simple_read_from_buffer(buf, count, ppos, names, avail); - rcu_read_unlock(); return rval; } @@ -396,11 +420,9 @@ static ssize_t portcntrs_debugfs_read(struct file *file, char __user *buf, struct hfi1_pportdata *ppd; ssize_t rval; - rcu_read_lock(); ppd = private2ppd(file); avail = hfi1_read_portcntrs(ppd, NULL, &counters); rval = simple_read_from_buffer(buf, count, ppos, counters, avail); - rcu_read_unlock(); return rval; } @@ -430,16 +452,13 @@ static ssize_t asic_flags_read(struct file *file, char __user *buf, int used; int i; - rcu_read_lock(); ppd = private2ppd(file); dd = ppd->dd; size = PAGE_SIZE; used = 0; tmp = kmalloc(size, GFP_KERNEL); - if (!tmp) { - rcu_read_unlock(); + if (!tmp) return -ENOMEM; - } scratch0 = read_csr(dd, ASIC_CFG_SCRATCH); used += scnprintf(tmp + used, size - used, @@ -466,7 +485,6 @@ static ssize_t asic_flags_read(struct file *file, char __user *buf, used += scnprintf(tmp + used, size - used, "Write bits to clear\n"); ret = simple_read_from_buffer(buf, count, ppos, tmp, used); - rcu_read_unlock(); kfree(tmp); return ret; } @@ -482,15 +500,12 @@ static ssize_t asic_flags_write(struct file *file, const char __user *buf, u64 scratch0; u64 clear; - rcu_read_lock(); ppd = private2ppd(file); dd = ppd->dd; buff = kmalloc(count + 1, GFP_KERNEL); - if (!buff) { - ret = -ENOMEM; - goto do_return; - } + if (!buff) + return -ENOMEM; ret = copy_from_user(buff, buf, count); if (ret > 0) { @@ -523,8 +538,6 @@ static ssize_t asic_flags_write(struct file *file, const char __user *buf, do_free: kfree(buff); - do_return: - rcu_read_unlock(); return ret; } @@ -538,18 +551,14 @@ static ssize_t qsfp_debugfs_dump(struct file *file, char __user *buf, char *tmp; int ret; - rcu_read_lock(); ppd = private2ppd(file); tmp = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!tmp) { - rcu_read_unlock(); + if (!tmp) return -ENOMEM; - } ret = qsfp_dump(ppd, tmp, PAGE_SIZE); if (ret > 0) ret = simple_read_from_buffer(buf, count, ppos, tmp, ret); - rcu_read_unlock(); kfree(tmp); return ret; } @@ -565,7 +574,6 @@ static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf, int offset; int total_written; - rcu_read_lock(); ppd = private2ppd(file); /* byte offset format: [offsetSize][i2cAddr][offsetHigh][offsetLow] */ @@ -573,16 +581,12 @@ static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf, offset = *ppos & 0xffff; /* explicitly reject invalid address 0 to catch cp and cat */ - if (i2c_addr == 0) { - ret = -EINVAL; - goto _return; - } + if (i2c_addr == 0) + return -EINVAL; buff = kmalloc(count, GFP_KERNEL); - if (!buff) { - ret = -ENOMEM; - goto _return; - } + if (!buff) + return -ENOMEM; ret = copy_from_user(buff, buf, count); if (ret > 0) { @@ -602,8 +606,6 @@ static ssize_t __i2c_debugfs_write(struct file *file, const char __user *buf, _free: kfree(buff); - _return: - rcu_read_unlock(); return ret; } @@ -632,7 +634,6 @@ static ssize_t __i2c_debugfs_read(struct file *file, char __user *buf, int offset; int total_read; - rcu_read_lock(); ppd = private2ppd(file); /* byte offset format: [offsetSize][i2cAddr][offsetHigh][offsetLow] */ @@ -640,16 +641,12 @@ static ssize_t __i2c_debugfs_read(struct file *file, char __user *buf, offset = *ppos & 0xffff; /* explicitly reject invalid address 0 to catch cp and cat */ - if (i2c_addr == 0) { - ret = -EINVAL; - goto _return; - } + if (i2c_addr == 0) + return -EINVAL; buff = kmalloc(count, GFP_KERNEL); - if (!buff) { - ret = -ENOMEM; - goto _return; - } + if (!buff) + return -ENOMEM; total_read = i2c_read(ppd, target, i2c_addr, offset, buff, count); if (total_read < 0) { @@ -669,8 +666,6 @@ static ssize_t __i2c_debugfs_read(struct file *file, char __user *buf, _free: kfree(buff); - _return: - rcu_read_unlock(); return ret; } @@ -697,26 +692,20 @@ static ssize_t __qsfp_debugfs_write(struct file *file, const char __user *buf, int ret; int total_written; - rcu_read_lock(); - if (*ppos + count > QSFP_PAGESIZE * 4) { /* base page + page00-page03 */ - ret = -EINVAL; - goto _return; - } + if (*ppos + count > QSFP_PAGESIZE * 4) /* base page + page00-page03 */ + return -EINVAL; ppd = private2ppd(file); buff = kmalloc(count, GFP_KERNEL); - if (!buff) { - ret = -ENOMEM; - goto _return; - } + if (!buff) + return -ENOMEM; ret = copy_from_user(buff, buf, count); if (ret > 0) { ret = -EFAULT; goto _free; } - total_written = qsfp_write(ppd, target, *ppos, buff, count); if (total_written < 0) { ret = total_written; @@ -729,8 +718,6 @@ static ssize_t __qsfp_debugfs_write(struct file *file, const char __user *buf, _free: kfree(buff); - _return: - rcu_read_unlock(); return ret; } @@ -757,7 +744,6 @@ static ssize_t __qsfp_debugfs_read(struct file *file, char __user *buf, int ret; int total_read; - rcu_read_lock(); if (*ppos + count > QSFP_PAGESIZE * 4) { /* base page + page00-page03 */ ret = -EINVAL; goto _return; @@ -790,7 +776,6 @@ static ssize_t __qsfp_debugfs_read(struct file *file, char __user *buf, _free: kfree(buff); _return: - rcu_read_unlock(); return ret; } @@ -1006,7 +991,6 @@ void hfi1_dbg_ibdev_exit(struct hfi1_ibdev *ibd) debugfs_remove_recursive(ibd->hfi1_ibdev_dbg); out: ibd->hfi1_ibdev_dbg = NULL; - synchronize_rcu(); } /* @@ -1031,9 +1015,7 @@ static const char * const hfi1_statnames[] = { }; static void *_driver_stats_names_seq_start(struct seq_file *s, loff_t *pos) -__acquires(RCU) { - rcu_read_lock(); if (*pos >= ARRAY_SIZE(hfi1_statnames)) return NULL; return pos; @@ -1051,9 +1033,7 @@ static void *_driver_stats_names_seq_next( } static void _driver_stats_names_seq_stop(struct seq_file *s, void *v) -__releases(RCU) { - rcu_read_unlock(); } static int _driver_stats_names_seq_show(struct seq_file *s, void *v) @@ -1069,9 +1049,7 @@ DEBUGFS_SEQ_FILE_OPEN(driver_stats_names) DEBUGFS_FILE_OPS(driver_stats_names); static void *_driver_stats_seq_start(struct seq_file *s, loff_t *pos) -__acquires(RCU) { - rcu_read_lock(); if (*pos >= ARRAY_SIZE(hfi1_statnames)) return NULL; return pos; @@ -1086,9 +1064,7 @@ static void *_driver_stats_seq_next(struct seq_file *s, void *v, loff_t *pos) } static void _driver_stats_seq_stop(struct seq_file *s, void *v) -__releases(RCU) { - rcu_read_unlock(); } static u64 hfi1_sps_ints(void) diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 8246dc7d0573..303f10555729 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -888,14 +888,15 @@ void set_all_slowpath(struct hfi1_devdata *dd) } static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd, - struct hfi1_packet packet, + struct hfi1_packet *packet, struct hfi1_devdata *dd) { struct work_struct *lsaw = &rcd->ppd->linkstate_active_work; - struct hfi1_message_header *hdr = hfi1_get_msgheader(packet.rcd->dd, - packet.rhf_addr); + struct hfi1_message_header *hdr = hfi1_get_msgheader(packet->rcd->dd, + packet->rhf_addr); + u8 etype = rhf_rcv_type(packet->rhf); - if (hdr2sc(hdr, packet.rhf) != 0xf) { + if (etype == RHF_RCV_TYPE_IB && hdr2sc(hdr, packet->rhf) != 0xf) { int hwstate = read_logical_state(dd); if (hwstate != LSTATE_ACTIVE) { @@ -979,7 +980,7 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread) /* Auto activate link on non-SC15 packet receive */ if (unlikely(rcd->ppd->host_link_state == HLS_UP_ARMED) && - set_armed_to_active(rcd, packet, dd)) + set_armed_to_active(rcd, &packet, dd)) goto bail; last = process_rcv_packet(&packet, thread); } diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 1ecbec192358..7e03ccd2554d 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -183,6 +183,7 @@ static int hfi1_file_open(struct inode *inode, struct file *fp) if (fd) { fd->rec_cpu_num = -1; /* no cpu affinity by default */ fd->mm = current->mm; + atomic_inc(&fd->mm->mm_count); } fp->private_data = fd; @@ -222,7 +223,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, ret = assign_ctxt(fp, &uinfo); if (ret < 0) return ret; - setup_ctxt(fp); + ret = setup_ctxt(fp); if (ret) return ret; ret = user_init(fp); @@ -779,6 +780,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) mutex_unlock(&hfi1_mutex); hfi1_free_ctxtdata(dd, uctxt); done: + mmdrop(fdata->mm); kobject_put(&dd->kobj); kfree(fdata); return 0; diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 1000e0fd96d9..325ec211370f 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -605,6 +605,7 @@ struct hfi1_pportdata { struct work_struct freeze_work; struct work_struct link_downgrade_work; struct work_struct link_bounce_work; + struct delayed_work start_link_work; /* host link state variables */ struct mutex hls_lock; u32 host_link_state; @@ -659,6 +660,7 @@ struct hfi1_pportdata { u8 linkinit_reason; u8 local_tx_rate; /* rate given to 8051 firmware */ u8 last_pstate; /* info only */ + u8 qsfp_retry_count; /* placeholders for IB MAD packet settings */ u8 overrun_threshold; @@ -1272,9 +1274,26 @@ static inline int hdr2sc(struct hfi1_message_header *hdr, u64 rhf) ((!!(rhf_dc_info(rhf))) << 4); } +#define HFI1_JKEY_WIDTH 16 +#define HFI1_JKEY_MASK (BIT(16) - 1) +#define HFI1_ADMIN_JKEY_RANGE 32 + +/* + * J_KEYs are split and allocated in the following groups: + * 0 - 31 - users with administrator privileges + * 32 - 63 - kernel protocols using KDETH packets + * 64 - 65535 - all other users using KDETH packets + */ static inline u16 generate_jkey(kuid_t uid) { - return from_kuid(current_user_ns(), uid) & 0xffff; + u16 jkey = from_kuid(current_user_ns(), uid) & HFI1_JKEY_MASK; + + if (capable(CAP_SYS_ADMIN)) + jkey &= HFI1_ADMIN_JKEY_RANGE - 1; + else if (jkey < 64) + jkey |= BIT(HFI1_JKEY_WIDTH - 1); + + return jkey; } /* @@ -1656,7 +1675,6 @@ struct cc_state *get_cc_state_protected(struct hfi1_pportdata *ppd) struct hfi1_devdata *hfi1_init_dd(struct pci_dev *, const struct pci_device_id *); void hfi1_free_devdata(struct hfi1_devdata *); -void cc_state_reclaim(struct rcu_head *rcu); struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra); /* LED beaconing functions */ @@ -1788,7 +1806,7 @@ extern unsigned int hfi1_max_mtu; extern unsigned int hfi1_cu; extern unsigned int user_credit_return_threshold; extern int num_user_contexts; -extern unsigned n_krcvqs; +extern unsigned long n_krcvqs; extern uint krcvqs[]; extern int krcvqsset; extern uint kdeth_qp; diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index a358d23ecd54..384b43d2fd49 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -94,7 +94,7 @@ module_param_array(krcvqs, uint, &krcvqsset, S_IRUGO); MODULE_PARM_DESC(krcvqs, "Array of the number of non-control kernel receive queues by VL"); /* computed based on above array */ -unsigned n_krcvqs; +unsigned long n_krcvqs; static unsigned hfi1_rcvarr_split = 25; module_param_named(rcvarr_split, hfi1_rcvarr_split, uint, S_IRUGO); @@ -500,6 +500,7 @@ void hfi1_init_pportdata(struct pci_dev *pdev, struct hfi1_pportdata *ppd, INIT_WORK(&ppd->link_downgrade_work, handle_link_downgrade); INIT_WORK(&ppd->sma_message_work, handle_sma_message); INIT_WORK(&ppd->link_bounce_work, handle_link_bounce); + INIT_DELAYED_WORK(&ppd->start_link_work, handle_start_link); INIT_WORK(&ppd->linkstate_active_work, receive_interrupt_work); INIT_WORK(&ppd->qsfp_info.qsfp_work, qsfp_event); @@ -1333,7 +1334,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd) spin_unlock(&ppd->cc_state_lock); if (cc_state) - call_rcu(&cc_state->rcu, cc_state_reclaim); + kfree_rcu(cc_state, rcu); } free_credit_return(dd); diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 1263abe01999..7ffc14f21523 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -1819,6 +1819,11 @@ static int __subn_get_opa_cable_info(struct opa_smp *smp, u32 am, u8 *data, u32 len = OPA_AM_CI_LEN(am) + 1; int ret; + if (dd->pport->port_type != PORT_TYPE_QSFP) { + smp->status |= IB_SMP_INVALID_FIELD; + return reply((struct ib_mad_hdr *)smp); + } + #define __CI_PAGE_SIZE BIT(7) /* 128 bytes */ #define __CI_PAGE_MASK ~(__CI_PAGE_SIZE - 1) #define __CI_PAGE_NUM(a) ((a) & __CI_PAGE_MASK) @@ -2599,7 +2604,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp, u8 lq, num_vls; u8 res_lli, res_ler; u64 port_mask; - unsigned long port_num; + u8 port_num; unsigned long vl; u32 vl_select_mask; int vfi; @@ -2633,9 +2638,9 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp, */ port_mask = be64_to_cpu(req->port_select_mask[3]); port_num = find_first_bit((unsigned long *)&port_mask, - sizeof(port_mask)); + sizeof(port_mask) * 8); - if ((u8)port_num != port) { + if (port_num != port) { pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; return reply((struct ib_mad_hdr *)pmp); } @@ -2837,7 +2842,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp, */ port_mask = be64_to_cpu(req->port_select_mask[3]); port_num = find_first_bit((unsigned long *)&port_mask, - sizeof(port_mask)); + sizeof(port_mask) * 8); if (port_num != port) { pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; @@ -3010,7 +3015,7 @@ static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp, */ port_mask = be64_to_cpu(req->port_select_mask[3]); port_num = find_first_bit((unsigned long *)&port_mask, - sizeof(port_mask)); + sizeof(port_mask) * 8); if (port_num != port) { pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; @@ -3247,7 +3252,7 @@ static int pma_set_opa_errorinfo(struct opa_pma_mad *pmp, */ port_mask = be64_to_cpu(req->port_select_mask[3]); port_num = find_first_bit((unsigned long *)&port_mask, - sizeof(port_mask)); + sizeof(port_mask) * 8); if (port_num != port) { pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; @@ -3398,7 +3403,7 @@ static void apply_cc_state(struct hfi1_pportdata *ppd) spin_unlock(&ppd->cc_state_lock); - call_rcu(&old_cc_state->rcu, cc_state_reclaim); + kfree_rcu(old_cc_state, rcu); } static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data, @@ -3553,13 +3558,6 @@ static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, return reply((struct ib_mad_hdr *)smp); } -void cc_state_reclaim(struct rcu_head *rcu) -{ - struct cc_state *cc_state = container_of(rcu, struct cc_state, rcu); - - kfree(cc_state); -} - static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, u32 *resp_len) diff --git a/drivers/infiniband/hw/hfi1/pio_copy.c b/drivers/infiniband/hw/hfi1/pio_copy.c index 8c25e1b58849..3a1ef3056282 100644 --- a/drivers/infiniband/hw/hfi1/pio_copy.c +++ b/drivers/infiniband/hw/hfi1/pio_copy.c @@ -771,6 +771,9 @@ void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes) read_extra_bytes(pbuf, from, to_fill); from += to_fill; nbytes -= to_fill; + /* may not be enough valid bytes left to align */ + if (extra > nbytes) + extra = nbytes; /* ...now write carry */ dest = pbuf->start + (pbuf->qw_written * sizeof(u64)); @@ -798,6 +801,15 @@ void seg_pio_copy_mid(struct pio_buf *pbuf, const void *from, size_t nbytes) read_low_bytes(pbuf, from, extra); from += extra; nbytes -= extra; + /* + * If no bytes are left, return early - we are done. + * NOTE: This short-circuit is *required* because + * "extra" may have been reduced in size and "from" + * is not aligned, as required when leaving this + * if block. + */ + if (nbytes == 0) + return; } /* at this point, from is QW aligned */ diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index a5aa3517e7d5..4e4d8317c281 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -656,10 +656,6 @@ struct qp_iter *qp_iter_init(struct hfi1_ibdev *dev) iter->dev = dev; iter->specials = dev->rdi.ibdev.phys_port_cnt * 2; - if (qp_iter_next(iter)) { - kfree(iter); - return NULL; - } return iter; } diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c index a207717ade2a..4e95ad810847 100644 --- a/drivers/infiniband/hw/hfi1/qsfp.c +++ b/drivers/infiniband/hw/hfi1/qsfp.c @@ -706,8 +706,8 @@ int get_cable_info(struct hfi1_devdata *dd, u32 port_num, u32 addr, u32 len, u8 *data) { struct hfi1_pportdata *ppd; - u32 excess_len = 0; - int ret = 0; + u32 excess_len = len; + int ret = 0, offset = 0; if (port_num > dd->num_pports || port_num < 1) { dd_dev_info(dd, "%s: Invalid port number %d\n", @@ -740,6 +740,34 @@ int get_cable_info(struct hfi1_devdata *dd, u32 port_num, u32 addr, u32 len, } memcpy(data, &ppd->qsfp_info.cache[addr], len); + + if (addr <= QSFP_MONITOR_VAL_END && + (addr + len) >= QSFP_MONITOR_VAL_START) { + /* Overlap with the dynamic channel monitor range */ + if (addr < QSFP_MONITOR_VAL_START) { + if (addr + len <= QSFP_MONITOR_VAL_END) + len = addr + len - QSFP_MONITOR_VAL_START; + else + len = QSFP_MONITOR_RANGE; + offset = QSFP_MONITOR_VAL_START - addr; + addr = QSFP_MONITOR_VAL_START; + } else if (addr == QSFP_MONITOR_VAL_START) { + offset = 0; + if (addr + len > QSFP_MONITOR_VAL_END) + len = QSFP_MONITOR_RANGE; + } else { + offset = 0; + if (addr + len > QSFP_MONITOR_VAL_END) + len = QSFP_MONITOR_VAL_END - addr + 1; + } + /* Refresh the values of the dynamic monitors from the cable */ + ret = one_qsfp_read(ppd, dd->hfi1_id, addr, data + offset, len); + if (ret != len) { + ret = -EAGAIN; + goto set_zeroes; + } + } + return 0; set_zeroes: diff --git a/drivers/infiniband/hw/hfi1/qsfp.h b/drivers/infiniband/hw/hfi1/qsfp.h index 69275ebd9597..36cf52359848 100644 --- a/drivers/infiniband/hw/hfi1/qsfp.h +++ b/drivers/infiniband/hw/hfi1/qsfp.h @@ -74,6 +74,9 @@ /* Defined fields that Intel requires of qualified cables */ /* Byte 0 is Identifier, not checked */ /* Byte 1 is reserved "status MSB" */ +#define QSFP_MONITOR_VAL_START 22 +#define QSFP_MONITOR_VAL_END 81 +#define QSFP_MONITOR_RANGE (QSFP_MONITOR_VAL_END - QSFP_MONITOR_VAL_START + 1) #define QSFP_TX_CTRL_BYTE_OFFS 86 #define QSFP_PWR_CTRL_BYTE_OFFS 93 #define QSFP_CDR_CTRL_BYTE_OFFS 98 diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 0ecf27903dc2..1694037d1eee 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -114,6 +114,8 @@ MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 12 #define KDETH_HCRC_LOWER_SHIFT 24 #define KDETH_HCRC_LOWER_MASK 0xff +#define AHG_KDETH_INTR_SHIFT 12 + #define PBC2LRH(x) ((((x) & 0xfff) << 2) - 4) #define LRH2PBC(x) ((((x) >> 2) + 1) & 0xfff) @@ -1480,7 +1482,8 @@ static int set_txreq_header_ahg(struct user_sdma_request *req, /* Clear KDETH.SH on last packet */ if (unlikely(tx->flags & TXREQ_FLAGS_REQ_LAST_PKT)) { val |= cpu_to_le16(KDETH_GET(hdr->kdeth.ver_tid_offset, - INTR) >> 16); + INTR) << + AHG_KDETH_INTR_SHIFT); val &= cpu_to_le16(~(1U << 13)); AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val); } else { diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h index b738acdb9b02..8ec09e470f84 100644 --- a/drivers/infiniband/hw/i40iw/i40iw.h +++ b/drivers/infiniband/hw/i40iw/i40iw.h @@ -232,7 +232,7 @@ struct i40iw_device { struct i40e_client *client; struct i40iw_hw hw; struct i40iw_cm_core cm_core; - unsigned long *mem_resources; + u8 *mem_resources; unsigned long *allocated_qps; unsigned long *allocated_cqs; unsigned long *allocated_mrs; @@ -435,8 +435,8 @@ static inline int i40iw_alloc_resource(struct i40iw_device *iwdev, *next = resource_num + 1; if (*next == max_resources) *next = 0; - spin_unlock_irqrestore(&iwdev->resource_lock, flags); *req_resource_num = resource_num; + spin_unlock_irqrestore(&iwdev->resource_lock, flags); return 0; } diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 5026dc79978a..7ca0638579c0 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -535,8 +535,8 @@ static struct i40iw_puda_buf *i40iw_form_cm_frame(struct i40iw_cm_node *cm_node, buf += hdr_len; } - if (pd_len) - memcpy(buf, pdata->addr, pd_len); + if (pdata && pdata->addr) + memcpy(buf, pdata->addr, pdata->size); atomic_set(&sqbuf->refcount, 1); @@ -3347,26 +3347,6 @@ int i40iw_cm_disconn(struct i40iw_qp *iwqp) } /** - * i40iw_loopback_nop - Send a nop - * @qp: associated hw qp - */ -static void i40iw_loopback_nop(struct i40iw_sc_qp *qp) -{ - u64 *wqe; - u64 header; - - wqe = qp->qp_uk.sq_base->elem; - set_64bit_val(wqe, 0, 0); - set_64bit_val(wqe, 8, 0); - set_64bit_val(wqe, 16, 0); - - header = LS_64(I40IWQP_OP_NOP, I40IWQPSQ_OPCODE) | - LS_64(0, I40IWQPSQ_SIGCOMPL) | - LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID); - set_64bit_val(wqe, 24, header); -} - -/** * i40iw_qp_disconnect - free qp and close cm * @iwqp: associate qp for the connection */ @@ -3638,7 +3618,7 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) } else { if (iwqp->page) iwqp->sc_qp.qp_uk.sq_base = kmap(iwqp->page); - i40iw_loopback_nop(&iwqp->sc_qp); + dev->iw_priv_qp_ops->qp_send_lsmm(&iwqp->sc_qp, NULL, 0, 0); } if (iwqp->page) diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c index 3ee0cad96bc6..0c92a40b3e86 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_hw.c +++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c @@ -265,6 +265,7 @@ void i40iw_next_iw_state(struct i40iw_qp *iwqp, info.dont_send_fin = false; if (iwqp->sc_qp.term_flags && (state == I40IW_QP_STATE_ERROR)) info.reset_tcp_conn = true; + iwqp->hw_iwarp_state = state; i40iw_hw_modify_qp(iwqp->iwdev, iwqp, &info, 0); } diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index 6e9081380a27..445e230d5ff8 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -100,7 +100,7 @@ static struct notifier_block i40iw_net_notifier = { .notifier_call = i40iw_net_event }; -static int i40iw_notifiers_registered; +static atomic_t i40iw_notifiers_registered; /** * i40iw_find_i40e_handler - find a handler given a client info @@ -1342,12 +1342,11 @@ exit: */ static void i40iw_register_notifiers(void) { - if (!i40iw_notifiers_registered) { + if (atomic_inc_return(&i40iw_notifiers_registered) == 1) { register_inetaddr_notifier(&i40iw_inetaddr_notifier); register_inet6addr_notifier(&i40iw_inetaddr6_notifier); register_netevent_notifier(&i40iw_net_notifier); } - i40iw_notifiers_registered++; } /** @@ -1429,8 +1428,7 @@ static void i40iw_deinit_device(struct i40iw_device *iwdev, bool reset, bool del i40iw_del_macip_entry(iwdev, (u8)iwdev->mac_ip_table_idx); /* fallthrough */ case INET_NOTIFIER: - if (i40iw_notifiers_registered > 0) { - i40iw_notifiers_registered--; + if (!atomic_dec_return(&i40iw_notifiers_registered)) { unregister_netevent_notifier(&i40iw_net_notifier); unregister_inetaddr_notifier(&i40iw_inetaddr_notifier); unregister_inet6addr_notifier(&i40iw_inetaddr6_notifier); @@ -1558,6 +1556,10 @@ static int i40iw_open(struct i40e_info *ldev, struct i40e_client *client) enum i40iw_status_code status; struct i40iw_handler *hdl; + hdl = i40iw_find_netdev(ldev->netdev); + if (hdl) + return 0; + hdl = kzalloc(sizeof(*hdl), GFP_KERNEL); if (!hdl) return -ENOMEM; diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c index 0e8db0a35141..6fd043b1d714 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_utils.c +++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c @@ -673,8 +673,11 @@ enum i40iw_status_code i40iw_free_virt_mem(struct i40iw_hw *hw, { if (!mem) return I40IW_ERR_PARAM; + /* + * mem->va points to the parent of mem, so both mem and mem->va + * can not be touched once mem->va is freed + */ kfree(mem->va); - mem->va = NULL; return 0; } diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 2360338877bf..6329c971c22f 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -794,7 +794,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, return &iwqp->ibqp; error: i40iw_free_qp_resources(iwdev, iwqp, qp_num); - kfree(mem); return ERR_PTR(err_code); } @@ -1926,8 +1925,7 @@ static int i40iw_dereg_mr(struct ib_mr *ib_mr) } if (iwpbl->pbl_allocated) i40iw_free_pble(iwdev->pble_rsrc, palloc); - kfree(iwpbl->iwmr); - iwpbl->iwmr = NULL; + kfree(iwmr); return 0; } diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index d6fc8a6e8c33..5df63dacaaa3 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -576,8 +576,8 @@ static int mlx4_ib_ipoib_csum_ok(__be16 status, __be16 checksum) checksum == cpu_to_be16(0xffff); } -static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc, - unsigned tail, struct mlx4_cqe *cqe, int is_eth) +static void use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc, + unsigned tail, struct mlx4_cqe *cqe, int is_eth) { struct mlx4_ib_proxy_sqp_hdr *hdr; @@ -600,8 +600,6 @@ static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct wc->slid = be16_to_cpu(hdr->tun.slid_mac_47_32); wc->sl = (u8) (be16_to_cpu(hdr->tun.sl_vid) >> 12); } - - return 0; } static void mlx4_ib_qp_sw_comp(struct mlx4_ib_qp *qp, int num_entries, @@ -689,12 +687,6 @@ repoll: is_error = (cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_ERROR; - if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_OPCODE_NOP && - is_send)) { - pr_warn("Completion for NOP opcode detected!\n"); - return -EINVAL; - } - /* Resize CQ in progress */ if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_CQE_OPCODE_RESIZE)) { if (cq->resize_buf) { @@ -720,12 +712,6 @@ repoll: */ mqp = __mlx4_qp_lookup(to_mdev(cq->ibcq.device)->dev, be32_to_cpu(cqe->vlan_my_qpn)); - if (unlikely(!mqp)) { - pr_warn("CQ %06x with entry for unknown QPN %06x\n", - cq->mcq.cqn, be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK); - return -EINVAL; - } - *cur_qp = to_mibqp(mqp); } @@ -738,11 +724,6 @@ repoll: /* SRQ is also in the radix tree */ msrq = mlx4_srq_lookup(to_mdev(cq->ibcq.device)->dev, srq_num); - if (unlikely(!msrq)) { - pr_warn("CQ %06x with entry for unknown SRQN %06x\n", - cq->mcq.cqn, srq_num); - return -EINVAL; - } } if (is_send) { @@ -852,9 +833,11 @@ repoll: if (mlx4_is_mfunc(to_mdev(cq->ibcq.device)->dev)) { if ((*cur_qp)->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER | - MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) - return use_tunnel_data(*cur_qp, cq, wc, tail, - cqe, is_eth); + MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) { + use_tunnel_data(*cur_qp, cq, wc, tail, cqe, + is_eth); + return 0; + } } wc->slid = be16_to_cpu(cqe->rlid); @@ -891,7 +874,6 @@ int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) struct mlx4_ib_qp *cur_qp = NULL; unsigned long flags; int npolled; - int err = 0; struct mlx4_ib_dev *mdev = to_mdev(cq->ibcq.device); spin_lock_irqsave(&cq->lock, flags); @@ -901,8 +883,7 @@ int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) } for (npolled = 0; npolled < num_entries; ++npolled) { - err = mlx4_ib_poll_one(cq, &cur_qp, wc + npolled); - if (err) + if (mlx4_ib_poll_one(cq, &cur_qp, wc + npolled)) break; } @@ -911,10 +892,7 @@ int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) out: spin_unlock_irqrestore(&cq->lock, flags); - if (err == 0 || err == -EAGAIN) - return npolled; - else - return err; + return npolled; } int mlx4_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 9c2e53d28f98..0f21c3a25552 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -1128,6 +1128,27 @@ void handle_port_mgmt_change_event(struct work_struct *work) /* Generate GUID changed event */ if (changed_attr & MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK) { + if (mlx4_is_master(dev->dev)) { + union ib_gid gid; + int err = 0; + + if (!eqe->event.port_mgmt_change.params.port_info.gid_prefix) + err = __mlx4_ib_query_gid(&dev->ib_dev, port, 0, &gid, 1); + else + gid.global.subnet_prefix = + eqe->event.port_mgmt_change.params.port_info.gid_prefix; + if (err) { + pr_warn("Could not change QP1 subnet prefix for port %d: query_gid error (%d)\n", + port, err); + } else { + pr_debug("Changing QP1 subnet prefix for port %d. old=0x%llx. new=0x%llx\n", + port, + (u64)atomic64_read(&dev->sriov.demux[port - 1].subnet_prefix), + be64_to_cpu(gid.global.subnet_prefix)); + atomic64_set(&dev->sriov.demux[port - 1].subnet_prefix, + be64_to_cpu(gid.global.subnet_prefix)); + } + } mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE); /*if master, notify all slaves*/ if (mlx4_is_master(dev->dev)) @@ -2202,6 +2223,8 @@ int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev) if (err) goto demux_err; dev->sriov.demux[i].guid_cache[0] = gid.global.interface_id; + atomic64_set(&dev->sriov.demux[i].subnet_prefix, + be64_to_cpu(gid.global.subnet_prefix)); err = alloc_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1, &dev->sriov.sqps[i]); if (err) diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 2af44c2de262..87ba9bca4181 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2202,6 +2202,9 @@ static int mlx4_ib_alloc_diag_counters(struct mlx4_ib_dev *ibdev) bool per_port = !!(ibdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT); + if (mlx4_is_slave(ibdev->dev)) + return 0; + for (i = 0; i < MLX4_DIAG_COUNTERS_TYPES; i++) { /* i == 1 means we are building port counters */ if (i && !per_port) diff --git a/drivers/infiniband/hw/mlx4/mcg.c b/drivers/infiniband/hw/mlx4/mcg.c index 8f7ad07915b0..097bfcc4ee99 100644 --- a/drivers/infiniband/hw/mlx4/mcg.c +++ b/drivers/infiniband/hw/mlx4/mcg.c @@ -489,7 +489,7 @@ static u8 get_leave_state(struct mcast_group *group) if (!group->members[i]) leave_state |= (1 << i); - return leave_state & (group->rec.scope_join_state & 7); + return leave_state & (group->rec.scope_join_state & 0xf); } static int join_group(struct mcast_group *group, int slave, u8 join_mask) @@ -564,8 +564,8 @@ static void mlx4_ib_mcg_timeout_handler(struct work_struct *work) } else mcg_warn_group(group, "DRIVER BUG\n"); } else if (group->state == MCAST_LEAVE_SENT) { - if (group->rec.scope_join_state & 7) - group->rec.scope_join_state &= 0xf8; + if (group->rec.scope_join_state & 0xf) + group->rec.scope_join_state &= 0xf0; group->state = MCAST_IDLE; mutex_unlock(&group->lock); if (release_group(group, 1)) @@ -605,7 +605,7 @@ static int handle_leave_req(struct mcast_group *group, u8 leave_mask, static int handle_join_req(struct mcast_group *group, u8 join_mask, struct mcast_req *req) { - u8 group_join_state = group->rec.scope_join_state & 7; + u8 group_join_state = group->rec.scope_join_state & 0xf; int ref = 0; u16 status; struct ib_sa_mcmember_data *sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data; @@ -690,8 +690,8 @@ static void mlx4_ib_mcg_work_handler(struct work_struct *work) u8 cur_join_state; resp_join_state = ((struct ib_sa_mcmember_data *) - group->response_sa_mad.data)->scope_join_state & 7; - cur_join_state = group->rec.scope_join_state & 7; + group->response_sa_mad.data)->scope_join_state & 0xf; + cur_join_state = group->rec.scope_join_state & 0xf; if (method == IB_MGMT_METHOD_GET_RESP) { /* successfull join */ @@ -710,7 +710,7 @@ process_requests: req = list_first_entry(&group->pending_list, struct mcast_req, group_list); sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data; - req_join_state = sa_data->scope_join_state & 0x7; + req_join_state = sa_data->scope_join_state & 0xf; /* For a leave request, we will immediately answer the VF, and * update our internal counters. The actual leave will be sent diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 7c5832ede4bd..686ab48ff644 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -448,7 +448,7 @@ struct mlx4_ib_demux_ctx { struct workqueue_struct *wq; struct workqueue_struct *ud_wq; spinlock_t ud_lock; - __be64 subnet_prefix; + atomic64_t subnet_prefix; __be64 guid_cache[128]; struct mlx4_ib_dev *dev; /* the following lock protects both mcg_table and mcg_mgid0_list */ diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 768085f59566..7fb9629bd12b 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -2493,24 +2493,27 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr, sqp->ud_header.grh.flow_label = ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff); sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit; - if (is_eth) + if (is_eth) { memcpy(sqp->ud_header.grh.source_gid.raw, sgid.raw, 16); - else { - if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) { - /* When multi-function is enabled, the ib_core gid - * indexes don't necessarily match the hw ones, so - * we must use our own cache */ - sqp->ud_header.grh.source_gid.global.subnet_prefix = - to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. - subnet_prefix; - sqp->ud_header.grh.source_gid.global.interface_id = - to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. - guid_cache[ah->av.ib.gid_index]; - } else - ib_get_cached_gid(ib_dev, - be32_to_cpu(ah->av.ib.port_pd) >> 24, - ah->av.ib.gid_index, - &sqp->ud_header.grh.source_gid, NULL); + } else { + if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) { + /* When multi-function is enabled, the ib_core gid + * indexes don't necessarily match the hw ones, so + * we must use our own cache + */ + sqp->ud_header.grh.source_gid.global.subnet_prefix = + cpu_to_be64(atomic64_read(&(to_mdev(ib_dev)->sriov. + demux[sqp->qp.port - 1]. + subnet_prefix))); + sqp->ud_header.grh.source_gid.global.interface_id = + to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. + guid_cache[ah->av.ib.gid_index]; + } else { + ib_get_cached_gid(ib_dev, + be32_to_cpu(ah->av.ib.port_pd) >> 24, + ah->av.ib.gid_index, + &sqp->ud_header.grh.source_gid, NULL); + } } memcpy(sqp->ud_header.grh.destination_gid.raw, ah->av.ib.dgid, 16); diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 308a358e5b46..e4fac9292e4a 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -553,12 +553,6 @@ repoll: * from the table. */ mqp = __mlx5_qp_lookup(dev->mdev, qpn); - if (unlikely(!mqp)) { - mlx5_ib_warn(dev, "CQE@CQ %06x for unknown QPN %6x\n", - cq->mcq.cqn, qpn); - return -EINVAL; - } - *cur_qp = to_mibqp(mqp); } @@ -619,13 +613,6 @@ repoll: read_lock(&dev->mdev->priv.mkey_table.lock); mmkey = __mlx5_mr_lookup(dev->mdev, mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey))); - if (unlikely(!mmkey)) { - read_unlock(&dev->mdev->priv.mkey_table.lock); - mlx5_ib_warn(dev, "CQE@CQ %06x for unknown MR %6x\n", - cq->mcq.cqn, be32_to_cpu(sig_err_cqe->mkey)); - return -EINVAL; - } - mr = to_mibmr(mmkey); get_sig_err_item(sig_err_cqe, &mr->sig->err_item); mr->sig->sig_err_exists = true; @@ -676,7 +663,6 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) unsigned long flags; int soft_polled = 0; int npolled; - int err = 0; spin_lock_irqsave(&cq->lock, flags); if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { @@ -688,8 +674,7 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) soft_polled = poll_soft_wc(cq, num_entries, wc); for (npolled = 0; npolled < num_entries - soft_polled; npolled++) { - err = mlx5_poll_one(cq, &cur_qp, wc + soft_polled + npolled); - if (err) + if (mlx5_poll_one(cq, &cur_qp, wc + soft_polled + npolled)) break; } @@ -698,10 +683,7 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) out: spin_unlock_irqrestore(&cq->lock, flags); - if (err == 0 || err == -EAGAIN) - return soft_polled + npolled; - else - return err; + return soft_polled + npolled; } int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index a84bb766fc62..e19537cf44ab 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -37,7 +37,6 @@ #include <linux/pci.h> #include <linux/dma-mapping.h> #include <linux/slab.h> -#include <linux/io-mapping.h> #if defined(CONFIG_X86) #include <asm/pat.h> #endif @@ -289,7 +288,9 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num, static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev) { - return !MLX5_CAP_GEN(dev->mdev, ib_virt); + if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB) + return !MLX5_CAP_GEN(dev->mdev, ib_virt); + return 0; } enum { @@ -1429,6 +1430,13 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v, dmac_47_16), ib_spec->eth.val.dst_mac); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + smac_47_16), + ib_spec->eth.mask.src_mac); + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + smac_47_16), + ib_spec->eth.val.src_mac); + if (ib_spec->eth.mask.vlan_tag) { MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, vlan_tag, 1); @@ -1850,6 +1858,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, int domain) { struct mlx5_ib_dev *dev = to_mdev(qp->device); + struct mlx5_ib_qp *mqp = to_mqp(qp); struct mlx5_ib_flow_handler *handler = NULL; struct mlx5_flow_destination *dst = NULL; struct mlx5_ib_flow_prio *ft_prio; @@ -1876,7 +1885,10 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, } dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; - dst->tir_num = to_mqp(qp)->raw_packet_qp.rq.tirn; + if (mqp->flags & MLX5_IB_QP_RSS) + dst->tir_num = mqp->rss_qp.tirn; + else + dst->tir_num = mqp->raw_packet_qp.rq.tirn; if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) { diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index 40df2cca0609..996b54e366b0 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -71,7 +71,7 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, addr = addr >> page_shift; tmp = (unsigned long)addr; - m = find_first_bit(&tmp, sizeof(tmp)); + m = find_first_bit(&tmp, BITS_PER_LONG); skip = 1 << m; mask = skip - 1; i = 0; @@ -81,7 +81,7 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, for (k = 0; k < len; k++) { if (!(i & mask)) { tmp = (unsigned long)pfn; - m = min_t(unsigned long, m, find_first_bit(&tmp, sizeof(tmp))); + m = min_t(unsigned long, m, find_first_bit(&tmp, BITS_PER_LONG)); skip = 1 << m; mask = skip - 1; base = pfn; @@ -89,7 +89,7 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, } else { if (base + p != pfn) { tmp = (unsigned long)p; - m = find_first_bit(&tmp, sizeof(tmp)); + m = find_first_bit(&tmp, BITS_PER_LONG); skip = 1 << m; mask = skip - 1; base = pfn; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 372385d0f993..95146f4aa3e3 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -402,6 +402,7 @@ enum mlx5_ib_qp_flags { /* QP uses 1 as its source QP number */ MLX5_IB_QP_SQPN_QP1 = 1 << 6, MLX5_IB_QP_CAP_SCATTER_FCS = 1 << 7, + MLX5_IB_QP_RSS = 1 << 8, }; struct mlx5_umr_wr { diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 0dd7d93cac95..affc3f6598ca 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1449,6 +1449,7 @@ create_tir: kvfree(in); /* qpn is reserved for that QP */ qp->trans_qp.base.mqp.qpn = 0; + qp->flags |= MLX5_IB_QP_RSS; return 0; err: @@ -3658,12 +3659,8 @@ static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, struct ib_send_wr *wr, unsigned *idx, int *size, int nreq) { - int err = 0; - - if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) { - err = -ENOMEM; - return err; - } + if (unlikely(mlx5_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq))) + return -ENOMEM; *idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1); *seg = mlx5_get_send_wqe(qp, *idx); @@ -3679,7 +3676,7 @@ static int begin_wqe(struct mlx5_ib_qp *qp, void **seg, *seg += sizeof(**ctrl); *size = sizeof(**ctrl) / 16; - return err; + return 0; } static void finish_wqe(struct mlx5_ib_qp *qp, @@ -3758,7 +3755,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, num_sge = wr->num_sge; if (unlikely(num_sge > qp->sq.max_gs)) { mlx5_ib_warn(dev, "\n"); - err = -ENOMEM; + err = -EINVAL; *bad_wr = wr; goto out; } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 16740dcb876b..67fc0b6857e1 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -1156,18 +1156,18 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev, attr->max_srq = (rsp->max_srq_rpir_qps & OCRDMA_MBX_QUERY_CFG_MAX_SRQ_MASK) >> OCRDMA_MBX_QUERY_CFG_MAX_SRQ_OFFSET; - attr->max_send_sge = ((rsp->max_write_send_sge & + attr->max_send_sge = ((rsp->max_recv_send_sge & OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK) >> OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT); - attr->max_recv_sge = (rsp->max_write_send_sge & - OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK) >> - OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT; + attr->max_recv_sge = (rsp->max_recv_send_sge & + OCRDMA_MBX_QUERY_CFG_MAX_RECV_SGE_MASK) >> + OCRDMA_MBX_QUERY_CFG_MAX_RECV_SGE_SHIFT; attr->max_srq_sge = (rsp->max_srq_rqe_sge & OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_MASK) >> OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_OFFSET; - attr->max_rdma_sge = (rsp->max_write_send_sge & - OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_MASK) >> - OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_SHIFT; + attr->max_rdma_sge = (rsp->max_wr_rd_sge & + OCRDMA_MBX_QUERY_CFG_MAX_RD_SGE_MASK) >> + OCRDMA_MBX_QUERY_CFG_MAX_RD_SGE_SHIFT; attr->max_ord_per_qp = (rsp->max_ird_ord_per_qp & OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_MASK) >> OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_SHIFT; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index 0efc9662c6d8..37df4481bb8f 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -554,9 +554,9 @@ enum { OCRDMA_MBX_QUERY_CFG_L3_TYPE_MASK = 0x18, OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT = 0, OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK = 0xFFFF, - OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_SHIFT = 16, - OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_MASK = 0xFFFF << - OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_SHIFT, + OCRDMA_MBX_QUERY_CFG_MAX_RECV_SGE_SHIFT = 16, + OCRDMA_MBX_QUERY_CFG_MAX_RECV_SGE_MASK = 0xFFFF << + OCRDMA_MBX_QUERY_CFG_MAX_RECV_SGE_SHIFT, OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_SHIFT = 0, OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_MASK = 0xFFFF, @@ -612,6 +612,8 @@ enum { OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_OFFSET = 0, OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_MASK = 0xFFFF << OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_OFFSET, + OCRDMA_MBX_QUERY_CFG_MAX_RD_SGE_SHIFT = 0, + OCRDMA_MBX_QUERY_CFG_MAX_RD_SGE_MASK = 0xFFFF, }; struct ocrdma_mbx_query_config { @@ -619,7 +621,7 @@ struct ocrdma_mbx_query_config { struct ocrdma_mbx_rsp rsp; u32 qp_srq_cq_ird_ord; u32 max_pd_ca_ack_delay; - u32 max_write_send_sge; + u32 max_recv_send_sge; u32 max_ird_ord_per_qp; u32 max_shared_ird_ord; u32 max_mr; @@ -639,6 +641,8 @@ struct ocrdma_mbx_query_config { u32 max_wqes_rqes_per_q; u32 max_cq_cqes_per_cq; u32 max_srq_rqe_sge; + u32 max_wr_rd_sge; + u32 ird_pgsz_num_pages; }; struct ocrdma_fw_ver_rsp { diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index b1a3d91fe8b9..0aa854737e74 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -125,8 +125,8 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS; - attr->max_sge = dev->attr.max_send_sge; - attr->max_sge_rd = attr->max_sge; + attr->max_sge = min(dev->attr.max_send_sge, dev->attr.max_recv_sge); + attr->max_sge_rd = dev->attr.max_rdma_sge; attr->max_cq = dev->attr.max_cq; attr->max_cqe = dev->attr.max_cqe; attr->max_mr = dev->attr.max_mr; diff --git a/drivers/infiniband/hw/qib/qib_debugfs.c b/drivers/infiniband/hw/qib/qib_debugfs.c index 5e75b43c596b..5bad8e3b40bb 100644 --- a/drivers/infiniband/hw/qib/qib_debugfs.c +++ b/drivers/infiniband/hw/qib/qib_debugfs.c @@ -189,27 +189,32 @@ static int _ctx_stats_seq_show(struct seq_file *s, void *v) DEBUGFS_FILE(ctx_stats) static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos) + __acquires(RCU) { struct qib_qp_iter *iter; loff_t n = *pos; - rcu_read_lock(); iter = qib_qp_iter_init(s->private); + + /* stop calls rcu_read_unlock */ + rcu_read_lock(); + if (!iter) return NULL; - while (n--) { + do { if (qib_qp_iter_next(iter)) { kfree(iter); return NULL; } - } + } while (n--); return iter; } static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr, loff_t *pos) + __must_hold(RCU) { struct qib_qp_iter *iter = iter_ptr; @@ -224,6 +229,7 @@ static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr, } static void _qp_stats_seq_stop(struct seq_file *s, void *iter_ptr) + __releases(RCU) { rcu_read_unlock(); } diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index fcdf37913a26..c3edc033f7c4 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -328,26 +328,12 @@ static ssize_t flash_write(struct file *file, const char __user *buf, pos = *ppos; - if (pos != 0) { - ret = -EINVAL; - goto bail; - } - - if (count != sizeof(struct qib_flash)) { - ret = -EINVAL; - goto bail; - } - - tmp = kmalloc(count, GFP_KERNEL); - if (!tmp) { - ret = -ENOMEM; - goto bail; - } + if (pos != 0 || count != sizeof(struct qib_flash)) + return -EINVAL; - if (copy_from_user(tmp, buf, count)) { - ret = -EFAULT; - goto bail_tmp; - } + tmp = memdup_user(buf, count); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); dd = private2dd(file); if (qib_eeprom_write(dd, pos, tmp, count)) { @@ -361,8 +347,6 @@ static ssize_t flash_write(struct file *file, const char __user *buf, bail_tmp: kfree(tmp); - -bail: return ret; } diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 9cc0aae1d781..f9b8cd2354d1 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -573,10 +573,6 @@ struct qib_qp_iter *qib_qp_iter_init(struct qib_ibdev *dev) return NULL; iter->dev = dev; - if (qib_qp_iter_next(iter)) { - kfree(iter); - return NULL; - } return iter; } diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index c229b9f4a52d..0a89a955550b 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -664,7 +664,8 @@ static int __init usnic_ib_init(void) return err; } - if (pci_register_driver(&usnic_ib_pci_driver)) { + err = pci_register_driver(&usnic_ib_pci_driver); + if (err) { usnic_err("Unable to register with PCI\n"); goto out_umem_fini; } diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c index 80c4b6b401b8..46b64970058e 100644 --- a/drivers/infiniband/sw/rdmavt/mr.c +++ b/drivers/infiniband/sw/rdmavt/mr.c @@ -294,7 +294,7 @@ static void __rvt_free_mr(struct rvt_mr *mr) { rvt_deinit_mregion(&mr->mr); rvt_free_lkey(&mr->mr); - vfree(mr); + kfree(mr); } /** diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index bdb540f25a88..870b4f212fbc 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -873,7 +873,8 @@ bail_qpn: free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num); bail_rq_wq: - vfree(qp->r_rq.wq); + if (!qp->ip) + vfree(qp->r_rq.wq); bail_driver_priv: rdi->driver_f.qp_priv_free(rdi, qp); diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index 55f0e8f0ca79..ddd59270ff6d 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -362,15 +362,34 @@ static int __init rxe_module_init(void) return err; } - err = rxe_net_init(); + err = rxe_net_ipv4_init(); if (err) { - pr_err("rxe: unable to init\n"); + pr_err("rxe: unable to init ipv4 tunnel\n"); rxe_cache_exit(); - return err; + goto exit; + } + + err = rxe_net_ipv6_init(); + if (err) { + pr_err("rxe: unable to init ipv6 tunnel\n"); + rxe_cache_exit(); + goto exit; } + + err = register_netdevice_notifier(&rxe_net_notifier); + if (err) { + pr_err("rxe: Failed to rigister netdev notifier\n"); + goto exit; + } + pr_info("rxe: loaded\n"); return 0; + +exit: + rxe_release_udp_tunnel(recv_sockets.sk4); + rxe_release_udp_tunnel(recv_sockets.sk6); + return err; } static void __exit rxe_module_exit(void) diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index 36f67de44095..1c59ef2c67aa 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -689,7 +689,14 @@ int rxe_completer(void *arg) qp->req.need_retry = 1; rxe_run_task(&qp->req.task, 1); } + + if (pkt) { + rxe_drop_ref(pkt->qp); + kfree_skb(skb); + } + goto exit; + } else { wqe->status = IB_WC_RETRY_EXC_ERR; state = COMPST_ERROR; @@ -716,6 +723,12 @@ int rxe_completer(void *arg) case COMPST_ERROR: do_complete(qp, wqe); rxe_qp_error(qp); + + if (pkt) { + rxe_drop_ref(pkt->qp); + kfree_skb(skb); + } + goto exit; } } diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 0b8d2ea8b41d..eedf2f1cafdf 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -275,9 +275,10 @@ static struct socket *rxe_setup_udp_tunnel(struct net *net, __be16 port, return sock; } -static void rxe_release_udp_tunnel(struct socket *sk) +void rxe_release_udp_tunnel(struct socket *sk) { - udp_tunnel_sock_release(sk); + if (sk) + udp_tunnel_sock_release(sk); } static void prepare_udp_hdr(struct sk_buff *skb, __be16 src_port, @@ -658,51 +659,45 @@ out: return NOTIFY_OK; } -static struct notifier_block rxe_net_notifier = { +struct notifier_block rxe_net_notifier = { .notifier_call = rxe_notify, }; -int rxe_net_init(void) +int rxe_net_ipv4_init(void) { - int err; - spin_lock_init(&dev_list_lock); - recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net, - htons(ROCE_V2_UDP_DPORT), true); - if (IS_ERR(recv_sockets.sk6)) { - recv_sockets.sk6 = NULL; - pr_err("rxe: Failed to create IPv6 UDP tunnel\n"); - return -1; - } - recv_sockets.sk4 = rxe_setup_udp_tunnel(&init_net, - htons(ROCE_V2_UDP_DPORT), false); + htons(ROCE_V2_UDP_DPORT), false); if (IS_ERR(recv_sockets.sk4)) { - rxe_release_udp_tunnel(recv_sockets.sk6); recv_sockets.sk4 = NULL; - recv_sockets.sk6 = NULL; pr_err("rxe: Failed to create IPv4 UDP tunnel\n"); return -1; } - err = register_netdevice_notifier(&rxe_net_notifier); - if (err) { - rxe_release_udp_tunnel(recv_sockets.sk6); - rxe_release_udp_tunnel(recv_sockets.sk4); - pr_err("rxe: Failed to rigister netdev notifier\n"); - } - - return err; + return 0; } -void rxe_net_exit(void) +int rxe_net_ipv6_init(void) { - if (recv_sockets.sk6) - rxe_release_udp_tunnel(recv_sockets.sk6); +#if IS_ENABLED(CONFIG_IPV6) - if (recv_sockets.sk4) - rxe_release_udp_tunnel(recv_sockets.sk4); + spin_lock_init(&dev_list_lock); + recv_sockets.sk6 = rxe_setup_udp_tunnel(&init_net, + htons(ROCE_V2_UDP_DPORT), true); + if (IS_ERR(recv_sockets.sk6)) { + recv_sockets.sk6 = NULL; + pr_err("rxe: Failed to create IPv6 UDP tunnel\n"); + return -1; + } +#endif + return 0; +} + +void rxe_net_exit(void) +{ + rxe_release_udp_tunnel(recv_sockets.sk6); + rxe_release_udp_tunnel(recv_sockets.sk4); unregister_netdevice_notifier(&rxe_net_notifier); } diff --git a/drivers/infiniband/sw/rxe/rxe_net.h b/drivers/infiniband/sw/rxe/rxe_net.h index 7b06f76d16cc..0daf7f09e5b5 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.h +++ b/drivers/infiniband/sw/rxe/rxe_net.h @@ -44,10 +44,13 @@ struct rxe_recv_sockets { }; extern struct rxe_recv_sockets recv_sockets; +extern struct notifier_block rxe_net_notifier; +void rxe_release_udp_tunnel(struct socket *sk); struct rxe_dev *rxe_net_add(struct net_device *ndev); -int rxe_net_init(void); +int rxe_net_ipv4_init(void); +int rxe_net_ipv6_init(void); void rxe_net_exit(void); #endif /* RXE_NET_H */ diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index 3d464c23e08b..144d2f129fcd 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -312,7 +312,7 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) * make a copy of the skb to post to the next qp */ skb_copy = (mce->qp_list.next != &mcg->qp_list) ? - skb_clone(skb, GFP_KERNEL) : NULL; + skb_clone(skb, GFP_ATOMIC) : NULL; pkt->qp = qp; rxe_add_ref(qp); diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 33b2d9d77021..13a848a518e8 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -511,24 +511,21 @@ static int fill_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe, } static void update_wqe_state(struct rxe_qp *qp, - struct rxe_send_wqe *wqe, - struct rxe_pkt_info *pkt, - enum wqe_state *prev_state) + struct rxe_send_wqe *wqe, + struct rxe_pkt_info *pkt) { - enum wqe_state prev_state_ = wqe->state; - if (pkt->mask & RXE_END_MASK) { if (qp_type(qp) == IB_QPT_RC) wqe->state = wqe_state_pending; } else { wqe->state = wqe_state_processing; } - - *prev_state = prev_state_; } -static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, - struct rxe_pkt_info *pkt, int payload) +static void update_wqe_psn(struct rxe_qp *qp, + struct rxe_send_wqe *wqe, + struct rxe_pkt_info *pkt, + int payload) { /* number of packets left to send including current one */ int num_pkt = (wqe->dma.resid + payload + qp->mtu - 1) / qp->mtu; @@ -546,9 +543,34 @@ static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, qp->req.psn = (wqe->first_psn + num_pkt) & BTH_PSN_MASK; else qp->req.psn = (qp->req.psn + 1) & BTH_PSN_MASK; +} - qp->req.opcode = pkt->opcode; +static void save_state(struct rxe_send_wqe *wqe, + struct rxe_qp *qp, + struct rxe_send_wqe *rollback_wqe, + struct rxe_qp *rollback_qp) +{ + rollback_wqe->state = wqe->state; + rollback_wqe->first_psn = wqe->first_psn; + rollback_wqe->last_psn = wqe->last_psn; + rollback_qp->req.psn = qp->req.psn; +} +static void rollback_state(struct rxe_send_wqe *wqe, + struct rxe_qp *qp, + struct rxe_send_wqe *rollback_wqe, + struct rxe_qp *rollback_qp) +{ + wqe->state = rollback_wqe->state; + wqe->first_psn = rollback_wqe->first_psn; + wqe->last_psn = rollback_wqe->last_psn; + qp->req.psn = rollback_qp->req.psn; +} + +static void update_state(struct rxe_qp *qp, struct rxe_send_wqe *wqe, + struct rxe_pkt_info *pkt, int payload) +{ + qp->req.opcode = pkt->opcode; if (pkt->mask & RXE_END_MASK) qp->req.wqe_index = next_index(qp->sq.queue, qp->req.wqe_index); @@ -571,7 +593,8 @@ int rxe_requester(void *arg) int mtu; int opcode; int ret; - enum wqe_state prev_state; + struct rxe_qp rollback_qp; + struct rxe_send_wqe rollback_wqe; next_wqe: if (unlikely(!qp->valid || qp->req.state == QP_STATE_ERROR)) @@ -688,13 +711,21 @@ next_wqe: goto err; } - update_wqe_state(qp, wqe, &pkt, &prev_state); + /* + * To prevent a race on wqe access between requester and completer, + * wqe members state and psn need to be set before calling + * rxe_xmit_packet(). + * Otherwise, completer might initiate an unjustified retry flow. + */ + save_state(wqe, qp, &rollback_wqe, &rollback_qp); + update_wqe_state(qp, wqe, &pkt); + update_wqe_psn(qp, wqe, &pkt, payload); ret = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, &pkt, skb); if (ret) { qp->need_req_skb = 1; kfree_skb(skb); - wqe->state = prev_state; + rollback_state(wqe, qp, &rollback_wqe, &rollback_qp); if (ret == -EAGAIN) { rxe_run_task(&qp->req.task, 1); diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index ebb03b46e2ad..3e0f0f2baace 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -972,11 +972,13 @@ static int send_atomic_ack(struct rxe_qp *qp, struct rxe_pkt_info *pkt, free_rd_atomic_resource(qp, res); rxe_advance_resp_resource(qp); + memcpy(SKB_TO_PKT(skb), &ack_pkt, sizeof(skb->cb)); + res->type = RXE_ATOMIC_MASK; res->atomic.skb = skb; - res->first_psn = qp->resp.psn; - res->last_psn = qp->resp.psn; - res->cur_psn = qp->resp.psn; + res->first_psn = ack_pkt.psn; + res->last_psn = ack_pkt.psn; + res->cur_psn = ack_pkt.psn; rc = rxe_xmit_packet(rxe, qp, &ack_pkt, skb_copy); if (rc) { @@ -1116,8 +1118,7 @@ static enum resp_states duplicate_request(struct rxe_qp *qp, rc = RESPST_CLEANUP; goto out; } - bth_set_psn(SKB_TO_PKT(skb_copy), - qp->resp.psn - 1); + /* Resend the result. */ rc = rxe_xmit_packet(to_rdev(qp->ibqp.device), qp, pkt, skb_copy); diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 4f7d9b48df64..9dbfcc0ab577 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -478,6 +478,7 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_ah *address, u32 qpn); void ipoib_reap_ah(struct work_struct *work); +struct ipoib_path *__path_find(struct net_device *dev, void *gid); void ipoib_mark_paths_invalid(struct net_device *dev); void ipoib_flush_paths(struct net_device *dev); int ipoib_check_sm_sendonly_fullmember_support(struct ipoib_dev_priv *priv); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 951d9abcca8b..4ad297d3de89 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1318,6 +1318,8 @@ void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx) } } +#define QPN_AND_OPTIONS_OFFSET 4 + static void ipoib_cm_tx_start(struct work_struct *work) { struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv, @@ -1326,6 +1328,7 @@ static void ipoib_cm_tx_start(struct work_struct *work) struct ipoib_neigh *neigh; struct ipoib_cm_tx *p; unsigned long flags; + struct ipoib_path *path; int ret; struct ib_sa_path_rec pathrec; @@ -1338,7 +1341,19 @@ static void ipoib_cm_tx_start(struct work_struct *work) p = list_entry(priv->cm.start_list.next, typeof(*p), list); list_del_init(&p->list); neigh = p->neigh; + qpn = IPOIB_QPN(neigh->daddr); + /* + * As long as the search is with these 2 locks, + * path existence indicates its validity. + */ + path = __path_find(dev, neigh->daddr + QPN_AND_OPTIONS_OFFSET); + if (!path) { + pr_info("%s ignore not valid path %pI6\n", + __func__, + neigh->daddr + QPN_AND_OPTIONS_OFFSET); + goto free_neigh; + } memcpy(&pathrec, &p->path->pathrec, sizeof pathrec); spin_unlock_irqrestore(&priv->lock, flags); @@ -1350,6 +1365,7 @@ static void ipoib_cm_tx_start(struct work_struct *work) spin_lock_irqsave(&priv->lock, flags); if (ret) { +free_neigh: neigh = p->neigh; if (neigh) { neigh->cm = NULL; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index dc6d241b9406..be11d5d5b8c1 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -1161,8 +1161,17 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv, } if (level == IPOIB_FLUSH_LIGHT) { + int oper_up; ipoib_mark_paths_invalid(dev); + /* Set IPoIB operation as down to prevent races between: + * the flush flow which leaves MCG and on the fly joins + * which can happen during that time. mcast restart task + * should deal with join requests we missed. + */ + oper_up = test_and_clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags); ipoib_mcast_dev_flush(dev); + if (oper_up) + set_bit(IPOIB_FLAG_OPER_UP, &priv->flags); ipoib_flush_ah(dev); } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 74bcaa064226..cc1c1b062ea5 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -485,7 +485,7 @@ int ipoib_set_mode(struct net_device *dev, const char *buf) return -EINVAL; } -static struct ipoib_path *__path_find(struct net_device *dev, void *gid) +struct ipoib_path *__path_find(struct net_device *dev, void *gid) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct rb_node *n = priv->path_tree.rb_node; diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index ba6be060a476..cae9bbcc27e7 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -403,6 +403,7 @@ isert_init_conn(struct isert_conn *isert_conn) INIT_LIST_HEAD(&isert_conn->node); init_completion(&isert_conn->login_comp); init_completion(&isert_conn->login_req_comp); + init_waitqueue_head(&isert_conn->rem_wait); kref_init(&isert_conn->kref); mutex_init(&isert_conn->mutex); INIT_WORK(&isert_conn->release_work, isert_release_work); @@ -448,7 +449,7 @@ isert_alloc_login_buf(struct isert_conn *isert_conn, isert_conn->login_rsp_buf = kzalloc(ISER_RX_PAYLOAD_SIZE, GFP_KERNEL); if (!isert_conn->login_rsp_buf) { - isert_err("Unable to allocate isert_conn->login_rspbuf\n"); + ret = -ENOMEM; goto out_unmap_login_req_buf; } @@ -578,7 +579,8 @@ isert_connect_release(struct isert_conn *isert_conn) BUG_ON(!device); isert_free_rx_descriptors(isert_conn); - if (isert_conn->cm_id) + if (isert_conn->cm_id && + !isert_conn->dev_removed) rdma_destroy_id(isert_conn->cm_id); if (isert_conn->qp) { @@ -593,7 +595,10 @@ isert_connect_release(struct isert_conn *isert_conn) isert_device_put(device); - kfree(isert_conn); + if (isert_conn->dev_removed) + wake_up_interruptible(&isert_conn->rem_wait); + else + kfree(isert_conn); } static void @@ -753,6 +758,7 @@ static int isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) { struct isert_np *isert_np = cma_id->context; + struct isert_conn *isert_conn; int ret = 0; isert_info("%s (%d): status %d id %p np %p\n", @@ -773,10 +779,21 @@ isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) break; case RDMA_CM_EVENT_ADDR_CHANGE: /* FALLTHRU */ case RDMA_CM_EVENT_DISCONNECTED: /* FALLTHRU */ - case RDMA_CM_EVENT_DEVICE_REMOVAL: /* FALLTHRU */ case RDMA_CM_EVENT_TIMEWAIT_EXIT: /* FALLTHRU */ ret = isert_disconnected_handler(cma_id, event->event); break; + case RDMA_CM_EVENT_DEVICE_REMOVAL: + isert_conn = cma_id->qp->qp_context; + isert_conn->dev_removed = true; + isert_disconnected_handler(cma_id, event->event); + wait_event_interruptible(isert_conn->rem_wait, + isert_conn->state == ISER_CONN_DOWN); + kfree(isert_conn); + /* + * return non-zero from the callback to destroy + * the rdma cm id + */ + return 1; case RDMA_CM_EVENT_REJECTED: /* FALLTHRU */ case RDMA_CM_EVENT_UNREACHABLE: /* FALLTHRU */ case RDMA_CM_EVENT_CONNECT_ERROR: diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index fc791efe3a10..c02ada57d7f5 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -158,6 +158,8 @@ struct isert_conn { struct work_struct release_work; bool logout_posted; bool snd_w_inv; + wait_queue_head_t rem_wait; + bool dev_removed; }; #define ISERT_MAX_CQ 64 diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index dfa23b075a88..883bbfe08e0e 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -522,6 +522,11 @@ static int srpt_refresh_port(struct srpt_port *sport) if (ret) goto err_query_port; + snprintf(sport->port_guid, sizeof(sport->port_guid), + "0x%016llx%016llx", + be64_to_cpu(sport->gid.global.subnet_prefix), + be64_to_cpu(sport->gid.global.interface_id)); + if (!sport->mad_agent) { memset(®_req, 0, sizeof(reg_req)); reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT; @@ -2548,10 +2553,6 @@ static void srpt_add_one(struct ib_device *device) sdev->device->name, i); goto err_ring; } - snprintf(sport->port_guid, sizeof(sport->port_guid), - "0x%016llx%016llx", - be64_to_cpu(sport->gid.global.subnet_prefix), - be64_to_cpu(sport->gid.global.interface_id)); } spin_lock(&srpt_dev_lock); |