diff options
Diffstat (limited to 'drivers/infiniband/ulp')
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib.h | 32 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_cm.c | 81 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_ethtool.c | 2 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_fs.c | 6 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_ib.c | 15 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_main.c | 444 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 7 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_netlink.c | 23 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 8 | ||||
-rw-r--r-- | drivers/infiniband/ulp/ipoib/ipoib_vlan.c | 261 | ||||
-rw-r--r-- | drivers/infiniband/ulp/iser/iscsi_iser.c | 16 | ||||
-rw-r--r-- | drivers/infiniband/ulp/iser/iser_memory.c | 5 | ||||
-rw-r--r-- | drivers/infiniband/ulp/iser/iser_verbs.c | 14 | ||||
-rw-r--r-- | drivers/infiniband/ulp/isert/ib_isert.c | 26 | ||||
-rw-r--r-- | drivers/infiniband/ulp/srp/ib_srp.c | 27 | ||||
-rw-r--r-- | drivers/infiniband/ulp/srpt/ib_srpt.c | 71 | ||||
-rw-r--r-- | drivers/infiniband/ulp/srpt/ib_srpt.h | 4 |
17 files changed, 561 insertions, 481 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index a50b062ed13e..1abe3c62f106 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -91,11 +91,9 @@ enum { IPOIB_STOP_REAPER = 7, IPOIB_FLAG_ADMIN_CM = 9, IPOIB_FLAG_UMCAST = 10, - IPOIB_STOP_NEIGH_GC = 11, IPOIB_NEIGH_TBL_FLUSH = 12, IPOIB_FLAG_DEV_ADDR_SET = 13, IPOIB_FLAG_DEV_ADDR_CTRL = 14, - IPOIB_FLAG_GOING_DOWN = 15, IPOIB_MAX_BACKOFF_SECONDS = 16, @@ -252,11 +250,11 @@ struct ipoib_cm_tx { struct ipoib_neigh *neigh; struct ipoib_path *path; struct ipoib_tx_buf *tx_ring; - unsigned tx_head; - unsigned tx_tail; + unsigned int tx_head; + unsigned int tx_tail; unsigned long flags; u32 mtu; - unsigned max_send_sge; + unsigned int max_send_sge; }; struct ipoib_cm_rx_buf { @@ -325,15 +323,22 @@ struct ipoib_dev_priv { spinlock_t lock; struct net_device *dev; + void (*next_priv_destructor)(struct net_device *dev); struct napi_struct send_napi; struct napi_struct recv_napi; unsigned long flags; + /* + * This protects access to the child_intfs list. + * To READ from child_intfs the RTNL or vlan_rwsem read side must be + * held. To WRITE RTNL and the vlan_rwsem write side must be held (in + * that order) This lock exists because we have a few contexts where + * we need the child_intfs, but do not want to grab the RTNL. + */ struct rw_semaphore vlan_rwsem; struct mutex mcast_mutex; - struct mutex sysfs_mutex; struct rb_root path_tree; struct list_head path_list; @@ -373,8 +378,8 @@ struct ipoib_dev_priv { struct ipoib_rx_buf *rx_ring; struct ipoib_tx_buf *tx_ring; - unsigned tx_head; - unsigned tx_tail; + unsigned int tx_head; + unsigned int tx_tail; struct ib_sge tx_sge[MAX_SKB_FRAGS + 1]; struct ib_ud_wr tx_wr; struct ib_wc send_wc[MAX_SEND_CQE]; @@ -404,7 +409,7 @@ struct ipoib_dev_priv { #endif u64 hca_caps; struct ipoib_ethtool_st ethtool; - unsigned max_send_sge; + unsigned int max_send_sge; bool sm_fullmember_sendonly_support; const struct net_device_ops *rn_ops; }; @@ -414,7 +419,7 @@ struct ipoib_ah { struct ib_ah *ah; struct list_head list; struct kref ref; - unsigned last_send; + unsigned int last_send; int valid; }; @@ -483,6 +488,7 @@ static inline void ipoib_put_ah(struct ipoib_ah *ah) kref_put(&ah->ref, ipoib_free_ah); } int ipoib_open(struct net_device *dev); +void ipoib_intf_free(struct net_device *dev); int ipoib_add_pkey_attr(struct net_device *dev); int ipoib_add_umcast_attr(struct net_device *dev); @@ -510,9 +516,6 @@ void ipoib_ib_dev_down(struct net_device *dev); int ipoib_ib_dev_stop_default(struct net_device *dev); void ipoib_pkey_dev_check_presence(struct net_device *dev); -int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port); -void ipoib_dev_cleanup(struct net_device *dev); - void ipoib_mcast_join_task(struct work_struct *work); void ipoib_mcast_carrier_on_task(struct work_struct *work); void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb); @@ -600,7 +603,6 @@ void ipoib_pkey_open(struct ipoib_dev_priv *priv); void ipoib_drain_cq(struct net_device *dev); void ipoib_set_ethtool_ops(struct net_device *dev); -void ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca); #define IPOIB_FLAGS_RC 0x80 #define IPOIB_FLAGS_UC 0x40 @@ -729,7 +731,7 @@ void ipoib_cm_dev_stop(struct net_device *dev) static inline int ipoib_cm_dev_init(struct net_device *dev) { - return -ENOSYS; + return -EOPNOTSUPP; } static inline diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 6535d9beb24d..ea01b8dd2be6 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -78,7 +78,7 @@ static struct ib_send_wr ipoib_cm_rx_drain_wr = { }; static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, - struct ib_cm_event *event); + const struct ib_cm_event *event); static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, int frags, u64 mapping[IPOIB_CM_RX_SG]) @@ -94,7 +94,6 @@ static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, int frags, static int ipoib_cm_post_receive_srq(struct net_device *dev, int id) { struct ipoib_dev_priv *priv = ipoib_priv(dev); - struct ib_recv_wr *bad_wr; int i, ret; priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; @@ -102,7 +101,7 @@ static int ipoib_cm_post_receive_srq(struct net_device *dev, int id) for (i = 0; i < priv->cm.num_frags; ++i) priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i]; - ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr); + ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, NULL); if (unlikely(ret)) { ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret); ipoib_cm_dma_unmap_rx(priv, priv->cm.num_frags - 1, @@ -120,7 +119,6 @@ static int ipoib_cm_post_receive_nonsrq(struct net_device *dev, struct ib_sge *sge, int id) { struct ipoib_dev_priv *priv = ipoib_priv(dev); - struct ib_recv_wr *bad_wr; int i, ret; wr->wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV; @@ -128,7 +126,7 @@ static int ipoib_cm_post_receive_nonsrq(struct net_device *dev, for (i = 0; i < IPOIB_CM_RX_SG; ++i) sge[i].addr = rx->rx_ring[id].mapping[i]; - ret = ib_post_recv(rx->qp, wr, &bad_wr); + ret = ib_post_recv(rx->qp, wr, NULL); if (unlikely(ret)) { ipoib_warn(priv, "post recv failed for buf %d (%d)\n", id, ret); ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1, @@ -212,7 +210,6 @@ static void ipoib_cm_free_rx_ring(struct net_device *dev, static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv) { - struct ib_send_wr *bad_wr; struct ipoib_cm_rx *p; /* We only reserved 1 extra slot in CQ for drain WRs, so @@ -227,7 +224,7 @@ static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv) */ p = list_entry(priv->cm.rx_flush_list.next, typeof(*p), list); ipoib_cm_rx_drain_wr.wr_id = IPOIB_CM_RX_DRAIN_WRID; - if (ib_post_send(p->qp, &ipoib_cm_rx_drain_wr, &bad_wr)) + if (ib_post_send(p->qp, &ipoib_cm_rx_drain_wr, NULL)) ipoib_warn(priv, "failed to post drain wr\n"); list_splice_init(&priv->cm.rx_flush_list, &priv->cm.rx_drain_list); @@ -275,7 +272,7 @@ static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev, static int ipoib_cm_modify_rx_qp(struct net_device *dev, struct ib_cm_id *cm_id, struct ib_qp *qp, - unsigned psn) + unsigned int psn) { struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ib_qp_attr qp_attr; @@ -363,7 +360,7 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i if (!rx->rx_ring) return -ENOMEM; - t = kmalloc(sizeof *t, GFP_KERNEL); + t = kmalloc(sizeof(*t), GFP_KERNEL); if (!t) { ret = -ENOMEM; goto err_free_1; @@ -421,8 +418,9 @@ err_free_1: } static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id, - struct ib_qp *qp, struct ib_cm_req_event_param *req, - unsigned psn) + struct ib_qp *qp, + const struct ib_cm_req_event_param *req, + unsigned int psn) { struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ipoib_cm_data data = {}; @@ -432,7 +430,7 @@ static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id, data.mtu = cpu_to_be32(IPOIB_CM_BUF_SIZE); rep.private_data = &data; - rep.private_data_len = sizeof data; + rep.private_data_len = sizeof(data); rep.flow_control = 0; rep.rnr_retry_count = req->rnr_retry_count; rep.srq = ipoib_cm_has_srq(dev); @@ -441,16 +439,17 @@ static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id, return ib_send_cm_rep(cm_id, &rep); } -static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) +static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, + const struct ib_cm_event *event) { struct net_device *dev = cm_id->context; struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ipoib_cm_rx *p; - unsigned psn; + unsigned int psn; int ret; ipoib_dbg(priv, "REQ arrived\n"); - p = kzalloc(sizeof *p, GFP_KERNEL); + p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) return -ENOMEM; p->dev = dev; @@ -503,7 +502,7 @@ err_qp: } static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id, - struct ib_cm_event *event) + const struct ib_cm_event *event) { struct ipoib_cm_rx *p; struct ipoib_dev_priv *priv; @@ -547,7 +546,7 @@ static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space, 0, PAGE_SIZE); --skb_shinfo(skb)->nr_frags; } else { - size = min(length, (unsigned) PAGE_SIZE); + size = min_t(unsigned int, length, PAGE_SIZE); skb_frag_size_set(frag, size); skb->data_len += size; @@ -641,8 +640,9 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) } } - frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len, - (unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE; + frags = PAGE_ALIGN(wc->byte_len - + min_t(u32, wc->byte_len, IPOIB_CM_HEAD_SIZE)) / + PAGE_SIZE; newskb = ipoib_cm_alloc_rx_skb(dev, rx_ring, wr_id, frags, mapping, GFP_ATOMIC); @@ -657,7 +657,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) } ipoib_cm_dma_unmap_rx(priv, frags, rx_ring[wr_id].mapping); - memcpy(rx_ring[wr_id].mapping, mapping, (frags + 1) * sizeof *mapping); + memcpy(rx_ring[wr_id].mapping, mapping, (frags + 1) * sizeof(*mapping)); ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n", wc->byte_len, wc->slid); @@ -698,13 +698,11 @@ static inline int post_send(struct ipoib_dev_priv *priv, unsigned int wr_id, struct ipoib_tx_buf *tx_req) { - struct ib_send_wr *bad_wr; - ipoib_build_sge(priv, tx_req); priv->tx_wr.wr.wr_id = wr_id | IPOIB_OP_CM; - return ib_post_send(tx->qp, &priv->tx_wr.wr, &bad_wr); + return ib_post_send(tx->qp, &priv->tx_wr.wr, NULL); } void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx) @@ -712,7 +710,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ipoib_tx_buf *tx_req; int rc; - unsigned usable_sge = tx->max_send_sge - !!skb_headlen(skb); + unsigned int usable_sge = tx->max_send_sge - !!skb_headlen(skb); if (unlikely(skb->len > tx->mtu)) { ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n", @@ -982,7 +980,8 @@ void ipoib_cm_dev_stop(struct net_device *dev) cancel_delayed_work(&priv->cm.stale_task); } -static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) +static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, + const struct ib_cm_event *event) { struct ipoib_cm_tx *p = cm_id->context; struct ipoib_dev_priv *priv = ipoib_priv(p->dev); @@ -1068,8 +1067,8 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_ struct ib_qp *tx_qp; if (dev->features & NETIF_F_SG) - attr.cap.max_send_sge = - min_t(u32, priv->ca->attrs.max_sge, MAX_SKB_FRAGS + 1); + attr.cap.max_send_sge = min_t(u32, priv->ca->attrs.max_send_sge, + MAX_SKB_FRAGS + 1); tx_qp = ib_create_qp(priv->pd, &attr); tx->max_send_sge = attr.cap.max_send_sge; @@ -1094,7 +1093,7 @@ static int ipoib_cm_send_req(struct net_device *dev, req.qp_num = qp->qp_num; req.qp_type = qp->qp_type; req.private_data = &data; - req.private_data_len = sizeof data; + req.private_data_len = sizeof(data); req.flow_control = 0; req.starting_psn = 0; /* FIXME */ @@ -1152,7 +1151,7 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn, ret = -ENOMEM; goto err_tx; } - memset(p->tx_ring, 0, ipoib_sendq_size * sizeof *p->tx_ring); + memset(p->tx_ring, 0, ipoib_sendq_size * sizeof(*p->tx_ring)); p->qp = ipoib_cm_create_tx_qp(p->dev, p); memalloc_noio_restore(noio_flag); @@ -1248,7 +1247,7 @@ timeout: } static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id, - struct ib_cm_event *event) + const struct ib_cm_event *event) { struct ipoib_cm_tx *tx = cm_id->context; struct ipoib_dev_priv *priv = ipoib_priv(tx->dev); @@ -1305,7 +1304,7 @@ struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path struct ipoib_dev_priv *priv = ipoib_priv(dev); struct ipoib_cm_tx *tx; - tx = kzalloc(sizeof *tx, GFP_ATOMIC); + tx = kzalloc(sizeof(*tx), GFP_ATOMIC); if (!tx) return NULL; @@ -1370,7 +1369,7 @@ static void ipoib_cm_tx_start(struct work_struct *work) neigh->daddr + QPN_AND_OPTIONS_OFFSET); goto free_neigh; } - memcpy(&pathrec, &p->path->pathrec, sizeof pathrec); + memcpy(&pathrec, &p->path->pathrec, sizeof(pathrec)); spin_unlock_irqrestore(&priv->lock, flags); netif_tx_unlock_bh(dev); @@ -1428,7 +1427,7 @@ static void ipoib_cm_skb_reap(struct work_struct *work) struct net_device *dev = priv->dev; struct sk_buff *skb; unsigned long flags; - unsigned mtu = priv->mcast_mtu; + unsigned int mtu = priv->mcast_mtu; netif_tx_lock_bh(dev); spin_lock_irqsave(&priv->lock, flags); @@ -1518,19 +1517,16 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr, { struct net_device *dev = to_net_dev(d); int ret; - struct ipoib_dev_priv *priv = ipoib_priv(dev); - - if (test_bit(IPOIB_FLAG_GOING_DOWN, &priv->flags)) - return -EPERM; - - if (!mutex_trylock(&priv->sysfs_mutex)) - return restart_syscall(); if (!rtnl_trylock()) { - mutex_unlock(&priv->sysfs_mutex); return restart_syscall(); } + if (dev->reg_state != NETREG_REGISTERED) { + rtnl_unlock(); + return -EPERM; + } + ret = ipoib_set_mode(dev, buf); /* The assumption is that the function ipoib_set_mode returned @@ -1539,7 +1535,6 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr, */ if (ret != -EBUSY) rtnl_unlock(); - mutex_unlock(&priv->sysfs_mutex); return (!ret || ret == -EBUSY) ? count : ret; } @@ -1564,7 +1559,7 @@ static void ipoib_cm_create_srq(struct net_device *dev, int max_sge) priv->cm.srq = ib_create_srq(priv->pd, &srq_init_attr); if (IS_ERR(priv->cm.srq)) { - if (PTR_ERR(priv->cm.srq) != -ENOSYS) + if (PTR_ERR(priv->cm.srq) != -EOPNOTSUPP) pr_warn("%s: failed to allocate SRQ, error %ld\n", priv->ca->name, PTR_ERR(priv->cm.srq)); priv->cm.srq = NULL; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c index 2706bf26cbac..83429925dfc6 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c @@ -102,7 +102,7 @@ static int ipoib_set_coalesce(struct net_device *dev, ret = rdma_set_cq_moderation(priv->recv_cq, coal->rx_max_coalesced_frames, coal->rx_coalesce_usecs); - if (ret && ret != -ENOSYS) { + if (ret && ret != -EOPNOTSUPP) { ipoib_warn(priv, "failed modifying CQ (%d)\n", ret); return ret; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c index ea302b054601..178488028734 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c @@ -262,15 +262,15 @@ static const struct file_operations ipoib_path_fops = { void ipoib_create_debug_files(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); - char name[IFNAMSIZ + sizeof "_path"]; + char name[IFNAMSIZ + sizeof("_path")]; - snprintf(name, sizeof name, "%s_mcg", dev->name); + snprintf(name, sizeof(name), "%s_mcg", dev->name); priv->mcg_dentry = debugfs_create_file(name, S_IFREG | S_IRUGO, ipoib_root, dev, &ipoib_mcg_fops); if (!priv->mcg_dentry) ipoib_warn(priv, "failed to create mcg debug file\n"); - snprintf(name, sizeof name, "%s_path", dev->name); + snprintf(name, sizeof(name), "%s_path", dev->name); priv->path_dentry = debugfs_create_file(name, S_IFREG | S_IRUGO, ipoib_root, dev, &ipoib_path_fops); if (!priv->path_dentry) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index f47f9ace1f48..9006a13af1de 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -40,6 +40,7 @@ #include <linux/ip.h> #include <linux/tcp.h> +#include <rdma/ib_cache.h> #include "ipoib.h" @@ -57,7 +58,7 @@ struct ipoib_ah *ipoib_create_ah(struct net_device *dev, struct ipoib_ah *ah; struct ib_ah *vah; - ah = kmalloc(sizeof *ah, GFP_KERNEL); + ah = kmalloc(sizeof(*ah), GFP_KERNEL); if (!ah) return ERR_PTR(-ENOMEM); @@ -100,7 +101,6 @@ static void ipoib_ud_dma_unmap_rx(struct ipoib_dev_priv *priv, static int ipoib_ib_post_receive(struct net_device *dev, int id) { struct ipoib_dev_priv *priv = ipoib_priv(dev); - struct ib_recv_wr *bad_wr; int ret; priv->rx_wr.wr_id = id | IPOIB_OP_RECV; @@ -108,7 +108,7 @@ static int ipoib_ib_post_receive(struct net_device *dev, int id) priv->rx_sge[1].addr = priv->rx_ring[id].mapping[1]; - ret = ib_post_recv(priv->qp, &priv->rx_wr, &bad_wr); + ret = ib_post_recv(priv->qp, &priv->rx_wr, NULL); if (unlikely(ret)) { ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret); ipoib_ud_dma_unmap_rx(priv, priv->rx_ring[id].mapping); @@ -202,7 +202,7 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc) } memcpy(mapping, priv->rx_ring[wr_id].mapping, - IPOIB_UD_RX_SG * sizeof *mapping); + IPOIB_UD_RX_SG * sizeof(*mapping)); /* * If we can't allocate a new RX buffer, dump @@ -541,7 +541,6 @@ static inline int post_send(struct ipoib_dev_priv *priv, struct ipoib_tx_buf *tx_req, void *head, int hlen) { - struct ib_send_wr *bad_wr; struct sk_buff *skb = tx_req->skb; ipoib_build_sge(priv, tx_req); @@ -558,7 +557,7 @@ static inline int post_send(struct ipoib_dev_priv *priv, } else priv->tx_wr.wr.opcode = IB_WR_SEND; - return ib_post_send(priv->qp, &priv->tx_wr.wr, &bad_wr); + return ib_post_send(priv->qp, &priv->tx_wr.wr, NULL); } int ipoib_send(struct net_device *dev, struct sk_buff *skb, @@ -568,7 +567,7 @@ int ipoib_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_tx_buf *tx_req; int hlen, rc; void *phead; - unsigned usable_sge = priv->max_send_sge - !!skb_headlen(skb); + unsigned int usable_sge = priv->max_send_sge - !!skb_headlen(skb); if (skb_is_gso(skb)) { hlen = skb_transport_offset(skb) + tcp_hdrlen(skb); @@ -1069,7 +1068,7 @@ static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv) bool ret = false; netdev_gid = (union ib_gid *)(priv->dev->dev_addr + 4); - if (ib_query_gid(priv->ca, priv->port, 0, &gid0, NULL)) + if (rdma_query_gid(priv->ca, priv->port, 0, &gid0)) return false; netif_addr_lock_bh(priv->dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 26cde95bc0f3..e3d28f9ad9c0 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -215,11 +215,6 @@ static int ipoib_stop(struct net_device *dev) return 0; } -static void ipoib_uninit(struct net_device *dev) -{ - ipoib_dev_cleanup(dev); -} - static netdev_features_t ipoib_fix_features(struct net_device *dev, netdev_features_t features) { struct ipoib_dev_priv *priv = ipoib_priv(dev); @@ -634,7 +629,7 @@ struct ipoib_path_iter *ipoib_path_iter_init(struct net_device *dev) { struct ipoib_path_iter *iter; - iter = kmalloc(sizeof *iter, GFP_KERNEL); + iter = kmalloc(sizeof(*iter), GFP_KERNEL); if (!iter) return NULL; @@ -770,8 +765,10 @@ static void path_rec_completion(int status, struct rdma_ah_attr av; if (!ib_init_ah_attr_from_path(priv->ca, priv->port, - pathrec, &av)) + pathrec, &av, NULL)) { ah = ipoib_create_ah(dev, priv->pd, &av); + rdma_destroy_ah_attr(&av); + } } spin_lock_irqsave(&priv->lock, flags); @@ -883,7 +880,7 @@ static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid) if (!priv->broadcast) return NULL; - path = kzalloc(sizeof *path, GFP_ATOMIC); + path = kzalloc(sizeof(*path), GFP_ATOMIC); if (!path) return NULL; @@ -1199,11 +1196,13 @@ static void ipoib_timeout(struct net_device *dev) static int ipoib_hard_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, - const void *daddr, const void *saddr, unsigned len) + const void *daddr, + const void *saddr, + unsigned int len) { struct ipoib_header *header; - header = skb_push(skb, sizeof *header); + header = skb_push(skb, sizeof(*header)); header->proto = htons(type); header->reserved = 0; @@ -1306,9 +1305,6 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv) int i; LIST_HEAD(remove_list); - if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags)) - return; - spin_lock_irqsave(&priv->lock, flags); htbl = rcu_dereference_protected(ntbl->htbl, @@ -1320,9 +1316,6 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv) /* neigh is obsolete if it was idle for two GC periods */ dt = 2 * arp_tbl.gc_interval; neigh_obsolete = jiffies - dt; - /* handle possible race condition */ - if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags)) - goto out_unlock; for (i = 0; i < htbl->size; i++) { struct ipoib_neigh *neigh; @@ -1360,9 +1353,8 @@ static void ipoib_reap_neigh(struct work_struct *work) __ipoib_reap_neigh(priv); - if (!test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags)) - queue_delayed_work(priv->wq, &priv->neigh_reap_task, - arp_tbl.gc_interval); + queue_delayed_work(priv->wq, &priv->neigh_reap_task, + arp_tbl.gc_interval); } @@ -1371,7 +1363,7 @@ static struct ipoib_neigh *ipoib_neigh_ctor(u8 *daddr, { struct ipoib_neigh *neigh; - neigh = kzalloc(sizeof *neigh, GFP_ATOMIC); + neigh = kzalloc(sizeof(*neigh), GFP_ATOMIC); if (!neigh) return NULL; @@ -1524,9 +1516,8 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv) htbl = kzalloc(sizeof(*htbl), GFP_KERNEL); if (!htbl) return -ENOMEM; - set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); size = roundup_pow_of_two(arp_tbl.gc_thresh3); - buckets = kcalloc(size, sizeof(*buckets), GFP_KERNEL); + buckets = kvcalloc(size, sizeof(*buckets), GFP_KERNEL); if (!buckets) { kfree(htbl); return -ENOMEM; @@ -1539,7 +1530,6 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv) atomic_set(&ntbl->entries, 0); /* start garbage collection */ - clear_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); queue_delayed_work(priv->wq, &priv->neigh_reap_task, arp_tbl.gc_interval); @@ -1554,7 +1544,7 @@ static void neigh_hash_free_rcu(struct rcu_head *head) struct ipoib_neigh __rcu **buckets = htbl->buckets; struct ipoib_neigh_table *ntbl = htbl->ntbl; - kfree(buckets); + kvfree(buckets); kfree(htbl); complete(&ntbl->deleted); } @@ -1649,15 +1639,11 @@ out_unlock: static void ipoib_neigh_hash_uninit(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); - int stopped; ipoib_dbg(priv, "ipoib_neigh_hash_uninit\n"); init_completion(&priv->ntbl.deleted); - /* Stop GC if called at init fail need to cancel work */ - stopped = test_and_set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); - if (!stopped) - cancel_delayed_work(&priv->neigh_reap_task); + cancel_delayed_work_sync(&priv->neigh_reap_task); ipoib_flush_neighs(priv); @@ -1755,13 +1741,11 @@ static int ipoib_ioctl(struct net_device *dev, struct ifreq *ifr, return priv->rn_ops->ndo_do_ioctl(dev, ifr, cmd); } -int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) +static int ipoib_dev_init(struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); int ret = -ENOMEM; - priv->ca = ca; - priv->port = port; priv->qp = NULL; /* @@ -1777,7 +1761,7 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) /* create pd, which used both for control and datapath*/ priv->pd = ib_alloc_pd(priv->ca, 0); if (IS_ERR(priv->pd)) { - pr_warn("%s: failed to allocate PD\n", ca->name); + pr_warn("%s: failed to allocate PD\n", priv->ca->name); goto clean_wq; } @@ -1787,7 +1771,8 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) goto out_free_pd; } - if (ipoib_neigh_hash_init(priv) < 0) { + ret = ipoib_neigh_hash_init(priv); + if (ret) { pr_warn("%s failed to init neigh hash\n", dev->name); goto out_dev_uninit; } @@ -1796,12 +1781,15 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port) if (ipoib_ib_dev_open(dev)) { pr_warn("%s failed to open device\n", dev->name); ret = -ENODEV; - goto out_dev_uninit; + goto out_hash_uninit; } } return 0; +out_hash_uninit: + ipoib_neigh_hash_uninit(dev); + out_dev_uninit: ipoib_ib_dev_cleanup(dev); @@ -1821,21 +1809,151 @@ out: return ret; } -void ipoib_dev_cleanup(struct net_device *dev) +/* + * This must be called before doing an unregister_netdev on a parent device to + * shutdown the IB event handler. + */ +static void ipoib_parent_unregister_pre(struct net_device *ndev) { - struct ipoib_dev_priv *priv = ipoib_priv(dev), *cpriv, *tcpriv; - LIST_HEAD(head); + struct ipoib_dev_priv *priv = ipoib_priv(ndev); - ASSERT_RTNL(); + /* + * ipoib_set_mac checks netif_running before pushing work, clearing + * running ensures the it will not add more work. + */ + rtnl_lock(); + dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP); + rtnl_unlock(); - /* Delete any child interfaces first */ - list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) { - /* Stop GC on child */ - set_bit(IPOIB_STOP_NEIGH_GC, &cpriv->flags); - cancel_delayed_work(&cpriv->neigh_reap_task); - unregister_netdevice_queue(cpriv->dev, &head); + /* ipoib_event() cannot be running once this returns */ + ib_unregister_event_handler(&priv->event_handler); + + /* + * Work on the queue grabs the rtnl lock, so this cannot be done while + * also holding it. + */ + flush_workqueue(ipoib_workqueue); +} + +static void ipoib_set_dev_features(struct ipoib_dev_priv *priv) +{ + priv->hca_caps = priv->ca->attrs.device_cap_flags; + + if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) { + priv->dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM; + + if (priv->hca_caps & IB_DEVICE_UD_TSO) + priv->dev->hw_features |= NETIF_F_TSO; + + priv->dev->features |= priv->dev->hw_features; + } +} + +static int ipoib_parent_init(struct net_device *ndev) +{ + struct ipoib_dev_priv *priv = ipoib_priv(ndev); + struct ib_port_attr attr; + int result; + + result = ib_query_port(priv->ca, priv->port, &attr); + if (result) { + pr_warn("%s: ib_query_port %d failed\n", priv->ca->name, + priv->port); + return result; + } + priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu); + + result = ib_query_pkey(priv->ca, priv->port, 0, &priv->pkey); + if (result) { + pr_warn("%s: ib_query_pkey port %d failed (ret = %d)\n", + priv->ca->name, priv->port, result); + return result; } - unregister_netdevice_many(&head); + + result = rdma_query_gid(priv->ca, priv->port, 0, &priv->local_gid); + if (result) { + pr_warn("%s: rdma_query_gid port %d failed (ret = %d)\n", + priv->ca->name, priv->port, result); + return result; + } + memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, + sizeof(union ib_gid)); + + SET_NETDEV_DEV(priv->dev, priv->ca->dev.parent); + priv->dev->dev_id = priv->port - 1; + + return 0; +} + +static void ipoib_child_init(struct net_device *ndev) +{ + struct ipoib_dev_priv *priv = ipoib_priv(ndev); + struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent); + + dev_hold(priv->parent); + + down_write(&ppriv->vlan_rwsem); + list_add_tail(&priv->list, &ppriv->child_intfs); + up_write(&ppriv->vlan_rwsem); + + priv->max_ib_mtu = ppriv->max_ib_mtu; + set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags); + memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr, INFINIBAND_ALEN); + memcpy(&priv->local_gid, &ppriv->local_gid, sizeof(priv->local_gid)); +} + +static int ipoib_ndo_init(struct net_device *ndev) +{ + struct ipoib_dev_priv *priv = ipoib_priv(ndev); + int rc; + + if (priv->parent) { + ipoib_child_init(ndev); + } else { + rc = ipoib_parent_init(ndev); + if (rc) + return rc; + } + + /* MTU will be reset when mcast join happens */ + ndev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu); + priv->mcast_mtu = priv->admin_mtu = ndev->mtu; + ndev->max_mtu = IPOIB_CM_MTU; + + ndev->neigh_priv_len = sizeof(struct ipoib_neigh); + + /* + * Set the full membership bit, so that we join the right + * broadcast group, etc. + */ + priv->pkey |= 0x8000; + + ndev->broadcast[8] = priv->pkey >> 8; + ndev->broadcast[9] = priv->pkey & 0xff; + set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags); + + ipoib_set_dev_features(priv); + + rc = ipoib_dev_init(ndev); + if (rc) { + pr_warn("%s: failed to initialize device: %s port %d (ret = %d)\n", + priv->ca->name, priv->dev->name, priv->port, rc); + } + + return 0; +} + +static void ipoib_ndo_uninit(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = ipoib_priv(dev); + + ASSERT_RTNL(); + + /* + * ipoib_remove_one guarantees the children are removed before the + * parent, and that is the only place where a parent can be removed. + */ + WARN_ON(!list_empty(&priv->child_intfs)); ipoib_neigh_hash_uninit(dev); @@ -1847,6 +1965,16 @@ void ipoib_dev_cleanup(struct net_device *dev) destroy_workqueue(priv->wq); priv->wq = NULL; } + + if (priv->parent) { + struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent); + + down_write(&ppriv->vlan_rwsem); + list_del(&priv->list); + up_write(&ppriv->vlan_rwsem); + + dev_put(priv->parent); + } } static int ipoib_set_vf_link_state(struct net_device *dev, int vf, int link_state) @@ -1894,7 +2022,8 @@ static const struct header_ops ipoib_header_ops = { }; static const struct net_device_ops ipoib_netdev_ops_pf = { - .ndo_uninit = ipoib_uninit, + .ndo_init = ipoib_ndo_init, + .ndo_uninit = ipoib_ndo_uninit, .ndo_open = ipoib_open, .ndo_stop = ipoib_stop, .ndo_change_mtu = ipoib_change_mtu, @@ -1913,7 +2042,8 @@ static const struct net_device_ops ipoib_netdev_ops_pf = { }; static const struct net_device_ops ipoib_netdev_ops_vf = { - .ndo_uninit = ipoib_uninit, + .ndo_init = ipoib_ndo_init, + .ndo_uninit = ipoib_ndo_uninit, .ndo_open = ipoib_open, .ndo_stop = ipoib_stop, .ndo_change_mtu = ipoib_change_mtu, @@ -1945,6 +2075,13 @@ void ipoib_setup_common(struct net_device *dev) netif_keep_dst(dev); memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN); + + /* + * unregister_netdev always frees the netdev, we use this mode + * consistently to unify all the various unregister paths, including + * those connected to rtnl_link_ops which require it. + */ + dev->needs_free_netdev = true; } static void ipoib_build_priv(struct net_device *dev) @@ -1955,7 +2092,6 @@ static void ipoib_build_priv(struct net_device *dev) spin_lock_init(&priv->lock); init_rwsem(&priv->vlan_rwsem); mutex_init(&priv->mcast_mutex); - mutex_init(&priv->sysfs_mutex); INIT_LIST_HEAD(&priv->path_list); INIT_LIST_HEAD(&priv->child_intfs); @@ -1999,9 +2135,7 @@ static struct net_device rn->send = ipoib_send; rn->attach_mcast = ipoib_mcast_attach; rn->detach_mcast = ipoib_mcast_detach; - rn->free_rdma_netdev = free_netdev; rn->hca = hca; - dev->netdev_ops = &ipoib_netdev_default_pf; return dev; @@ -2039,6 +2173,9 @@ struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port, if (!priv) return NULL; + priv->ca = hca; + priv->port = port; + dev = ipoib_get_netdev(hca, port, name); if (!dev) goto free_priv; @@ -2053,6 +2190,15 @@ struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port, rn = netdev_priv(dev); rn->clnt_priv = priv; + + /* + * Only the child register_netdev flows can handle priv_destructor + * being set, so we force it to NULL here and handle manually until it + * is safe to turn on. + */ + priv->next_priv_destructor = dev->priv_destructor; + dev->priv_destructor = NULL; + ipoib_build_priv(dev); return priv; @@ -2061,6 +2207,27 @@ free_priv: return NULL; } +void ipoib_intf_free(struct net_device *dev) +{ + struct ipoib_dev_priv *priv = ipoib_priv(dev); + struct rdma_netdev *rn = netdev_priv(dev); + + dev->priv_destructor = priv->next_priv_destructor; + if (dev->priv_destructor) + dev->priv_destructor(dev); + + /* + * There are some error flows around register_netdev failing that may + * attempt to call priv_destructor twice, prevent that from happening. + */ + dev->priv_destructor = NULL; + + /* unregister/destroy is very complicated. Make bugs more obvious. */ + rn->clnt_priv = NULL; + + kfree(priv); +} + static ssize_t show_pkey(struct device *dev, struct device_attribute *attr, char *buf) { @@ -2186,12 +2353,6 @@ static ssize_t create_child(struct device *dev, if (pkey <= 0 || pkey > 0xffff || pkey == 0x8000) return -EINVAL; - /* - * Set the full membership bit, so that we join the right - * broadcast group, etc. - */ - pkey |= 0x8000; - ret = ipoib_vlan_add(to_net_dev(dev), pkey); return ret ? ret : count; @@ -2223,87 +2384,19 @@ int ipoib_add_pkey_attr(struct net_device *dev) return device_create_file(&dev->dev, &dev_attr_pkey); } -void ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca) -{ - priv->hca_caps = hca->attrs.device_cap_flags; - - if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) { - priv->dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM; - - if (priv->hca_caps & IB_DEVICE_UD_TSO) - priv->dev->hw_features |= NETIF_F_TSO; - - priv->dev->features |= priv->dev->hw_features; - } -} - static struct net_device *ipoib_add_port(const char *format, struct ib_device *hca, u8 port) { struct ipoib_dev_priv *priv; - struct ib_port_attr attr; - struct rdma_netdev *rn; - int result = -ENOMEM; + struct net_device *ndev; + int result; priv = ipoib_intf_alloc(hca, port, format); if (!priv) { pr_warn("%s, %d: ipoib_intf_alloc failed\n", hca->name, port); - goto alloc_mem_failed; - } - - SET_NETDEV_DEV(priv->dev, hca->dev.parent); - priv->dev->dev_id = port - 1; - - result = ib_query_port(hca, port, &attr); - if (result) { - pr_warn("%s: ib_query_port %d failed\n", hca->name, port); - goto device_init_failed; - } - - priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu); - - /* MTU will be reset when mcast join happens */ - priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu); - priv->mcast_mtu = priv->admin_mtu = priv->dev->mtu; - priv->dev->max_mtu = IPOIB_CM_MTU; - - priv->dev->neigh_priv_len = sizeof(struct ipoib_neigh); - - result = ib_query_pkey(hca, port, 0, &priv->pkey); - if (result) { - pr_warn("%s: ib_query_pkey port %d failed (ret = %d)\n", - hca->name, port, result); - goto device_init_failed; - } - - ipoib_set_dev_features(priv, hca); - - /* - * Set the full membership bit, so that we join the right - * broadcast group, etc. - */ - priv->pkey |= 0x8000; - - priv->dev->broadcast[8] = priv->pkey >> 8; - priv->dev->broadcast[9] = priv->pkey & 0xff; - - result = ib_query_gid(hca, port, 0, &priv->local_gid, NULL); - if (result) { - pr_warn("%s: ib_query_gid port %d failed (ret = %d)\n", - hca->name, port, result); - goto device_init_failed; - } - - memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, - sizeof(union ib_gid)); - set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags); - - result = ipoib_dev_init(priv->dev, hca, port); - if (result) { - pr_warn("%s: failed to initialize port %d (ret = %d)\n", - hca->name, port, result); - goto device_init_failed; + return ERR_PTR(-ENOMEM); } + ndev = priv->dev; INIT_IB_EVENT_HANDLER(&priv->event_handler, priv->ca, ipoib_event); @@ -2312,46 +2405,43 @@ static struct net_device *ipoib_add_port(const char *format, /* call event handler to ensure pkey in sync */ queue_work(ipoib_workqueue, &priv->flush_heavy); - result = register_netdev(priv->dev); + result = register_netdev(ndev); if (result) { pr_warn("%s: couldn't register ipoib port %d; error %d\n", hca->name, port, result); - goto register_failed; + + ipoib_parent_unregister_pre(ndev); + ipoib_intf_free(ndev); + free_netdev(ndev); + + return ERR_PTR(result); } - result = -ENOMEM; - if (ipoib_cm_add_mode_attr(priv->dev)) + /* + * We cannot set priv_destructor before register_netdev because we + * need priv to be always valid during the error flow to execute + * ipoib_parent_unregister_pre(). Instead handle it manually and only + * enter priv_destructor mode once we are completely registered. + */ + ndev->priv_destructor = ipoib_intf_free; + + if (ipoib_cm_add_mode_attr(ndev)) goto sysfs_failed; - if (ipoib_add_pkey_attr(priv->dev)) + if (ipoib_add_pkey_attr(ndev)) goto sysfs_failed; - if (ipoib_add_umcast_attr(priv->dev)) + if (ipoib_add_umcast_attr(ndev)) goto sysfs_failed; - if (device_create_file(&priv->dev->dev, &dev_attr_create_child)) + if (device_create_file(&ndev->dev, &dev_attr_create_child)) goto sysfs_failed; - if (device_create_file(&priv->dev->dev, &dev_attr_delete_child)) + if (device_create_file(&ndev->dev, &dev_attr_delete_child)) goto sysfs_failed; - return priv->dev; + return ndev; sysfs_failed: - unregister_netdev(priv->dev); - -register_failed: - ib_unregister_event_handler(&priv->event_handler); - flush_workqueue(ipoib_workqueue); - /* Stop GC if started before flush */ - set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); - cancel_delayed_work(&priv->neigh_reap_task); - flush_workqueue(priv->wq); - ipoib_dev_cleanup(priv->dev); - -device_init_failed: - rn = netdev_priv(priv->dev); - rn->free_rdma_netdev(priv->dev); - kfree(priv); - -alloc_mem_failed: - return ERR_PTR(result); + ipoib_parent_unregister_pre(ndev); + unregister_netdev(ndev); + return ERR_PTR(-ENOMEM); } static void ipoib_add_one(struct ib_device *device) @@ -2362,7 +2452,7 @@ static void ipoib_add_one(struct ib_device *device) int p; int count = 0; - dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL); + dev_list = kmalloc(sizeof(*dev_list), GFP_KERNEL); if (!dev_list) return; @@ -2396,39 +2486,18 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data) return; list_for_each_entry_safe(priv, tmp, dev_list, list) { - struct rdma_netdev *parent_rn = netdev_priv(priv->dev); - - ib_unregister_event_handler(&priv->event_handler); - flush_workqueue(ipoib_workqueue); - - /* mark interface in the middle of destruction */ - set_bit(IPOIB_FLAG_GOING_DOWN, &priv->flags); + LIST_HEAD(head); + ipoib_parent_unregister_pre(priv->dev); rtnl_lock(); - dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP); - rtnl_unlock(); - - /* Stop GC */ - set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags); - cancel_delayed_work(&priv->neigh_reap_task); - flush_workqueue(priv->wq); - - /* Wrap rtnl_lock/unlock with mutex to protect sysfs calls */ - mutex_lock(&priv->sysfs_mutex); - unregister_netdev(priv->dev); - mutex_unlock(&priv->sysfs_mutex); - - parent_rn->free_rdma_netdev(priv->dev); - list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) { - struct rdma_netdev *child_rn; + list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, + list) + unregister_netdevice_queue(cpriv->dev, &head); + unregister_netdevice_queue(priv->dev, &head); + unregister_netdevice_many(&head); - child_rn = netdev_priv(cpriv->dev); - child_rn->free_rdma_netdev(cpriv->dev); - kfree(cpriv); - } - - kfree(priv); + rtnl_unlock(); } kfree(dev_list); @@ -2476,8 +2545,7 @@ static int __init ipoib_init_module(void) * its private workqueue, and we only queue up flush events * on our global flush workqueue. This avoids the deadlocks. */ - ipoib_workqueue = alloc_ordered_workqueue("ipoib_flush", - WQ_MEM_RECLAIM); + ipoib_workqueue = alloc_ordered_workqueue("ipoib_flush", 0); if (!ipoib_workqueue) { ret = -ENOMEM; goto err_fs; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 6709328d90f8..b9e9562f5034 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -140,7 +140,7 @@ static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev, { struct ipoib_mcast *mcast; - mcast = kzalloc(sizeof *mcast, can_sleep ? GFP_KERNEL : GFP_ATOMIC); + mcast = kzalloc(sizeof(*mcast), can_sleep ? GFP_KERNEL : GFP_ATOMIC); if (!mcast) return NULL; @@ -822,6 +822,7 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb) if (neigh && list_empty(&neigh->list)) { kref_get(&mcast->ah->ref); neigh->ah = mcast->ah; + neigh->ah->valid = 1; list_add_tail(&neigh->list, &mcast->neigh_list); } } @@ -917,7 +918,7 @@ void ipoib_mcast_restart_task(struct work_struct *work) if (!ipoib_mcast_addr_is_valid(ha->addr, dev->broadcast)) continue; - memcpy(mgid.raw, ha->addr + 4, sizeof mgid); + memcpy(mgid.raw, ha->addr + 4, sizeof(mgid)); mcast = __ipoib_mcast_find(dev, &mgid); if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) { @@ -997,7 +998,7 @@ struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev) { struct ipoib_mcast_iter *iter; - iter = kmalloc(sizeof *iter, GFP_KERNEL); + iter = kmalloc(sizeof(*iter), GFP_KERNEL); if (!iter) return NULL; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c index 3e44087935ae..d4d553a51fa9 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c @@ -122,15 +122,6 @@ static int ipoib_new_child_link(struct net *src_net, struct net_device *dev, } else child_pkey = nla_get_u16(data[IFLA_IPOIB_PKEY]); - if (child_pkey == 0 || child_pkey == 0x8000) - return -EINVAL; - - /* - * Set the full membership bit, so that we join the right - * broadcast group, etc. - */ - child_pkey |= 0x8000; - err = __ipoib_vlan_add(ppriv, ipoib_priv(dev), child_pkey, IPOIB_RTNL_CHILD); @@ -139,19 +130,6 @@ static int ipoib_new_child_link(struct net *src_net, struct net_device *dev, return err; } -static void ipoib_unregister_child_dev(struct net_device *dev, struct list_head *head) -{ - struct ipoib_dev_priv *priv, *ppriv; - - priv = ipoib_priv(dev); - ppriv = ipoib_priv(priv->parent); - - down_write(&ppriv->vlan_rwsem); - unregister_netdevice_queue(dev, head); - list_del(&priv->list); - up_write(&ppriv->vlan_rwsem); -} - static size_t ipoib_get_size(const struct net_device *dev) { return nla_total_size(2) + /* IFLA_IPOIB_PKEY */ @@ -167,7 +145,6 @@ static struct rtnl_link_ops ipoib_link_ops __read_mostly = { .setup = ipoib_setup_common, .newlink = ipoib_new_child_link, .changelink = ipoib_changelink, - .dellink = ipoib_unregister_child_dev, .get_size = ipoib_get_size, .fill_info = ipoib_fill_info, }; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index 984a88096f39..9f36ca786df8 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -52,7 +52,7 @@ int ipoib_mcast_attach(struct net_device *dev, struct ib_device *hca, if (set_qkey) { ret = -ENOMEM; - qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL); + qp_attr = kmalloc(sizeof(*qp_attr), GFP_KERNEL); if (!qp_attr) goto out; @@ -147,7 +147,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) .cap = { .max_send_wr = ipoib_sendq_size, .max_recv_wr = ipoib_recvq_size, - .max_send_sge = min_t(u32, priv->ca->attrs.max_sge, + .max_send_sge = min_t(u32, priv->ca->attrs.max_send_sge, MAX_SKB_FRAGS + 1), .max_recv_sge = IPOIB_UD_RX_SG }, @@ -168,8 +168,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) else size += ipoib_recvq_size * ipoib_max_conn_qp; } else - if (ret != -ENOSYS) - return -ENODEV; + if (ret != -EOPNOTSUPP) + return ret; req_vec = (priv->port - 1) * 2; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index 55a9b71ed05a..341753fbda54 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -50,68 +50,112 @@ static ssize_t show_parent(struct device *d, struct device_attribute *attr, } static DEVICE_ATTR(parent, S_IRUGO, show_parent, NULL); +static bool is_child_unique(struct ipoib_dev_priv *ppriv, + struct ipoib_dev_priv *priv) +{ + struct ipoib_dev_priv *tpriv; + + ASSERT_RTNL(); + + /* + * Since the legacy sysfs interface uses pkey for deletion it cannot + * support more than one interface with the same pkey, it creates + * ambiguity. The RTNL interface deletes using the netdev so it does + * not have a problem to support duplicated pkeys. + */ + if (priv->child_type != IPOIB_LEGACY_CHILD) + return true; + + /* + * First ensure this isn't a duplicate. We check the parent device and + * then all of the legacy child interfaces to make sure the Pkey + * doesn't match. + */ + if (ppriv->pkey == priv->pkey) + return false; + + list_for_each_entry(tpriv, &ppriv->child_intfs, list) { + if (tpriv->pkey == priv->pkey && + tpriv->child_type == IPOIB_LEGACY_CHILD) + return false; + } + + return true; +} + +/* + * NOTE: If this function fails then the priv->dev will remain valid, however + * priv can have been freed and must not be touched by caller in the error + * case. + * + * If (ndev->reg_state == NETREG_UNINITIALIZED) then it is up to the caller to + * free the net_device (just as rtnl_newlink does) otherwise the net_device + * will be freed when the rtnl is unlocked. + */ int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv, u16 pkey, int type) { + struct net_device *ndev = priv->dev; int result; - priv->max_ib_mtu = ppriv->max_ib_mtu; - /* MTU will be reset when mcast join happens */ - priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu); - priv->mcast_mtu = priv->admin_mtu = priv->dev->mtu; - priv->parent = ppriv->dev; - set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags); + ASSERT_RTNL(); + + /* + * Racing with unregister of the parent must be prevented by the + * caller. + */ + WARN_ON(ppriv->dev->reg_state != NETREG_REGISTERED); - ipoib_set_dev_features(priv, ppriv->ca); + if (pkey == 0 || pkey == 0x8000) { + result = -EINVAL; + goto out_early; + } + priv->parent = ppriv->dev; priv->pkey = pkey; + priv->child_type = type; - memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr, INFINIBAND_ALEN); - memcpy(&priv->local_gid, &ppriv->local_gid, sizeof(priv->local_gid)); - set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags); - priv->dev->broadcast[8] = pkey >> 8; - priv->dev->broadcast[9] = pkey & 0xff; - - result = ipoib_dev_init(priv->dev, ppriv->ca, ppriv->port); - if (result < 0) { - ipoib_warn(ppriv, "failed to initialize subinterface: " - "device %s, port %d", - ppriv->ca->name, ppriv->port); - goto err; + if (!is_child_unique(ppriv, priv)) { + result = -ENOTUNIQ; + goto out_early; } - result = register_netdevice(priv->dev); + /* We do not need to touch priv if register_netdevice fails */ + ndev->priv_destructor = ipoib_intf_free; + + result = register_netdevice(ndev); if (result) { ipoib_warn(priv, "failed to initialize; error %i", result); - goto register_failed; + + /* + * register_netdevice sometimes calls priv_destructor, + * sometimes not. Make sure it was done. + */ + goto out_early; } /* RTNL childs don't need proprietary sysfs entries */ if (type == IPOIB_LEGACY_CHILD) { - if (ipoib_cm_add_mode_attr(priv->dev)) + if (ipoib_cm_add_mode_attr(ndev)) goto sysfs_failed; - if (ipoib_add_pkey_attr(priv->dev)) + if (ipoib_add_pkey_attr(ndev)) goto sysfs_failed; - if (ipoib_add_umcast_attr(priv->dev)) + if (ipoib_add_umcast_attr(ndev)) goto sysfs_failed; - if (device_create_file(&priv->dev->dev, &dev_attr_parent)) + if (device_create_file(&ndev->dev, &dev_attr_parent)) goto sysfs_failed; } - priv->child_type = type; - list_add_tail(&priv->list, &ppriv->child_intfs); - return 0; sysfs_failed: - result = -ENOMEM; unregister_netdevice(priv->dev); + return -ENOMEM; -register_failed: - ipoib_dev_cleanup(priv->dev); - -err: +out_early: + if (ndev->priv_destructor) + ndev->priv_destructor(ndev); return result; } @@ -119,129 +163,124 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey) { struct ipoib_dev_priv *ppriv, *priv; char intf_name[IFNAMSIZ]; - struct ipoib_dev_priv *tpriv; + struct net_device *ndev; int result; if (!capable(CAP_NET_ADMIN)) return -EPERM; - ppriv = ipoib_priv(pdev); - - if (test_bit(IPOIB_FLAG_GOING_DOWN, &ppriv->flags)) - return -EPERM; - - snprintf(intf_name, sizeof intf_name, "%s.%04x", - ppriv->dev->name, pkey); - - if (!mutex_trylock(&ppriv->sysfs_mutex)) + if (!rtnl_trylock()) return restart_syscall(); - if (!rtnl_trylock()) { - mutex_unlock(&ppriv->sysfs_mutex); - return restart_syscall(); - } - - if (!down_write_trylock(&ppriv->vlan_rwsem)) { + if (pdev->reg_state != NETREG_REGISTERED) { rtnl_unlock(); - mutex_unlock(&ppriv->sysfs_mutex); - return restart_syscall(); + return -EPERM; } + ppriv = ipoib_priv(pdev); + + snprintf(intf_name, sizeof(intf_name), "%s.%04x", + ppriv->dev->name, pkey); + priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name); if (!priv) { result = -ENOMEM; goto out; } - - /* - * First ensure this isn't a duplicate. We check the parent device and - * then all of the legacy child interfaces to make sure the Pkey - * doesn't match. - */ - if (ppriv->pkey == pkey) { - result = -ENOTUNIQ; - goto out; - } - - list_for_each_entry(tpriv, &ppriv->child_intfs, list) { - if (tpriv->pkey == pkey && - tpriv->child_type == IPOIB_LEGACY_CHILD) { - result = -ENOTUNIQ; - goto out; - } - } + ndev = priv->dev; result = __ipoib_vlan_add(ppriv, priv, pkey, IPOIB_LEGACY_CHILD); + if (result && ndev->reg_state == NETREG_UNINITIALIZED) + free_netdev(ndev); + out: - up_write(&ppriv->vlan_rwsem); rtnl_unlock(); - mutex_unlock(&ppriv->sysfs_mutex); - if (result && priv) { - struct rdma_netdev *rn; + return result; +} + +struct ipoib_vlan_delete_work { + struct work_struct work; + struct net_device *dev; +}; + +/* + * sysfs callbacks of a netdevice cannot obtain the rtnl lock as + * unregister_netdev ultimately deletes the sysfs files while holding the rtnl + * lock. This deadlocks the system. + * + * A callback can use rtnl_trylock to avoid the deadlock but it cannot call + * unregister_netdev as that internally takes and releases the rtnl_lock. So + * instead we find the netdev to unregister and then do the actual unregister + * from the global work queue where we can obtain the rtnl_lock safely. + */ +static void ipoib_vlan_delete_task(struct work_struct *work) +{ + struct ipoib_vlan_delete_work *pwork = + container_of(work, struct ipoib_vlan_delete_work, work); + struct net_device *dev = pwork->dev; + + rtnl_lock(); + + /* Unregistering tasks can race with another task or parent removal */ + if (dev->reg_state == NETREG_REGISTERED) { + struct ipoib_dev_priv *priv = ipoib_priv(dev); + struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent); - rn = netdev_priv(priv->dev); - rn->free_rdma_netdev(priv->dev); - kfree(priv); + ipoib_dbg(ppriv, "delete child vlan %s\n", dev->name); + unregister_netdevice(dev); } - return result; + rtnl_unlock(); + + kfree(pwork); } int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey) { struct ipoib_dev_priv *ppriv, *priv, *tpriv; - struct net_device *dev = NULL; + int rc; if (!capable(CAP_NET_ADMIN)) return -EPERM; - ppriv = ipoib_priv(pdev); - - if (test_bit(IPOIB_FLAG_GOING_DOWN, &ppriv->flags)) - return -EPERM; - - if (!mutex_trylock(&ppriv->sysfs_mutex)) + if (!rtnl_trylock()) return restart_syscall(); - if (!rtnl_trylock()) { - mutex_unlock(&ppriv->sysfs_mutex); - return restart_syscall(); - } - - if (!down_write_trylock(&ppriv->vlan_rwsem)) { + if (pdev->reg_state != NETREG_REGISTERED) { rtnl_unlock(); - mutex_unlock(&ppriv->sysfs_mutex); - return restart_syscall(); + return -EPERM; } + ppriv = ipoib_priv(pdev); + + rc = -ENODEV; list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) { if (priv->pkey == pkey && priv->child_type == IPOIB_LEGACY_CHILD) { - list_del(&priv->list); - dev = priv->dev; + struct ipoib_vlan_delete_work *work; + + work = kmalloc(sizeof(*work), GFP_KERNEL); + if (!work) { + rc = -ENOMEM; + goto out; + } + + down_write(&ppriv->vlan_rwsem); + list_del_init(&priv->list); + up_write(&ppriv->vlan_rwsem); + work->dev = priv->dev; + INIT_WORK(&work->work, ipoib_vlan_delete_task); + queue_work(ipoib_workqueue, &work->work); + + rc = 0; break; } } - up_write(&ppriv->vlan_rwsem); - - if (dev) { - ipoib_dbg(ppriv, "delete child vlan %s\n", dev->name); - unregister_netdevice(dev); - } +out: rtnl_unlock(); - mutex_unlock(&ppriv->sysfs_mutex); - - if (dev) { - struct rdma_netdev *rn; - - rn = netdev_priv(dev); - rn->free_rdma_netdev(priv->dev); - kfree(priv); - return 0; - } - return -ENODEV; + return rc; } diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 9a6434c31db2..3fecd87c9f2b 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -610,12 +610,10 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, uint32_t initial_cmdsn) { struct iscsi_cls_session *cls_session; - struct iscsi_session *session; struct Scsi_Host *shost; struct iser_conn *iser_conn = NULL; struct ib_conn *ib_conn; u32 max_fr_sectors; - u16 max_cmds; shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 0); if (!shost) @@ -633,8 +631,8 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, */ if (ep) { iser_conn = ep->dd_data; - max_cmds = iser_conn->max_cmds; shost->sg_tablesize = iser_conn->scsi_sg_tablesize; + shost->can_queue = min_t(u16, cmds_max, iser_conn->max_cmds); mutex_lock(&iser_conn->state_mutex); if (iser_conn->state != ISER_CONN_UP) { @@ -660,7 +658,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, } mutex_unlock(&iser_conn->state_mutex); } else { - max_cmds = ISER_DEF_XMIT_CMDS_MAX; + shost->can_queue = min_t(u16, cmds_max, ISER_DEF_XMIT_CMDS_MAX); if (iscsi_host_add(shost, NULL)) goto free_host; } @@ -676,21 +674,13 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, iser_warn("max_sectors was reduced from %u to %u\n", iser_max_sectors, shost->max_sectors); - if (cmds_max > max_cmds) { - iser_info("cmds_max changed from %u to %u\n", - cmds_max, max_cmds); - cmds_max = max_cmds; - } - cls_session = iscsi_session_setup(&iscsi_iser_transport, shost, - cmds_max, 0, + shost->can_queue, 0, sizeof(struct iscsi_iser_task), initial_cmdsn, 0); if (!cls_session) goto remove_host; - session = cls_session->dd_data; - shost->can_queue = session->scsi_cmds_max; return cls_session; remove_host: diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 130bf163f066..009be8889d71 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -405,7 +405,8 @@ iser_reg_sig_mr(struct iscsi_iser_task *iser_task, ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey)); - wr = sig_handover_wr(iser_tx_next_wr(tx_desc)); + wr = container_of(iser_tx_next_wr(tx_desc), struct ib_sig_handover_wr, + wr); wr->wr.opcode = IB_WR_REG_SIG_MR; wr->wr.wr_cqe = cqe; wr->wr.sg_list = &data_reg->sge; @@ -457,7 +458,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, return n < 0 ? n : -EINVAL; } - wr = reg_wr(iser_tx_next_wr(tx_desc)); + wr = container_of(iser_tx_next_wr(tx_desc), struct ib_reg_wr, wr); wr->wr.opcode = IB_WR_REG_MR; wr->wr.wr_cqe = cqe; wr->wr.send_flags = 0; diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 616d978cbf2b..b686a4aaffe8 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -1022,7 +1022,7 @@ int iser_post_recvl(struct iser_conn *iser_conn) { struct ib_conn *ib_conn = &iser_conn->ib_conn; struct iser_login_desc *desc = &iser_conn->login_desc; - struct ib_recv_wr wr, *wr_failed; + struct ib_recv_wr wr; int ib_ret; desc->sge.addr = desc->rsp_dma; @@ -1036,7 +1036,7 @@ int iser_post_recvl(struct iser_conn *iser_conn) wr.next = NULL; ib_conn->post_recv_buf_count++; - ib_ret = ib_post_recv(ib_conn->qp, &wr, &wr_failed); + ib_ret = ib_post_recv(ib_conn->qp, &wr, NULL); if (ib_ret) { iser_err("ib_post_recv failed ret=%d\n", ib_ret); ib_conn->post_recv_buf_count--; @@ -1050,7 +1050,7 @@ int iser_post_recvm(struct iser_conn *iser_conn, int count) struct ib_conn *ib_conn = &iser_conn->ib_conn; unsigned int my_rx_head = iser_conn->rx_desc_head; struct iser_rx_desc *rx_desc; - struct ib_recv_wr *wr, *wr_failed; + struct ib_recv_wr *wr; int i, ib_ret; for (wr = ib_conn->rx_wr, i = 0; i < count; i++, wr++) { @@ -1067,7 +1067,7 @@ int iser_post_recvm(struct iser_conn *iser_conn, int count) wr->next = NULL; /* mark end of work requests list */ ib_conn->post_recv_buf_count += count; - ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &wr_failed); + ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, NULL); if (ib_ret) { iser_err("ib_post_recv failed ret=%d\n", ib_ret); ib_conn->post_recv_buf_count -= count; @@ -1086,7 +1086,7 @@ int iser_post_recvm(struct iser_conn *iser_conn, int count) int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc, bool signal) { - struct ib_send_wr *bad_wr, *wr = iser_tx_next_wr(tx_desc); + struct ib_send_wr *wr = iser_tx_next_wr(tx_desc); int ib_ret; ib_dma_sync_single_for_device(ib_conn->device->ib_device, @@ -1100,10 +1100,10 @@ int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc, wr->opcode = IB_WR_SEND; wr->send_flags = signal ? IB_SEND_SIGNALED : 0; - ib_ret = ib_post_send(ib_conn->qp, &tx_desc->wrs[0].send, &bad_wr); + ib_ret = ib_post_send(ib_conn->qp, &tx_desc->wrs[0].send, NULL); if (ib_ret) iser_err("ib_post_send failed, ret:%d opcode:%d\n", - ib_ret, bad_wr->opcode); + ib_ret, wr->opcode); return ib_ret; } diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index cccbcf0eb035..f39670c5c25c 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -136,7 +136,7 @@ isert_create_qp(struct isert_conn *isert_conn, attr.cap.max_send_wr = ISERT_QP_MAX_REQ_DTOS + 1; attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1; attr.cap.max_rdma_ctxs = ISCSI_DEF_XMIT_CMDS_MAX; - attr.cap.max_send_sge = device->ib_device->attrs.max_sge; + attr.cap.max_send_sge = device->ib_device->attrs.max_send_sge; attr.cap.max_recv_sge = 1; attr.sq_sig_type = IB_SIGNAL_REQ_WR; attr.qp_type = IB_QPT_RC; @@ -299,7 +299,8 @@ isert_create_device_ib_res(struct isert_device *device) struct ib_device *ib_dev = device->ib_device; int ret; - isert_dbg("devattr->max_sge: %d\n", ib_dev->attrs.max_sge); + isert_dbg("devattr->max_send_sge: %d devattr->max_recv_sge %d\n", + ib_dev->attrs.max_send_sge, ib_dev->attrs.max_recv_sge); isert_dbg("devattr->max_sge_rd: %d\n", ib_dev->attrs.max_sge_rd); ret = isert_alloc_comps(device); @@ -809,7 +810,7 @@ isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) static int isert_post_recvm(struct isert_conn *isert_conn, u32 count) { - struct ib_recv_wr *rx_wr, *rx_wr_failed; + struct ib_recv_wr *rx_wr; int i, ret; struct iser_rx_desc *rx_desc; @@ -825,8 +826,7 @@ isert_post_recvm(struct isert_conn *isert_conn, u32 count) rx_wr--; rx_wr->next = NULL; /* mark end of work requests list */ - ret = ib_post_recv(isert_conn->qp, isert_conn->rx_wr, - &rx_wr_failed); + ret = ib_post_recv(isert_conn->qp, isert_conn->rx_wr, NULL); if (ret) isert_err("ib_post_recv() failed with ret: %d\n", ret); @@ -836,7 +836,7 @@ isert_post_recvm(struct isert_conn *isert_conn, u32 count) static int isert_post_recv(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc) { - struct ib_recv_wr *rx_wr_failed, rx_wr; + struct ib_recv_wr rx_wr; int ret; if (!rx_desc->in_use) { @@ -853,7 +853,7 @@ isert_post_recv(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc) rx_wr.num_sge = 1; rx_wr.next = NULL; - ret = ib_post_recv(isert_conn->qp, &rx_wr, &rx_wr_failed); + ret = ib_post_recv(isert_conn->qp, &rx_wr, NULL); if (ret) isert_err("ib_post_recv() failed with ret: %d\n", ret); @@ -864,7 +864,7 @@ static int isert_login_post_send(struct isert_conn *isert_conn, struct iser_tx_desc *tx_desc) { struct ib_device *ib_dev = isert_conn->cm_id->device; - struct ib_send_wr send_wr, *send_wr_failed; + struct ib_send_wr send_wr; int ret; ib_dma_sync_single_for_device(ib_dev, tx_desc->dma_addr, @@ -879,7 +879,7 @@ isert_login_post_send(struct isert_conn *isert_conn, struct iser_tx_desc *tx_des send_wr.opcode = IB_WR_SEND; send_wr.send_flags = IB_SEND_SIGNALED; - ret = ib_post_send(isert_conn->qp, &send_wr, &send_wr_failed); + ret = ib_post_send(isert_conn->qp, &send_wr, NULL); if (ret) isert_err("ib_post_send() failed, ret: %d\n", ret); @@ -967,7 +967,7 @@ isert_init_send_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd, static int isert_login_post_recv(struct isert_conn *isert_conn) { - struct ib_recv_wr rx_wr, *rx_wr_fail; + struct ib_recv_wr rx_wr; struct ib_sge sge; int ret; @@ -986,7 +986,7 @@ isert_login_post_recv(struct isert_conn *isert_conn) rx_wr.sg_list = &sge; rx_wr.num_sge = 1; - ret = ib_post_recv(isert_conn->qp, &rx_wr, &rx_wr_fail); + ret = ib_post_recv(isert_conn->qp, &rx_wr, NULL); if (ret) isert_err("ib_post_recv() failed: %d\n", ret); @@ -1829,7 +1829,6 @@ isert_send_done(struct ib_cq *cq, struct ib_wc *wc) static int isert_post_response(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd) { - struct ib_send_wr *wr_failed; int ret; ret = isert_post_recv(isert_conn, isert_cmd->rx_desc); @@ -1838,8 +1837,7 @@ isert_post_response(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd) return ret; } - ret = ib_post_send(isert_conn->qp, &isert_cmd->tx_desc.send_wr, - &wr_failed); + ret = ib_post_send(isert_conn->qp, &isert_cmd->tx_desc.send_wr, NULL); if (ret) { isert_err("ib_post_send failed with %d\n", ret); return ret; diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 9786b24b956f..444d16520506 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -57,13 +57,10 @@ #define DRV_NAME "ib_srp" #define PFX DRV_NAME ": " -#define DRV_VERSION "2.0" -#define DRV_RELDATE "July 26, 2015" MODULE_AUTHOR("Roland Dreier"); MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator"); MODULE_LICENSE("Dual BSD/GPL"); -MODULE_INFO(release_date, DRV_RELDATE); #if !defined(CONFIG_DYNAMIC_DEBUG) #define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt) @@ -145,7 +142,8 @@ static void srp_remove_one(struct ib_device *device, void *client_data); static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc); static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc, const char *opname); -static int srp_ib_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event); +static int srp_ib_cm_handler(struct ib_cm_id *cm_id, + const struct ib_cm_event *event); static int srp_rdma_cm_handler(struct rdma_cm_id *cm_id, struct rdma_cm_event *event); @@ -1211,7 +1209,6 @@ static void srp_inv_rkey_err_done(struct ib_cq *cq, struct ib_wc *wc) static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch, u32 rkey) { - struct ib_send_wr *bad_wr; struct ib_send_wr wr = { .opcode = IB_WR_LOCAL_INV, .next = NULL, @@ -1222,7 +1219,7 @@ static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch, wr.wr_cqe = &req->reg_cqe; req->reg_cqe.done = srp_inv_rkey_err_done; - return ib_post_send(ch->qp, &wr, &bad_wr); + return ib_post_send(ch->qp, &wr, NULL); } static void srp_unmap_data(struct scsi_cmnd *scmnd, @@ -1503,7 +1500,6 @@ static int srp_map_finish_fr(struct srp_map_state *state, { struct srp_target_port *target = ch->target; struct srp_device *dev = target->srp_host->srp_dev; - struct ib_send_wr *bad_wr; struct ib_reg_wr wr; struct srp_fr_desc *desc; u32 rkey; @@ -1567,7 +1563,7 @@ static int srp_map_finish_fr(struct srp_map_state *state, srp_map_desc(state, desc->mr->iova, desc->mr->length, desc->mr->rkey); - err = ib_post_send(ch->qp, &wr.wr, &bad_wr); + err = ib_post_send(ch->qp, &wr.wr, NULL); if (unlikely(err)) { WARN_ON_ONCE(err == -ENOMEM); return err; @@ -2018,7 +2014,7 @@ static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len) { struct srp_target_port *target = ch->target; struct ib_sge list; - struct ib_send_wr wr, *bad_wr; + struct ib_send_wr wr; list.addr = iu->dma; list.length = len; @@ -2033,13 +2029,13 @@ static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len) wr.opcode = IB_WR_SEND; wr.send_flags = IB_SEND_SIGNALED; - return ib_post_send(ch->qp, &wr, &bad_wr); + return ib_post_send(ch->qp, &wr, NULL); } static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu) { struct srp_target_port *target = ch->target; - struct ib_recv_wr wr, *bad_wr; + struct ib_recv_wr wr; struct ib_sge list; list.addr = iu->dma; @@ -2053,7 +2049,7 @@ static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu) wr.sg_list = &list; wr.num_sge = 1; - return ib_post_recv(ch->qp, &wr, &bad_wr); + return ib_post_recv(ch->qp, &wr, NULL); } static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp) @@ -2558,7 +2554,7 @@ error: } static void srp_ib_cm_rej_handler(struct ib_cm_id *cm_id, - struct ib_cm_event *event, + const struct ib_cm_event *event, struct srp_rdma_ch *ch) { struct srp_target_port *target = ch->target; @@ -2643,7 +2639,8 @@ static void srp_ib_cm_rej_handler(struct ib_cm_id *cm_id, } } -static int srp_ib_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) +static int srp_ib_cm_handler(struct ib_cm_id *cm_id, + const struct ib_cm_event *event) { struct srp_rdma_ch *ch = cm_id->context; struct srp_target_port *target = ch->target; @@ -3843,7 +3840,7 @@ static ssize_t srp_create_target(struct device *dev, INIT_WORK(&target->tl_err_work, srp_tl_err_work); INIT_WORK(&target->remove_work, srp_remove_work); spin_lock_init(&target->lock); - ret = ib_query_gid(ibdev, host->port, 0, &target->sgid, NULL); + ret = rdma_query_gid(ibdev, host->port, 0, &target->sgid); if (ret) goto out; diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 1ae638b58b63..f37cbad022a2 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -575,8 +575,7 @@ static int srpt_refresh_port(struct srpt_port *sport) sport->sm_lid = port_attr.sm_lid; sport->lid = port_attr.lid; - ret = ib_query_gid(sport->sdev->device, sport->port, 0, &sport->gid, - NULL); + ret = rdma_query_gid(sport->sdev->device, sport->port, 0, &sport->gid); if (ret) goto err_query_port; @@ -720,7 +719,7 @@ static struct srpt_ioctx **srpt_alloc_ioctx_ring(struct srpt_device *sdev, WARN_ON(ioctx_size != sizeof(struct srpt_recv_ioctx) && ioctx_size != sizeof(struct srpt_send_ioctx)); - ring = kmalloc_array(ring_size, sizeof(ring[0]), GFP_KERNEL); + ring = kvmalloc_array(ring_size, sizeof(ring[0]), GFP_KERNEL); if (!ring) goto out; for (i = 0; i < ring_size; ++i) { @@ -734,7 +733,7 @@ static struct srpt_ioctx **srpt_alloc_ioctx_ring(struct srpt_device *sdev, err: while (--i >= 0) srpt_free_ioctx(sdev, ring[i], dma_size, dir); - kfree(ring); + kvfree(ring); ring = NULL; out: return ring; @@ -759,7 +758,7 @@ static void srpt_free_ioctx_ring(struct srpt_ioctx **ioctx_ring, for (i = 0; i < ring_size; ++i) srpt_free_ioctx(sdev, ioctx_ring[i], dma_size, dir); - kfree(ioctx_ring); + kvfree(ioctx_ring); } /** @@ -817,7 +816,7 @@ static int srpt_post_recv(struct srpt_device *sdev, struct srpt_rdma_ch *ch, struct srpt_recv_ioctx *ioctx) { struct ib_sge list; - struct ib_recv_wr wr, *bad_wr; + struct ib_recv_wr wr; BUG_ON(!sdev); list.addr = ioctx->ioctx.dma; @@ -831,9 +830,9 @@ static int srpt_post_recv(struct srpt_device *sdev, struct srpt_rdma_ch *ch, wr.num_sge = 1; if (sdev->use_srq) - return ib_post_srq_recv(sdev->srq, &wr, &bad_wr); + return ib_post_srq_recv(sdev->srq, &wr, NULL); else - return ib_post_recv(ch->qp, &wr, &bad_wr); + return ib_post_recv(ch->qp, &wr, NULL); } /** @@ -847,7 +846,6 @@ static int srpt_post_recv(struct srpt_device *sdev, struct srpt_rdma_ch *ch, */ static int srpt_zerolength_write(struct srpt_rdma_ch *ch) { - struct ib_send_wr *bad_wr; struct ib_rdma_wr wr = { .wr = { .next = NULL, @@ -860,7 +858,7 @@ static int srpt_zerolength_write(struct srpt_rdma_ch *ch) pr_debug("%s-%d: queued zerolength write\n", ch->sess_name, ch->qp->qp_num); - return ib_post_send(ch->qp, &wr.wr, &bad_wr); + return ib_post_send(ch->qp, &wr.wr, NULL); } static void srpt_zerolength_write_done(struct ib_cq *cq, struct ib_wc *wc) @@ -1754,13 +1752,15 @@ retry: */ qp_init->cap.max_send_wr = min(sq_size / 2, attrs->max_qp_wr); qp_init->cap.max_rdma_ctxs = sq_size / 2; - qp_init->cap.max_send_sge = min(attrs->max_sge, SRPT_MAX_SG_PER_WQE); + qp_init->cap.max_send_sge = min(attrs->max_send_sge, + SRPT_MAX_SG_PER_WQE); qp_init->port_num = ch->sport->port; if (sdev->use_srq) { qp_init->srq = sdev->srq; } else { qp_init->cap.max_recv_wr = ch->rq_size; - qp_init->cap.max_recv_sge = qp_init->cap.max_send_sge; + qp_init->cap.max_recv_sge = min(attrs->max_recv_sge, + SRPT_MAX_SG_PER_WQE); } if (ch->using_rdma_cm) { @@ -1833,8 +1833,7 @@ static bool srpt_close_ch(struct srpt_rdma_ch *ch) int ret; if (!srpt_set_ch_state(ch, CH_DRAINING)) { - pr_debug("%s-%d: already closed\n", ch->sess_name, - ch->qp->qp_num); + pr_debug("%s: already closed\n", ch->sess_name); return false; } @@ -1940,8 +1939,8 @@ static void __srpt_close_all_ch(struct srpt_port *sport) list_for_each_entry(nexus, &sport->nexus_list, entry) { list_for_each_entry(ch, &nexus->ch_list, list) { if (srpt_disconnect_ch(ch) >= 0) - pr_info("Closing channel %s-%d because target %s_%d has been disabled\n", - ch->sess_name, ch->qp->qp_num, + pr_info("Closing channel %s because target %s_%d has been disabled\n", + ch->sess_name, sport->sdev->device->name, sport->port); srpt_close_ch(ch); } @@ -2086,7 +2085,7 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev, struct rdma_conn_param rdma_cm; struct ib_cm_rep_param ib_cm; } *rep_param = NULL; - struct srpt_rdma_ch *ch; + struct srpt_rdma_ch *ch = NULL; char i_port_id[36]; u32 it_iu_len; int i, ret; @@ -2233,13 +2232,15 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev, TARGET_PROT_NORMAL, i_port_id + 2, ch, NULL); if (IS_ERR_OR_NULL(ch->sess)) { + WARN_ON_ONCE(ch->sess == NULL); ret = PTR_ERR(ch->sess); + ch->sess = NULL; pr_info("Rejected login for initiator %s: ret = %d.\n", ch->sess_name, ret); rej->reason = cpu_to_be32(ret == -ENOMEM ? SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES : SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED); - goto reject; + goto destroy_ib; } mutex_lock(&sport->mutex); @@ -2278,7 +2279,7 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev, rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES); pr_err("rejected SRP_LOGIN_REQ because enabling RTR failed (error code = %d)\n", ret); - goto destroy_ib; + goto reject; } pr_debug("Establish connection sess=%p name=%s ch=%p\n", ch->sess, @@ -2357,8 +2358,11 @@ free_ring: srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring, ch->sport->sdev, ch->rq_size, ch->max_rsp_size, DMA_TO_DEVICE); + free_ch: - if (ib_cm_id) + if (rdma_cm_id) + rdma_cm_id->context = NULL; + else ib_cm_id->context = NULL; kfree(ch); ch = NULL; @@ -2378,6 +2382,15 @@ reject: ib_send_cm_rej(ib_cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0, rej, sizeof(*rej)); + if (ch && ch->sess) { + srpt_close_ch(ch); + /* + * Tell the caller not to free cm_id since + * srpt_release_channel_work() will do that. + */ + ret = 0; + } + out: kfree(rep_param); kfree(rsp); @@ -2387,7 +2400,7 @@ out: } static int srpt_ib_cm_req_recv(struct ib_cm_id *cm_id, - struct ib_cm_req_event_param *param, + const struct ib_cm_req_event_param *param, void *private_data) { char sguid[40]; @@ -2499,7 +2512,8 @@ static void srpt_cm_rtu_recv(struct srpt_rdma_ch *ch) * a non-zero value in any other case will trigger a race with the * ib_destroy_cm_id() call in srpt_release_channel(). */ -static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event) +static int srpt_cm_handler(struct ib_cm_id *cm_id, + const struct ib_cm_event *event) { struct srpt_rdma_ch *ch = cm_id->context; int ret; @@ -2609,7 +2623,7 @@ static int srpt_write_pending(struct se_cmd *se_cmd) struct srpt_send_ioctx *ioctx = container_of(se_cmd, struct srpt_send_ioctx, cmd); struct srpt_rdma_ch *ch = ioctx->ch; - struct ib_send_wr *first_wr = NULL, *bad_wr; + struct ib_send_wr *first_wr = NULL; struct ib_cqe *cqe = &ioctx->rdma_cqe; enum srpt_command_state new_state; int ret, i; @@ -2633,7 +2647,7 @@ static int srpt_write_pending(struct se_cmd *se_cmd) cqe = NULL; } - ret = ib_post_send(ch->qp, first_wr, &bad_wr); + ret = ib_post_send(ch->qp, first_wr, NULL); if (ret) { pr_err("%s: ib_post_send() returned %d for %d (avail: %d)\n", __func__, ret, ioctx->n_rdma, @@ -2671,7 +2685,7 @@ static void srpt_queue_response(struct se_cmd *cmd) container_of(cmd, struct srpt_send_ioctx, cmd); struct srpt_rdma_ch *ch = ioctx->ch; struct srpt_device *sdev = ch->sport->sdev; - struct ib_send_wr send_wr, *first_wr = &send_wr, *bad_wr; + struct ib_send_wr send_wr, *first_wr = &send_wr; struct ib_sge sge; enum srpt_command_state state; int resp_len, ret, i; @@ -2744,7 +2758,7 @@ static void srpt_queue_response(struct se_cmd *cmd) send_wr.opcode = IB_WR_SEND; send_wr.send_flags = IB_SEND_SIGNALED; - ret = ib_post_send(ch->qp, first_wr, &bad_wr); + ret = ib_post_send(ch->qp, first_wr, NULL); if (ret < 0) { pr_err("%s: sending cmd response failed for tag %llu (%d)\n", __func__, ioctx->cmd.tag, ret); @@ -2968,7 +2982,8 @@ static void srpt_add_one(struct ib_device *device) pr_debug("device = %p\n", device); - sdev = kzalloc(sizeof(*sdev), GFP_KERNEL); + sdev = kzalloc(struct_size(sdev, port, device->phys_port_cnt), + GFP_KERNEL); if (!sdev) goto err; @@ -3022,8 +3037,6 @@ static void srpt_add_one(struct ib_device *device) srpt_event_handler); ib_register_event_handler(&sdev->event_handler); - WARN_ON(sdev->device->phys_port_cnt > ARRAY_SIZE(sdev->port)); - for (i = 1; i <= sdev->device->phys_port_cnt; i++) { sport = &sdev->port[i - 1]; INIT_LIST_HEAD(&sport->nexus_list); diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h index 2361483476a0..444dfd7281b5 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.h +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -396,9 +396,9 @@ struct srpt_port { * @sdev_mutex: Serializes use_srq changes. * @use_srq: Whether or not to use SRQ. * @ioctx_ring: Per-HCA SRQ. - * @port: Information about the ports owned by this HCA. * @event_handler: Per-HCA asynchronous IB event handler. * @list: Node in srpt_dev_list. + * @port: Information about the ports owned by this HCA. */ struct srpt_device { struct ib_device *device; @@ -410,9 +410,9 @@ struct srpt_device { struct mutex sdev_mutex; bool use_srq; struct srpt_recv_ioctx **ioctx_ring; - struct srpt_port port[2]; struct ib_event_handler event_handler; struct list_head list; + struct srpt_port port[]; }; #endif /* IB_SRPT_H */ |