summaryrefslogtreecommitdiff
path: root/drivers/infiniband/ulp/ipoib
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/ulp/ipoib')
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h32
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c81
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ethtool.c2
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_fs.c6
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c15
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c444
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c7
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_netlink.c23
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c8
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_vlan.c261
10 files changed, 480 insertions, 399 deletions
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index a50b062ed13e..1abe3c62f106 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -91,11 +91,9 @@ enum {
IPOIB_STOP_REAPER = 7,
IPOIB_FLAG_ADMIN_CM = 9,
IPOIB_FLAG_UMCAST = 10,
- IPOIB_STOP_NEIGH_GC = 11,
IPOIB_NEIGH_TBL_FLUSH = 12,
IPOIB_FLAG_DEV_ADDR_SET = 13,
IPOIB_FLAG_DEV_ADDR_CTRL = 14,
- IPOIB_FLAG_GOING_DOWN = 15,
IPOIB_MAX_BACKOFF_SECONDS = 16,
@@ -252,11 +250,11 @@ struct ipoib_cm_tx {
struct ipoib_neigh *neigh;
struct ipoib_path *path;
struct ipoib_tx_buf *tx_ring;
- unsigned tx_head;
- unsigned tx_tail;
+ unsigned int tx_head;
+ unsigned int tx_tail;
unsigned long flags;
u32 mtu;
- unsigned max_send_sge;
+ unsigned int max_send_sge;
};
struct ipoib_cm_rx_buf {
@@ -325,15 +323,22 @@ struct ipoib_dev_priv {
spinlock_t lock;
struct net_device *dev;
+ void (*next_priv_destructor)(struct net_device *dev);
struct napi_struct send_napi;
struct napi_struct recv_napi;
unsigned long flags;
+ /*
+ * This protects access to the child_intfs list.
+ * To READ from child_intfs the RTNL or vlan_rwsem read side must be
+ * held. To WRITE RTNL and the vlan_rwsem write side must be held (in
+ * that order) This lock exists because we have a few contexts where
+ * we need the child_intfs, but do not want to grab the RTNL.
+ */
struct rw_semaphore vlan_rwsem;
struct mutex mcast_mutex;
- struct mutex sysfs_mutex;
struct rb_root path_tree;
struct list_head path_list;
@@ -373,8 +378,8 @@ struct ipoib_dev_priv {
struct ipoib_rx_buf *rx_ring;
struct ipoib_tx_buf *tx_ring;
- unsigned tx_head;
- unsigned tx_tail;
+ unsigned int tx_head;
+ unsigned int tx_tail;
struct ib_sge tx_sge[MAX_SKB_FRAGS + 1];
struct ib_ud_wr tx_wr;
struct ib_wc send_wc[MAX_SEND_CQE];
@@ -404,7 +409,7 @@ struct ipoib_dev_priv {
#endif
u64 hca_caps;
struct ipoib_ethtool_st ethtool;
- unsigned max_send_sge;
+ unsigned int max_send_sge;
bool sm_fullmember_sendonly_support;
const struct net_device_ops *rn_ops;
};
@@ -414,7 +419,7 @@ struct ipoib_ah {
struct ib_ah *ah;
struct list_head list;
struct kref ref;
- unsigned last_send;
+ unsigned int last_send;
int valid;
};
@@ -483,6 +488,7 @@ static inline void ipoib_put_ah(struct ipoib_ah *ah)
kref_put(&ah->ref, ipoib_free_ah);
}
int ipoib_open(struct net_device *dev);
+void ipoib_intf_free(struct net_device *dev);
int ipoib_add_pkey_attr(struct net_device *dev);
int ipoib_add_umcast_attr(struct net_device *dev);
@@ -510,9 +516,6 @@ void ipoib_ib_dev_down(struct net_device *dev);
int ipoib_ib_dev_stop_default(struct net_device *dev);
void ipoib_pkey_dev_check_presence(struct net_device *dev);
-int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
-void ipoib_dev_cleanup(struct net_device *dev);
-
void ipoib_mcast_join_task(struct work_struct *work);
void ipoib_mcast_carrier_on_task(struct work_struct *work);
void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb);
@@ -600,7 +603,6 @@ void ipoib_pkey_open(struct ipoib_dev_priv *priv);
void ipoib_drain_cq(struct net_device *dev);
void ipoib_set_ethtool_ops(struct net_device *dev);
-void ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca);
#define IPOIB_FLAGS_RC 0x80
#define IPOIB_FLAGS_UC 0x40
@@ -729,7 +731,7 @@ void ipoib_cm_dev_stop(struct net_device *dev)
static inline
int ipoib_cm_dev_init(struct net_device *dev)
{
- return -ENOSYS;
+ return -EOPNOTSUPP;
}
static inline
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 6535d9beb24d..ea01b8dd2be6 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -78,7 +78,7 @@ static struct ib_send_wr ipoib_cm_rx_drain_wr = {
};
static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
- struct ib_cm_event *event);
+ const struct ib_cm_event *event);
static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, int frags,
u64 mapping[IPOIB_CM_RX_SG])
@@ -94,7 +94,6 @@ static void ipoib_cm_dma_unmap_rx(struct ipoib_dev_priv *priv, int frags,
static int ipoib_cm_post_receive_srq(struct net_device *dev, int id)
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
- struct ib_recv_wr *bad_wr;
int i, ret;
priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
@@ -102,7 +101,7 @@ static int ipoib_cm_post_receive_srq(struct net_device *dev, int id)
for (i = 0; i < priv->cm.num_frags; ++i)
priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i];
- ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr);
+ ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, NULL);
if (unlikely(ret)) {
ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret);
ipoib_cm_dma_unmap_rx(priv, priv->cm.num_frags - 1,
@@ -120,7 +119,6 @@ static int ipoib_cm_post_receive_nonsrq(struct net_device *dev,
struct ib_sge *sge, int id)
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
- struct ib_recv_wr *bad_wr;
int i, ret;
wr->wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
@@ -128,7 +126,7 @@ static int ipoib_cm_post_receive_nonsrq(struct net_device *dev,
for (i = 0; i < IPOIB_CM_RX_SG; ++i)
sge[i].addr = rx->rx_ring[id].mapping[i];
- ret = ib_post_recv(rx->qp, wr, &bad_wr);
+ ret = ib_post_recv(rx->qp, wr, NULL);
if (unlikely(ret)) {
ipoib_warn(priv, "post recv failed for buf %d (%d)\n", id, ret);
ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1,
@@ -212,7 +210,6 @@ static void ipoib_cm_free_rx_ring(struct net_device *dev,
static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv)
{
- struct ib_send_wr *bad_wr;
struct ipoib_cm_rx *p;
/* We only reserved 1 extra slot in CQ for drain WRs, so
@@ -227,7 +224,7 @@ static void ipoib_cm_start_rx_drain(struct ipoib_dev_priv *priv)
*/
p = list_entry(priv->cm.rx_flush_list.next, typeof(*p), list);
ipoib_cm_rx_drain_wr.wr_id = IPOIB_CM_RX_DRAIN_WRID;
- if (ib_post_send(p->qp, &ipoib_cm_rx_drain_wr, &bad_wr))
+ if (ib_post_send(p->qp, &ipoib_cm_rx_drain_wr, NULL))
ipoib_warn(priv, "failed to post drain wr\n");
list_splice_init(&priv->cm.rx_flush_list, &priv->cm.rx_drain_list);
@@ -275,7 +272,7 @@ static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev,
static int ipoib_cm_modify_rx_qp(struct net_device *dev,
struct ib_cm_id *cm_id, struct ib_qp *qp,
- unsigned psn)
+ unsigned int psn)
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_qp_attr qp_attr;
@@ -363,7 +360,7 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i
if (!rx->rx_ring)
return -ENOMEM;
- t = kmalloc(sizeof *t, GFP_KERNEL);
+ t = kmalloc(sizeof(*t), GFP_KERNEL);
if (!t) {
ret = -ENOMEM;
goto err_free_1;
@@ -421,8 +418,9 @@ err_free_1:
}
static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id,
- struct ib_qp *qp, struct ib_cm_req_event_param *req,
- unsigned psn)
+ struct ib_qp *qp,
+ const struct ib_cm_req_event_param *req,
+ unsigned int psn)
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_cm_data data = {};
@@ -432,7 +430,7 @@ static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id,
data.mtu = cpu_to_be32(IPOIB_CM_BUF_SIZE);
rep.private_data = &data;
- rep.private_data_len = sizeof data;
+ rep.private_data_len = sizeof(data);
rep.flow_control = 0;
rep.rnr_retry_count = req->rnr_retry_count;
rep.srq = ipoib_cm_has_srq(dev);
@@ -441,16 +439,17 @@ static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id,
return ib_send_cm_rep(cm_id, &rep);
}
-static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
+static int ipoib_cm_req_handler(struct ib_cm_id *cm_id,
+ const struct ib_cm_event *event)
{
struct net_device *dev = cm_id->context;
struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_cm_rx *p;
- unsigned psn;
+ unsigned int psn;
int ret;
ipoib_dbg(priv, "REQ arrived\n");
- p = kzalloc(sizeof *p, GFP_KERNEL);
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
if (!p)
return -ENOMEM;
p->dev = dev;
@@ -503,7 +502,7 @@ err_qp:
}
static int ipoib_cm_rx_handler(struct ib_cm_id *cm_id,
- struct ib_cm_event *event)
+ const struct ib_cm_event *event)
{
struct ipoib_cm_rx *p;
struct ipoib_dev_priv *priv;
@@ -547,7 +546,7 @@ static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space,
0, PAGE_SIZE);
--skb_shinfo(skb)->nr_frags;
} else {
- size = min(length, (unsigned) PAGE_SIZE);
+ size = min_t(unsigned int, length, PAGE_SIZE);
skb_frag_size_set(frag, size);
skb->data_len += size;
@@ -641,8 +640,9 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
}
}
- frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len,
- (unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE;
+ frags = PAGE_ALIGN(wc->byte_len -
+ min_t(u32, wc->byte_len, IPOIB_CM_HEAD_SIZE)) /
+ PAGE_SIZE;
newskb = ipoib_cm_alloc_rx_skb(dev, rx_ring, wr_id, frags,
mapping, GFP_ATOMIC);
@@ -657,7 +657,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
}
ipoib_cm_dma_unmap_rx(priv, frags, rx_ring[wr_id].mapping);
- memcpy(rx_ring[wr_id].mapping, mapping, (frags + 1) * sizeof *mapping);
+ memcpy(rx_ring[wr_id].mapping, mapping, (frags + 1) * sizeof(*mapping));
ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
wc->byte_len, wc->slid);
@@ -698,13 +698,11 @@ static inline int post_send(struct ipoib_dev_priv *priv,
unsigned int wr_id,
struct ipoib_tx_buf *tx_req)
{
- struct ib_send_wr *bad_wr;
-
ipoib_build_sge(priv, tx_req);
priv->tx_wr.wr.wr_id = wr_id | IPOIB_OP_CM;
- return ib_post_send(tx->qp, &priv->tx_wr.wr, &bad_wr);
+ return ib_post_send(tx->qp, &priv->tx_wr.wr, NULL);
}
void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_tx *tx)
@@ -712,7 +710,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_tx_buf *tx_req;
int rc;
- unsigned usable_sge = tx->max_send_sge - !!skb_headlen(skb);
+ unsigned int usable_sge = tx->max_send_sge - !!skb_headlen(skb);
if (unlikely(skb->len > tx->mtu)) {
ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
@@ -982,7 +980,8 @@ void ipoib_cm_dev_stop(struct net_device *dev)
cancel_delayed_work(&priv->cm.stale_task);
}
-static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
+static int ipoib_cm_rep_handler(struct ib_cm_id *cm_id,
+ const struct ib_cm_event *event)
{
struct ipoib_cm_tx *p = cm_id->context;
struct ipoib_dev_priv *priv = ipoib_priv(p->dev);
@@ -1068,8 +1067,8 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_
struct ib_qp *tx_qp;
if (dev->features & NETIF_F_SG)
- attr.cap.max_send_sge =
- min_t(u32, priv->ca->attrs.max_sge, MAX_SKB_FRAGS + 1);
+ attr.cap.max_send_sge = min_t(u32, priv->ca->attrs.max_send_sge,
+ MAX_SKB_FRAGS + 1);
tx_qp = ib_create_qp(priv->pd, &attr);
tx->max_send_sge = attr.cap.max_send_sge;
@@ -1094,7 +1093,7 @@ static int ipoib_cm_send_req(struct net_device *dev,
req.qp_num = qp->qp_num;
req.qp_type = qp->qp_type;
req.private_data = &data;
- req.private_data_len = sizeof data;
+ req.private_data_len = sizeof(data);
req.flow_control = 0;
req.starting_psn = 0; /* FIXME */
@@ -1152,7 +1151,7 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn,
ret = -ENOMEM;
goto err_tx;
}
- memset(p->tx_ring, 0, ipoib_sendq_size * sizeof *p->tx_ring);
+ memset(p->tx_ring, 0, ipoib_sendq_size * sizeof(*p->tx_ring));
p->qp = ipoib_cm_create_tx_qp(p->dev, p);
memalloc_noio_restore(noio_flag);
@@ -1248,7 +1247,7 @@ timeout:
}
static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
- struct ib_cm_event *event)
+ const struct ib_cm_event *event)
{
struct ipoib_cm_tx *tx = cm_id->context;
struct ipoib_dev_priv *priv = ipoib_priv(tx->dev);
@@ -1305,7 +1304,7 @@ struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path
struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_cm_tx *tx;
- tx = kzalloc(sizeof *tx, GFP_ATOMIC);
+ tx = kzalloc(sizeof(*tx), GFP_ATOMIC);
if (!tx)
return NULL;
@@ -1370,7 +1369,7 @@ static void ipoib_cm_tx_start(struct work_struct *work)
neigh->daddr + QPN_AND_OPTIONS_OFFSET);
goto free_neigh;
}
- memcpy(&pathrec, &p->path->pathrec, sizeof pathrec);
+ memcpy(&pathrec, &p->path->pathrec, sizeof(pathrec));
spin_unlock_irqrestore(&priv->lock, flags);
netif_tx_unlock_bh(dev);
@@ -1428,7 +1427,7 @@ static void ipoib_cm_skb_reap(struct work_struct *work)
struct net_device *dev = priv->dev;
struct sk_buff *skb;
unsigned long flags;
- unsigned mtu = priv->mcast_mtu;
+ unsigned int mtu = priv->mcast_mtu;
netif_tx_lock_bh(dev);
spin_lock_irqsave(&priv->lock, flags);
@@ -1518,19 +1517,16 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
{
struct net_device *dev = to_net_dev(d);
int ret;
- struct ipoib_dev_priv *priv = ipoib_priv(dev);
-
- if (test_bit(IPOIB_FLAG_GOING_DOWN, &priv->flags))
- return -EPERM;
-
- if (!mutex_trylock(&priv->sysfs_mutex))
- return restart_syscall();
if (!rtnl_trylock()) {
- mutex_unlock(&priv->sysfs_mutex);
return restart_syscall();
}
+ if (dev->reg_state != NETREG_REGISTERED) {
+ rtnl_unlock();
+ return -EPERM;
+ }
+
ret = ipoib_set_mode(dev, buf);
/* The assumption is that the function ipoib_set_mode returned
@@ -1539,7 +1535,6 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
*/
if (ret != -EBUSY)
rtnl_unlock();
- mutex_unlock(&priv->sysfs_mutex);
return (!ret || ret == -EBUSY) ? count : ret;
}
@@ -1564,7 +1559,7 @@ static void ipoib_cm_create_srq(struct net_device *dev, int max_sge)
priv->cm.srq = ib_create_srq(priv->pd, &srq_init_attr);
if (IS_ERR(priv->cm.srq)) {
- if (PTR_ERR(priv->cm.srq) != -ENOSYS)
+ if (PTR_ERR(priv->cm.srq) != -EOPNOTSUPP)
pr_warn("%s: failed to allocate SRQ, error %ld\n",
priv->ca->name, PTR_ERR(priv->cm.srq));
priv->cm.srq = NULL;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
index 2706bf26cbac..83429925dfc6 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c
@@ -102,7 +102,7 @@ static int ipoib_set_coalesce(struct net_device *dev,
ret = rdma_set_cq_moderation(priv->recv_cq,
coal->rx_max_coalesced_frames,
coal->rx_coalesce_usecs);
- if (ret && ret != -ENOSYS) {
+ if (ret && ret != -EOPNOTSUPP) {
ipoib_warn(priv, "failed modifying CQ (%d)\n", ret);
return ret;
}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
index ea302b054601..178488028734 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
@@ -262,15 +262,15 @@ static const struct file_operations ipoib_path_fops = {
void ipoib_create_debug_files(struct net_device *dev)
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
- char name[IFNAMSIZ + sizeof "_path"];
+ char name[IFNAMSIZ + sizeof("_path")];
- snprintf(name, sizeof name, "%s_mcg", dev->name);
+ snprintf(name, sizeof(name), "%s_mcg", dev->name);
priv->mcg_dentry = debugfs_create_file(name, S_IFREG | S_IRUGO,
ipoib_root, dev, &ipoib_mcg_fops);
if (!priv->mcg_dentry)
ipoib_warn(priv, "failed to create mcg debug file\n");
- snprintf(name, sizeof name, "%s_path", dev->name);
+ snprintf(name, sizeof(name), "%s_path", dev->name);
priv->path_dentry = debugfs_create_file(name, S_IFREG | S_IRUGO,
ipoib_root, dev, &ipoib_path_fops);
if (!priv->path_dentry)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index f47f9ace1f48..9006a13af1de 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -40,6 +40,7 @@
#include <linux/ip.h>
#include <linux/tcp.h>
+#include <rdma/ib_cache.h>
#include "ipoib.h"
@@ -57,7 +58,7 @@ struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
struct ipoib_ah *ah;
struct ib_ah *vah;
- ah = kmalloc(sizeof *ah, GFP_KERNEL);
+ ah = kmalloc(sizeof(*ah), GFP_KERNEL);
if (!ah)
return ERR_PTR(-ENOMEM);
@@ -100,7 +101,6 @@ static void ipoib_ud_dma_unmap_rx(struct ipoib_dev_priv *priv,
static int ipoib_ib_post_receive(struct net_device *dev, int id)
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
- struct ib_recv_wr *bad_wr;
int ret;
priv->rx_wr.wr_id = id | IPOIB_OP_RECV;
@@ -108,7 +108,7 @@ static int ipoib_ib_post_receive(struct net_device *dev, int id)
priv->rx_sge[1].addr = priv->rx_ring[id].mapping[1];
- ret = ib_post_recv(priv->qp, &priv->rx_wr, &bad_wr);
+ ret = ib_post_recv(priv->qp, &priv->rx_wr, NULL);
if (unlikely(ret)) {
ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret);
ipoib_ud_dma_unmap_rx(priv, priv->rx_ring[id].mapping);
@@ -202,7 +202,7 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
}
memcpy(mapping, priv->rx_ring[wr_id].mapping,
- IPOIB_UD_RX_SG * sizeof *mapping);
+ IPOIB_UD_RX_SG * sizeof(*mapping));
/*
* If we can't allocate a new RX buffer, dump
@@ -541,7 +541,6 @@ static inline int post_send(struct ipoib_dev_priv *priv,
struct ipoib_tx_buf *tx_req,
void *head, int hlen)
{
- struct ib_send_wr *bad_wr;
struct sk_buff *skb = tx_req->skb;
ipoib_build_sge(priv, tx_req);
@@ -558,7 +557,7 @@ static inline int post_send(struct ipoib_dev_priv *priv,
} else
priv->tx_wr.wr.opcode = IB_WR_SEND;
- return ib_post_send(priv->qp, &priv->tx_wr.wr, &bad_wr);
+ return ib_post_send(priv->qp, &priv->tx_wr.wr, NULL);
}
int ipoib_send(struct net_device *dev, struct sk_buff *skb,
@@ -568,7 +567,7 @@ int ipoib_send(struct net_device *dev, struct sk_buff *skb,
struct ipoib_tx_buf *tx_req;
int hlen, rc;
void *phead;
- unsigned usable_sge = priv->max_send_sge - !!skb_headlen(skb);
+ unsigned int usable_sge = priv->max_send_sge - !!skb_headlen(skb);
if (skb_is_gso(skb)) {
hlen = skb_transport_offset(skb) + tcp_hdrlen(skb);
@@ -1069,7 +1068,7 @@ static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv)
bool ret = false;
netdev_gid = (union ib_gid *)(priv->dev->dev_addr + 4);
- if (ib_query_gid(priv->ca, priv->port, 0, &gid0, NULL))
+ if (rdma_query_gid(priv->ca, priv->port, 0, &gid0))
return false;
netif_addr_lock_bh(priv->dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 26cde95bc0f3..e3d28f9ad9c0 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -215,11 +215,6 @@ static int ipoib_stop(struct net_device *dev)
return 0;
}
-static void ipoib_uninit(struct net_device *dev)
-{
- ipoib_dev_cleanup(dev);
-}
-
static netdev_features_t ipoib_fix_features(struct net_device *dev, netdev_features_t features)
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
@@ -634,7 +629,7 @@ struct ipoib_path_iter *ipoib_path_iter_init(struct net_device *dev)
{
struct ipoib_path_iter *iter;
- iter = kmalloc(sizeof *iter, GFP_KERNEL);
+ iter = kmalloc(sizeof(*iter), GFP_KERNEL);
if (!iter)
return NULL;
@@ -770,8 +765,10 @@ static void path_rec_completion(int status,
struct rdma_ah_attr av;
if (!ib_init_ah_attr_from_path(priv->ca, priv->port,
- pathrec, &av))
+ pathrec, &av, NULL)) {
ah = ipoib_create_ah(dev, priv->pd, &av);
+ rdma_destroy_ah_attr(&av);
+ }
}
spin_lock_irqsave(&priv->lock, flags);
@@ -883,7 +880,7 @@ static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid)
if (!priv->broadcast)
return NULL;
- path = kzalloc(sizeof *path, GFP_ATOMIC);
+ path = kzalloc(sizeof(*path), GFP_ATOMIC);
if (!path)
return NULL;
@@ -1199,11 +1196,13 @@ static void ipoib_timeout(struct net_device *dev)
static int ipoib_hard_header(struct sk_buff *skb,
struct net_device *dev,
unsigned short type,
- const void *daddr, const void *saddr, unsigned len)
+ const void *daddr,
+ const void *saddr,
+ unsigned int len)
{
struct ipoib_header *header;
- header = skb_push(skb, sizeof *header);
+ header = skb_push(skb, sizeof(*header));
header->proto = htons(type);
header->reserved = 0;
@@ -1306,9 +1305,6 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
int i;
LIST_HEAD(remove_list);
- if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
- return;
-
spin_lock_irqsave(&priv->lock, flags);
htbl = rcu_dereference_protected(ntbl->htbl,
@@ -1320,9 +1316,6 @@ static void __ipoib_reap_neigh(struct ipoib_dev_priv *priv)
/* neigh is obsolete if it was idle for two GC periods */
dt = 2 * arp_tbl.gc_interval;
neigh_obsolete = jiffies - dt;
- /* handle possible race condition */
- if (test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
- goto out_unlock;
for (i = 0; i < htbl->size; i++) {
struct ipoib_neigh *neigh;
@@ -1360,9 +1353,8 @@ static void ipoib_reap_neigh(struct work_struct *work)
__ipoib_reap_neigh(priv);
- if (!test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
- queue_delayed_work(priv->wq, &priv->neigh_reap_task,
- arp_tbl.gc_interval);
+ queue_delayed_work(priv->wq, &priv->neigh_reap_task,
+ arp_tbl.gc_interval);
}
@@ -1371,7 +1363,7 @@ static struct ipoib_neigh *ipoib_neigh_ctor(u8 *daddr,
{
struct ipoib_neigh *neigh;
- neigh = kzalloc(sizeof *neigh, GFP_ATOMIC);
+ neigh = kzalloc(sizeof(*neigh), GFP_ATOMIC);
if (!neigh)
return NULL;
@@ -1524,9 +1516,8 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv)
htbl = kzalloc(sizeof(*htbl), GFP_KERNEL);
if (!htbl)
return -ENOMEM;
- set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
size = roundup_pow_of_two(arp_tbl.gc_thresh3);
- buckets = kcalloc(size, sizeof(*buckets), GFP_KERNEL);
+ buckets = kvcalloc(size, sizeof(*buckets), GFP_KERNEL);
if (!buckets) {
kfree(htbl);
return -ENOMEM;
@@ -1539,7 +1530,6 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv)
atomic_set(&ntbl->entries, 0);
/* start garbage collection */
- clear_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
queue_delayed_work(priv->wq, &priv->neigh_reap_task,
arp_tbl.gc_interval);
@@ -1554,7 +1544,7 @@ static void neigh_hash_free_rcu(struct rcu_head *head)
struct ipoib_neigh __rcu **buckets = htbl->buckets;
struct ipoib_neigh_table *ntbl = htbl->ntbl;
- kfree(buckets);
+ kvfree(buckets);
kfree(htbl);
complete(&ntbl->deleted);
}
@@ -1649,15 +1639,11 @@ out_unlock:
static void ipoib_neigh_hash_uninit(struct net_device *dev)
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
- int stopped;
ipoib_dbg(priv, "ipoib_neigh_hash_uninit\n");
init_completion(&priv->ntbl.deleted);
- /* Stop GC if called at init fail need to cancel work */
- stopped = test_and_set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
- if (!stopped)
- cancel_delayed_work(&priv->neigh_reap_task);
+ cancel_delayed_work_sync(&priv->neigh_reap_task);
ipoib_flush_neighs(priv);
@@ -1755,13 +1741,11 @@ static int ipoib_ioctl(struct net_device *dev, struct ifreq *ifr,
return priv->rn_ops->ndo_do_ioctl(dev, ifr, cmd);
}
-int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
+static int ipoib_dev_init(struct net_device *dev)
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
int ret = -ENOMEM;
- priv->ca = ca;
- priv->port = port;
priv->qp = NULL;
/*
@@ -1777,7 +1761,7 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
/* create pd, which used both for control and datapath*/
priv->pd = ib_alloc_pd(priv->ca, 0);
if (IS_ERR(priv->pd)) {
- pr_warn("%s: failed to allocate PD\n", ca->name);
+ pr_warn("%s: failed to allocate PD\n", priv->ca->name);
goto clean_wq;
}
@@ -1787,7 +1771,8 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
goto out_free_pd;
}
- if (ipoib_neigh_hash_init(priv) < 0) {
+ ret = ipoib_neigh_hash_init(priv);
+ if (ret) {
pr_warn("%s failed to init neigh hash\n", dev->name);
goto out_dev_uninit;
}
@@ -1796,12 +1781,15 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
if (ipoib_ib_dev_open(dev)) {
pr_warn("%s failed to open device\n", dev->name);
ret = -ENODEV;
- goto out_dev_uninit;
+ goto out_hash_uninit;
}
}
return 0;
+out_hash_uninit:
+ ipoib_neigh_hash_uninit(dev);
+
out_dev_uninit:
ipoib_ib_dev_cleanup(dev);
@@ -1821,21 +1809,151 @@ out:
return ret;
}
-void ipoib_dev_cleanup(struct net_device *dev)
+/*
+ * This must be called before doing an unregister_netdev on a parent device to
+ * shutdown the IB event handler.
+ */
+static void ipoib_parent_unregister_pre(struct net_device *ndev)
{
- struct ipoib_dev_priv *priv = ipoib_priv(dev), *cpriv, *tcpriv;
- LIST_HEAD(head);
+ struct ipoib_dev_priv *priv = ipoib_priv(ndev);
- ASSERT_RTNL();
+ /*
+ * ipoib_set_mac checks netif_running before pushing work, clearing
+ * running ensures the it will not add more work.
+ */
+ rtnl_lock();
+ dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP);
+ rtnl_unlock();
- /* Delete any child interfaces first */
- list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) {
- /* Stop GC on child */
- set_bit(IPOIB_STOP_NEIGH_GC, &cpriv->flags);
- cancel_delayed_work(&cpriv->neigh_reap_task);
- unregister_netdevice_queue(cpriv->dev, &head);
+ /* ipoib_event() cannot be running once this returns */
+ ib_unregister_event_handler(&priv->event_handler);
+
+ /*
+ * Work on the queue grabs the rtnl lock, so this cannot be done while
+ * also holding it.
+ */
+ flush_workqueue(ipoib_workqueue);
+}
+
+static void ipoib_set_dev_features(struct ipoib_dev_priv *priv)
+{
+ priv->hca_caps = priv->ca->attrs.device_cap_flags;
+
+ if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) {
+ priv->dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
+
+ if (priv->hca_caps & IB_DEVICE_UD_TSO)
+ priv->dev->hw_features |= NETIF_F_TSO;
+
+ priv->dev->features |= priv->dev->hw_features;
+ }
+}
+
+static int ipoib_parent_init(struct net_device *ndev)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(ndev);
+ struct ib_port_attr attr;
+ int result;
+
+ result = ib_query_port(priv->ca, priv->port, &attr);
+ if (result) {
+ pr_warn("%s: ib_query_port %d failed\n", priv->ca->name,
+ priv->port);
+ return result;
+ }
+ priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu);
+
+ result = ib_query_pkey(priv->ca, priv->port, 0, &priv->pkey);
+ if (result) {
+ pr_warn("%s: ib_query_pkey port %d failed (ret = %d)\n",
+ priv->ca->name, priv->port, result);
+ return result;
}
- unregister_netdevice_many(&head);
+
+ result = rdma_query_gid(priv->ca, priv->port, 0, &priv->local_gid);
+ if (result) {
+ pr_warn("%s: rdma_query_gid port %d failed (ret = %d)\n",
+ priv->ca->name, priv->port, result);
+ return result;
+ }
+ memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw,
+ sizeof(union ib_gid));
+
+ SET_NETDEV_DEV(priv->dev, priv->ca->dev.parent);
+ priv->dev->dev_id = priv->port - 1;
+
+ return 0;
+}
+
+static void ipoib_child_init(struct net_device *ndev)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(ndev);
+ struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent);
+
+ dev_hold(priv->parent);
+
+ down_write(&ppriv->vlan_rwsem);
+ list_add_tail(&priv->list, &ppriv->child_intfs);
+ up_write(&ppriv->vlan_rwsem);
+
+ priv->max_ib_mtu = ppriv->max_ib_mtu;
+ set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags);
+ memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr, INFINIBAND_ALEN);
+ memcpy(&priv->local_gid, &ppriv->local_gid, sizeof(priv->local_gid));
+}
+
+static int ipoib_ndo_init(struct net_device *ndev)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(ndev);
+ int rc;
+
+ if (priv->parent) {
+ ipoib_child_init(ndev);
+ } else {
+ rc = ipoib_parent_init(ndev);
+ if (rc)
+ return rc;
+ }
+
+ /* MTU will be reset when mcast join happens */
+ ndev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu);
+ priv->mcast_mtu = priv->admin_mtu = ndev->mtu;
+ ndev->max_mtu = IPOIB_CM_MTU;
+
+ ndev->neigh_priv_len = sizeof(struct ipoib_neigh);
+
+ /*
+ * Set the full membership bit, so that we join the right
+ * broadcast group, etc.
+ */
+ priv->pkey |= 0x8000;
+
+ ndev->broadcast[8] = priv->pkey >> 8;
+ ndev->broadcast[9] = priv->pkey & 0xff;
+ set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags);
+
+ ipoib_set_dev_features(priv);
+
+ rc = ipoib_dev_init(ndev);
+ if (rc) {
+ pr_warn("%s: failed to initialize device: %s port %d (ret = %d)\n",
+ priv->ca->name, priv->dev->name, priv->port, rc);
+ }
+
+ return 0;
+}
+
+static void ipoib_ndo_uninit(struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+
+ ASSERT_RTNL();
+
+ /*
+ * ipoib_remove_one guarantees the children are removed before the
+ * parent, and that is the only place where a parent can be removed.
+ */
+ WARN_ON(!list_empty(&priv->child_intfs));
ipoib_neigh_hash_uninit(dev);
@@ -1847,6 +1965,16 @@ void ipoib_dev_cleanup(struct net_device *dev)
destroy_workqueue(priv->wq);
priv->wq = NULL;
}
+
+ if (priv->parent) {
+ struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent);
+
+ down_write(&ppriv->vlan_rwsem);
+ list_del(&priv->list);
+ up_write(&ppriv->vlan_rwsem);
+
+ dev_put(priv->parent);
+ }
}
static int ipoib_set_vf_link_state(struct net_device *dev, int vf, int link_state)
@@ -1894,7 +2022,8 @@ static const struct header_ops ipoib_header_ops = {
};
static const struct net_device_ops ipoib_netdev_ops_pf = {
- .ndo_uninit = ipoib_uninit,
+ .ndo_init = ipoib_ndo_init,
+ .ndo_uninit = ipoib_ndo_uninit,
.ndo_open = ipoib_open,
.ndo_stop = ipoib_stop,
.ndo_change_mtu = ipoib_change_mtu,
@@ -1913,7 +2042,8 @@ static const struct net_device_ops ipoib_netdev_ops_pf = {
};
static const struct net_device_ops ipoib_netdev_ops_vf = {
- .ndo_uninit = ipoib_uninit,
+ .ndo_init = ipoib_ndo_init,
+ .ndo_uninit = ipoib_ndo_uninit,
.ndo_open = ipoib_open,
.ndo_stop = ipoib_stop,
.ndo_change_mtu = ipoib_change_mtu,
@@ -1945,6 +2075,13 @@ void ipoib_setup_common(struct net_device *dev)
netif_keep_dst(dev);
memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN);
+
+ /*
+ * unregister_netdev always frees the netdev, we use this mode
+ * consistently to unify all the various unregister paths, including
+ * those connected to rtnl_link_ops which require it.
+ */
+ dev->needs_free_netdev = true;
}
static void ipoib_build_priv(struct net_device *dev)
@@ -1955,7 +2092,6 @@ static void ipoib_build_priv(struct net_device *dev)
spin_lock_init(&priv->lock);
init_rwsem(&priv->vlan_rwsem);
mutex_init(&priv->mcast_mutex);
- mutex_init(&priv->sysfs_mutex);
INIT_LIST_HEAD(&priv->path_list);
INIT_LIST_HEAD(&priv->child_intfs);
@@ -1999,9 +2135,7 @@ static struct net_device
rn->send = ipoib_send;
rn->attach_mcast = ipoib_mcast_attach;
rn->detach_mcast = ipoib_mcast_detach;
- rn->free_rdma_netdev = free_netdev;
rn->hca = hca;
-
dev->netdev_ops = &ipoib_netdev_default_pf;
return dev;
@@ -2039,6 +2173,9 @@ struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port,
if (!priv)
return NULL;
+ priv->ca = hca;
+ priv->port = port;
+
dev = ipoib_get_netdev(hca, port, name);
if (!dev)
goto free_priv;
@@ -2053,6 +2190,15 @@ struct ipoib_dev_priv *ipoib_intf_alloc(struct ib_device *hca, u8 port,
rn = netdev_priv(dev);
rn->clnt_priv = priv;
+
+ /*
+ * Only the child register_netdev flows can handle priv_destructor
+ * being set, so we force it to NULL here and handle manually until it
+ * is safe to turn on.
+ */
+ priv->next_priv_destructor = dev->priv_destructor;
+ dev->priv_destructor = NULL;
+
ipoib_build_priv(dev);
return priv;
@@ -2061,6 +2207,27 @@ free_priv:
return NULL;
}
+void ipoib_intf_free(struct net_device *dev)
+{
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ struct rdma_netdev *rn = netdev_priv(dev);
+
+ dev->priv_destructor = priv->next_priv_destructor;
+ if (dev->priv_destructor)
+ dev->priv_destructor(dev);
+
+ /*
+ * There are some error flows around register_netdev failing that may
+ * attempt to call priv_destructor twice, prevent that from happening.
+ */
+ dev->priv_destructor = NULL;
+
+ /* unregister/destroy is very complicated. Make bugs more obvious. */
+ rn->clnt_priv = NULL;
+
+ kfree(priv);
+}
+
static ssize_t show_pkey(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -2186,12 +2353,6 @@ static ssize_t create_child(struct device *dev,
if (pkey <= 0 || pkey > 0xffff || pkey == 0x8000)
return -EINVAL;
- /*
- * Set the full membership bit, so that we join the right
- * broadcast group, etc.
- */
- pkey |= 0x8000;
-
ret = ipoib_vlan_add(to_net_dev(dev), pkey);
return ret ? ret : count;
@@ -2223,87 +2384,19 @@ int ipoib_add_pkey_attr(struct net_device *dev)
return device_create_file(&dev->dev, &dev_attr_pkey);
}
-void ipoib_set_dev_features(struct ipoib_dev_priv *priv, struct ib_device *hca)
-{
- priv->hca_caps = hca->attrs.device_cap_flags;
-
- if (priv->hca_caps & IB_DEVICE_UD_IP_CSUM) {
- priv->dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM;
-
- if (priv->hca_caps & IB_DEVICE_UD_TSO)
- priv->dev->hw_features |= NETIF_F_TSO;
-
- priv->dev->features |= priv->dev->hw_features;
- }
-}
-
static struct net_device *ipoib_add_port(const char *format,
struct ib_device *hca, u8 port)
{
struct ipoib_dev_priv *priv;
- struct ib_port_attr attr;
- struct rdma_netdev *rn;
- int result = -ENOMEM;
+ struct net_device *ndev;
+ int result;
priv = ipoib_intf_alloc(hca, port, format);
if (!priv) {
pr_warn("%s, %d: ipoib_intf_alloc failed\n", hca->name, port);
- goto alloc_mem_failed;
- }
-
- SET_NETDEV_DEV(priv->dev, hca->dev.parent);
- priv->dev->dev_id = port - 1;
-
- result = ib_query_port(hca, port, &attr);
- if (result) {
- pr_warn("%s: ib_query_port %d failed\n", hca->name, port);
- goto device_init_failed;
- }
-
- priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu);
-
- /* MTU will be reset when mcast join happens */
- priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu);
- priv->mcast_mtu = priv->admin_mtu = priv->dev->mtu;
- priv->dev->max_mtu = IPOIB_CM_MTU;
-
- priv->dev->neigh_priv_len = sizeof(struct ipoib_neigh);
-
- result = ib_query_pkey(hca, port, 0, &priv->pkey);
- if (result) {
- pr_warn("%s: ib_query_pkey port %d failed (ret = %d)\n",
- hca->name, port, result);
- goto device_init_failed;
- }
-
- ipoib_set_dev_features(priv, hca);
-
- /*
- * Set the full membership bit, so that we join the right
- * broadcast group, etc.
- */
- priv->pkey |= 0x8000;
-
- priv->dev->broadcast[8] = priv->pkey >> 8;
- priv->dev->broadcast[9] = priv->pkey & 0xff;
-
- result = ib_query_gid(hca, port, 0, &priv->local_gid, NULL);
- if (result) {
- pr_warn("%s: ib_query_gid port %d failed (ret = %d)\n",
- hca->name, port, result);
- goto device_init_failed;
- }
-
- memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw,
- sizeof(union ib_gid));
- set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags);
-
- result = ipoib_dev_init(priv->dev, hca, port);
- if (result) {
- pr_warn("%s: failed to initialize port %d (ret = %d)\n",
- hca->name, port, result);
- goto device_init_failed;
+ return ERR_PTR(-ENOMEM);
}
+ ndev = priv->dev;
INIT_IB_EVENT_HANDLER(&priv->event_handler,
priv->ca, ipoib_event);
@@ -2312,46 +2405,43 @@ static struct net_device *ipoib_add_port(const char *format,
/* call event handler to ensure pkey in sync */
queue_work(ipoib_workqueue, &priv->flush_heavy);
- result = register_netdev(priv->dev);
+ result = register_netdev(ndev);
if (result) {
pr_warn("%s: couldn't register ipoib port %d; error %d\n",
hca->name, port, result);
- goto register_failed;
+
+ ipoib_parent_unregister_pre(ndev);
+ ipoib_intf_free(ndev);
+ free_netdev(ndev);
+
+ return ERR_PTR(result);
}
- result = -ENOMEM;
- if (ipoib_cm_add_mode_attr(priv->dev))
+ /*
+ * We cannot set priv_destructor before register_netdev because we
+ * need priv to be always valid during the error flow to execute
+ * ipoib_parent_unregister_pre(). Instead handle it manually and only
+ * enter priv_destructor mode once we are completely registered.
+ */
+ ndev->priv_destructor = ipoib_intf_free;
+
+ if (ipoib_cm_add_mode_attr(ndev))
goto sysfs_failed;
- if (ipoib_add_pkey_attr(priv->dev))
+ if (ipoib_add_pkey_attr(ndev))
goto sysfs_failed;
- if (ipoib_add_umcast_attr(priv->dev))
+ if (ipoib_add_umcast_attr(ndev))
goto sysfs_failed;
- if (device_create_file(&priv->dev->dev, &dev_attr_create_child))
+ if (device_create_file(&ndev->dev, &dev_attr_create_child))
goto sysfs_failed;
- if (device_create_file(&priv->dev->dev, &dev_attr_delete_child))
+ if (device_create_file(&ndev->dev, &dev_attr_delete_child))
goto sysfs_failed;
- return priv->dev;
+ return ndev;
sysfs_failed:
- unregister_netdev(priv->dev);
-
-register_failed:
- ib_unregister_event_handler(&priv->event_handler);
- flush_workqueue(ipoib_workqueue);
- /* Stop GC if started before flush */
- set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
- cancel_delayed_work(&priv->neigh_reap_task);
- flush_workqueue(priv->wq);
- ipoib_dev_cleanup(priv->dev);
-
-device_init_failed:
- rn = netdev_priv(priv->dev);
- rn->free_rdma_netdev(priv->dev);
- kfree(priv);
-
-alloc_mem_failed:
- return ERR_PTR(result);
+ ipoib_parent_unregister_pre(ndev);
+ unregister_netdev(ndev);
+ return ERR_PTR(-ENOMEM);
}
static void ipoib_add_one(struct ib_device *device)
@@ -2362,7 +2452,7 @@ static void ipoib_add_one(struct ib_device *device)
int p;
int count = 0;
- dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL);
+ dev_list = kmalloc(sizeof(*dev_list), GFP_KERNEL);
if (!dev_list)
return;
@@ -2396,39 +2486,18 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data)
return;
list_for_each_entry_safe(priv, tmp, dev_list, list) {
- struct rdma_netdev *parent_rn = netdev_priv(priv->dev);
-
- ib_unregister_event_handler(&priv->event_handler);
- flush_workqueue(ipoib_workqueue);
-
- /* mark interface in the middle of destruction */
- set_bit(IPOIB_FLAG_GOING_DOWN, &priv->flags);
+ LIST_HEAD(head);
+ ipoib_parent_unregister_pre(priv->dev);
rtnl_lock();
- dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP);
- rtnl_unlock();
-
- /* Stop GC */
- set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
- cancel_delayed_work(&priv->neigh_reap_task);
- flush_workqueue(priv->wq);
-
- /* Wrap rtnl_lock/unlock with mutex to protect sysfs calls */
- mutex_lock(&priv->sysfs_mutex);
- unregister_netdev(priv->dev);
- mutex_unlock(&priv->sysfs_mutex);
-
- parent_rn->free_rdma_netdev(priv->dev);
- list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) {
- struct rdma_netdev *child_rn;
+ list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs,
+ list)
+ unregister_netdevice_queue(cpriv->dev, &head);
+ unregister_netdevice_queue(priv->dev, &head);
+ unregister_netdevice_many(&head);
- child_rn = netdev_priv(cpriv->dev);
- child_rn->free_rdma_netdev(cpriv->dev);
- kfree(cpriv);
- }
-
- kfree(priv);
+ rtnl_unlock();
}
kfree(dev_list);
@@ -2476,8 +2545,7 @@ static int __init ipoib_init_module(void)
* its private workqueue, and we only queue up flush events
* on our global flush workqueue. This avoids the deadlocks.
*/
- ipoib_workqueue = alloc_ordered_workqueue("ipoib_flush",
- WQ_MEM_RECLAIM);
+ ipoib_workqueue = alloc_ordered_workqueue("ipoib_flush", 0);
if (!ipoib_workqueue) {
ret = -ENOMEM;
goto err_fs;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 6709328d90f8..b9e9562f5034 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -140,7 +140,7 @@ static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev,
{
struct ipoib_mcast *mcast;
- mcast = kzalloc(sizeof *mcast, can_sleep ? GFP_KERNEL : GFP_ATOMIC);
+ mcast = kzalloc(sizeof(*mcast), can_sleep ? GFP_KERNEL : GFP_ATOMIC);
if (!mcast)
return NULL;
@@ -822,6 +822,7 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
if (neigh && list_empty(&neigh->list)) {
kref_get(&mcast->ah->ref);
neigh->ah = mcast->ah;
+ neigh->ah->valid = 1;
list_add_tail(&neigh->list, &mcast->neigh_list);
}
}
@@ -917,7 +918,7 @@ void ipoib_mcast_restart_task(struct work_struct *work)
if (!ipoib_mcast_addr_is_valid(ha->addr, dev->broadcast))
continue;
- memcpy(mgid.raw, ha->addr + 4, sizeof mgid);
+ memcpy(mgid.raw, ha->addr + 4, sizeof(mgid));
mcast = __ipoib_mcast_find(dev, &mgid);
if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
@@ -997,7 +998,7 @@ struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev)
{
struct ipoib_mcast_iter *iter;
- iter = kmalloc(sizeof *iter, GFP_KERNEL);
+ iter = kmalloc(sizeof(*iter), GFP_KERNEL);
if (!iter)
return NULL;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
index 3e44087935ae..d4d553a51fa9 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
@@ -122,15 +122,6 @@ static int ipoib_new_child_link(struct net *src_net, struct net_device *dev,
} else
child_pkey = nla_get_u16(data[IFLA_IPOIB_PKEY]);
- if (child_pkey == 0 || child_pkey == 0x8000)
- return -EINVAL;
-
- /*
- * Set the full membership bit, so that we join the right
- * broadcast group, etc.
- */
- child_pkey |= 0x8000;
-
err = __ipoib_vlan_add(ppriv, ipoib_priv(dev),
child_pkey, IPOIB_RTNL_CHILD);
@@ -139,19 +130,6 @@ static int ipoib_new_child_link(struct net *src_net, struct net_device *dev,
return err;
}
-static void ipoib_unregister_child_dev(struct net_device *dev, struct list_head *head)
-{
- struct ipoib_dev_priv *priv, *ppriv;
-
- priv = ipoib_priv(dev);
- ppriv = ipoib_priv(priv->parent);
-
- down_write(&ppriv->vlan_rwsem);
- unregister_netdevice_queue(dev, head);
- list_del(&priv->list);
- up_write(&ppriv->vlan_rwsem);
-}
-
static size_t ipoib_get_size(const struct net_device *dev)
{
return nla_total_size(2) + /* IFLA_IPOIB_PKEY */
@@ -167,7 +145,6 @@ static struct rtnl_link_ops ipoib_link_ops __read_mostly = {
.setup = ipoib_setup_common,
.newlink = ipoib_new_child_link,
.changelink = ipoib_changelink,
- .dellink = ipoib_unregister_child_dev,
.get_size = ipoib_get_size,
.fill_info = ipoib_fill_info,
};
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 984a88096f39..9f36ca786df8 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -52,7 +52,7 @@ int ipoib_mcast_attach(struct net_device *dev, struct ib_device *hca,
if (set_qkey) {
ret = -ENOMEM;
- qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
+ qp_attr = kmalloc(sizeof(*qp_attr), GFP_KERNEL);
if (!qp_attr)
goto out;
@@ -147,7 +147,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
.cap = {
.max_send_wr = ipoib_sendq_size,
.max_recv_wr = ipoib_recvq_size,
- .max_send_sge = min_t(u32, priv->ca->attrs.max_sge,
+ .max_send_sge = min_t(u32, priv->ca->attrs.max_send_sge,
MAX_SKB_FRAGS + 1),
.max_recv_sge = IPOIB_UD_RX_SG
},
@@ -168,8 +168,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
else
size += ipoib_recvq_size * ipoib_max_conn_qp;
} else
- if (ret != -ENOSYS)
- return -ENODEV;
+ if (ret != -EOPNOTSUPP)
+ return ret;
req_vec = (priv->port - 1) * 2;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index 55a9b71ed05a..341753fbda54 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -50,68 +50,112 @@ static ssize_t show_parent(struct device *d, struct device_attribute *attr,
}
static DEVICE_ATTR(parent, S_IRUGO, show_parent, NULL);
+static bool is_child_unique(struct ipoib_dev_priv *ppriv,
+ struct ipoib_dev_priv *priv)
+{
+ struct ipoib_dev_priv *tpriv;
+
+ ASSERT_RTNL();
+
+ /*
+ * Since the legacy sysfs interface uses pkey for deletion it cannot
+ * support more than one interface with the same pkey, it creates
+ * ambiguity. The RTNL interface deletes using the netdev so it does
+ * not have a problem to support duplicated pkeys.
+ */
+ if (priv->child_type != IPOIB_LEGACY_CHILD)
+ return true;
+
+ /*
+ * First ensure this isn't a duplicate. We check the parent device and
+ * then all of the legacy child interfaces to make sure the Pkey
+ * doesn't match.
+ */
+ if (ppriv->pkey == priv->pkey)
+ return false;
+
+ list_for_each_entry(tpriv, &ppriv->child_intfs, list) {
+ if (tpriv->pkey == priv->pkey &&
+ tpriv->child_type == IPOIB_LEGACY_CHILD)
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * NOTE: If this function fails then the priv->dev will remain valid, however
+ * priv can have been freed and must not be touched by caller in the error
+ * case.
+ *
+ * If (ndev->reg_state == NETREG_UNINITIALIZED) then it is up to the caller to
+ * free the net_device (just as rtnl_newlink does) otherwise the net_device
+ * will be freed when the rtnl is unlocked.
+ */
int __ipoib_vlan_add(struct ipoib_dev_priv *ppriv, struct ipoib_dev_priv *priv,
u16 pkey, int type)
{
+ struct net_device *ndev = priv->dev;
int result;
- priv->max_ib_mtu = ppriv->max_ib_mtu;
- /* MTU will be reset when mcast join happens */
- priv->dev->mtu = IPOIB_UD_MTU(priv->max_ib_mtu);
- priv->mcast_mtu = priv->admin_mtu = priv->dev->mtu;
- priv->parent = ppriv->dev;
- set_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags);
+ ASSERT_RTNL();
+
+ /*
+ * Racing with unregister of the parent must be prevented by the
+ * caller.
+ */
+ WARN_ON(ppriv->dev->reg_state != NETREG_REGISTERED);
- ipoib_set_dev_features(priv, ppriv->ca);
+ if (pkey == 0 || pkey == 0x8000) {
+ result = -EINVAL;
+ goto out_early;
+ }
+ priv->parent = ppriv->dev;
priv->pkey = pkey;
+ priv->child_type = type;
- memcpy(priv->dev->dev_addr, ppriv->dev->dev_addr, INFINIBAND_ALEN);
- memcpy(&priv->local_gid, &ppriv->local_gid, sizeof(priv->local_gid));
- set_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags);
- priv->dev->broadcast[8] = pkey >> 8;
- priv->dev->broadcast[9] = pkey & 0xff;
-
- result = ipoib_dev_init(priv->dev, ppriv->ca, ppriv->port);
- if (result < 0) {
- ipoib_warn(ppriv, "failed to initialize subinterface: "
- "device %s, port %d",
- ppriv->ca->name, ppriv->port);
- goto err;
+ if (!is_child_unique(ppriv, priv)) {
+ result = -ENOTUNIQ;
+ goto out_early;
}
- result = register_netdevice(priv->dev);
+ /* We do not need to touch priv if register_netdevice fails */
+ ndev->priv_destructor = ipoib_intf_free;
+
+ result = register_netdevice(ndev);
if (result) {
ipoib_warn(priv, "failed to initialize; error %i", result);
- goto register_failed;
+
+ /*
+ * register_netdevice sometimes calls priv_destructor,
+ * sometimes not. Make sure it was done.
+ */
+ goto out_early;
}
/* RTNL childs don't need proprietary sysfs entries */
if (type == IPOIB_LEGACY_CHILD) {
- if (ipoib_cm_add_mode_attr(priv->dev))
+ if (ipoib_cm_add_mode_attr(ndev))
goto sysfs_failed;
- if (ipoib_add_pkey_attr(priv->dev))
+ if (ipoib_add_pkey_attr(ndev))
goto sysfs_failed;
- if (ipoib_add_umcast_attr(priv->dev))
+ if (ipoib_add_umcast_attr(ndev))
goto sysfs_failed;
- if (device_create_file(&priv->dev->dev, &dev_attr_parent))
+ if (device_create_file(&ndev->dev, &dev_attr_parent))
goto sysfs_failed;
}
- priv->child_type = type;
- list_add_tail(&priv->list, &ppriv->child_intfs);
-
return 0;
sysfs_failed:
- result = -ENOMEM;
unregister_netdevice(priv->dev);
+ return -ENOMEM;
-register_failed:
- ipoib_dev_cleanup(priv->dev);
-
-err:
+out_early:
+ if (ndev->priv_destructor)
+ ndev->priv_destructor(ndev);
return result;
}
@@ -119,129 +163,124 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
{
struct ipoib_dev_priv *ppriv, *priv;
char intf_name[IFNAMSIZ];
- struct ipoib_dev_priv *tpriv;
+ struct net_device *ndev;
int result;
if (!capable(CAP_NET_ADMIN))
return -EPERM;
- ppriv = ipoib_priv(pdev);
-
- if (test_bit(IPOIB_FLAG_GOING_DOWN, &ppriv->flags))
- return -EPERM;
-
- snprintf(intf_name, sizeof intf_name, "%s.%04x",
- ppriv->dev->name, pkey);
-
- if (!mutex_trylock(&ppriv->sysfs_mutex))
+ if (!rtnl_trylock())
return restart_syscall();
- if (!rtnl_trylock()) {
- mutex_unlock(&ppriv->sysfs_mutex);
- return restart_syscall();
- }
-
- if (!down_write_trylock(&ppriv->vlan_rwsem)) {
+ if (pdev->reg_state != NETREG_REGISTERED) {
rtnl_unlock();
- mutex_unlock(&ppriv->sysfs_mutex);
- return restart_syscall();
+ return -EPERM;
}
+ ppriv = ipoib_priv(pdev);
+
+ snprintf(intf_name, sizeof(intf_name), "%s.%04x",
+ ppriv->dev->name, pkey);
+
priv = ipoib_intf_alloc(ppriv->ca, ppriv->port, intf_name);
if (!priv) {
result = -ENOMEM;
goto out;
}
-
- /*
- * First ensure this isn't a duplicate. We check the parent device and
- * then all of the legacy child interfaces to make sure the Pkey
- * doesn't match.
- */
- if (ppriv->pkey == pkey) {
- result = -ENOTUNIQ;
- goto out;
- }
-
- list_for_each_entry(tpriv, &ppriv->child_intfs, list) {
- if (tpriv->pkey == pkey &&
- tpriv->child_type == IPOIB_LEGACY_CHILD) {
- result = -ENOTUNIQ;
- goto out;
- }
- }
+ ndev = priv->dev;
result = __ipoib_vlan_add(ppriv, priv, pkey, IPOIB_LEGACY_CHILD);
+ if (result && ndev->reg_state == NETREG_UNINITIALIZED)
+ free_netdev(ndev);
+
out:
- up_write(&ppriv->vlan_rwsem);
rtnl_unlock();
- mutex_unlock(&ppriv->sysfs_mutex);
- if (result && priv) {
- struct rdma_netdev *rn;
+ return result;
+}
+
+struct ipoib_vlan_delete_work {
+ struct work_struct work;
+ struct net_device *dev;
+};
+
+/*
+ * sysfs callbacks of a netdevice cannot obtain the rtnl lock as
+ * unregister_netdev ultimately deletes the sysfs files while holding the rtnl
+ * lock. This deadlocks the system.
+ *
+ * A callback can use rtnl_trylock to avoid the deadlock but it cannot call
+ * unregister_netdev as that internally takes and releases the rtnl_lock. So
+ * instead we find the netdev to unregister and then do the actual unregister
+ * from the global work queue where we can obtain the rtnl_lock safely.
+ */
+static void ipoib_vlan_delete_task(struct work_struct *work)
+{
+ struct ipoib_vlan_delete_work *pwork =
+ container_of(work, struct ipoib_vlan_delete_work, work);
+ struct net_device *dev = pwork->dev;
+
+ rtnl_lock();
+
+ /* Unregistering tasks can race with another task or parent removal */
+ if (dev->reg_state == NETREG_REGISTERED) {
+ struct ipoib_dev_priv *priv = ipoib_priv(dev);
+ struct ipoib_dev_priv *ppriv = ipoib_priv(priv->parent);
- rn = netdev_priv(priv->dev);
- rn->free_rdma_netdev(priv->dev);
- kfree(priv);
+ ipoib_dbg(ppriv, "delete child vlan %s\n", dev->name);
+ unregister_netdevice(dev);
}
- return result;
+ rtnl_unlock();
+
+ kfree(pwork);
}
int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
{
struct ipoib_dev_priv *ppriv, *priv, *tpriv;
- struct net_device *dev = NULL;
+ int rc;
if (!capable(CAP_NET_ADMIN))
return -EPERM;
- ppriv = ipoib_priv(pdev);
-
- if (test_bit(IPOIB_FLAG_GOING_DOWN, &ppriv->flags))
- return -EPERM;
-
- if (!mutex_trylock(&ppriv->sysfs_mutex))
+ if (!rtnl_trylock())
return restart_syscall();
- if (!rtnl_trylock()) {
- mutex_unlock(&ppriv->sysfs_mutex);
- return restart_syscall();
- }
-
- if (!down_write_trylock(&ppriv->vlan_rwsem)) {
+ if (pdev->reg_state != NETREG_REGISTERED) {
rtnl_unlock();
- mutex_unlock(&ppriv->sysfs_mutex);
- return restart_syscall();
+ return -EPERM;
}
+ ppriv = ipoib_priv(pdev);
+
+ rc = -ENODEV;
list_for_each_entry_safe(priv, tpriv, &ppriv->child_intfs, list) {
if (priv->pkey == pkey &&
priv->child_type == IPOIB_LEGACY_CHILD) {
- list_del(&priv->list);
- dev = priv->dev;
+ struct ipoib_vlan_delete_work *work;
+
+ work = kmalloc(sizeof(*work), GFP_KERNEL);
+ if (!work) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ down_write(&ppriv->vlan_rwsem);
+ list_del_init(&priv->list);
+ up_write(&ppriv->vlan_rwsem);
+ work->dev = priv->dev;
+ INIT_WORK(&work->work, ipoib_vlan_delete_task);
+ queue_work(ipoib_workqueue, &work->work);
+
+ rc = 0;
break;
}
}
- up_write(&ppriv->vlan_rwsem);
-
- if (dev) {
- ipoib_dbg(ppriv, "delete child vlan %s\n", dev->name);
- unregister_netdevice(dev);
- }
+out:
rtnl_unlock();
- mutex_unlock(&ppriv->sysfs_mutex);
-
- if (dev) {
- struct rdma_netdev *rn;
-
- rn = netdev_priv(dev);
- rn->free_rdma_netdev(priv->dev);
- kfree(priv);
- return 0;
- }
- return -ENODEV;
+ return rc;
}