From 3874397c0bdec3c21ce071711cd105165179b8eb Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Mon, 21 Nov 2011 08:43:54 -0500 Subject: IB/ipoib: Prevent hung task or softlockup processing multicast response This following can occur with ipoib when processing a multicast reponse: BUG: soft lockup - CPU#0 stuck for 67s! [ib_mad1:982] Modules linked in: ... CPU 0: Modules linked in: ... Pid: 982, comm: ib_mad1 Not tainted 2.6.32-131.0.15.el6.x86_64 #1 ProLiant DL160 G5 RIP: 0010:[] [] _spin_unlock_irqrestore+0x17/0x20 RSP: 0018:ffff8802119ed860 EFLAGS: 00000246 0000000000000004 RBX: ffff8802119ed860 RCX: 000000000000a299 RDX: ffff88021086c700 RSI: 0000000000000246 RDI: 0000000000000246 RBP: ffffffff8100bc8e R08: ffff880210ac229c R09: 0000000000000000 R10: ffff88021278aab8 R11: 0000000000000000 R12: ffff8802119ed860 R13: ffffffff8100be6e R14: 0000000000000001 R15: 0000000000000003 FS: 0000000000000000(0000) GS:ffff880028200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b CR2: 00000000006d4840 CR3: 0000000209aa5000 CR4: 00000000000406f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Call Trace: [] ? ipoib_mcast_send+0x157/0x480 [ib_ipoib] [] ? apic_timer_interrupt+0xe/0x20 [] ? apic_timer_interrupt+0xe/0x20 [] ? ipoib_path_lookup+0x124/0x2d0 [ib_ipoib] [] ? ipoib_start_xmit+0x17c/0x430 [ib_ipoib] [] ? dev_hard_start_xmit+0x2c8/0x3f0 [] ? sch_direct_xmit+0x15a/0x1c0 [] ? dev_queue_xmit+0x388/0x4d0 [] ? ipoib_mcast_join_finish+0x2c7/0x510 [ib_ipoib] [] ? ipoib_mcast_sendonly_join_complete+0x1b8/0x1f0 [ib_ipoib] [] ? mcast_work_handler+0x1a6/0x710 [ib_sa] [] ? ib_send_mad+0xfe/0x3c0 [ib_mad] [] ? ib_get_cached_lmc+0xa3/0xb0 [ib_core] [] ? join_handler+0xeb/0x200 [ib_sa] [] ? ib_sa_mcmember_rec_callback+0x5c/0xa0 [ib_sa] [] ? recv_handler+0x3c/0x70 [ib_sa] [] ? ib_mad_completion_handler+0x844/0x9d0 [ib_mad] [] ? ib_mad_completion_handler+0x0/0x9d0 [ib_mad] [] ? worker_thread+0x170/0x2a0 [] ? autoremove_wake_function+0x0/0x40 [] ? worker_thread+0x0/0x2a0 [] ? kthread+0x96/0xa0 [] ? child_rip+0xa/0x20 Coinciding with stack trace is the following message: ib0: ib_address_create failed The code below in ipoib_mcast_join_finish() will note the above failure in the address handle but otherwise continue: ah = ipoib_create_ah(dev, priv->pd, &av); if (!ah) { ipoib_warn(priv, "ib_address_create failed\n"); } else { The while loop at the bottom of ipoib_mcast_join_finish() will attempt to send queued multicast packets in mcast->pkt_queue and eventually end up in ipoib_mcast_send(): if (!mcast->ah) { if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE) skb_queue_tail(&mcast->pkt_queue, skb); else { ++dev->stats.tx_dropped; dev_kfree_skb_any(skb); } My read is that the code will requeue the packet and return to the ipoib_mcast_join_finish() while loop and the stage is set for the "hung" task diagnostic as the while loop never sees a non-NULL ah, and will do nothing to resolve. There are GFP_ATOMIC allocates in the provider routines, so this is possible and should be dealt with. The test that induced the failure is associated with a host SM on the same server during a shutdown. This patch causes ipoib_mcast_join_finish() to exit with an error which will flush the queued mcast packets. Nothing is done to unwind the QP attached state so that subsequent sends from above will retry the join. Reviewed-by: Ram Vepa Reviewed-by: Gary Leshner Signed-off-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/ulp/ipoib/ipoib_multicast.c') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 1b7a97686356..30ca8f069a17 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -240,8 +240,11 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, av.grh.dgid = mcast->mcmember.mgid; ah = ipoib_create_ah(dev, priv->pd, &av); - if (!ah) { - ipoib_warn(priv, "ib_address_create failed\n"); + if (IS_ERR(ah)) { + ipoib_warn(priv, "ib_address_create failed %ld\n", + -PTR_ERR(ah)); + /* use original error */ + return PTR_ERR(ah); } else { spin_lock_irq(&priv->lock); mcast->ah = ah; -- cgit v1.2.3 From 580da35a31f91a594f3090b7a2c39b85cb051a12 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 29 Nov 2011 22:31:23 +0100 Subject: IB: Fix RCU lockdep splats Commit f2c31e32b37 ("net: fix NULL dereferences in check_peer_redir()") forgot to take care of infiniband uses of dst neighbours. Many thanks to Marc Aurele who provided a nice bug report and feedback. Reported-by: Marc Aurele La France Signed-off-by: Eric Dumazet Cc: David Miller Cc: Signed-off-by: Roland Dreier --- drivers/infiniband/core/addr.c | 9 ++++++--- drivers/infiniband/hw/cxgb3/iwch_cm.c | 4 ++++ drivers/infiniband/hw/cxgb4/cm.c | 6 ++++++ drivers/infiniband/hw/nes/nes_cm.c | 6 ++++-- drivers/infiniband/ulp/ipoib/ipoib_main.c | 18 +++++++++++------- drivers/infiniband/ulp/ipoib/ipoib_multicast.c | 6 ++++-- 6 files changed, 35 insertions(+), 14 deletions(-) (limited to 'drivers/infiniband/ulp/ipoib/ipoib_multicast.c') diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 691276bafd78..e9cf51b1343b 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -216,7 +216,9 @@ static int addr4_resolve(struct sockaddr_in *src_in, neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->dst.dev); if (!neigh || !(neigh->nud_state & NUD_VALID)) { + rcu_read_lock(); neigh_event_send(dst_get_neighbour(&rt->dst), NULL); + rcu_read_unlock(); ret = -ENODATA; if (neigh) goto release; @@ -274,15 +276,16 @@ static int addr6_resolve(struct sockaddr_in6 *src_in, goto put; } + rcu_read_lock(); neigh = dst_get_neighbour(dst); if (!neigh || !(neigh->nud_state & NUD_VALID)) { if (neigh) neigh_event_send(neigh, NULL); ret = -ENODATA; - goto put; + } else { + ret = rdma_copy_addr(addr, dst->dev, neigh->ha); } - - ret = rdma_copy_addr(addr, dst->dev, neigh->ha); + rcu_read_unlock(); put: dst_release(dst); return ret; diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c index de6d0774e609..c88b12beef25 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_cm.c +++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c @@ -1375,8 +1375,10 @@ static int pass_accept_req(struct t3cdev *tdev, struct sk_buff *skb, void *ctx) goto reject; } dst = &rt->dst; + rcu_read_lock(); neigh = dst_get_neighbour(dst); l2t = t3_l2t_get(tdev, neigh, neigh->dev); + rcu_read_unlock(); if (!l2t) { printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n", __func__); @@ -1946,10 +1948,12 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) } ep->dst = &rt->dst; + rcu_read_lock(); neigh = dst_get_neighbour(ep->dst); /* get a l2t entry */ ep->l2t = t3_l2t_get(ep->com.tdev, neigh, neigh->dev); + rcu_read_unlock(); if (!ep->l2t) { printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__); err = -ENOMEM; diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index b36cdac9c558..75b57bee6622 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1594,6 +1594,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) goto reject; } dst = &rt->dst; + rcu_read_lock(); neigh = dst_get_neighbour(dst); if (neigh->dev->flags & IFF_LOOPBACK) { pdev = ip_dev_find(&init_net, peer_ip); @@ -1620,6 +1621,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) rss_qid = dev->rdev.lldi.rxq_ids[ cxgb4_port_idx(neigh->dev) * step]; } + rcu_read_unlock(); if (!l2t) { printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n", __func__); @@ -1820,6 +1822,7 @@ static int c4iw_reconnect(struct c4iw_ep *ep) } ep->dst = &rt->dst; + rcu_read_lock(); neigh = dst_get_neighbour(ep->dst); /* get a l2t entry */ @@ -1856,6 +1859,7 @@ static int c4iw_reconnect(struct c4iw_ep *ep) ep->rss_qid = ep->com.dev->rdev.lldi.rxq_ids[ cxgb4_port_idx(neigh->dev) * step]; } + rcu_read_unlock(); if (!ep->l2t) { printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__); err = -ENOMEM; @@ -2301,6 +2305,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) } ep->dst = &rt->dst; + rcu_read_lock(); neigh = dst_get_neighbour(ep->dst); /* get a l2t entry */ @@ -2339,6 +2344,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) ep->retry_with_mpa_v1 = 0; ep->tried_with_mpa_v1 = 0; } + rcu_read_unlock(); if (!ep->l2t) { printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__); err = -ENOMEM; diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c index dfce9ea98a39..0a52d72371ee 100644 --- a/drivers/infiniband/hw/nes/nes_cm.c +++ b/drivers/infiniband/hw/nes/nes_cm.c @@ -1377,9 +1377,11 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi neigh_release(neigh); } - if ((neigh == NULL) || (!(neigh->nud_state & NUD_VALID))) + if ((neigh == NULL) || (!(neigh->nud_state & NUD_VALID))) { + rcu_read_lock(); neigh_event_send(dst_get_neighbour(&rt->dst), NULL); - + rcu_read_unlock(); + } ip_rt_put(rt); return rc; } diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 7567b6000230..ef38848d1b0e 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -555,6 +555,7 @@ static int path_rec_start(struct net_device *dev, return 0; } +/* called with rcu_read_lock */ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); @@ -636,6 +637,7 @@ err_drop: spin_unlock_irqrestore(&priv->lock, flags); } +/* called with rcu_read_lock */ static void ipoib_path_lookup(struct sk_buff *skb, struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(skb->dev); @@ -720,13 +722,14 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) struct neighbour *n = NULL; unsigned long flags; + rcu_read_lock(); if (likely(skb_dst(skb))) n = dst_get_neighbour(skb_dst(skb)); if (likely(n)) { if (unlikely(!*to_ipoib_neigh(n))) { ipoib_path_lookup(skb, dev); - return NETDEV_TX_OK; + goto unlock; } neigh = *to_ipoib_neigh(n); @@ -749,17 +752,17 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) ipoib_neigh_free(dev, neigh); spin_unlock_irqrestore(&priv->lock, flags); ipoib_path_lookup(skb, dev); - return NETDEV_TX_OK; + goto unlock; } if (ipoib_cm_get(neigh)) { if (ipoib_cm_up(neigh)) { ipoib_cm_send(dev, skb, ipoib_cm_get(neigh)); - return NETDEV_TX_OK; + goto unlock; } } else if (neigh->ah) { ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(n->ha)); - return NETDEV_TX_OK; + goto unlock; } if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) { @@ -793,13 +796,14 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) phdr->hwaddr + 4); dev_kfree_skb_any(skb); ++dev->stats.tx_dropped; - return NETDEV_TX_OK; + goto unlock; } unicast_arp_send(skb, dev, phdr); } } - +unlock: + rcu_read_unlock(); return NETDEV_TX_OK; } @@ -837,7 +841,7 @@ static int ipoib_hard_header(struct sk_buff *skb, dst = skb_dst(skb); n = NULL; if (dst) - n = dst_get_neighbour(dst); + n = dst_get_neighbour_raw(dst); if ((!dst || !n) && daddr) { struct ipoib_pseudoheader *phdr = (struct ipoib_pseudoheader *) skb_push(skb, sizeof *phdr); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 1b7a97686356..cad1894594a8 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -266,7 +266,7 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast, skb->dev = dev; if (dst) - n = dst_get_neighbour(dst); + n = dst_get_neighbour_raw(dst); if (!dst || !n) { /* put pseudoheader back on for next time */ skb_push(skb, sizeof (struct ipoib_pseudoheader)); @@ -722,6 +722,8 @@ out: if (mcast && mcast->ah) { struct dst_entry *dst = skb_dst(skb); struct neighbour *n = NULL; + + rcu_read_lock(); if (dst) n = dst_get_neighbour(dst); if (n && !*to_ipoib_neigh(n)) { @@ -734,7 +736,7 @@ out: list_add_tail(&neigh->list, &mcast->neigh_list); } } - + rcu_read_unlock(); spin_unlock_irqrestore(&priv->lock, flags); ipoib_send(dev, skb, mcast->ah, IB_MULTICAST_QPN); return; -- cgit v1.2.3