From 047491ea334a454fa0647ec99dadcc6dd38417e0 Mon Sep 17 00:00:00 2001
From: Jon Maloy <jon.maloy@ericsson.com>
Date: Wed, 10 Oct 2018 17:34:01 +0200
Subject: tipc: set link tolerance correctly in broadcast link

In the patch referred to below we added link tolerance as an additional
criteria for declaring broadcast transmission "stale" and resetting the
affected links.

However, the 'tolerance' field of the broadcast link is never set, and
remains at zero. This renders the whole commit without the intended
improving effect, but luckily also with no negative effect.

In this commit we add the missing initialization.

Fixes: a4dc70d46cf1 ("tipc: extend link reset criteria for stale packet retransmission")
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/link.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

(limited to 'net/tipc')

diff --git a/net/tipc/link.c b/net/tipc/link.c
index fb886b525d95..d229a36968da 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -477,6 +477,8 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
 	l->in_session = false;
 	l->bearer_id = bearer_id;
 	l->tolerance = tolerance;
+	if (bc_rcvlink)
+		bc_rcvlink->tolerance = tolerance;
 	l->net_plane = net_plane;
 	l->advertised_mtu = mtu;
 	l->mtu = mtu;
@@ -1031,7 +1033,7 @@ static int tipc_link_retrans(struct tipc_link *l, struct tipc_link *r,
 	/* Detect repeated retransmit failures on same packet */
 	if (r->last_retransm != buf_seqno(skb)) {
 		r->last_retransm = buf_seqno(skb);
-		r->stale_limit = jiffies + msecs_to_jiffies(l->tolerance);
+		r->stale_limit = jiffies + msecs_to_jiffies(r->tolerance);
 	} else if (++r->stale_cnt > 99 && time_after(jiffies, r->stale_limit)) {
 		link_retransmit_failure(l, skb);
 		if (link_is_bc_sndlink(l))
@@ -1576,9 +1578,10 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
 		strncpy(if_name, data, TIPC_MAX_IF_NAME);
 
 		/* Update own tolerance if peer indicates a non-zero value */
-		if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL))
+		if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) {
 			l->tolerance = peers_tol;
-
+			l->bc_rcvlink->tolerance = peers_tol;
+		}
 		/* Update own priority if peer's priority is higher */
 		if (in_range(peers_prio, l->priority + 1, TIPC_MAX_LINK_PRI))
 			l->priority = peers_prio;
@@ -1604,9 +1607,10 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
 		l->rcv_nxt_state = msg_seqno(hdr) + 1;
 
 		/* Update own tolerance if peer indicates a non-zero value */
-		if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL))
+		if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) {
 			l->tolerance = peers_tol;
-
+			l->bc_rcvlink->tolerance = peers_tol;
+		}
 		/* Update own prio if peer indicates a different value */
 		if ((peers_prio != l->priority) &&
 		    in_range(peers_prio, 1, TIPC_MAX_LINK_PRI)) {
@@ -2223,6 +2227,8 @@ void tipc_link_set_tolerance(struct tipc_link *l, u32 tol,
 			     struct sk_buff_head *xmitq)
 {
 	l->tolerance = tol;
+	if (l->bc_rcvlink)
+		l->bc_rcvlink->tolerance = tol;
 	if (link_is_up(l))
 		tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, tol, 0, xmitq);
 }
-- 
cgit v1.2.3


From e7eb05823806502747eadc31039cecfd7836ddeb Mon Sep 17 00:00:00 2001
From: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
Date: Wed, 10 Oct 2018 17:50:23 +0200
Subject: tipc: queue socket protocol error messages into socket receive buffer

In tipc_sk_filter_rcv(), when we detect protocol messages with error we
call tipc_sk_conn_proto_rcv() and let it reset the connection and notify
the socket by calling sk->sk_state_change().

However, tipc_sk_filter_rcv() may have been called from the function
tipc_backlog_rcv(), in which case the socket lock is held and the socket
already awake. This means that the sk_state_change() call is ignored and
the error notification lost. Now the receive queue will remain empty and
the socket sleeps forever.

In this commit, we convert the protocol message into a connection abort
message and enqueue it into the socket's receive queue. By this addition
to the above state change we cover all conditions.

Acked-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/socket.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

(limited to 'net/tipc')

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index b6f99b021d09..49810fdff4c5 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1196,6 +1196,7 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
  * @skb: pointer to message buffer.
  */
 static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
+				   struct sk_buff_head *inputq,
 				   struct sk_buff_head *xmitq)
 {
 	struct tipc_msg *hdr = buf_msg(skb);
@@ -1213,7 +1214,16 @@ static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
 		tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk),
 				      tsk_peer_port(tsk));
 		sk->sk_state_change(sk);
-		goto exit;
+
+		/* State change is ignored if socket already awake,
+		 * - convert msg to abort msg and add to inqueue
+		 */
+		msg_set_user(hdr, TIPC_CRITICAL_IMPORTANCE);
+		msg_set_type(hdr, TIPC_CONN_MSG);
+		msg_set_size(hdr, BASIC_H_SIZE);
+		msg_set_hdr_sz(hdr, BASIC_H_SIZE);
+		__skb_queue_tail(inputq, skb);
+		return;
 	}
 
 	tsk->probe_unacked = false;
@@ -1936,7 +1946,7 @@ static void tipc_sk_proto_rcv(struct sock *sk,
 
 	switch (msg_user(hdr)) {
 	case CONN_MANAGER:
-		tipc_sk_conn_proto_rcv(tsk, skb, xmitq);
+		tipc_sk_conn_proto_rcv(tsk, skb, inputq, xmitq);
 		return;
 	case SOCK_WAKEUP:
 		tipc_dest_del(&tsk->cong_links, msg_orignode(hdr), 0);
-- 
cgit v1.2.3


From a1f8dd34e64af689e95122921fb2ca83dedd4c4e Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Thu, 11 Oct 2018 19:57:56 +0800
Subject: tipc: eliminate possible recursive locking detected by LOCKDEP

When booting kernel with LOCKDEP option, below warning info was found:

WARNING: possible recursive locking detected
4.19.0-rc7+ #14 Not tainted
--------------------------------------------
swapper/0/1 is trying to acquire lock:
00000000dcfc0fc8 (&(&list->lock)->rlock#4){+...}, at: spin_lock_bh
include/linux/spinlock.h:334 [inline]
00000000dcfc0fc8 (&(&list->lock)->rlock#4){+...}, at:
tipc_link_reset+0x125/0xdf0 net/tipc/link.c:850

but task is already holding lock:
00000000cbb9b036 (&(&list->lock)->rlock#4){+...}, at: spin_lock_bh
include/linux/spinlock.h:334 [inline]
00000000cbb9b036 (&(&list->lock)->rlock#4){+...}, at:
tipc_link_reset+0xfa/0xdf0 net/tipc/link.c:849

other info that might help us debug this:
 Possible unsafe locking scenario:

       CPU0
       ----
  lock(&(&list->lock)->rlock#4);
  lock(&(&list->lock)->rlock#4);

 *** DEADLOCK ***

 May be due to missing lock nesting notation

2 locks held by swapper/0/1:
 #0: 00000000f7539d34 (pernet_ops_rwsem){+.+.}, at:
register_pernet_subsys+0x19/0x40 net/core/net_namespace.c:1051
 #1: 00000000cbb9b036 (&(&list->lock)->rlock#4){+...}, at:
spin_lock_bh include/linux/spinlock.h:334 [inline]
 #1: 00000000cbb9b036 (&(&list->lock)->rlock#4){+...}, at:
tipc_link_reset+0xfa/0xdf0 net/tipc/link.c:849

stack backtrace:
CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.19.0-rc7+ #14
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x1af/0x295 lib/dump_stack.c:113
 print_deadlock_bug kernel/locking/lockdep.c:1759 [inline]
 check_deadlock kernel/locking/lockdep.c:1803 [inline]
 validate_chain kernel/locking/lockdep.c:2399 [inline]
 __lock_acquire+0xf1e/0x3c60 kernel/locking/lockdep.c:3411
 lock_acquire+0x1db/0x520 kernel/locking/lockdep.c:3900
 __raw_spin_lock_bh include/linux/spinlock_api_smp.h:135 [inline]
 _raw_spin_lock_bh+0x31/0x40 kernel/locking/spinlock.c:168
 spin_lock_bh include/linux/spinlock.h:334 [inline]
 tipc_link_reset+0x125/0xdf0 net/tipc/link.c:850
 tipc_link_bc_create+0xb5/0x1f0 net/tipc/link.c:526
 tipc_bcast_init+0x59b/0xab0 net/tipc/bcast.c:521
 tipc_init_net+0x472/0x610 net/tipc/core.c:82
 ops_init+0xf7/0x520 net/core/net_namespace.c:129
 __register_pernet_operations net/core/net_namespace.c:940 [inline]
 register_pernet_operations+0x453/0xac0 net/core/net_namespace.c:1011
 register_pernet_subsys+0x28/0x40 net/core/net_namespace.c:1052
 tipc_init+0x83/0x104 net/tipc/core.c:140
 do_one_initcall+0x109/0x70a init/main.c:885
 do_initcall_level init/main.c:953 [inline]
 do_initcalls init/main.c:961 [inline]
 do_basic_setup init/main.c:979 [inline]
 kernel_init_freeable+0x4bd/0x57f init/main.c:1144
 kernel_init+0x13/0x180 init/main.c:1063
 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:413

The reason why the noise above was complained by LOCKDEP is because we
nested to hold l->wakeupq.lock and l->inputq->lock in tipc_link_reset
function. In fact it's unnecessary to move skb buffer from l->wakeupq
queue to l->inputq queue while holding the two locks at the same time.
Instead, we can move skb buffers in l->wakeupq queue to a temporary
list first and then move the buffers of the temporary list to l->inputq
queue, which is also safe for us.

Fixes: 3f32d0be6c16 ("tipc: lock wakeup & inputq at tipc_link_reset()")
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Acked-by: Jon Maloy <jon.maloy@ericsson.com>

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/link.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'net/tipc')

diff --git a/net/tipc/link.c b/net/tipc/link.c
index d229a36968da..f6552e4f4b43 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -845,14 +845,21 @@ static void link_prepare_wakeup(struct tipc_link *l)
 
 void tipc_link_reset(struct tipc_link *l)
 {
+	struct sk_buff_head list;
+
+	__skb_queue_head_init(&list);
+
 	l->in_session = false;
 	l->session++;
 	l->mtu = l->advertised_mtu;
+
 	spin_lock_bh(&l->wakeupq.lock);
+	skb_queue_splice_init(&l->wakeupq, &list);
+	spin_unlock_bh(&l->wakeupq.lock);
+
 	spin_lock_bh(&l->inputq->lock);
-	skb_queue_splice_init(&l->wakeupq, l->inputq);
+	skb_queue_splice_init(&list, l->inputq);
 	spin_unlock_bh(&l->inputq->lock);
-	spin_unlock_bh(&l->wakeupq.lock);
 
 	__skb_queue_purge(&l->transmq);
 	__skb_queue_purge(&l->deferdq);
-- 
cgit v1.2.3