summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/bonding.rst9
-rw-r--r--Documentation/networking/index.rst2
-rw-r--r--Documentation/networking/x25-iface.rst3
-rw-r--r--drivers/net/bonding/bond_netlink.c7
-rw-r--r--drivers/net/bonding/bond_options.c8
-rw-r--r--drivers/net/ethernet/google/gve/gve_main.c13
-rw-r--r--drivers/net/ethernet/mediatek/mtk_wed.c2
-rw-r--r--drivers/net/ethernet/mscc/vsc7514_regs.c18
-rw-r--r--drivers/net/ipvlan/ipvlan_core.c6
-rw-r--r--drivers/net/pcs/pcs-xpcs.c2
-rw-r--r--drivers/net/tap.c4
-rw-r--r--include/linux/if_vlan.h17
-rw-r--r--include/net/bonding.h2
-rw-r--r--net/bridge/br_forward.c2
-rw-r--r--net/core/datagram.c15
-rw-r--r--net/core/dev.c2
-rw-r--r--net/core/stream.c12
-rw-r--r--net/ipv4/af_inet.c2
-rw-r--r--net/ipv4/tcp.c14
-rw-r--r--net/ipv4/tcp_bpf.c2
-rw-r--r--net/ipv4/tcp_input.c4
-rw-r--r--net/llc/af_llc.c8
-rw-r--r--net/netlink/af_netlink.c8
-rw-r--r--net/packet/af_packet.c6
-rw-r--r--net/smc/smc_close.c4
-rw-r--r--net/smc/smc_rx.c4
-rw-r--r--net/smc/smc_tx.c4
-rw-r--r--net/socket.c2
-rw-r--r--net/tipc/socket.c4
-rw-r--r--net/tls/tls_main.c3
-rw-r--r--net/unix/af_unix.c22
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond_options.sh50
-rw-r--r--tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/lib.sh3
34 files changed, 176 insertions, 90 deletions
diff --git a/Documentation/networking/bonding.rst b/Documentation/networking/bonding.rst
index adc4bf4f3c50..28925e19622d 100644
--- a/Documentation/networking/bonding.rst
+++ b/Documentation/networking/bonding.rst
@@ -776,10 +776,11 @@ peer_notif_delay
Specify the delay, in milliseconds, between each peer
notification (gratuitous ARP and unsolicited IPv6 Neighbor
Advertisement) when they are issued after a failover event.
- This delay should be a multiple of the link monitor interval
- (arp_interval or miimon, whichever is active). The default
- value is 0 which means to match the value of the link monitor
- interval.
+ This delay should be a multiple of the MII link monitor interval
+ (miimon).
+
+ The valid range is 0 - 300000. The default value is 0, which means
+ to match the value of the MII link monitor interval.
prio
Slave priority. A higher number means higher priority.
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index a164ff074356..5b75c3f7a137 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -116,8 +116,8 @@ Contents:
udplite
vrf
vxlan
- x25-iface
x25
+ x25-iface
xfrm_device
xfrm_proc
xfrm_sync
diff --git a/Documentation/networking/x25-iface.rst b/Documentation/networking/x25-iface.rst
index f34e9ec64937..285cefcfce87 100644
--- a/Documentation/networking/x25-iface.rst
+++ b/Documentation/networking/x25-iface.rst
@@ -1,8 +1,7 @@
.. SPDX-License-Identifier: GPL-2.0
-============================-
X.25 Device Driver Interface
-============================-
+============================
Version 1.1
diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c
index c2d080fc4fc4..27cbe148f0db 100644
--- a/drivers/net/bonding/bond_netlink.c
+++ b/drivers/net/bonding/bond_netlink.c
@@ -84,6 +84,11 @@ nla_put_failure:
return -EMSGSIZE;
}
+/* Limit the max delay range to 300s */
+static struct netlink_range_validation delay_range = {
+ .max = 300000,
+};
+
static const struct nla_policy bond_policy[IFLA_BOND_MAX + 1] = {
[IFLA_BOND_MODE] = { .type = NLA_U8 },
[IFLA_BOND_ACTIVE_SLAVE] = { .type = NLA_U32 },
@@ -114,7 +119,7 @@ static const struct nla_policy bond_policy[IFLA_BOND_MAX + 1] = {
[IFLA_BOND_AD_ACTOR_SYSTEM] = { .type = NLA_BINARY,
.len = ETH_ALEN },
[IFLA_BOND_TLB_DYNAMIC_LB] = { .type = NLA_U8 },
- [IFLA_BOND_PEER_NOTIF_DELAY] = { .type = NLA_U32 },
+ [IFLA_BOND_PEER_NOTIF_DELAY] = NLA_POLICY_FULL_RANGE(NLA_U32, &delay_range),
[IFLA_BOND_MISSED_MAX] = { .type = NLA_U8 },
[IFLA_BOND_NS_IP6_TARGET] = { .type = NLA_NESTED },
};
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index 0498fc6731f8..f3f27f0bd2a6 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -169,6 +169,12 @@ static const struct bond_opt_value bond_num_peer_notif_tbl[] = {
{ NULL, -1, 0}
};
+static const struct bond_opt_value bond_peer_notif_delay_tbl[] = {
+ { "off", 0, 0},
+ { "maxval", 300000, BOND_VALFLAG_MAX},
+ { NULL, -1, 0}
+};
+
static const struct bond_opt_value bond_primary_reselect_tbl[] = {
{ "always", BOND_PRI_RESELECT_ALWAYS, BOND_VALFLAG_DEFAULT},
{ "better", BOND_PRI_RESELECT_BETTER, 0},
@@ -488,7 +494,7 @@ static const struct bond_option bond_opts[BOND_OPT_LAST] = {
.id = BOND_OPT_PEER_NOTIF_DELAY,
.name = "peer_notif_delay",
.desc = "Delay between each peer notification on failover event, in milliseconds",
- .values = bond_intmax_tbl,
+ .values = bond_peer_notif_delay_tbl,
.set = bond_option_peer_notif_delay_set
}
};
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index 57ce74315eba..caa00c72aeeb 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -294,19 +294,6 @@ static int gve_napi_poll_dqo(struct napi_struct *napi, int budget)
bool reschedule = false;
int work_done = 0;
- /* Clear PCI MSI-X Pending Bit Array (PBA)
- *
- * This bit is set if an interrupt event occurs while the vector is
- * masked. If this bit is set and we reenable the interrupt, it will
- * fire again. Since we're just about to poll the queue state, we don't
- * need it to fire again.
- *
- * Under high softirq load, it's possible that the interrupt condition
- * is triggered twice before we got the chance to process it.
- */
- gve_write_irq_doorbell_dqo(priv, block,
- GVE_ITR_NO_UPDATE_DQO | GVE_ITR_CLEAR_PBA_BIT_DQO);
-
if (block->tx)
reschedule |= gve_tx_poll_dqo(block, /*do_clean=*/true);
diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c
index 4c205afbd230..985cff910f30 100644
--- a/drivers/net/ethernet/mediatek/mtk_wed.c
+++ b/drivers/net/ethernet/mediatek/mtk_wed.c
@@ -654,7 +654,7 @@ __mtk_wed_detach(struct mtk_wed_device *dev)
BIT(hw->index), BIT(hw->index));
}
- if (!hw_list[!hw->index]->wed_dev &&
+ if ((!hw_list[!hw->index] || !hw_list[!hw->index]->wed_dev) &&
hw->eth->dma_dev != hw->eth->dev)
mtk_eth_set_dma_device(hw->eth, hw->eth->dev);
diff --git a/drivers/net/ethernet/mscc/vsc7514_regs.c b/drivers/net/ethernet/mscc/vsc7514_regs.c
index ef6fd3f6be30..5595bfe84bbb 100644
--- a/drivers/net/ethernet/mscc/vsc7514_regs.c
+++ b/drivers/net/ethernet/mscc/vsc7514_regs.c
@@ -307,15 +307,15 @@ static const u32 vsc7514_sys_regmap[] = {
REG(SYS_COUNT_DROP_YELLOW_PRIO_4, 0x000218),
REG(SYS_COUNT_DROP_YELLOW_PRIO_5, 0x00021c),
REG(SYS_COUNT_DROP_YELLOW_PRIO_6, 0x000220),
- REG(SYS_COUNT_DROP_YELLOW_PRIO_7, 0x000214),
- REG(SYS_COUNT_DROP_GREEN_PRIO_0, 0x000218),
- REG(SYS_COUNT_DROP_GREEN_PRIO_1, 0x00021c),
- REG(SYS_COUNT_DROP_GREEN_PRIO_2, 0x000220),
- REG(SYS_COUNT_DROP_GREEN_PRIO_3, 0x000224),
- REG(SYS_COUNT_DROP_GREEN_PRIO_4, 0x000228),
- REG(SYS_COUNT_DROP_GREEN_PRIO_5, 0x00022c),
- REG(SYS_COUNT_DROP_GREEN_PRIO_6, 0x000230),
- REG(SYS_COUNT_DROP_GREEN_PRIO_7, 0x000234),
+ REG(SYS_COUNT_DROP_YELLOW_PRIO_7, 0x000224),
+ REG(SYS_COUNT_DROP_GREEN_PRIO_0, 0x000228),
+ REG(SYS_COUNT_DROP_GREEN_PRIO_1, 0x00022c),
+ REG(SYS_COUNT_DROP_GREEN_PRIO_2, 0x000230),
+ REG(SYS_COUNT_DROP_GREEN_PRIO_3, 0x000234),
+ REG(SYS_COUNT_DROP_GREEN_PRIO_4, 0x000238),
+ REG(SYS_COUNT_DROP_GREEN_PRIO_5, 0x00023c),
+ REG(SYS_COUNT_DROP_GREEN_PRIO_6, 0x000240),
+ REG(SYS_COUNT_DROP_GREEN_PRIO_7, 0x000244),
REG(SYS_RESET_CFG, 0x000508),
REG(SYS_CMID, 0x00050c),
REG(SYS_VLAN_ETYPE_CFG, 0x000510),
diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
index 460b3d4f2245..ab5133eb1d51 100644
--- a/drivers/net/ipvlan/ipvlan_core.c
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -436,6 +436,9 @@ static int ipvlan_process_v4_outbound(struct sk_buff *skb)
goto err;
}
skb_dst_set(skb, &rt->dst);
+
+ memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+
err = ip_local_out(net, skb->sk, skb);
if (unlikely(net_xmit_eval(err)))
dev->stats.tx_errors++;
@@ -474,6 +477,9 @@ static int ipvlan_process_v6_outbound(struct sk_buff *skb)
goto err;
}
skb_dst_set(skb, dst);
+
+ memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+
err = ip6_local_out(net, skb->sk, skb);
if (unlikely(net_xmit_eval(err)))
dev->stats.tx_errors++;
diff --git a/drivers/net/pcs/pcs-xpcs.c b/drivers/net/pcs/pcs-xpcs.c
index 539cd43eae8d..f19d48c94fe0 100644
--- a/drivers/net/pcs/pcs-xpcs.c
+++ b/drivers/net/pcs/pcs-xpcs.c
@@ -1203,7 +1203,7 @@ static const struct xpcs_compat synopsys_xpcs_compat[DW_XPCS_INTERFACE_MAX] = {
[DW_XPCS_2500BASEX] = {
.supported = xpcs_2500basex_features,
.interface = xpcs_2500basex_interfaces,
- .num_interfaces = ARRAY_SIZE(xpcs_2500basex_features),
+ .num_interfaces = ARRAY_SIZE(xpcs_2500basex_interfaces),
.an_mode = DW_2500BASEX,
},
};
diff --git a/drivers/net/tap.c b/drivers/net/tap.c
index ce993cc75bf3..d30d730ed5a7 100644
--- a/drivers/net/tap.c
+++ b/drivers/net/tap.c
@@ -742,7 +742,7 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control,
/* Move network header to the right position for VLAN tagged packets */
if (eth_type_vlan(skb->protocol) &&
- __vlan_get_protocol(skb, skb->protocol, &depth) != 0)
+ vlan_get_protocol_and_depth(skb, skb->protocol, &depth) != 0)
skb_set_network_header(skb, depth);
/* copy skb_ubuf_info for callback when skb has no error */
@@ -1197,7 +1197,7 @@ static int tap_get_user_xdp(struct tap_queue *q, struct xdp_buff *xdp)
/* Move network header to the right position for VLAN tagged packets */
if (eth_type_vlan(skb->protocol) &&
- __vlan_get_protocol(skb, skb->protocol, &depth) != 0)
+ vlan_get_protocol_and_depth(skb, skb->protocol, &depth) != 0)
skb_set_network_header(skb, depth);
rcu_read_lock();
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 0f40f379d75c..6ba71957851e 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -637,6 +637,23 @@ static inline __be16 vlan_get_protocol(const struct sk_buff *skb)
return __vlan_get_protocol(skb, skb->protocol, NULL);
}
+/* This version of __vlan_get_protocol() also pulls mac header in skb->head */
+static inline __be16 vlan_get_protocol_and_depth(struct sk_buff *skb,
+ __be16 type, int *depth)
+{
+ int maclen;
+
+ type = __vlan_get_protocol(skb, type, &maclen);
+
+ if (type) {
+ if (!pskb_may_pull(skb, maclen))
+ type = 0;
+ else if (depth)
+ *depth = maclen;
+ }
+ return type;
+}
+
/* A getter for the SKB protocol field which will handle VLAN tags consistently
* whether VLAN acceleration is enabled or not.
*/
diff --git a/include/net/bonding.h b/include/net/bonding.h
index a60a24923b55..0efef2a952b7 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -233,7 +233,7 @@ struct bonding {
*/
spinlock_t mode_lock;
spinlock_t stats_lock;
- u8 send_peer_notif;
+ u32 send_peer_notif;
u8 igmp_retrans;
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *proc_entry;
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 57744704ff69..84d6dd5e5b1a 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -42,7 +42,7 @@ int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb
eth_type_vlan(skb->protocol)) {
int depth;
- if (!__vlan_get_protocol(skb, skb->protocol, &depth))
+ if (!vlan_get_protocol_and_depth(skb, skb->protocol, &depth))
goto drop;
skb_set_network_header(skb, depth);
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 5662dff3d381..176eb5834746 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -807,18 +807,21 @@ __poll_t datagram_poll(struct file *file, struct socket *sock,
{
struct sock *sk = sock->sk;
__poll_t mask;
+ u8 shutdown;
sock_poll_wait(file, sock, wait);
mask = 0;
/* exceptional events? */
- if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
+ if (READ_ONCE(sk->sk_err) ||
+ !skb_queue_empty_lockless(&sk->sk_error_queue))
mask |= EPOLLERR |
(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
- if (sk->sk_shutdown & RCV_SHUTDOWN)
+ shutdown = READ_ONCE(sk->sk_shutdown);
+ if (shutdown & RCV_SHUTDOWN)
mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
- if (sk->sk_shutdown == SHUTDOWN_MASK)
+ if (shutdown == SHUTDOWN_MASK)
mask |= EPOLLHUP;
/* readable? */
@@ -827,10 +830,12 @@ __poll_t datagram_poll(struct file *file, struct socket *sock,
/* Connection-based need to check for termination and startup */
if (connection_based(sk)) {
- if (sk->sk_state == TCP_CLOSE)
+ int state = READ_ONCE(sk->sk_state);
+
+ if (state == TCP_CLOSE)
mask |= EPOLLHUP;
/* connection hasn't started yet? */
- if (sk->sk_state == TCP_SYN_SENT)
+ if (state == TCP_SYN_SENT)
return mask;
}
diff --git a/net/core/dev.c b/net/core/dev.c
index 735096d42c1d..b3c13e041935 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3335,7 +3335,7 @@ __be16 skb_network_protocol(struct sk_buff *skb, int *depth)
type = eth->h_proto;
}
- return __vlan_get_protocol(skb, type, depth);
+ return vlan_get_protocol_and_depth(skb, type, depth);
}
/* openvswitch calls this on rx path, so we need a different check.
diff --git a/net/core/stream.c b/net/core/stream.c
index 434446ab14c5..f5c4e47df165 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -73,8 +73,8 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p)
add_wait_queue(sk_sleep(sk), &wait);
sk->sk_write_pending++;
done = sk_wait_event(sk, timeo_p,
- !sk->sk_err &&
- !((1 << sk->sk_state) &
+ !READ_ONCE(sk->sk_err) &&
+ !((1 << READ_ONCE(sk->sk_state)) &
~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)), &wait);
remove_wait_queue(sk_sleep(sk), &wait);
sk->sk_write_pending--;
@@ -87,9 +87,9 @@ EXPORT_SYMBOL(sk_stream_wait_connect);
* sk_stream_closing - Return 1 if we still have things to send in our buffers.
* @sk: socket to verify
*/
-static inline int sk_stream_closing(struct sock *sk)
+static int sk_stream_closing(const struct sock *sk)
{
- return (1 << sk->sk_state) &
+ return (1 << READ_ONCE(sk->sk_state)) &
(TCPF_FIN_WAIT1 | TCPF_CLOSING | TCPF_LAST_ACK);
}
@@ -142,8 +142,8 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
sk->sk_write_pending++;
- sk_wait_event(sk, &current_timeo, sk->sk_err ||
- (sk->sk_shutdown & SEND_SHUTDOWN) ||
+ sk_wait_event(sk, &current_timeo, READ_ONCE(sk->sk_err) ||
+ (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) ||
(sk_stream_memory_free(sk) &&
!vm_wait), &wait);
sk->sk_write_pending--;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 940062e08f57..c4aab3aacbd8 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -894,7 +894,7 @@ int inet_shutdown(struct socket *sock, int how)
EPOLLHUP, even on eg. unconnected UDP sockets -- RR */
fallthrough;
default:
- sk->sk_shutdown |= how;
+ WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | how);
if (sk->sk_prot->shutdown)
sk->sk_prot->shutdown(sk, how);
break;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 20db115c38c4..4d6392c16b7a 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -498,6 +498,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
__poll_t mask;
struct sock *sk = sock->sk;
const struct tcp_sock *tp = tcp_sk(sk);
+ u8 shutdown;
int state;
sock_poll_wait(file, sock, wait);
@@ -540,9 +541,10 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
* NOTE. Check for TCP_CLOSE is added. The goal is to prevent
* blocking on fresh not-connected or disconnected socket. --ANK
*/
- if (sk->sk_shutdown == SHUTDOWN_MASK || state == TCP_CLOSE)
+ shutdown = READ_ONCE(sk->sk_shutdown);
+ if (shutdown == SHUTDOWN_MASK || state == TCP_CLOSE)
mask |= EPOLLHUP;
- if (sk->sk_shutdown & RCV_SHUTDOWN)
+ if (shutdown & RCV_SHUTDOWN)
mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
/* Connected or passive Fast Open socket? */
@@ -559,7 +561,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
if (tcp_stream_is_readable(sk, target))
mask |= EPOLLIN | EPOLLRDNORM;
- if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
+ if (!(shutdown & SEND_SHUTDOWN)) {
if (__sk_stream_is_writeable(sk, 1)) {
mask |= EPOLLOUT | EPOLLWRNORM;
} else { /* send SIGIO later */
@@ -2867,7 +2869,7 @@ void __tcp_close(struct sock *sk, long timeout)
int data_was_unread = 0;
int state;
- sk->sk_shutdown = SHUTDOWN_MASK;
+ WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
if (sk->sk_state == TCP_LISTEN) {
tcp_set_state(sk, TCP_CLOSE);
@@ -3119,7 +3121,7 @@ int tcp_disconnect(struct sock *sk, int flags)
inet_bhash2_reset_saddr(sk);
- sk->sk_shutdown = 0;
+ WRITE_ONCE(sk->sk_shutdown, 0);
sock_reset_flag(sk, SOCK_DONE);
tp->srtt_us = 0;
tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT);
@@ -4649,7 +4651,7 @@ void tcp_done(struct sock *sk)
if (req)
reqsk_fastopen_remove(sk, req, false);
- sk->sk_shutdown = SHUTDOWN_MASK;
+ WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
if (!sock_flag(sk, SOCK_DEAD))
sk->sk_state_change(sk);
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index ebf917511937..2e9547467edb 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -168,7 +168,7 @@ static int tcp_msg_wait_data(struct sock *sk, struct sk_psock *psock,
sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
ret = sk_wait_event(sk, &timeo,
!list_empty(&psock->ingress_msg) ||
- !skb_queue_empty(&sk->sk_receive_queue), &wait);
+ !skb_queue_empty_lockless(&sk->sk_receive_queue), &wait);
sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
remove_wait_queue(sk_sleep(sk), &wait);
return ret;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a057330d6f59..61b6710f337a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4362,7 +4362,7 @@ void tcp_fin(struct sock *sk)
inet_csk_schedule_ack(sk);
- sk->sk_shutdown |= RCV_SHUTDOWN;
+ WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | RCV_SHUTDOWN);
sock_set_flag(sk, SOCK_DONE);
switch (sk->sk_state) {
@@ -6599,7 +6599,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
break;
tcp_set_state(sk, TCP_FIN_WAIT2);
- sk->sk_shutdown |= SEND_SHUTDOWN;
+ WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | SEND_SHUTDOWN);
sk_dst_confirm(sk);
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index da7fe94bea2e..9ffbc667be6c 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -583,7 +583,8 @@ static int llc_ui_wait_for_disc(struct sock *sk, long timeout)
add_wait_queue(sk_sleep(sk), &wait);
while (1) {
- if (sk_wait_event(sk, &timeout, sk->sk_state == TCP_CLOSE, &wait))
+ if (sk_wait_event(sk, &timeout,
+ READ_ONCE(sk->sk_state) == TCP_CLOSE, &wait))
break;
rc = -ERESTARTSYS;
if (signal_pending(current))
@@ -603,7 +604,8 @@ static bool llc_ui_wait_for_conn(struct sock *sk, long timeout)
add_wait_queue(sk_sleep(sk), &wait);
while (1) {
- if (sk_wait_event(sk, &timeout, sk->sk_state != TCP_SYN_SENT, &wait))
+ if (sk_wait_event(sk, &timeout,
+ READ_ONCE(sk->sk_state) != TCP_SYN_SENT, &wait))
break;
if (signal_pending(current) || !timeout)
break;
@@ -622,7 +624,7 @@ static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout)
while (1) {
rc = 0;
if (sk_wait_event(sk, &timeout,
- (sk->sk_shutdown & RCV_SHUTDOWN) ||
+ (READ_ONCE(sk->sk_shutdown) & RCV_SHUTDOWN) ||
(!llc_data_accept_state(llc->state) &&
!llc->remote_busy_flag &&
!llc->p_flag), &wait))
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 7ef8b9a1e30c..c87804112d0c 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1990,7 +1990,7 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
skb_free_datagram(sk, skb);
- if (nlk->cb_running &&
+ if (READ_ONCE(nlk->cb_running) &&
atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf / 2) {
ret = netlink_dump(sk);
if (ret) {
@@ -2302,7 +2302,7 @@ static int netlink_dump(struct sock *sk)
if (cb->done)
cb->done(cb);
- nlk->cb_running = false;
+ WRITE_ONCE(nlk->cb_running, false);
module = cb->module;
skb = cb->skb;
mutex_unlock(nlk->cb_mutex);
@@ -2365,7 +2365,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
goto error_put;
}
- nlk->cb_running = true;
+ WRITE_ONCE(nlk->cb_running, true);
nlk->dump_done_errno = INT_MAX;
mutex_unlock(nlk->cb_mutex);
@@ -2703,7 +2703,7 @@ static int netlink_native_seq_show(struct seq_file *seq, void *v)
nlk->groups ? (u32)nlk->groups[0] : 0,
sk_rmem_alloc_get(s),
sk_wmem_alloc_get(s),
- nlk->cb_running,
+ READ_ONCE(nlk->cb_running),
refcount_read(&s->sk_refcnt),
atomic_read(&s->sk_drops),
sock_i_ino(s)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 640d94e34635..94c6a1ffa459 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1934,10 +1934,8 @@ static void packet_parse_headers(struct sk_buff *skb, struct socket *sock)
/* Move network header to the right position for VLAN tagged packets */
if (likely(skb->dev->type == ARPHRD_ETHER) &&
eth_type_vlan(skb->protocol) &&
- __vlan_get_protocol(skb, skb->protocol, &depth) != 0) {
- if (pskb_may_pull(skb, depth))
- skb_set_network_header(skb, depth);
- }
+ vlan_get_protocol_and_depth(skb, skb->protocol, &depth) != 0)
+ skb_set_network_header(skb, depth);
skb_probe_transport_header(skb);
}
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index 31db7438857c..dbdf03e8aa5b 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -67,8 +67,8 @@ static void smc_close_stream_wait(struct smc_sock *smc, long timeout)
rc = sk_wait_event(sk, &timeout,
!smc_tx_prepared_sends(&smc->conn) ||
- sk->sk_err == ECONNABORTED ||
- sk->sk_err == ECONNRESET ||
+ READ_ONCE(sk->sk_err) == ECONNABORTED ||
+ READ_ONCE(sk->sk_err) == ECONNRESET ||
smc->conn.killed,
&wait);
if (rc)
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index 4380d32f5a5f..9a2f3638d161 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -267,9 +267,9 @@ int smc_rx_wait(struct smc_sock *smc, long *timeo,
sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
add_wait_queue(sk_sleep(sk), &wait);
rc = sk_wait_event(sk, timeo,
- sk->sk_err ||
+ READ_ONCE(sk->sk_err) ||
cflags->peer_conn_abort ||
- sk->sk_shutdown & RCV_SHUTDOWN ||
+ READ_ONCE(sk->sk_shutdown) & RCV_SHUTDOWN ||
conn->killed ||
fcrit(conn),
&wait);
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index f4b6a71ac488..45128443f1f1 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -113,8 +113,8 @@ static int smc_tx_wait(struct smc_sock *smc, int flags)
break; /* at least 1 byte of free & no urgent data */
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
sk_wait_event(sk, &timeo,
- sk->sk_err ||
- (sk->sk_shutdown & SEND_SHUTDOWN) ||
+ READ_ONCE(sk->sk_err) ||
+ (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) ||
smc_cdc_rxed_any_close(conn) ||
(atomic_read(&conn->sndbuf_space) &&
!conn->urg_tx_pend),
diff --git a/net/socket.c b/net/socket.c
index a7b4b37d86df..b7e01d0fe082 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2911,7 +2911,7 @@ static int do_recvmmsg(int fd, struct mmsghdr __user *mmsg,
* error to return on the next call or if the
* app asks about it using getsockopt(SO_ERROR).
*/
- sock->sk->sk_err = -err;
+ WRITE_ONCE(sock->sk->sk_err, -err);
}
out_put:
fput_light(sock->file, fput_needed);
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 37edfe10f8c6..dd73d71c02a9 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -314,9 +314,9 @@ static void tsk_rej_rx_queue(struct sock *sk, int error)
tipc_sk_respond(sk, skb, error);
}
-static bool tipc_sk_connected(struct sock *sk)
+static bool tipc_sk_connected(const struct sock *sk)
{
- return sk->sk_state == TIPC_ESTABLISHED;
+ return READ_ONCE(sk->sk_state) == TIPC_ESTABLISHED;
}
/* tipc_sk_type_connectionless - check if the socket is datagram socket
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index b32c112984dd..f2e7302a4d96 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -111,7 +111,8 @@ int wait_on_pending_writer(struct sock *sk, long *timeo)
break;
}
- if (sk_wait_event(sk, timeo, !sk->sk_write_pending, &wait))
+ if (sk_wait_event(sk, timeo,
+ !READ_ONCE(sk->sk_write_pending), &wait))
break;
}
remove_wait_queue(sk_sleep(sk), &wait);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index fb31e8a4409e..cc695c9f09ec 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -603,7 +603,7 @@ static void unix_release_sock(struct sock *sk, int embrion)
/* Clear state */
unix_state_lock(sk);
sock_orphan(sk);
- sk->sk_shutdown = SHUTDOWN_MASK;
+ WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
path = u->path;
u->path.dentry = NULL;
u->path.mnt = NULL;
@@ -628,7 +628,7 @@ static void unix_release_sock(struct sock *sk, int embrion)
if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
unix_state_lock(skpair);
/* No more writes */
- skpair->sk_shutdown = SHUTDOWN_MASK;
+ WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK);
if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
WRITE_ONCE(skpair->sk_err, ECONNRESET);
unix_state_unlock(skpair);
@@ -1442,7 +1442,7 @@ static long unix_wait_for_peer(struct sock *other, long timeo)
sched = !sock_flag(other, SOCK_DEAD) &&
!(other->sk_shutdown & RCV_SHUTDOWN) &&
- unix_recvq_full(other);
+ unix_recvq_full_lockless(other);
unix_state_unlock(other);
@@ -3008,7 +3008,7 @@ static int unix_shutdown(struct socket *sock, int mode)
++mode;
unix_state_lock(sk);
- sk->sk_shutdown |= mode;
+ WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | mode);
other = unix_peer(sk);
if (other)
sock_hold(other);
@@ -3028,7 +3028,7 @@ static int unix_shutdown(struct socket *sock, int mode)
if (mode&SEND_SHUTDOWN)
peer_mode |= RCV_SHUTDOWN;
unix_state_lock(other);
- other->sk_shutdown |= peer_mode;
+ WRITE_ONCE(other->sk_shutdown, other->sk_shutdown | peer_mode);
unix_state_unlock(other);
other->sk_state_change(other);
if (peer_mode == SHUTDOWN_MASK)
@@ -3160,16 +3160,18 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa
{
struct sock *sk = sock->sk;
__poll_t mask;
+ u8 shutdown;
sock_poll_wait(file, sock, wait);
mask = 0;
+ shutdown = READ_ONCE(sk->sk_shutdown);
/* exceptional events? */
if (READ_ONCE(sk->sk_err))
mask |= EPOLLERR;
- if (sk->sk_shutdown == SHUTDOWN_MASK)
+ if (shutdown == SHUTDOWN_MASK)
mask |= EPOLLHUP;
- if (sk->sk_shutdown & RCV_SHUTDOWN)
+ if (shutdown & RCV_SHUTDOWN)
mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
/* readable? */
@@ -3203,9 +3205,11 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
struct sock *sk = sock->sk, *other;
unsigned int writable;
__poll_t mask;
+ u8 shutdown;
sock_poll_wait(file, sock, wait);
mask = 0;
+ shutdown = READ_ONCE(sk->sk_shutdown);
/* exceptional events? */
if (READ_ONCE(sk->sk_err) ||
@@ -3213,9 +3217,9 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
mask |= EPOLLERR |
(sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
- if (sk->sk_shutdown & RCV_SHUTDOWN)
+ if (shutdown & RCV_SHUTDOWN)
mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
- if (sk->sk_shutdown == SHUTDOWN_MASK)
+ if (shutdown == SHUTDOWN_MASK)
mask |= EPOLLHUP;
/* readable? */
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_options.sh b/tools/testing/selftests/drivers/net/bonding/bond_options.sh
index db29a3146a86..607ba5c38977 100755
--- a/tools/testing/selftests/drivers/net/bonding/bond_options.sh
+++ b/tools/testing/selftests/drivers/net/bonding/bond_options.sh
@@ -6,6 +6,7 @@
ALL_TESTS="
prio
arp_validate
+ num_grat_arp
"
REQUIRE_MZ=no
@@ -255,6 +256,55 @@ arp_validate()
arp_validate_ns "active-backup"
}
+garp_test()
+{
+ local param="$1"
+ local active_slave exp_num real_num i
+ RET=0
+
+ # create bond
+ bond_reset "${param}"
+
+ bond_check_connection
+ [ $RET -ne 0 ] && log_test "num_grat_arp" "$retmsg"
+
+
+ # Add tc rules to count GARP number
+ for i in $(seq 0 2); do
+ tc -n ${g_ns} filter add dev s$i ingress protocol arp pref 1 handle 101 \
+ flower skip_hw arp_op request arp_sip ${s_ip4} arp_tip ${s_ip4} action pass
+ done
+
+ # Do failover
+ active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave")
+ ip -n ${s_ns} link set ${active_slave} down
+
+ exp_num=$(echo "${param}" | cut -f6 -d ' ')
+ sleep $((exp_num + 2))
+
+ active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave")
+
+ # check result
+ real_num=$(tc_rule_handle_stats_get "dev s${active_slave#eth} ingress" 101 ".packets" "-n ${g_ns}")
+ if [ "${real_num}" -ne "${exp_num}" ]; then
+ echo "$real_num garp packets sent on active slave ${active_slave}"
+ RET=1
+ fi
+
+ for i in $(seq 0 2); do
+ tc -n ${g_ns} filter del dev s$i ingress
+ done
+}
+
+num_grat_arp()
+{
+ local val
+ for val in 10 20 30 50; do
+ garp_test "mode active-backup miimon 100 num_grat_arp $val peer_notify_delay 1000"
+ log_test "num_grat_arp" "active-backup miimon num_grat_arp $val"
+ done
+}
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh b/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh
index 4045ca97fb22..69ab99a56043 100644
--- a/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh
+++ b/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh
@@ -61,6 +61,8 @@ server_create()
ip -n ${g_ns} link set s${i} up
ip -n ${g_ns} link set s${i} master br0
ip -n ${s_ns} link set eth${i} master bond0
+
+ tc -n ${g_ns} qdisc add dev s${i} clsact
done
ip -n ${s_ns} link set bond0 up
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 057c3d0ad620..9ddb68dd6a08 100755
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -791,8 +791,9 @@ tc_rule_handle_stats_get()
local id=$1; shift
local handle=$1; shift
local selector=${1:-.packets}; shift
+ local netns=${1:-""}; shift
- tc -j -s filter show $id \
+ tc $netns -j -s filter show $id \
| jq ".[] | select(.options.handle == $handle) | \
.options.actions[0].stats$selector"
}