summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/Kconfig2
-rw-r--r--net/bpf/test_run.c204
-rw-r--r--net/bridge/br_device.c1
-rw-r--r--net/bridge/br_forward.c3
-rw-r--r--net/bridge/br_input.c1
-rw-r--r--net/bridge/br_private.h27
-rw-r--r--net/core/Makefile2
-rw-r--r--net/core/dev.c152
-rw-r--r--net/core/filter.c65
-rw-r--r--net/core/flow_dissector.c40
-rw-r--r--net/core/gro.c114
-rw-r--r--net/core/gso.c273
-rw-r--r--net/core/net_namespace.c4
-rw-r--r--net/core/netdev-genl-gen.c2
-rw-r--r--net/core/netdev-genl-gen.h2
-rw-r--r--net/core/netpoll.c5
-rw-r--r--net/core/pktgen.c13
-rw-r--r--net/core/rtnetlink.c179
-rw-r--r--net/core/skbuff.c258
-rw-r--r--net/core/sock.c108
-rw-r--r--net/dccp/dccp.h2
-rw-r--r--net/dccp/proto.c12
-rw-r--r--net/devlink/health.c2
-rw-r--r--net/devlink/leftover.c196
-rw-r--r--net/dsa/port.c32
-rw-r--r--net/ethtool/ioctl.c15
-rw-r--r--net/ethtool/netlink.c12
-rw-r--r--net/handshake/genl.c2
-rw-r--r--net/handshake/genl.h2
-rw-r--r--net/hsr/hsr_device.c5
-rw-r--r--net/hsr/hsr_main.h1
-rw-r--r--net/hsr/hsr_slave.c15
-rw-r--r--net/ieee802154/socket.c15
-rw-r--r--net/ipv4/af_inet.c53
-rw-r--r--net/ipv4/esp4_offload.c1
-rw-r--r--net/ipv4/fou_nl.c2
-rw-r--r--net/ipv4/fou_nl.h2
-rw-r--r--net/ipv4/gre_offload.c1
-rw-r--r--net/ipv4/inet_connection_sock.c21
-rw-r--r--net/ipv4/ip_gre.c8
-rw-r--r--net/ipv4/ip_output.c170
-rw-r--r--net/ipv4/ipconfig.c10
-rw-r--r--net/ipv4/ipmr.c63
-rw-r--r--net/ipv4/ping.c56
-rw-r--r--net/ipv4/raw.c26
-rw-r--r--net/ipv4/syncookies.c4
-rw-r--r--net/ipv4/sysctl_net_ipv4.c19
-rw-r--r--net/ipv4/tcp.c309
-rw-r--r--net/ipv4/tcp_bpf.c69
-rw-r--r--net/ipv4/tcp_ipv4.c25
-rw-r--r--net/ipv4/tcp_minisocks.c2
-rw-r--r--net/ipv4/tcp_offload.c8
-rw-r--r--net/ipv4/tcp_output.c195
-rw-r--r--net/ipv4/tcp_timer.c17
-rw-r--r--net/ipv4/udp.c384
-rw-r--r--net/ipv4/udp_offload.c1
-rw-r--r--net/ipv6/addrconf.c4
-rw-r--r--net/ipv6/af_inet6.c3
-rw-r--r--net/ipv6/esp6_offload.c1
-rw-r--r--net/ipv6/exthdrs.c36
-rw-r--r--net/ipv6/ip6_offload.c1
-rw-r--r--net/ipv6/ip6_output.c19
-rw-r--r--net/ipv6/ip6mr.c44
-rw-r--r--net/ipv6/raw.c16
-rw-r--r--net/ipv6/route.c25
-rw-r--r--net/ipv6/rpl.c7
-rw-r--r--net/ipv6/seg6_iptunnel.c3
-rw-r--r--net/ipv6/tcp_ipv6.c11
-rw-r--r--net/ipv6/tcpv6_offload.c3
-rw-r--r--net/ipv6/udp.c15
-rw-r--r--net/ipv6/udp_offload.c1
-rw-r--r--net/kcm/kcmsock.c355
-rw-r--r--net/l2tp/l2tp_core.h2
-rw-r--r--net/l2tp/l2tp_ip.c9
-rw-r--r--net/mac80211/cfg.c77
-rw-r--r--net/mac80211/chan.c8
-rw-r--r--net/mac80211/driver-ops.h10
-rw-r--r--net/mac80211/ht.c5
-rw-r--r--net/mac80211/ibss.c38
-rw-r--r--net/mac80211/ieee80211_i.h33
-rw-r--r--net/mac80211/iface.c37
-rw-r--r--net/mac80211/main.c7
-rw-r--r--net/mac80211/mesh.c40
-rw-r--r--net/mac80211/mesh.h19
-rw-r--r--net/mac80211/mesh_hwmp.c6
-rw-r--r--net/mac80211/mesh_plink.c37
-rw-r--r--net/mac80211/mesh_ps.c7
-rw-r--r--net/mac80211/mlme.c136
-rw-r--r--net/mac80211/ocb.c10
-rw-r--r--net/mac80211/rx.c2
-rw-r--r--net/mac80211/scan.c2
-rw-r--r--net/mac80211/sta_info.c235
-rw-r--r--net/mac80211/status.c6
-rw-r--r--net/mac80211/tdls.c4
-rw-r--r--net/mac80211/tx.c29
-rw-r--r--net/mac80211/util.c156
-rw-r--r--net/mctp/route.c3
-rw-r--r--net/mpls/af_mpls.c1
-rw-r--r--net/mpls/mpls_gso.c1
-rw-r--r--net/mptcp/mib.c6
-rw-r--r--net/mptcp/mib.h18
-rw-r--r--net/mptcp/options.c19
-rw-r--r--net/mptcp/pm.c47
-rw-r--r--net/mptcp/pm_netlink.c142
-rw-r--r--net/mptcp/pm_userspace.c5
-rw-r--r--net/mptcp/protocol.c63
-rw-r--r--net/mptcp/protocol.h20
-rw-r--r--net/mptcp/sockopt.c153
-rw-r--r--net/mptcp/subflow.c2
-rw-r--r--net/ncsi/ncsi-rsp.c93
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c4
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c1
-rw-r--r--net/netfilter/nf_flow_table_core.c24
-rw-r--r--net/netfilter/nf_flow_table_ip.c232
-rw-r--r--net/netfilter/nf_tables_api.c11
-rw-r--r--net/netfilter/nfnetlink_queue.c1
-rw-r--r--net/netfilter/nft_exthdr.c106
-rw-r--r--net/netfilter/nft_flow_offload.c12
-rw-r--r--net/netfilter/nft_lookup.c23
-rw-r--r--net/netfilter/nft_set_pipapo.c6
-rw-r--r--net/netlabel/netlabel_domainhash.h2
-rw-r--r--net/netlink/af_netlink.c2
-rw-r--r--net/netlink/genetlink.c2
-rw-r--r--net/nfc/llcp_commands.c3
-rw-r--r--net/nsh/nsh.c1
-rw-r--r--net/openvswitch/actions.c13
-rw-r--r--net/openvswitch/datapath.c1
-rw-r--r--net/openvswitch/flow_netlink.c2
-rw-r--r--net/openvswitch/meter.c4
-rw-r--r--net/phonet/datagram.c11
-rw-r--r--net/phonet/pep.c11
-rw-r--r--net/phonet/socket.c2
-rw-r--r--net/sched/act_pedit.c4
-rw-r--r--net/sched/act_police.c1
-rw-r--r--net/sched/cls_flower.c132
-rw-r--r--net/sched/sch_cake.c1
-rw-r--r--net/sched/sch_htb.c7
-rw-r--r--net/sched/sch_netem.c1
-rw-r--r--net/sched/sch_taprio.c90
-rw-r--r--net/sched/sch_tbf.c1
-rw-r--r--net/sctp/offload.c1
-rw-r--r--net/sctp/protocol.c4
-rw-r--r--net/sctp/socket.c26
-rw-r--r--net/sctp/stream_sched.c9
-rw-r--r--net/socket.c38
-rw-r--r--net/sunrpc/svcsock.c38
-rw-r--r--net/tipc/bearer.c18
-rw-r--r--net/tipc/bearer.h4
-rw-r--r--net/tipc/udp_media.c4
-rw-r--r--net/tls/tls.h2
-rw-r--r--net/tls/tls_device.c124
-rw-r--r--net/tls/tls_main.c63
-rw-r--r--net/tls/tls_sw.c248
-rw-r--r--net/unix/Kconfig6
-rw-r--r--net/unix/af_unix.c217
-rw-r--r--net/wireless/core.c158
-rw-r--r--net/wireless/core.h13
-rw-r--r--net/wireless/nl80211.c6
-rw-r--r--net/wireless/pmsr.c4
-rw-r--r--net/wireless/scan.c14
-rw-r--r--net/wireless/sme.c4
-rw-r--r--net/wireless/sysfs.c8
-rw-r--r--net/wireless/wext-sme.c4
-rw-r--r--net/xdp/xsk_buff_pool.c7
-rw-r--r--net/xfrm/espintcp.c10
-rw-r--r--net/xfrm/xfrm_device.c1
-rw-r--r--net/xfrm/xfrm_interface_core.c1
-rw-r--r--net/xfrm/xfrm_ipcomp.c5
-rw-r--r--net/xfrm/xfrm_output.c1
169 files changed, 3987 insertions, 3006 deletions
diff --git a/net/Kconfig b/net/Kconfig
index 7d39c1773eb4..2fb25b534df5 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -324,7 +324,7 @@ config CGROUP_NET_CLASSID
config NET_RX_BUSY_POLL
bool
- default y if !PREEMPT_RT
+ default y if !PREEMPT_RT || (PREEMPT_RT && !NETCONSOLE)
config BQL
bool
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index e79e3a415ca9..2321bd2f9964 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -561,29 +561,6 @@ __bpf_kfunc int bpf_modify_return_test(int a, int *b)
return a + *b;
}
-__bpf_kfunc u64 bpf_kfunc_call_test1(struct sock *sk, u32 a, u64 b, u32 c, u64 d)
-{
- return a + b + c + d;
-}
-
-__bpf_kfunc int bpf_kfunc_call_test2(struct sock *sk, u32 a, u32 b)
-{
- return a + b;
-}
-
-__bpf_kfunc struct sock *bpf_kfunc_call_test3(struct sock *sk)
-{
- return sk;
-}
-
-long noinline bpf_kfunc_call_test4(signed char a, short b, int c, long d)
-{
- /* Provoke the compiler to assume that the caller has sign-extended a,
- * b and c on platforms where this is required (e.g. s390x).
- */
- return (long)a + (long)b + (long)c + d;
-}
-
int noinline bpf_fentry_shadow_test(int a)
{
return a + 1;
@@ -606,32 +583,6 @@ struct prog_test_ref_kfunc {
refcount_t cnt;
};
-static struct prog_test_ref_kfunc prog_test_struct = {
- .a = 42,
- .b = 108,
- .next = &prog_test_struct,
- .cnt = REFCOUNT_INIT(1),
-};
-
-__bpf_kfunc struct prog_test_ref_kfunc *
-bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr)
-{
- refcount_inc(&prog_test_struct.cnt);
- return &prog_test_struct;
-}
-
-__bpf_kfunc void bpf_kfunc_call_test_offset(struct prog_test_ref_kfunc *p)
-{
- WARN_ON_ONCE(1);
-}
-
-__bpf_kfunc struct prog_test_member *
-bpf_kfunc_call_memb_acquire(void)
-{
- WARN_ON_ONCE(1);
- return NULL;
-}
-
__bpf_kfunc void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p)
{
refcount_dec(&p->cnt);
@@ -641,134 +592,6 @@ __bpf_kfunc void bpf_kfunc_call_memb_release(struct prog_test_member *p)
{
}
-__bpf_kfunc void bpf_kfunc_call_memb1_release(struct prog_test_member1 *p)
-{
- WARN_ON_ONCE(1);
-}
-
-static int *__bpf_kfunc_call_test_get_mem(struct prog_test_ref_kfunc *p, const int size)
-{
- if (size > 2 * sizeof(int))
- return NULL;
-
- return (int *)p;
-}
-
-__bpf_kfunc int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p,
- const int rdwr_buf_size)
-{
- return __bpf_kfunc_call_test_get_mem(p, rdwr_buf_size);
-}
-
-__bpf_kfunc int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p,
- const int rdonly_buf_size)
-{
- return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size);
-}
-
-/* the next 2 ones can't be really used for testing expect to ensure
- * that the verifier rejects the call.
- * Acquire functions must return struct pointers, so these ones are
- * failing.
- */
-__bpf_kfunc int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p,
- const int rdonly_buf_size)
-{
- return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size);
-}
-
-__bpf_kfunc void bpf_kfunc_call_int_mem_release(int *p)
-{
-}
-
-struct prog_test_pass1 {
- int x0;
- struct {
- int x1;
- struct {
- int x2;
- struct {
- int x3;
- };
- };
- };
-};
-
-struct prog_test_pass2 {
- int len;
- short arr1[4];
- struct {
- char arr2[4];
- unsigned long arr3[8];
- } x;
-};
-
-struct prog_test_fail1 {
- void *p;
- int x;
-};
-
-struct prog_test_fail2 {
- int x8;
- struct prog_test_pass1 x;
-};
-
-struct prog_test_fail3 {
- int len;
- char arr1[2];
- char arr2[];
-};
-
-__bpf_kfunc void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb)
-{
-}
-
-__bpf_kfunc void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p)
-{
-}
-
-__bpf_kfunc void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p)
-{
-}
-
-__bpf_kfunc void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p)
-{
-}
-
-__bpf_kfunc void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p)
-{
-}
-
-__bpf_kfunc void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p)
-{
-}
-
-__bpf_kfunc void bpf_kfunc_call_test_mem_len_pass1(void *mem, int mem__sz)
-{
-}
-
-__bpf_kfunc void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len)
-{
-}
-
-__bpf_kfunc void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len)
-{
-}
-
-__bpf_kfunc void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p)
-{
- /* p != NULL, but p->cnt could be 0 */
-}
-
-__bpf_kfunc void bpf_kfunc_call_test_destructive(void)
-{
-}
-
-__bpf_kfunc static u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused)
-{
- return arg;
-}
-
__diag_pop();
BTF_SET8_START(bpf_test_modify_return_ids)
@@ -782,32 +605,8 @@ static const struct btf_kfunc_id_set bpf_test_modify_return_set = {
};
BTF_SET8_START(test_sk_check_kfunc_ids)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test1)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test2)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test3)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test4)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test_acquire, KF_ACQUIRE | KF_RET_NULL)
-BTF_ID_FLAGS(func, bpf_kfunc_call_memb_acquire, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_release, KF_RELEASE)
BTF_ID_FLAGS(func, bpf_kfunc_call_memb_release, KF_RELEASE)
-BTF_ID_FLAGS(func, bpf_kfunc_call_memb1_release, KF_RELEASE)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test_get_rdwr_mem, KF_RET_NULL)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test_get_rdonly_mem, KF_RET_NULL)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test_acq_rdonly_mem, KF_ACQUIRE | KF_RET_NULL)
-BTF_ID_FLAGS(func, bpf_kfunc_call_int_mem_release, KF_RELEASE)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass_ctx)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass1)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass2)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail1)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail2)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail3)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_pass1)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail1)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail2)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS | KF_RCU)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test_destructive, KF_DESTRUCTIVE)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test_static_unused_arg)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test_offset)
BTF_SET8_END(test_sk_check_kfunc_ids)
static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size,
@@ -1415,11 +1214,10 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
}
frag = &sinfo->frags[sinfo->nr_frags++];
- __skb_frag_set_page(frag, page);
data_len = min_t(u32, kattr->test.data_size_in - size,
PAGE_SIZE);
- skb_frag_size_set(frag, data_len);
+ skb_frag_fill_page_desc(frag, page, 0, data_len);
if (copy_from_user(page_address(page), data_in + size,
data_len)) {
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 8eca8a5c80c6..9a5ea06236bd 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -39,6 +39,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
u16 vid = 0;
memset(skb->cb, 0, sizeof(struct br_input_skb_cb));
+ br_tc_skb_miss_set(skb, false);
rcu_read_lock();
nf_ops = rcu_dereference(nf_br_ops);
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 84d6dd5e5b1a..6116eba1bd89 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -203,6 +203,8 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb,
struct net_bridge_port *prev = NULL;
struct net_bridge_port *p;
+ br_tc_skb_miss_set(skb, pkt_type != BR_PKT_BROADCAST);
+
list_for_each_entry_rcu(p, &br->port_list, list) {
/* Do not flood unicast traffic to ports that turn it off, nor
* other traffic if flood off, except for traffic we originate
@@ -295,6 +297,7 @@ void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
allow_mode_include = false;
} else {
p = NULL;
+ br_tc_skb_miss_set(skb, true);
}
while (p || rp) {
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index fc17b9fd93e6..c34a0b0901b0 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -334,6 +334,7 @@ static rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
return RX_HANDLER_CONSUMED;
memset(skb->cb, 0, sizeof(struct br_input_skb_cb));
+ br_tc_skb_miss_set(skb, false);
p = br_port_get_rcu(skb->dev);
if (p->flags & BR_VLAN_TUNNEL)
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 2119729ded2b..a63b32c1638e 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -15,6 +15,7 @@
#include <linux/u64_stats_sync.h>
#include <net/route.h>
#include <net/ip6_fib.h>
+#include <net/pkt_cls.h>
#include <linux/if_vlan.h>
#include <linux/rhashtable.h>
#include <linux/refcount.h>
@@ -754,6 +755,32 @@ void br_boolopt_multi_get(const struct net_bridge *br,
struct br_boolopt_multi *bm);
void br_opt_toggle(struct net_bridge *br, enum net_bridge_opts opt, bool on);
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+static inline void br_tc_skb_miss_set(struct sk_buff *skb, bool miss)
+{
+ struct tc_skb_ext *ext;
+
+ if (!tc_skb_ext_tc_enabled())
+ return;
+
+ ext = skb_ext_find(skb, TC_SKB_EXT);
+ if (ext) {
+ ext->l2_miss = miss;
+ return;
+ }
+ if (!miss)
+ return;
+ ext = tc_skb_ext_alloc(skb);
+ if (!ext)
+ return;
+ ext->l2_miss = true;
+}
+#else
+static inline void br_tc_skb_miss_set(struct sk_buff *skb, bool miss)
+{
+}
+#endif
+
/* br_device.c */
void br_dev_setup(struct net_device *dev);
void br_dev_delete(struct net_device *dev, struct list_head *list);
diff --git a/net/core/Makefile b/net/core/Makefile
index 8f367813bc68..731db2eaa610 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -13,7 +13,7 @@ obj-y += dev.o dev_addr_lists.o dst.o netevent.o \
neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \
fib_notifier.o xdp.o flow_offload.o gro.o \
- netdev-genl.o netdev-genl-gen.o
+ netdev-genl.o netdev-genl-gen.o gso.o
obj-$(CONFIG_NETDEV_ADDR_LIST_TEST) += dev_addr_lists_test.o
diff --git a/net/core/dev.c b/net/core/dev.c
index c29f3e1db3ca..e4ff0adf5523 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -758,29 +758,43 @@ struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
}
EXPORT_SYMBOL(dev_get_by_name_rcu);
+/* Deprecated for new users, call netdev_get_by_name() instead */
+struct net_device *dev_get_by_name(struct net *net, const char *name)
+{
+ struct net_device *dev;
+
+ rcu_read_lock();
+ dev = dev_get_by_name_rcu(net, name);
+ dev_hold(dev);
+ rcu_read_unlock();
+ return dev;
+}
+EXPORT_SYMBOL(dev_get_by_name);
+
/**
- * dev_get_by_name - find a device by its name
+ * netdev_get_by_name() - find a device by its name
* @net: the applicable net namespace
* @name: name to find
+ * @tracker: tracking object for the acquired reference
+ * @gfp: allocation flags for the tracker
*
* Find an interface by name. This can be called from any
* context and does its own locking. The returned handle has
- * the usage count incremented and the caller must use dev_put() to
+ * the usage count incremented and the caller must use netdev_put() to
* release it when it is no longer needed. %NULL is returned if no
* matching device is found.
*/
-
-struct net_device *dev_get_by_name(struct net *net, const char *name)
+struct net_device *netdev_get_by_name(struct net *net, const char *name,
+ netdevice_tracker *tracker, gfp_t gfp)
{
struct net_device *dev;
- rcu_read_lock();
- dev = dev_get_by_name_rcu(net, name);
- dev_hold(dev);
- rcu_read_unlock();
+ dev = dev_get_by_name(net, name);
+ if (dev)
+ netdev_tracker_alloc(dev, tracker, gfp);
return dev;
}
-EXPORT_SYMBOL(dev_get_by_name);
+EXPORT_SYMBOL(netdev_get_by_name);
/**
* __dev_get_by_index - find a device by its ifindex
@@ -831,29 +845,42 @@ struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
}
EXPORT_SYMBOL(dev_get_by_index_rcu);
+/* Deprecated for new users, call netdev_get_by_index() instead */
+struct net_device *dev_get_by_index(struct net *net, int ifindex)
+{
+ struct net_device *dev;
+
+ rcu_read_lock();
+ dev = dev_get_by_index_rcu(net, ifindex);
+ dev_hold(dev);
+ rcu_read_unlock();
+ return dev;
+}
+EXPORT_SYMBOL(dev_get_by_index);
/**
- * dev_get_by_index - find a device by its ifindex
+ * netdev_get_by_index() - find a device by its ifindex
* @net: the applicable net namespace
* @ifindex: index of device
+ * @tracker: tracking object for the acquired reference
+ * @gfp: allocation flags for the tracker
*
* Search for an interface by index. Returns NULL if the device
* is not found or a pointer to the device. The device returned has
* had a reference added and the pointer is safe until the user calls
- * dev_put to indicate they have finished with it.
+ * netdev_put() to indicate they have finished with it.
*/
-
-struct net_device *dev_get_by_index(struct net *net, int ifindex)
+struct net_device *netdev_get_by_index(struct net *net, int ifindex,
+ netdevice_tracker *tracker, gfp_t gfp)
{
struct net_device *dev;
- rcu_read_lock();
- dev = dev_get_by_index_rcu(net, ifindex);
- dev_hold(dev);
- rcu_read_unlock();
+ dev = dev_get_by_index(net, ifindex);
+ if (dev)
+ netdev_tracker_alloc(dev, tracker, gfp);
return dev;
}
-EXPORT_SYMBOL(dev_get_by_index);
+EXPORT_SYMBOL(netdev_get_by_index);
/**
* dev_get_by_napi_id - find a device by napi_id
@@ -3209,7 +3236,7 @@ static u16 skb_tx_hash(const struct net_device *dev,
return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset;
}
-static void skb_warn_bad_offload(const struct sk_buff *skb)
+void skb_warn_bad_offload(const struct sk_buff *skb)
{
static const netdev_features_t null_features;
struct net_device *dev = skb->dev;
@@ -3338,74 +3365,6 @@ __be16 skb_network_protocol(struct sk_buff *skb, int *depth)
return vlan_get_protocol_and_depth(skb, type, depth);
}
-/* openvswitch calls this on rx path, so we need a different check.
- */
-static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
-{
- if (tx_path)
- return skb->ip_summed != CHECKSUM_PARTIAL &&
- skb->ip_summed != CHECKSUM_UNNECESSARY;
-
- return skb->ip_summed == CHECKSUM_NONE;
-}
-
-/**
- * __skb_gso_segment - Perform segmentation on skb.
- * @skb: buffer to segment
- * @features: features for the output path (see dev->features)
- * @tx_path: whether it is called in TX path
- *
- * This function segments the given skb and returns a list of segments.
- *
- * It may return NULL if the skb requires no segmentation. This is
- * only possible when GSO is used for verifying header integrity.
- *
- * Segmentation preserves SKB_GSO_CB_OFFSET bytes of previous skb cb.
- */
-struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
- netdev_features_t features, bool tx_path)
-{
- struct sk_buff *segs;
-
- if (unlikely(skb_needs_check(skb, tx_path))) {
- int err;
-
- /* We're going to init ->check field in TCP or UDP header */
- err = skb_cow_head(skb, 0);
- if (err < 0)
- return ERR_PTR(err);
- }
-
- /* Only report GSO partial support if it will enable us to
- * support segmentation on this frame without needing additional
- * work.
- */
- if (features & NETIF_F_GSO_PARTIAL) {
- netdev_features_t partial_features = NETIF_F_GSO_ROBUST;
- struct net_device *dev = skb->dev;
-
- partial_features |= dev->features & dev->gso_partial_features;
- if (!skb_gso_ok(skb, features | partial_features))
- features &= ~NETIF_F_GSO_PARTIAL;
- }
-
- BUILD_BUG_ON(SKB_GSO_CB_OFFSET +
- sizeof(*SKB_GSO_CB(skb)) > sizeof(skb->cb));
-
- SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb);
- SKB_GSO_CB(skb)->encap_level = 0;
-
- skb_reset_mac_header(skb);
- skb_reset_mac_len(skb);
-
- segs = skb_mac_gso_segment(skb, features);
-
- if (segs != skb && unlikely(skb_needs_check(skb, tx_path) && !IS_ERR(segs)))
- skb_warn_bad_offload(skb);
-
- return segs;
-}
-EXPORT_SYMBOL(__skb_gso_segment);
/* Take action when hardware reception checksum errors are detected. */
#ifdef CONFIG_BUG
@@ -6199,7 +6158,8 @@ restart:
if (!napi)
goto out;
- preempt_disable();
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ preempt_disable();
for (;;) {
int work = 0;
@@ -6241,7 +6201,8 @@ count:
if (unlikely(need_resched())) {
if (napi_poll)
busy_poll_stop(napi, have_poll_lock, prefer_busy_poll, budget);
- preempt_enable();
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ preempt_enable();
rcu_read_unlock();
cond_resched();
if (loop_end(loop_end_arg, start_time))
@@ -6252,7 +6213,8 @@ count:
}
if (napi_poll)
busy_poll_stop(napi, have_poll_lock, prefer_busy_poll, budget);
- preempt_enable();
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ preempt_enable();
out:
rcu_read_unlock();
}
@@ -8819,6 +8781,8 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa,
return -EINVAL;
if (!netif_device_present(dev))
return -ENODEV;
+ if (!memcmp(dev->dev_addr, sa->sa_data, dev->addr_len))
+ return 0;
err = dev_pre_changeaddr_notify(dev, sa->sa_data, extack);
if (err)
return err;
@@ -10570,8 +10534,10 @@ void netdev_sw_irq_coalesce_default_on(struct net_device *dev)
{
WARN_ON(dev->reg_state == NETREG_REGISTERED);
- dev->gro_flush_timeout = 20000;
- dev->napi_defer_hard_irqs = 1;
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
+ dev->gro_flush_timeout = 20000;
+ dev->napi_defer_hard_irqs = 1;
+ }
}
EXPORT_SYMBOL_GPL(netdev_sw_irq_coalesce_default_on);
@@ -10632,7 +10598,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev = PTR_ALIGN(p, NETDEV_ALIGN);
dev->padded = (char *)dev - (char *)p;
- ref_tracker_dir_init(&dev->refcnt_tracker, 128);
+ ref_tracker_dir_init(&dev->refcnt_tracker, 128, name);
#ifdef CONFIG_PCPU_DEV_REFCNT
dev->pcpu_refcnt = alloc_percpu(int);
if (!dev->pcpu_refcnt)
diff --git a/net/core/filter.c b/net/core/filter.c
index d9ce04ca22ce..968139f4a1ac 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -6916,6 +6916,8 @@ u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
FIELD)); \
} while (0)
+ BTF_TYPE_EMIT(struct bpf_tcp_sock);
+
switch (si->off) {
case offsetof(struct bpf_tcp_sock, rtt_min):
BUILD_BUG_ON(sizeof_field(struct tcp_sock, rtt_min) !=
@@ -11721,3 +11723,66 @@ static int __init bpf_kfunc_init(void)
return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &bpf_kfunc_set_xdp);
}
late_initcall(bpf_kfunc_init);
+
+/* Disables missing prototype warnings */
+__diag_push();
+__diag_ignore_all("-Wmissing-prototypes",
+ "Global functions as their definitions will be in vmlinux BTF");
+
+/* bpf_sock_destroy: Destroy the given socket with ECONNABORTED error code.
+ *
+ * The function expects a non-NULL pointer to a socket, and invokes the
+ * protocol specific socket destroy handlers.
+ *
+ * The helper can only be called from BPF contexts that have acquired the socket
+ * locks.
+ *
+ * Parameters:
+ * @sock: Pointer to socket to be destroyed
+ *
+ * Return:
+ * On error, may return EPROTONOSUPPORT, EINVAL.
+ * EPROTONOSUPPORT if protocol specific destroy handler is not supported.
+ * 0 otherwise
+ */
+__bpf_kfunc int bpf_sock_destroy(struct sock_common *sock)
+{
+ struct sock *sk = (struct sock *)sock;
+
+ /* The locking semantics that allow for synchronous execution of the
+ * destroy handlers are only supported for TCP and UDP.
+ * Supporting protocols will need to acquire sock lock in the BPF context
+ * prior to invoking this kfunc.
+ */
+ if (!sk->sk_prot->diag_destroy || (sk->sk_protocol != IPPROTO_TCP &&
+ sk->sk_protocol != IPPROTO_UDP))
+ return -EOPNOTSUPP;
+
+ return sk->sk_prot->diag_destroy(sk, ECONNABORTED);
+}
+
+__diag_pop()
+
+BTF_SET8_START(bpf_sk_iter_kfunc_ids)
+BTF_ID_FLAGS(func, bpf_sock_destroy, KF_TRUSTED_ARGS)
+BTF_SET8_END(bpf_sk_iter_kfunc_ids)
+
+static int tracing_iter_filter(const struct bpf_prog *prog, u32 kfunc_id)
+{
+ if (btf_id_set8_contains(&bpf_sk_iter_kfunc_ids, kfunc_id) &&
+ prog->expected_attach_type != BPF_TRACE_ITER)
+ return -EACCES;
+ return 0;
+}
+
+static const struct btf_kfunc_id_set bpf_sk_iter_kfunc_set = {
+ .owner = THIS_MODULE,
+ .set = &bpf_sk_iter_kfunc_ids,
+ .filter = tracing_iter_filter,
+};
+
+static int init_subsystem(void)
+{
+ return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_sk_iter_kfunc_set);
+}
+late_initcall(init_subsystem);
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 25fb0bbc310f..85a2d0d9bd39 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -27,6 +27,7 @@
#include <linux/tcp.h>
#include <linux/ptp_classify.h>
#include <net/flow_dissector.h>
+#include <net/pkt_cls.h>
#include <scsi/fc/fc_fcoe.h>
#include <uapi/linux/batadv_packet.h>
#include <linux/bpf.h>
@@ -241,6 +242,15 @@ void skb_flow_dissect_meta(const struct sk_buff *skb,
FLOW_DISSECTOR_KEY_META,
target_container);
meta->ingress_ifindex = skb->skb_iif;
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ if (tc_skb_ext_tc_enabled()) {
+ struct tc_skb_ext *ext;
+
+ ext = skb_ext_find(skb, TC_SKB_EXT);
+ if (ext)
+ meta->l2_miss = ext->l2_miss;
+ }
+#endif
}
EXPORT_SYMBOL(skb_flow_dissect_meta);
@@ -548,6 +558,30 @@ __skb_flow_dissect_arp(const struct sk_buff *skb,
}
static enum flow_dissect_ret
+__skb_flow_dissect_cfm(const struct sk_buff *skb,
+ struct flow_dissector *flow_dissector,
+ void *target_container, const void *data,
+ int nhoff, int hlen)
+{
+ struct flow_dissector_key_cfm *key, *hdr, _hdr;
+
+ if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_CFM))
+ return FLOW_DISSECT_RET_OUT_GOOD;
+
+ hdr = __skb_header_pointer(skb, nhoff, sizeof(*key), data, hlen, &_hdr);
+ if (!hdr)
+ return FLOW_DISSECT_RET_OUT_BAD;
+
+ key = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_CFM,
+ target_container);
+
+ key->mdl_ver = hdr->mdl_ver;
+ key->opcode = hdr->opcode;
+
+ return FLOW_DISSECT_RET_OUT_GOOD;
+}
+
+static enum flow_dissect_ret
__skb_flow_dissect_gre(const struct sk_buff *skb,
struct flow_dissector_key_control *key_control,
struct flow_dissector *flow_dissector,
@@ -1390,6 +1424,12 @@ proto_again:
break;
}
+ case htons(ETH_P_CFM):
+ fdret = __skb_flow_dissect_cfm(skb, flow_dissector,
+ target_container, data,
+ nhoff, hlen);
+ break;
+
default:
fdret = FLOW_DISSECT_RET_OUT_BAD;
break;
diff --git a/net/core/gro.c b/net/core/gro.c
index 2d84165cb4f1..0759277dc14e 100644
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -10,7 +10,7 @@
#define GRO_MAX_HEAD (MAX_HEADER + 128)
static DEFINE_SPINLOCK(offload_lock);
-static struct list_head offload_base __read_mostly = LIST_HEAD_INIT(offload_base);
+struct list_head offload_base __read_mostly = LIST_HEAD_INIT(offload_base);
/* Maximum number of GRO_NORMAL skbs to batch up for list-RX */
int gro_normal_batch __read_mostly = 8;
@@ -92,63 +92,6 @@ void dev_remove_offload(struct packet_offload *po)
}
EXPORT_SYMBOL(dev_remove_offload);
-/**
- * skb_eth_gso_segment - segmentation handler for ethernet protocols.
- * @skb: buffer to segment
- * @features: features for the output path (see dev->features)
- * @type: Ethernet Protocol ID
- */
-struct sk_buff *skb_eth_gso_segment(struct sk_buff *skb,
- netdev_features_t features, __be16 type)
-{
- struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
- struct packet_offload *ptype;
-
- rcu_read_lock();
- list_for_each_entry_rcu(ptype, &offload_base, list) {
- if (ptype->type == type && ptype->callbacks.gso_segment) {
- segs = ptype->callbacks.gso_segment(skb, features);
- break;
- }
- }
- rcu_read_unlock();
-
- return segs;
-}
-EXPORT_SYMBOL(skb_eth_gso_segment);
-
-/**
- * skb_mac_gso_segment - mac layer segmentation handler.
- * @skb: buffer to segment
- * @features: features for the output path (see dev->features)
- */
-struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
- netdev_features_t features)
-{
- struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
- struct packet_offload *ptype;
- int vlan_depth = skb->mac_len;
- __be16 type = skb_network_protocol(skb, &vlan_depth);
-
- if (unlikely(!type))
- return ERR_PTR(-EINVAL);
-
- __skb_pull(skb, vlan_depth);
-
- rcu_read_lock();
- list_for_each_entry_rcu(ptype, &offload_base, list) {
- if (ptype->type == type && ptype->callbacks.gso_segment) {
- segs = ptype->callbacks.gso_segment(skb, features);
- break;
- }
- }
- rcu_read_unlock();
-
- __skb_push(skb, skb->data - skb_mac_header(skb));
-
- return segs;
-}
-EXPORT_SYMBOL(skb_mac_gso_segment);
int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
{
@@ -239,9 +182,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
pinfo->nr_frags = nr_frags + 1 + skbinfo->nr_frags;
- __skb_frag_set_page(frag, page);
- skb_frag_off_set(frag, first_offset);
- skb_frag_size_set(frag, first_size);
+ skb_frag_fill_page_desc(frag, page, first_offset, first_size);
memcpy(frag + 1, skbinfo->frags, sizeof(*frag) * skbinfo->nr_frags);
/* We dont need to clear skbinfo->nr_frags here */
@@ -363,6 +304,24 @@ void napi_gro_flush(struct napi_struct *napi, bool flush_old)
}
EXPORT_SYMBOL(napi_gro_flush);
+static unsigned long gro_list_prepare_tc_ext(const struct sk_buff *skb,
+ const struct sk_buff *p,
+ unsigned long diffs)
+{
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+ struct tc_skb_ext *skb_ext;
+ struct tc_skb_ext *p_ext;
+
+ skb_ext = skb_ext_find(skb, TC_SKB_EXT);
+ p_ext = skb_ext_find(p, TC_SKB_EXT);
+
+ diffs |= (!!p_ext) ^ (!!skb_ext);
+ if (!diffs && unlikely(skb_ext))
+ diffs |= p_ext->chain ^ skb_ext->chain;
+#endif
+ return diffs;
+}
+
static void gro_list_prepare(const struct list_head *head,
const struct sk_buff *skb)
{
@@ -397,23 +356,11 @@ static void gro_list_prepare(const struct list_head *head,
* avoid trying too hard to skip each of them individually
*/
if (!diffs && unlikely(skb->slow_gro | p->slow_gro)) {
-#if IS_ENABLED(CONFIG_SKB_EXTENSIONS) && IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
- struct tc_skb_ext *skb_ext;
- struct tc_skb_ext *p_ext;
-#endif
-
diffs |= p->sk != skb->sk;
diffs |= skb_metadata_dst_cmp(p, skb);
diffs |= skb_get_nfct(p) ^ skb_get_nfct(skb);
-#if IS_ENABLED(CONFIG_SKB_EXTENSIONS) && IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
- skb_ext = skb_ext_find(skb, TC_SKB_EXT);
- p_ext = skb_ext_find(p, TC_SKB_EXT);
-
- diffs |= (!!p_ext) ^ (!!skb_ext);
- if (!diffs && unlikely(skb_ext))
- diffs |= p_ext->chain ^ skb_ext->chain;
-#endif
+ diffs |= gro_list_prepare_tc_ext(skb, p, diffs);
}
NAPI_GRO_CB(p)->same_flow = !diffs;
@@ -460,6 +407,14 @@ static void gro_pull_from_frag0(struct sk_buff *skb, int grow)
}
}
+static void gro_try_pull_from_frag0(struct sk_buff *skb)
+{
+ int grow = skb_gro_offset(skb) - skb_headlen(skb);
+
+ if (grow > 0)
+ gro_pull_from_frag0(skb, grow);
+}
+
static void gro_flush_oldest(struct napi_struct *napi, struct list_head *head)
{
struct sk_buff *oldest;
@@ -489,7 +444,6 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
struct sk_buff *pp = NULL;
enum gro_result ret;
int same_flow;
- int grow;
if (netif_elide_gro(skb->dev))
goto normal;
@@ -564,17 +518,14 @@ found_ptype:
else
gro_list->count++;
+ /* Must be called before setting NAPI_GRO_CB(skb)->{age|last} */
+ gro_try_pull_from_frag0(skb);
NAPI_GRO_CB(skb)->age = jiffies;
NAPI_GRO_CB(skb)->last = skb;
if (!skb_is_gso(skb))
skb_shinfo(skb)->gso_size = skb_gro_len(skb);
list_add(&skb->list, &gro_list->list);
ret = GRO_HELD;
-
-pull:
- grow = skb_gro_offset(skb) - skb_headlen(skb);
- if (grow > 0)
- gro_pull_from_frag0(skb, grow);
ok:
if (gro_list->count) {
if (!test_bit(bucket, &napi->gro_bitmask))
@@ -587,7 +538,8 @@ ok:
normal:
ret = GRO_NORMAL;
- goto pull;
+ gro_try_pull_from_frag0(skb);
+ goto ok;
}
struct packet_offload *gro_find_receive_by_type(__be16 type)
diff --git a/net/core/gso.c b/net/core/gso.c
new file mode 100644
index 000000000000..9e1803bfc9c6
--- /dev/null
+++ b/net/core/gso.c
@@ -0,0 +1,273 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/skbuff.h>
+#include <linux/sctp.h>
+#include <net/gso.h>
+#include <net/gro.h>
+
+/**
+ * skb_eth_gso_segment - segmentation handler for ethernet protocols.
+ * @skb: buffer to segment
+ * @features: features for the output path (see dev->features)
+ * @type: Ethernet Protocol ID
+ */
+struct sk_buff *skb_eth_gso_segment(struct sk_buff *skb,
+ netdev_features_t features, __be16 type)
+{
+ struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
+ struct packet_offload *ptype;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(ptype, &offload_base, list) {
+ if (ptype->type == type && ptype->callbacks.gso_segment) {
+ segs = ptype->callbacks.gso_segment(skb, features);
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ return segs;
+}
+EXPORT_SYMBOL(skb_eth_gso_segment);
+
+/**
+ * skb_mac_gso_segment - mac layer segmentation handler.
+ * @skb: buffer to segment
+ * @features: features for the output path (see dev->features)
+ */
+struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
+ struct packet_offload *ptype;
+ int vlan_depth = skb->mac_len;
+ __be16 type = skb_network_protocol(skb, &vlan_depth);
+
+ if (unlikely(!type))
+ return ERR_PTR(-EINVAL);
+
+ __skb_pull(skb, vlan_depth);
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(ptype, &offload_base, list) {
+ if (ptype->type == type && ptype->callbacks.gso_segment) {
+ segs = ptype->callbacks.gso_segment(skb, features);
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ __skb_push(skb, skb->data - skb_mac_header(skb));
+
+ return segs;
+}
+EXPORT_SYMBOL(skb_mac_gso_segment);
+/* openvswitch calls this on rx path, so we need a different check.
+ */
+static bool skb_needs_check(const struct sk_buff *skb, bool tx_path)
+{
+ if (tx_path)
+ return skb->ip_summed != CHECKSUM_PARTIAL &&
+ skb->ip_summed != CHECKSUM_UNNECESSARY;
+
+ return skb->ip_summed == CHECKSUM_NONE;
+}
+
+/**
+ * __skb_gso_segment - Perform segmentation on skb.
+ * @skb: buffer to segment
+ * @features: features for the output path (see dev->features)
+ * @tx_path: whether it is called in TX path
+ *
+ * This function segments the given skb and returns a list of segments.
+ *
+ * It may return NULL if the skb requires no segmentation. This is
+ * only possible when GSO is used for verifying header integrity.
+ *
+ * Segmentation preserves SKB_GSO_CB_OFFSET bytes of previous skb cb.
+ */
+struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
+ netdev_features_t features, bool tx_path)
+{
+ struct sk_buff *segs;
+
+ if (unlikely(skb_needs_check(skb, tx_path))) {
+ int err;
+
+ /* We're going to init ->check field in TCP or UDP header */
+ err = skb_cow_head(skb, 0);
+ if (err < 0)
+ return ERR_PTR(err);
+ }
+
+ /* Only report GSO partial support if it will enable us to
+ * support segmentation on this frame without needing additional
+ * work.
+ */
+ if (features & NETIF_F_GSO_PARTIAL) {
+ netdev_features_t partial_features = NETIF_F_GSO_ROBUST;
+ struct net_device *dev = skb->dev;
+
+ partial_features |= dev->features & dev->gso_partial_features;
+ if (!skb_gso_ok(skb, features | partial_features))
+ features &= ~NETIF_F_GSO_PARTIAL;
+ }
+
+ BUILD_BUG_ON(SKB_GSO_CB_OFFSET +
+ sizeof(*SKB_GSO_CB(skb)) > sizeof(skb->cb));
+
+ SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb);
+ SKB_GSO_CB(skb)->encap_level = 0;
+
+ skb_reset_mac_header(skb);
+ skb_reset_mac_len(skb);
+
+ segs = skb_mac_gso_segment(skb, features);
+
+ if (segs != skb && unlikely(skb_needs_check(skb, tx_path) && !IS_ERR(segs)))
+ skb_warn_bad_offload(skb);
+
+ return segs;
+}
+EXPORT_SYMBOL(__skb_gso_segment);
+
+/**
+ * skb_gso_transport_seglen - Return length of individual segments of a gso packet
+ *
+ * @skb: GSO skb
+ *
+ * skb_gso_transport_seglen is used to determine the real size of the
+ * individual segments, including Layer4 headers (TCP/UDP).
+ *
+ * The MAC/L2 or network (IP, IPv6) headers are not accounted for.
+ */
+static unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
+{
+ const struct skb_shared_info *shinfo = skb_shinfo(skb);
+ unsigned int thlen = 0;
+
+ if (skb->encapsulation) {
+ thlen = skb_inner_transport_header(skb) -
+ skb_transport_header(skb);
+
+ if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
+ thlen += inner_tcp_hdrlen(skb);
+ } else if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
+ thlen = tcp_hdrlen(skb);
+ } else if (unlikely(skb_is_gso_sctp(skb))) {
+ thlen = sizeof(struct sctphdr);
+ } else if (shinfo->gso_type & SKB_GSO_UDP_L4) {
+ thlen = sizeof(struct udphdr);
+ }
+ /* UFO sets gso_size to the size of the fragmentation
+ * payload, i.e. the size of the L4 (UDP) header is already
+ * accounted for.
+ */
+ return thlen + shinfo->gso_size;
+}
+
+/**
+ * skb_gso_network_seglen - Return length of individual segments of a gso packet
+ *
+ * @skb: GSO skb
+ *
+ * skb_gso_network_seglen is used to determine the real size of the
+ * individual segments, including Layer3 (IP, IPv6) and L4 headers (TCP/UDP).
+ *
+ * The MAC/L2 header is not accounted for.
+ */
+static unsigned int skb_gso_network_seglen(const struct sk_buff *skb)
+{
+ unsigned int hdr_len = skb_transport_header(skb) -
+ skb_network_header(skb);
+
+ return hdr_len + skb_gso_transport_seglen(skb);
+}
+
+/**
+ * skb_gso_mac_seglen - Return length of individual segments of a gso packet
+ *
+ * @skb: GSO skb
+ *
+ * skb_gso_mac_seglen is used to determine the real size of the
+ * individual segments, including MAC/L2, Layer3 (IP, IPv6) and L4
+ * headers (TCP/UDP).
+ */
+static unsigned int skb_gso_mac_seglen(const struct sk_buff *skb)
+{
+ unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
+
+ return hdr_len + skb_gso_transport_seglen(skb);
+}
+
+/**
+ * skb_gso_size_check - check the skb size, considering GSO_BY_FRAGS
+ *
+ * There are a couple of instances where we have a GSO skb, and we
+ * want to determine what size it would be after it is segmented.
+ *
+ * We might want to check:
+ * - L3+L4+payload size (e.g. IP forwarding)
+ * - L2+L3+L4+payload size (e.g. sanity check before passing to driver)
+ *
+ * This is a helper to do that correctly considering GSO_BY_FRAGS.
+ *
+ * @skb: GSO skb
+ *
+ * @seg_len: The segmented length (from skb_gso_*_seglen). In the
+ * GSO_BY_FRAGS case this will be [header sizes + GSO_BY_FRAGS].
+ *
+ * @max_len: The maximum permissible length.
+ *
+ * Returns true if the segmented length <= max length.
+ */
+static inline bool skb_gso_size_check(const struct sk_buff *skb,
+ unsigned int seg_len,
+ unsigned int max_len) {
+ const struct skb_shared_info *shinfo = skb_shinfo(skb);
+ const struct sk_buff *iter;
+
+ if (shinfo->gso_size != GSO_BY_FRAGS)
+ return seg_len <= max_len;
+
+ /* Undo this so we can re-use header sizes */
+ seg_len -= GSO_BY_FRAGS;
+
+ skb_walk_frags(skb, iter) {
+ if (seg_len + skb_headlen(iter) > max_len)
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * skb_gso_validate_network_len - Will a split GSO skb fit into a given MTU?
+ *
+ * @skb: GSO skb
+ * @mtu: MTU to validate against
+ *
+ * skb_gso_validate_network_len validates if a given skb will fit a
+ * wanted MTU once split. It considers L3 headers, L4 headers, and the
+ * payload.
+ */
+bool skb_gso_validate_network_len(const struct sk_buff *skb, unsigned int mtu)
+{
+ return skb_gso_size_check(skb, skb_gso_network_seglen(skb), mtu);
+}
+EXPORT_SYMBOL_GPL(skb_gso_validate_network_len);
+
+/**
+ * skb_gso_validate_mac_len - Will a split GSO skb fit in a given length?
+ *
+ * @skb: GSO skb
+ * @len: length to validate against
+ *
+ * skb_gso_validate_mac_len validates if a given skb will fit a wanted
+ * length once split, including L2, L3 and L4 headers and the payload.
+ */
+bool skb_gso_validate_mac_len(const struct sk_buff *skb, unsigned int len)
+{
+ return skb_gso_size_check(skb, skb_gso_mac_seglen(skb), len);
+}
+EXPORT_SYMBOL_GPL(skb_gso_validate_mac_len);
+
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 3e3598cd49f2..f4183c4c1ec8 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -308,7 +308,7 @@ EXPORT_SYMBOL_GPL(get_net_ns_by_id);
/* init code that must occur even if setup_net() is not called. */
static __net_init void preinit_net(struct net *net)
{
- ref_tracker_dir_init(&net->notrefcnt_tracker, 128);
+ ref_tracker_dir_init(&net->notrefcnt_tracker, 128, "net notrefcnt");
}
/*
@@ -322,7 +322,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
LIST_HEAD(net_exit_list);
refcount_set(&net->ns.count, 1);
- ref_tracker_dir_init(&net->refcnt_tracker, 128);
+ ref_tracker_dir_init(&net->refcnt_tracker, 128, "net refcnt");
refcount_set(&net->passive, 1);
get_random_bytes(&net->hash_mix, sizeof(u32));
diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c
index de17ca2f7dbf..ea9231378aa6 100644
--- a/net/core/netdev-genl-gen.c
+++ b/net/core/netdev-genl-gen.c
@@ -8,7 +8,7 @@
#include "netdev-genl-gen.h"
-#include <linux/netdev.h>
+#include <uapi/linux/netdev.h>
/* NETDEV_CMD_DEV_GET - do */
static const struct nla_policy netdev_dev_get_nl_policy[NETDEV_A_DEV_IFINDEX + 1] = {
diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h
index 74d74fc23167..7b370c073e7d 100644
--- a/net/core/netdev-genl-gen.h
+++ b/net/core/netdev-genl-gen.h
@@ -9,7 +9,7 @@
#include <net/netlink.h>
#include <net/genetlink.h>
-#include <linux/netdev.h>
+#include <uapi/linux/netdev.h>
int netdev_nl_dev_get_doit(struct sk_buff *skb, struct genl_info *info);
int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index e6a739b1afa9..543007f159f9 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -690,7 +690,7 @@ int netpoll_setup(struct netpoll *np)
err = -ENODEV;
goto unlock;
}
- dev_hold(ndev);
+ netdev_hold(ndev, &np->dev_tracker, GFP_KERNEL);
if (netdev_master_upper_dev_get(ndev)) {
np_err(np, "%s is a slave device, aborting\n", np->dev_name);
@@ -783,12 +783,11 @@ put_noaddr:
err = __netpoll_setup(np, ndev);
if (err)
goto put;
- netdev_tracker_alloc(ndev, &np->dev_tracker, GFP_KERNEL);
rtnl_unlock();
return 0;
put:
- dev_put(ndev);
+ netdev_put(ndev, &np->dev_tracker);
unlock:
rtnl_unlock();
return err;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 760238196db1..f56b8d697014 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2785,14 +2785,17 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb,
break;
}
get_page(pkt_dev->page);
- skb_frag_set_page(skb, i, pkt_dev->page);
- skb_frag_off_set(&skb_shinfo(skb)->frags[i], 0);
+
/*last fragment, fill rest of data*/
if (i == (frags - 1))
- skb_frag_size_set(&skb_shinfo(skb)->frags[i],
- (datalen < PAGE_SIZE ? datalen : PAGE_SIZE));
+ skb_frag_fill_page_desc(&skb_shinfo(skb)->frags[i],
+ pkt_dev->page, 0,
+ (datalen < PAGE_SIZE ?
+ datalen : PAGE_SIZE));
else
- skb_frag_size_set(&skb_shinfo(skb)->frags[i], frag_len);
+ skb_frag_fill_page_desc(&skb_shinfo(skb)->frags[i],
+ pkt_dev->page, 0, frag_len);
+
datalen -= skb_frag_size(&skb_shinfo(skb)->frags[i]);
skb->len += skb_frag_size(&skb_shinfo(skb)->frags[i]);
skb->data_len += skb_frag_size(&skb_shinfo(skb)->frags[i]);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 41de3a2f29e1..2c61fb912772 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -961,24 +961,27 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev,
nla_total_size(sizeof(struct ifla_vf_rate)) +
nla_total_size(sizeof(struct ifla_vf_link_state)) +
nla_total_size(sizeof(struct ifla_vf_rss_query_en)) +
- nla_total_size(0) + /* nest IFLA_VF_STATS */
- /* IFLA_VF_STATS_RX_PACKETS */
- nla_total_size_64bit(sizeof(__u64)) +
- /* IFLA_VF_STATS_TX_PACKETS */
- nla_total_size_64bit(sizeof(__u64)) +
- /* IFLA_VF_STATS_RX_BYTES */
- nla_total_size_64bit(sizeof(__u64)) +
- /* IFLA_VF_STATS_TX_BYTES */
- nla_total_size_64bit(sizeof(__u64)) +
- /* IFLA_VF_STATS_BROADCAST */
- nla_total_size_64bit(sizeof(__u64)) +
- /* IFLA_VF_STATS_MULTICAST */
- nla_total_size_64bit(sizeof(__u64)) +
- /* IFLA_VF_STATS_RX_DROPPED */
- nla_total_size_64bit(sizeof(__u64)) +
- /* IFLA_VF_STATS_TX_DROPPED */
- nla_total_size_64bit(sizeof(__u64)) +
nla_total_size(sizeof(struct ifla_vf_trust)));
+ if (~ext_filter_mask & RTEXT_FILTER_SKIP_STATS) {
+ size += num_vfs *
+ (nla_total_size(0) + /* nest IFLA_VF_STATS */
+ /* IFLA_VF_STATS_RX_PACKETS */
+ nla_total_size_64bit(sizeof(__u64)) +
+ /* IFLA_VF_STATS_TX_PACKETS */
+ nla_total_size_64bit(sizeof(__u64)) +
+ /* IFLA_VF_STATS_RX_BYTES */
+ nla_total_size_64bit(sizeof(__u64)) +
+ /* IFLA_VF_STATS_TX_BYTES */
+ nla_total_size_64bit(sizeof(__u64)) +
+ /* IFLA_VF_STATS_BROADCAST */
+ nla_total_size_64bit(sizeof(__u64)) +
+ /* IFLA_VF_STATS_MULTICAST */
+ nla_total_size_64bit(sizeof(__u64)) +
+ /* IFLA_VF_STATS_RX_DROPPED */
+ nla_total_size_64bit(sizeof(__u64)) +
+ /* IFLA_VF_STATS_TX_DROPPED */
+ nla_total_size_64bit(sizeof(__u64)));
+ }
return size;
} else
return 0;
@@ -1270,7 +1273,8 @@ static noinline_for_stack int rtnl_fill_stats(struct sk_buff *skb,
static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
struct net_device *dev,
int vfs_num,
- struct nlattr *vfinfo)
+ struct nlattr *vfinfo,
+ u32 ext_filter_mask)
{
struct ifla_vf_rss_query_en vf_rss_query_en;
struct nlattr *vf, *vfstats, *vfvlanlist;
@@ -1376,33 +1380,35 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
goto nla_put_vf_failure;
}
nla_nest_end(skb, vfvlanlist);
- memset(&vf_stats, 0, sizeof(vf_stats));
- if (dev->netdev_ops->ndo_get_vf_stats)
- dev->netdev_ops->ndo_get_vf_stats(dev, vfs_num,
- &vf_stats);
- vfstats = nla_nest_start_noflag(skb, IFLA_VF_STATS);
- if (!vfstats)
- goto nla_put_vf_failure;
- if (nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_PACKETS,
- vf_stats.rx_packets, IFLA_VF_STATS_PAD) ||
- nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_PACKETS,
- vf_stats.tx_packets, IFLA_VF_STATS_PAD) ||
- nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_BYTES,
- vf_stats.rx_bytes, IFLA_VF_STATS_PAD) ||
- nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_BYTES,
- vf_stats.tx_bytes, IFLA_VF_STATS_PAD) ||
- nla_put_u64_64bit(skb, IFLA_VF_STATS_BROADCAST,
- vf_stats.broadcast, IFLA_VF_STATS_PAD) ||
- nla_put_u64_64bit(skb, IFLA_VF_STATS_MULTICAST,
- vf_stats.multicast, IFLA_VF_STATS_PAD) ||
- nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_DROPPED,
- vf_stats.rx_dropped, IFLA_VF_STATS_PAD) ||
- nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_DROPPED,
- vf_stats.tx_dropped, IFLA_VF_STATS_PAD)) {
- nla_nest_cancel(skb, vfstats);
- goto nla_put_vf_failure;
+ if (~ext_filter_mask & RTEXT_FILTER_SKIP_STATS) {
+ memset(&vf_stats, 0, sizeof(vf_stats));
+ if (dev->netdev_ops->ndo_get_vf_stats)
+ dev->netdev_ops->ndo_get_vf_stats(dev, vfs_num,
+ &vf_stats);
+ vfstats = nla_nest_start_noflag(skb, IFLA_VF_STATS);
+ if (!vfstats)
+ goto nla_put_vf_failure;
+ if (nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_PACKETS,
+ vf_stats.rx_packets, IFLA_VF_STATS_PAD) ||
+ nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_PACKETS,
+ vf_stats.tx_packets, IFLA_VF_STATS_PAD) ||
+ nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_BYTES,
+ vf_stats.rx_bytes, IFLA_VF_STATS_PAD) ||
+ nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_BYTES,
+ vf_stats.tx_bytes, IFLA_VF_STATS_PAD) ||
+ nla_put_u64_64bit(skb, IFLA_VF_STATS_BROADCAST,
+ vf_stats.broadcast, IFLA_VF_STATS_PAD) ||
+ nla_put_u64_64bit(skb, IFLA_VF_STATS_MULTICAST,
+ vf_stats.multicast, IFLA_VF_STATS_PAD) ||
+ nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_DROPPED,
+ vf_stats.rx_dropped, IFLA_VF_STATS_PAD) ||
+ nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_DROPPED,
+ vf_stats.tx_dropped, IFLA_VF_STATS_PAD)) {
+ nla_nest_cancel(skb, vfstats);
+ goto nla_put_vf_failure;
+ }
+ nla_nest_end(skb, vfstats);
}
- nla_nest_end(skb, vfstats);
nla_nest_end(skb, vf);
return 0;
@@ -1435,7 +1441,7 @@ static noinline_for_stack int rtnl_fill_vf(struct sk_buff *skb,
return -EMSGSIZE;
for (i = 0; i < num_vfs; i++) {
- if (rtnl_fill_vfinfo(skb, dev, i, vfinfo))
+ if (rtnl_fill_vfinfo(skb, dev, i, vfinfo, ext_filter_mask))
return -EMSGSIZE;
}
@@ -2377,45 +2383,43 @@ static int rtnl_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate,
static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[],
struct netlink_ext_ack *extack)
{
- if (dev) {
- if (tb[IFLA_ADDRESS] &&
- nla_len(tb[IFLA_ADDRESS]) < dev->addr_len)
- return -EINVAL;
+ if (tb[IFLA_ADDRESS] &&
+ nla_len(tb[IFLA_ADDRESS]) < dev->addr_len)
+ return -EINVAL;
- if (tb[IFLA_BROADCAST] &&
- nla_len(tb[IFLA_BROADCAST]) < dev->addr_len)
- return -EINVAL;
+ if (tb[IFLA_BROADCAST] &&
+ nla_len(tb[IFLA_BROADCAST]) < dev->addr_len)
+ return -EINVAL;
- if (tb[IFLA_GSO_MAX_SIZE] &&
- nla_get_u32(tb[IFLA_GSO_MAX_SIZE]) > dev->tso_max_size) {
- NL_SET_ERR_MSG(extack, "too big gso_max_size");
- return -EINVAL;
- }
+ if (tb[IFLA_GSO_MAX_SIZE] &&
+ nla_get_u32(tb[IFLA_GSO_MAX_SIZE]) > dev->tso_max_size) {
+ NL_SET_ERR_MSG(extack, "too big gso_max_size");
+ return -EINVAL;
+ }
- if (tb[IFLA_GSO_MAX_SEGS] &&
- (nla_get_u32(tb[IFLA_GSO_MAX_SEGS]) > GSO_MAX_SEGS ||
- nla_get_u32(tb[IFLA_GSO_MAX_SEGS]) > dev->tso_max_segs)) {
- NL_SET_ERR_MSG(extack, "too big gso_max_segs");
- return -EINVAL;
- }
+ if (tb[IFLA_GSO_MAX_SEGS] &&
+ (nla_get_u32(tb[IFLA_GSO_MAX_SEGS]) > GSO_MAX_SEGS ||
+ nla_get_u32(tb[IFLA_GSO_MAX_SEGS]) > dev->tso_max_segs)) {
+ NL_SET_ERR_MSG(extack, "too big gso_max_segs");
+ return -EINVAL;
+ }
- if (tb[IFLA_GRO_MAX_SIZE] &&
- nla_get_u32(tb[IFLA_GRO_MAX_SIZE]) > GRO_MAX_SIZE) {
- NL_SET_ERR_MSG(extack, "too big gro_max_size");
- return -EINVAL;
- }
+ if (tb[IFLA_GRO_MAX_SIZE] &&
+ nla_get_u32(tb[IFLA_GRO_MAX_SIZE]) > GRO_MAX_SIZE) {
+ NL_SET_ERR_MSG(extack, "too big gro_max_size");
+ return -EINVAL;
+ }
- if (tb[IFLA_GSO_IPV4_MAX_SIZE] &&
- nla_get_u32(tb[IFLA_GSO_IPV4_MAX_SIZE]) > dev->tso_max_size) {
- NL_SET_ERR_MSG(extack, "too big gso_ipv4_max_size");
- return -EINVAL;
- }
+ if (tb[IFLA_GSO_IPV4_MAX_SIZE] &&
+ nla_get_u32(tb[IFLA_GSO_IPV4_MAX_SIZE]) > dev->tso_max_size) {
+ NL_SET_ERR_MSG(extack, "too big gso_ipv4_max_size");
+ return -EINVAL;
+ }
- if (tb[IFLA_GRO_IPV4_MAX_SIZE] &&
- nla_get_u32(tb[IFLA_GRO_IPV4_MAX_SIZE]) > GRO_MAX_SIZE) {
- NL_SET_ERR_MSG(extack, "too big gro_ipv4_max_size");
- return -EINVAL;
- }
+ if (tb[IFLA_GRO_IPV4_MAX_SIZE] &&
+ nla_get_u32(tb[IFLA_GRO_IPV4_MAX_SIZE]) > GRO_MAX_SIZE) {
+ NL_SET_ERR_MSG(extack, "too big gro_ipv4_max_size");
+ return -EINVAL;
}
if (tb[IFLA_AF_SPEC]) {
@@ -2736,10 +2740,6 @@ static int do_setlink(const struct sk_buff *skb,
char ifname[IFNAMSIZ];
int err;
- err = validate_linkmsg(dev, tb, extack);
- if (err < 0)
- return err;
-
if (tb[IFLA_IFNAME])
nla_strscpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
else
@@ -3156,6 +3156,10 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
goto errout;
}
+ err = validate_linkmsg(dev, tb, extack);
+ if (err < 0)
+ goto errout;
+
err = do_setlink(skb, dev, ifm, extack, tb, 0);
errout:
return err;
@@ -3399,6 +3403,9 @@ static int rtnl_group_changelink(const struct sk_buff *skb,
for_each_netdev_safe(net, dev, aux) {
if (dev->group == group) {
+ err = validate_linkmsg(dev, tb, extack);
+ if (err < 0)
+ return err;
err = do_setlink(skb, dev, ifm, extack, tb, 0);
if (err < 0)
return err;
@@ -3556,10 +3563,6 @@ replay:
m_ops = master_dev->rtnl_link_ops;
}
- err = validate_linkmsg(dev, tb, extack);
- if (err < 0)
- return err;
-
if (tb[IFLA_LINKINFO]) {
err = nla_parse_nested_deprecated(linkinfo, IFLA_INFO_MAX,
tb[IFLA_LINKINFO],
@@ -3623,6 +3626,10 @@ replay:
if (nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
+ err = validate_linkmsg(dev, tb, extack);
+ if (err < 0)
+ return err;
+
if (linkinfo[IFLA_INFO_DATA]) {
if (!ops || ops != dev->rtnl_link_ops ||
!ops->changelink)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index cea28d30abb5..fee2b1c105fe 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -67,6 +67,7 @@
#include <net/dst.h>
#include <net/sock.h>
#include <net/checksum.h>
+#include <net/gso.h>
#include <net/ip6_checksum.h>
#include <net/xfrm.h>
#include <net/mpls.h>
@@ -92,15 +93,7 @@ static struct kmem_cache *skbuff_fclone_cache __ro_after_init;
static struct kmem_cache *skbuff_ext_cache __ro_after_init;
#endif
-/* skb_small_head_cache and related code is only supported
- * for CONFIG_SLAB and CONFIG_SLUB.
- * As soon as SLOB is removed from the kernel, we can clean up this.
- */
-#if !defined(CONFIG_SLOB)
-# define HAVE_SKB_SMALL_HEAD_CACHE 1
-#endif
-#ifdef HAVE_SKB_SMALL_HEAD_CACHE
static struct kmem_cache *skb_small_head_cache __ro_after_init;
#define SKB_SMALL_HEAD_SIZE SKB_HEAD_ALIGN(MAX_TCP_HEADER)
@@ -117,7 +110,6 @@ static struct kmem_cache *skb_small_head_cache __ro_after_init;
#define SKB_SMALL_HEAD_HEADROOM \
SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE)
-#endif /* HAVE_SKB_SMALL_HEAD_CACHE */
int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
EXPORT_SYMBOL(sysctl_max_skb_frags);
@@ -562,7 +554,6 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
void *obj;
obj_size = SKB_HEAD_ALIGN(*size);
-#ifdef HAVE_SKB_SMALL_HEAD_CACHE
if (obj_size <= SKB_SMALL_HEAD_CACHE_SIZE &&
!(flags & KMALLOC_NOT_NORMAL_BITS)) {
obj = kmem_cache_alloc_node(skb_small_head_cache,
@@ -576,7 +567,6 @@ static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
obj = kmem_cache_alloc_node(skb_small_head_cache, flags, node);
goto out;
}
-#endif
*size = obj_size = kmalloc_size_roundup(obj_size);
/*
* Try a regular allocation, when that fails and we're not entitled
@@ -898,11 +888,9 @@ static bool skb_pp_recycle(struct sk_buff *skb, void *data, bool napi_safe)
static void skb_kfree_head(void *head, unsigned int end_offset)
{
-#ifdef HAVE_SKB_SMALL_HEAD_CACHE
if (end_offset == SKB_SMALL_HEAD_HEADROOM)
kmem_cache_free(skb_small_head_cache, head);
else
-#endif
kfree(head);
}
@@ -2160,7 +2148,6 @@ int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri)
if (likely(skb_end_offset(skb) == saved_end_offset))
return 0;
-#ifdef HAVE_SKB_SMALL_HEAD_CACHE
/* We can not change skb->end if the original or new value
* is SKB_SMALL_HEAD_HEADROOM, as it might break skb_kfree_head().
*/
@@ -2174,7 +2161,6 @@ int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri)
WARN_ON_ONCE(1);
return 0;
}
-#endif
shinfo = skb_shinfo(skb);
@@ -4203,13 +4189,13 @@ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
EXPORT_SYMBOL(skb_find_text);
int skb_append_pagefrags(struct sk_buff *skb, struct page *page,
- int offset, size_t size)
+ int offset, size_t size, size_t max_frags)
{
int i = skb_shinfo(skb)->nr_frags;
if (skb_can_coalesce(skb, i, page, offset)) {
skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], size);
- } else if (i < MAX_SKB_FRAGS) {
+ } else if (i < max_frags) {
skb_zcopy_downgrade_managed(skb);
get_page(page);
skb_fill_page_desc_noacc(skb, i, page, offset, size);
@@ -4249,10 +4235,9 @@ static inline skb_frag_t skb_head_frag_to_page_desc(struct sk_buff *frag_skb)
struct page *page;
page = virt_to_head_page(frag_skb->head);
- __skb_frag_set_page(&head_frag, page);
- skb_frag_off_set(&head_frag, frag_skb->data -
- (unsigned char *)page_address(page));
- skb_frag_size_set(&head_frag, skb_headlen(frag_skb));
+ skb_frag_fill_page_desc(&head_frag, page, frag_skb->data -
+ (unsigned char *)page_address(page),
+ skb_headlen(frag_skb));
return head_frag;
}
@@ -4768,7 +4753,6 @@ void __init skb_init(void)
0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC,
NULL);
-#ifdef HAVE_SKB_SMALL_HEAD_CACHE
/* usercopy should only access first SKB_SMALL_HEAD_HEADROOM bytes.
* struct skb_shared_info is located at the end of skb->head,
* and should not be copied to/from user.
@@ -4780,7 +4764,6 @@ void __init skb_init(void)
0,
SKB_SMALL_HEAD_HEADROOM,
NULL);
-#endif
skb_extensions_init();
}
@@ -5784,147 +5767,6 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
}
EXPORT_SYMBOL_GPL(skb_scrub_packet);
-/**
- * skb_gso_transport_seglen - Return length of individual segments of a gso packet
- *
- * @skb: GSO skb
- *
- * skb_gso_transport_seglen is used to determine the real size of the
- * individual segments, including Layer4 headers (TCP/UDP).
- *
- * The MAC/L2 or network (IP, IPv6) headers are not accounted for.
- */
-static unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
-{
- const struct skb_shared_info *shinfo = skb_shinfo(skb);
- unsigned int thlen = 0;
-
- if (skb->encapsulation) {
- thlen = skb_inner_transport_header(skb) -
- skb_transport_header(skb);
-
- if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
- thlen += inner_tcp_hdrlen(skb);
- } else if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
- thlen = tcp_hdrlen(skb);
- } else if (unlikely(skb_is_gso_sctp(skb))) {
- thlen = sizeof(struct sctphdr);
- } else if (shinfo->gso_type & SKB_GSO_UDP_L4) {
- thlen = sizeof(struct udphdr);
- }
- /* UFO sets gso_size to the size of the fragmentation
- * payload, i.e. the size of the L4 (UDP) header is already
- * accounted for.
- */
- return thlen + shinfo->gso_size;
-}
-
-/**
- * skb_gso_network_seglen - Return length of individual segments of a gso packet
- *
- * @skb: GSO skb
- *
- * skb_gso_network_seglen is used to determine the real size of the
- * individual segments, including Layer3 (IP, IPv6) and L4 headers (TCP/UDP).
- *
- * The MAC/L2 header is not accounted for.
- */
-static unsigned int skb_gso_network_seglen(const struct sk_buff *skb)
-{
- unsigned int hdr_len = skb_transport_header(skb) -
- skb_network_header(skb);
-
- return hdr_len + skb_gso_transport_seglen(skb);
-}
-
-/**
- * skb_gso_mac_seglen - Return length of individual segments of a gso packet
- *
- * @skb: GSO skb
- *
- * skb_gso_mac_seglen is used to determine the real size of the
- * individual segments, including MAC/L2, Layer3 (IP, IPv6) and L4
- * headers (TCP/UDP).
- */
-static unsigned int skb_gso_mac_seglen(const struct sk_buff *skb)
-{
- unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
-
- return hdr_len + skb_gso_transport_seglen(skb);
-}
-
-/**
- * skb_gso_size_check - check the skb size, considering GSO_BY_FRAGS
- *
- * There are a couple of instances where we have a GSO skb, and we
- * want to determine what size it would be after it is segmented.
- *
- * We might want to check:
- * - L3+L4+payload size (e.g. IP forwarding)
- * - L2+L3+L4+payload size (e.g. sanity check before passing to driver)
- *
- * This is a helper to do that correctly considering GSO_BY_FRAGS.
- *
- * @skb: GSO skb
- *
- * @seg_len: The segmented length (from skb_gso_*_seglen). In the
- * GSO_BY_FRAGS case this will be [header sizes + GSO_BY_FRAGS].
- *
- * @max_len: The maximum permissible length.
- *
- * Returns true if the segmented length <= max length.
- */
-static inline bool skb_gso_size_check(const struct sk_buff *skb,
- unsigned int seg_len,
- unsigned int max_len) {
- const struct skb_shared_info *shinfo = skb_shinfo(skb);
- const struct sk_buff *iter;
-
- if (shinfo->gso_size != GSO_BY_FRAGS)
- return seg_len <= max_len;
-
- /* Undo this so we can re-use header sizes */
- seg_len -= GSO_BY_FRAGS;
-
- skb_walk_frags(skb, iter) {
- if (seg_len + skb_headlen(iter) > max_len)
- return false;
- }
-
- return true;
-}
-
-/**
- * skb_gso_validate_network_len - Will a split GSO skb fit into a given MTU?
- *
- * @skb: GSO skb
- * @mtu: MTU to validate against
- *
- * skb_gso_validate_network_len validates if a given skb will fit a
- * wanted MTU once split. It considers L3 headers, L4 headers, and the
- * payload.
- */
-bool skb_gso_validate_network_len(const struct sk_buff *skb, unsigned int mtu)
-{
- return skb_gso_size_check(skb, skb_gso_network_seglen(skb), mtu);
-}
-EXPORT_SYMBOL_GPL(skb_gso_validate_network_len);
-
-/**
- * skb_gso_validate_mac_len - Will a split GSO skb fit in a given length?
- *
- * @skb: GSO skb
- * @len: length to validate against
- *
- * skb_gso_validate_mac_len validates if a given skb will fit a wanted
- * length once split, including L2, L3 and L4 headers and the payload.
- */
-bool skb_gso_validate_mac_len(const struct sk_buff *skb, unsigned int len)
-{
- return skb_gso_size_check(skb, skb_gso_mac_seglen(skb), len);
-}
-EXPORT_SYMBOL_GPL(skb_gso_validate_mac_len);
-
static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
{
int mac_len, meta_len;
@@ -6912,3 +6754,91 @@ nodefer: __kfree_skb(skb);
if (unlikely(kick) && !cmpxchg(&sd->defer_ipi_scheduled, 0, 1))
smp_call_function_single_async(cpu, &sd->defer_csd);
}
+
+static void skb_splice_csum_page(struct sk_buff *skb, struct page *page,
+ size_t offset, size_t len)
+{
+ const char *kaddr;
+ __wsum csum;
+
+ kaddr = kmap_local_page(page);
+ csum = csum_partial(kaddr + offset, len, 0);
+ kunmap_local(kaddr);
+ skb->csum = csum_block_add(skb->csum, csum, skb->len);
+}
+
+/**
+ * skb_splice_from_iter - Splice (or copy) pages to skbuff
+ * @skb: The buffer to add pages to
+ * @iter: Iterator representing the pages to be added
+ * @maxsize: Maximum amount of pages to be added
+ * @gfp: Allocation flags
+ *
+ * This is a common helper function for supporting MSG_SPLICE_PAGES. It
+ * extracts pages from an iterator and adds them to the socket buffer if
+ * possible, copying them to fragments if not possible (such as if they're slab
+ * pages).
+ *
+ * Returns the amount of data spliced/copied or -EMSGSIZE if there's
+ * insufficient space in the buffer to transfer anything.
+ */
+ssize_t skb_splice_from_iter(struct sk_buff *skb, struct iov_iter *iter,
+ ssize_t maxsize, gfp_t gfp)
+{
+ size_t frag_limit = READ_ONCE(sysctl_max_skb_frags);
+ struct page *pages[8], **ppages = pages;
+ ssize_t spliced = 0, ret = 0;
+ unsigned int i;
+
+ while (iter->count > 0) {
+ ssize_t space, nr, len;
+ size_t off;
+
+ ret = -EMSGSIZE;
+ space = frag_limit - skb_shinfo(skb)->nr_frags;
+ if (space < 0)
+ break;
+
+ /* We might be able to coalesce without increasing nr_frags */
+ nr = clamp_t(size_t, space, 1, ARRAY_SIZE(pages));
+
+ len = iov_iter_extract_pages(iter, &ppages, maxsize, nr, 0, &off);
+ if (len <= 0) {
+ ret = len ?: -EIO;
+ break;
+ }
+
+ i = 0;
+ do {
+ struct page *page = pages[i++];
+ size_t part = min_t(size_t, PAGE_SIZE - off, len);
+
+ ret = -EIO;
+ if (WARN_ON_ONCE(!sendpage_ok(page)))
+ goto out;
+
+ ret = skb_append_pagefrags(skb, page, off, part,
+ frag_limit);
+ if (ret < 0) {
+ iov_iter_revert(iter, len);
+ goto out;
+ }
+
+ if (skb->ip_summed == CHECKSUM_NONE)
+ skb_splice_csum_page(skb, page, off, part);
+
+ off = 0;
+ spliced += part;
+ maxsize -= part;
+ len -= part;
+ } while (len > 0);
+
+ if (maxsize <= 0)
+ break;
+ }
+
+out:
+ skb_len_add(skb, spliced);
+ return spliced ?: ret;
+}
+EXPORT_SYMBOL(skb_splice_from_iter);
diff --git a/net/core/sock.c b/net/core/sock.c
index 6e5662ca00fe..5f1747c12004 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -114,6 +114,9 @@
#include <linux/memcontrol.h>
#include <linux/prefetch.h>
#include <linux/compat.h>
+#include <linux/mroute.h>
+#include <linux/mroute6.h>
+#include <linux/icmpv6.h>
#include <linux/uaccess.h>
@@ -138,6 +141,7 @@
#include <net/tcp.h>
#include <net/busy_poll.h>
+#include <net/phonet/phonet.h>
#include <linux/ethtool.h>
@@ -1246,6 +1250,13 @@ set_sndbuf:
clear_bit(SOCK_PASSCRED, &sock->flags);
break;
+ case SO_PASSPIDFD:
+ if (valbool)
+ set_bit(SOCK_PASSPIDFD, &sock->flags);
+ else
+ clear_bit(SOCK_PASSPIDFD, &sock->flags);
+ break;
+
case SO_TIMESTAMP_OLD:
case SO_TIMESTAMP_NEW:
case SO_TIMESTAMPNS_OLD:
@@ -1726,6 +1737,10 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
v.val = !!test_bit(SOCK_PASSCRED, &sock->flags);
break;
+ case SO_PASSPIDFD:
+ v.val = !!test_bit(SOCK_PASSPIDFD, &sock->flags);
+ break;
+
case SO_PEERCRED:
{
struct ucred peercred;
@@ -1741,6 +1756,39 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
goto lenout;
}
+ case SO_PEERPIDFD:
+ {
+ struct pid *peer_pid;
+ struct file *pidfd_file = NULL;
+ int pidfd;
+
+ if (len > sizeof(pidfd))
+ len = sizeof(pidfd);
+
+ spin_lock(&sk->sk_peer_lock);
+ peer_pid = get_pid(sk->sk_peer_pid);
+ spin_unlock(&sk->sk_peer_lock);
+
+ if (!peer_pid)
+ return -ESRCH;
+
+ pidfd = pidfd_prepare(peer_pid, 0, &pidfd_file);
+ put_pid(peer_pid);
+ if (pidfd < 0)
+ return pidfd;
+
+ if (copy_to_sockptr(optval, &pidfd, len) ||
+ copy_to_sockptr(optlen, &len, sizeof(int))) {
+ put_unused_fd(pidfd);
+ fput(pidfd_file);
+
+ return -EFAULT;
+ }
+
+ fd_install(pidfd, pidfd_file);
+ return 0;
+ }
+
case SO_PEERGROUPS:
{
const struct cred *cred;
@@ -4100,3 +4148,63 @@ int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len)
return sk->sk_prot->bind_add(sk, addr, addr_len);
}
EXPORT_SYMBOL(sock_bind_add);
+
+/* Copy 'size' bytes from userspace and return `size` back to userspace */
+int sock_ioctl_inout(struct sock *sk, unsigned int cmd,
+ void __user *arg, void *karg, size_t size)
+{
+ int ret;
+
+ if (copy_from_user(karg, arg, size))
+ return -EFAULT;
+
+ ret = READ_ONCE(sk->sk_prot)->ioctl(sk, cmd, karg);
+ if (ret)
+ return ret;
+
+ if (copy_to_user(arg, karg, size))
+ return -EFAULT;
+
+ return 0;
+}
+EXPORT_SYMBOL(sock_ioctl_inout);
+
+/* This is the most common ioctl prep function, where the result (4 bytes) is
+ * copied back to userspace if the ioctl() returns successfully. No input is
+ * copied from userspace as input argument.
+ */
+static int sock_ioctl_out(struct sock *sk, unsigned int cmd, void __user *arg)
+{
+ int ret, karg = 0;
+
+ ret = READ_ONCE(sk->sk_prot)->ioctl(sk, cmd, &karg);
+ if (ret)
+ return ret;
+
+ return put_user(karg, (int __user *)arg);
+}
+
+/* A wrapper around sock ioctls, which copies the data from userspace
+ * (depending on the protocol/ioctl), and copies back the result to userspace.
+ * The main motivation for this function is to pass kernel memory to the
+ * protocol ioctl callbacks, instead of userspace memory.
+ */
+int sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
+{
+ int rc = 1;
+
+ if (sk->sk_type == SOCK_RAW && sk->sk_family == AF_INET)
+ rc = ipmr_sk_ioctl(sk, cmd, arg);
+ else if (sk->sk_type == SOCK_RAW && sk->sk_family == AF_INET6)
+ rc = ip6mr_sk_ioctl(sk, cmd, arg);
+ else if (sk_is_phonet(sk))
+ rc = phonet_sk_ioctl(sk, cmd, arg);
+
+ /* If ioctl was processed, returns its value */
+ if (rc <= 0)
+ return rc;
+
+ /* Otherwise call the default handler */
+ return sock_ioctl_out(sk, cmd, arg);
+}
+EXPORT_SYMBOL(sk_ioctl);
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 9ddc3a9e89e4..1f748ed1279d 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -292,7 +292,7 @@ int dccp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
int dccp_setsockopt(struct sock *sk, int level, int optname,
sockptr_t optval, unsigned int optlen);
-int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg);
+int dccp_ioctl(struct sock *sk, int cmd, int *karg);
int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
int *addr_len);
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index b0ebf853cb07..f331e5977a84 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -362,7 +362,7 @@ __poll_t dccp_poll(struct file *file, struct socket *sock,
EXPORT_SYMBOL_GPL(dccp_poll);
-int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
+int dccp_ioctl(struct sock *sk, int cmd, int *karg)
{
int rc = -ENOTCONN;
@@ -373,17 +373,17 @@ int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
switch (cmd) {
case SIOCOUTQ: {
- int amount = sk_wmem_alloc_get(sk);
+ *karg = sk_wmem_alloc_get(sk);
/* Using sk_wmem_alloc here because sk_wmem_queued is not used by DCCP and
* always 0, comparably to UDP.
*/
- rc = put_user(amount, (int __user *)arg);
+ rc = 0;
}
break;
case SIOCINQ: {
struct sk_buff *skb;
- unsigned long amount = 0;
+ *karg = 0;
skb = skb_peek(&sk->sk_receive_queue);
if (skb != NULL) {
@@ -391,9 +391,9 @@ int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
* We will only return the amount of this packet since
* that is all that will be read.
*/
- amount = skb->len;
+ *karg = skb->len;
}
- rc = put_user(amount, (int __user *)arg);
+ rc = 0;
}
break;
default:
diff --git a/net/devlink/health.c b/net/devlink/health.c
index 0839706d5741..194340a8bb86 100644
--- a/net/devlink/health.c
+++ b/net/devlink/health.c
@@ -480,7 +480,7 @@ static void devlink_recover_notify(struct devlink_health_reporter *reporter,
int err;
WARN_ON(cmd != DEVLINK_CMD_HEALTH_REPORTER_RECOVER);
- WARN_ON(!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED));
+ ASSERT_DEVLINK_REGISTERED(devlink);
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c
index cd0254968076..1f00f874471f 100644
--- a/net/devlink/leftover.c
+++ b/net/devlink/leftover.c
@@ -447,18 +447,18 @@ static void devlink_port_fn_cap_fill(struct nla_bitfield32 *caps,
caps->value |= cap;
}
-static int devlink_port_fn_roce_fill(const struct devlink_ops *ops,
- struct devlink_port *devlink_port,
+static int devlink_port_fn_roce_fill(struct devlink_port *devlink_port,
struct nla_bitfield32 *caps,
struct netlink_ext_ack *extack)
{
bool is_enable;
int err;
- if (!ops->port_fn_roce_get)
+ if (!devlink_port->ops->port_fn_roce_get)
return 0;
- err = ops->port_fn_roce_get(devlink_port, &is_enable, extack);
+ err = devlink_port->ops->port_fn_roce_get(devlink_port, &is_enable,
+ extack);
if (err) {
if (err == -EOPNOTSUPP)
return 0;
@@ -469,19 +469,19 @@ static int devlink_port_fn_roce_fill(const struct devlink_ops *ops,
return 0;
}
-static int devlink_port_fn_migratable_fill(const struct devlink_ops *ops,
- struct devlink_port *devlink_port,
+static int devlink_port_fn_migratable_fill(struct devlink_port *devlink_port,
struct nla_bitfield32 *caps,
struct netlink_ext_ack *extack)
{
bool is_enable;
int err;
- if (!ops->port_fn_migratable_get ||
+ if (!devlink_port->ops->port_fn_migratable_get ||
devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PCI_VF)
return 0;
- err = ops->port_fn_migratable_get(devlink_port, &is_enable, extack);
+ err = devlink_port->ops->port_fn_migratable_get(devlink_port,
+ &is_enable, extack);
if (err) {
if (err == -EOPNOTSUPP)
return 0;
@@ -492,8 +492,7 @@ static int devlink_port_fn_migratable_fill(const struct devlink_ops *ops,
return 0;
}
-static int devlink_port_fn_caps_fill(const struct devlink_ops *ops,
- struct devlink_port *devlink_port,
+static int devlink_port_fn_caps_fill(struct devlink_port *devlink_port,
struct sk_buff *msg,
struct netlink_ext_ack *extack,
bool *msg_updated)
@@ -501,11 +500,11 @@ static int devlink_port_fn_caps_fill(const struct devlink_ops *ops,
struct nla_bitfield32 caps = {};
int err;
- err = devlink_port_fn_roce_fill(ops, devlink_port, &caps, extack);
+ err = devlink_port_fn_roce_fill(devlink_port, &caps, extack);
if (err)
return err;
- err = devlink_port_fn_migratable_fill(ops, devlink_port, &caps, extack);
+ err = devlink_port_fn_migratable_fill(devlink_port, &caps, extack);
if (err)
return err;
@@ -691,8 +690,7 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg,
return 0;
}
-static int devlink_port_fn_hw_addr_fill(const struct devlink_ops *ops,
- struct devlink_port *port,
+static int devlink_port_fn_hw_addr_fill(struct devlink_port *port,
struct sk_buff *msg,
struct netlink_ext_ack *extack,
bool *msg_updated)
@@ -701,10 +699,10 @@ static int devlink_port_fn_hw_addr_fill(const struct devlink_ops *ops,
int hw_addr_len;
int err;
- if (!ops->port_function_hw_addr_get)
+ if (!port->ops->port_fn_hw_addr_get)
return 0;
- err = ops->port_function_hw_addr_get(port, hw_addr, &hw_addr_len,
+ err = port->ops->port_fn_hw_addr_get(port, hw_addr, &hw_addr_len,
extack);
if (err) {
if (err == -EOPNOTSUPP)
@@ -789,8 +787,7 @@ devlink_port_fn_opstate_valid(enum devlink_port_fn_opstate opstate)
opstate == DEVLINK_PORT_FN_OPSTATE_ATTACHED;
}
-static int devlink_port_fn_state_fill(const struct devlink_ops *ops,
- struct devlink_port *port,
+static int devlink_port_fn_state_fill(struct devlink_port *port,
struct sk_buff *msg,
struct netlink_ext_ack *extack,
bool *msg_updated)
@@ -799,10 +796,10 @@ static int devlink_port_fn_state_fill(const struct devlink_ops *ops,
enum devlink_port_fn_state state;
int err;
- if (!ops->port_fn_state_get)
+ if (!port->ops->port_fn_state_get)
return 0;
- err = ops->port_fn_state_get(port, &state, &opstate, extack);
+ err = port->ops->port_fn_state_get(port, &state, &opstate, extack);
if (err) {
if (err == -EOPNOTSUPP)
return 0;
@@ -829,18 +826,16 @@ static int
devlink_port_fn_mig_set(struct devlink_port *devlink_port, bool enable,
struct netlink_ext_ack *extack)
{
- const struct devlink_ops *ops = devlink_port->devlink->ops;
-
- return ops->port_fn_migratable_set(devlink_port, enable, extack);
+ return devlink_port->ops->port_fn_migratable_set(devlink_port, enable,
+ extack);
}
static int
devlink_port_fn_roce_set(struct devlink_port *devlink_port, bool enable,
struct netlink_ext_ack *extack)
{
- const struct devlink_ops *ops = devlink_port->devlink->ops;
-
- return ops->port_fn_roce_set(devlink_port, enable, extack);
+ return devlink_port->ops->port_fn_roce_set(devlink_port, enable,
+ extack);
}
static int devlink_port_fn_caps_set(struct devlink_port *devlink_port,
@@ -874,7 +869,6 @@ static int
devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *port,
struct netlink_ext_ack *extack)
{
- const struct devlink_ops *ops;
struct nlattr *function_attr;
bool msg_updated = false;
int err;
@@ -883,16 +877,13 @@ devlink_nl_port_function_attrs_put(struct sk_buff *msg, struct devlink_port *por
if (!function_attr)
return -EMSGSIZE;
- ops = port->devlink->ops;
- err = devlink_port_fn_hw_addr_fill(ops, port, msg, extack,
- &msg_updated);
+ err = devlink_port_fn_hw_addr_fill(port, msg, extack, &msg_updated);
if (err)
goto out;
- err = devlink_port_fn_caps_fill(ops, port, msg, extack,
- &msg_updated);
+ err = devlink_port_fn_caps_fill(port, msg, extack, &msg_updated);
if (err)
goto out;
- err = devlink_port_fn_state_fill(ops, port, msg, extack, &msg_updated);
+ err = devlink_port_fn_state_fill(port, msg, extack, &msg_updated);
out:
if (err || !msg_updated)
nla_nest_cancel(msg, function_attr);
@@ -1137,14 +1128,13 @@ static int devlink_port_type_set(struct devlink_port *devlink_port,
{
int err;
- if (!devlink_port->devlink->ops->port_type_set)
+ if (!devlink_port->ops->port_type_set)
return -EOPNOTSUPP;
if (port_type == devlink_port->type)
return 0;
- err = devlink_port->devlink->ops->port_type_set(devlink_port,
- port_type);
+ err = devlink_port->ops->port_type_set(devlink_port, port_type);
if (err)
return err;
@@ -1157,7 +1147,6 @@ static int devlink_port_function_hw_addr_set(struct devlink_port *port,
const struct nlattr *attr,
struct netlink_ext_ack *extack)
{
- const struct devlink_ops *ops = port->devlink->ops;
const u8 *hw_addr;
int hw_addr_len;
@@ -1178,7 +1167,7 @@ static int devlink_port_function_hw_addr_set(struct devlink_port *port,
}
}
- return ops->port_function_hw_addr_set(port, hw_addr, hw_addr_len,
+ return port->ops->port_fn_hw_addr_set(port, hw_addr, hw_addr_len,
extack);
}
@@ -1187,22 +1176,20 @@ static int devlink_port_fn_state_set(struct devlink_port *port,
struct netlink_ext_ack *extack)
{
enum devlink_port_fn_state state;
- const struct devlink_ops *ops;
state = nla_get_u8(attr);
- ops = port->devlink->ops;
- return ops->port_fn_state_set(port, state, extack);
+ return port->ops->port_fn_state_set(port, state, extack);
}
static int devlink_port_function_validate(struct devlink_port *devlink_port,
struct nlattr **tb,
struct netlink_ext_ack *extack)
{
- const struct devlink_ops *ops = devlink_port->devlink->ops;
+ const struct devlink_port_ops *ops = devlink_port->ops;
struct nlattr *attr;
if (tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR] &&
- !ops->port_function_hw_addr_set) {
+ !ops->port_fn_hw_addr_set) {
NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR],
"Port doesn't support function attributes");
return -EOPNOTSUPP;
@@ -1320,7 +1307,7 @@ static int devlink_nl_cmd_port_split_doit(struct sk_buff *skb,
if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PORT_SPLIT_COUNT))
return -EINVAL;
- if (!devlink->ops->port_split)
+ if (!devlink_port->ops->port_split)
return -EOPNOTSUPP;
count = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_SPLIT_COUNT]);
@@ -1339,8 +1326,8 @@ static int devlink_nl_cmd_port_split_doit(struct sk_buff *skb,
return -EINVAL;
}
- return devlink->ops->port_split(devlink, devlink_port, count,
- info->extack);
+ return devlink_port->ops->port_split(devlink, devlink_port, count,
+ info->extack);
}
static int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb,
@@ -1349,40 +1336,9 @@ static int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb,
struct devlink_port *devlink_port = info->user_ptr[1];
struct devlink *devlink = info->user_ptr[0];
- if (!devlink->ops->port_unsplit)
+ if (!devlink_port->ops->port_unsplit)
return -EOPNOTSUPP;
- return devlink->ops->port_unsplit(devlink, devlink_port, info->extack);
-}
-
-static int devlink_port_new_notify(struct devlink *devlink,
- unsigned int port_index,
- struct genl_info *info)
-{
- struct devlink_port *devlink_port;
- struct sk_buff *msg;
- int err;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- lockdep_assert_held(&devlink->lock);
- devlink_port = devlink_port_get_by_index(devlink, port_index);
- if (!devlink_port) {
- err = -ENODEV;
- goto out;
- }
-
- err = devlink_nl_port_fill(msg, devlink_port, DEVLINK_CMD_NEW,
- info->snd_portid, info->snd_seq, 0, NULL);
- if (err)
- goto out;
-
- return genlmsg_reply(msg, info);
-
-out:
- nlmsg_free(msg);
- return err;
+ return devlink_port->ops->port_unsplit(devlink, devlink_port, info->extack);
}
static int devlink_nl_cmd_port_new_doit(struct sk_buff *skb,
@@ -1391,10 +1347,11 @@ static int devlink_nl_cmd_port_new_doit(struct sk_buff *skb,
struct netlink_ext_ack *extack = info->extack;
struct devlink_port_new_attrs new_attrs = {};
struct devlink *devlink = info->user_ptr[0];
- unsigned int new_port_index;
+ struct devlink_port *devlink_port;
+ struct sk_buff *msg;
int err;
- if (!devlink->ops->port_new || !devlink->ops->port_del)
+ if (!devlink->ops->port_new)
return -EOPNOTSUPP;
if (!info->attrs[DEVLINK_ATTR_PORT_FLAVOUR] ||
@@ -1423,36 +1380,43 @@ static int devlink_nl_cmd_port_new_doit(struct sk_buff *skb,
new_attrs.sfnum_valid = true;
}
- err = devlink->ops->port_new(devlink, &new_attrs, extack,
- &new_port_index);
+ err = devlink->ops->port_new(devlink, &new_attrs,
+ extack, &devlink_port);
if (err)
return err;
- err = devlink_port_new_notify(devlink, new_port_index, info);
- if (err && err != -ENODEV) {
- /* Fail to send the response; destroy newly created port. */
- devlink->ops->port_del(devlink, new_port_index, extack);
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg) {
+ err = -ENOMEM;
+ goto err_out_port_del;
}
+ err = devlink_nl_port_fill(msg, devlink_port, DEVLINK_CMD_NEW,
+ info->snd_portid, info->snd_seq, 0, NULL);
+ if (WARN_ON_ONCE(err))
+ goto err_out_msg_free;
+ err = genlmsg_reply(msg, info);
+ if (err)
+ goto err_out_port_del;
+ return 0;
+
+err_out_msg_free:
+ nlmsg_free(msg);
+err_out_port_del:
+ devlink_port->ops->port_del(devlink, devlink_port, NULL);
return err;
}
static int devlink_nl_cmd_port_del_doit(struct sk_buff *skb,
struct genl_info *info)
{
+ struct devlink_port *devlink_port = info->user_ptr[1];
struct netlink_ext_ack *extack = info->extack;
struct devlink *devlink = info->user_ptr[0];
- unsigned int port_index;
- if (!devlink->ops->port_del)
+ if (!devlink_port->ops->port_del)
return -EOPNOTSUPP;
- if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PORT_INDEX)) {
- NL_SET_ERR_MSG(extack, "Port index is not specified");
- return -EINVAL;
- }
- port_index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
-
- return devlink->ops->port_del(devlink, port_index, extack);
+ return devlink_port->ops->port_del(devlink, devlink_port, extack);
}
static int
@@ -6384,6 +6348,7 @@ const struct genl_small_ops devlink_nl_ops[56] = {
.cmd = DEVLINK_CMD_PORT_DEL,
.doit = devlink_nl_cmd_port_del_doit,
.flags = GENL_ADMIN_PERM,
+ .internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
},
{
.cmd = DEVLINK_CMD_LINECARD_GET,
@@ -6772,7 +6737,10 @@ void devlink_notify_unregister(struct devlink *devlink)
static void devlink_port_type_warn(struct work_struct *work)
{
- WARN(true, "Type was not set for devlink port.");
+ struct devlink_port *port = container_of(to_delayed_work(work),
+ struct devlink_port,
+ type_warn_dw);
+ dev_warn(port->devlink->dev, "Type was not set for devlink port.");
}
static bool devlink_port_type_should_warn(struct devlink_port *devlink_port)
@@ -6809,7 +6777,7 @@ static void devlink_port_type_warn_cancel(struct devlink_port *devlink_port)
* @devlink: devlink
* @devlink_port: devlink port
*
- * Initialize essencial stuff that is needed for functions
+ * Initialize essential stuff that is needed for functions
* that may be called before devlink port registration.
* Call to this function is optional and not needed
* in case the driver does not use such functions.
@@ -6830,7 +6798,7 @@ EXPORT_SYMBOL_GPL(devlink_port_init);
*
* @devlink_port: devlink port
*
- * Deinitialize essencial stuff that is in use for functions
+ * Deinitialize essential stuff that is in use for functions
* that may be called after devlink port unregistration.
* Call to this function is optional and not needed
* in case the driver does not use such functions.
@@ -6841,12 +6809,15 @@ void devlink_port_fini(struct devlink_port *devlink_port)
}
EXPORT_SYMBOL_GPL(devlink_port_fini);
+static const struct devlink_port_ops devlink_port_dummy_ops = {};
+
/**
- * devl_port_register() - Register devlink port
+ * devl_port_register_with_ops() - Register devlink port
*
* @devlink: devlink
* @devlink_port: devlink port
* @port_index: driver-specific numerical identifier of the port
+ * @ops: port ops
*
* Register devlink port with provided port index. User can use
* any indexing, even hw-related one. devlink_port structure
@@ -6854,9 +6825,10 @@ EXPORT_SYMBOL_GPL(devlink_port_fini);
* Note that the caller should take care of zeroing the devlink_port
* structure.
*/
-int devl_port_register(struct devlink *devlink,
- struct devlink_port *devlink_port,
- unsigned int port_index)
+int devl_port_register_with_ops(struct devlink *devlink,
+ struct devlink_port *devlink_port,
+ unsigned int port_index,
+ const struct devlink_port_ops *ops)
{
int err;
@@ -6867,6 +6839,7 @@ int devl_port_register(struct devlink *devlink,
devlink_port_init(devlink, devlink_port);
devlink_port->registered = true;
devlink_port->index = port_index;
+ devlink_port->ops = ops ? ops : &devlink_port_dummy_ops;
spin_lock_init(&devlink_port->type_lock);
INIT_LIST_HEAD(&devlink_port->reporter_list);
err = xa_insert(&devlink->ports, port_index, devlink_port, GFP_KERNEL);
@@ -6878,14 +6851,15 @@ int devl_port_register(struct devlink *devlink,
devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW);
return 0;
}
-EXPORT_SYMBOL_GPL(devl_port_register);
+EXPORT_SYMBOL_GPL(devl_port_register_with_ops);
/**
- * devlink_port_register - Register devlink port
+ * devlink_port_register_with_ops - Register devlink port
*
* @devlink: devlink
* @devlink_port: devlink port
* @port_index: driver-specific numerical identifier of the port
+ * @ops: port ops
*
* Register devlink port with provided port index. User can use
* any indexing, even hw-related one. devlink_port structure
@@ -6895,18 +6869,20 @@ EXPORT_SYMBOL_GPL(devl_port_register);
*
* Context: Takes and release devlink->lock <mutex>.
*/
-int devlink_port_register(struct devlink *devlink,
- struct devlink_port *devlink_port,
- unsigned int port_index)
+int devlink_port_register_with_ops(struct devlink *devlink,
+ struct devlink_port *devlink_port,
+ unsigned int port_index,
+ const struct devlink_port_ops *ops)
{
int err;
devl_lock(devlink);
- err = devl_port_register(devlink, devlink_port, port_index);
+ err = devl_port_register_with_ops(devlink, devlink_port,
+ port_index, ops);
devl_unlock(devlink);
return err;
}
-EXPORT_SYMBOL_GPL(devlink_port_register);
+EXPORT_SYMBOL_GPL(devlink_port_register_with_ops);
/**
* devl_port_unregister() - Unregister devlink port
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 71ba30538411..0ce8fd311c78 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -1603,6 +1603,21 @@ dsa_port_phylink_mac_select_pcs(struct phylink_config *config,
return pcs;
}
+static int dsa_port_phylink_mac_prepare(struct phylink_config *config,
+ unsigned int mode,
+ phy_interface_t interface)
+{
+ struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
+ struct dsa_switch *ds = dp->ds;
+ int err = 0;
+
+ if (ds->ops->phylink_mac_prepare)
+ err = ds->ops->phylink_mac_prepare(ds, dp->index, mode,
+ interface);
+
+ return err;
+}
+
static void dsa_port_phylink_mac_config(struct phylink_config *config,
unsigned int mode,
const struct phylink_link_state *state)
@@ -1616,6 +1631,21 @@ static void dsa_port_phylink_mac_config(struct phylink_config *config,
ds->ops->phylink_mac_config(ds, dp->index, mode, state);
}
+static int dsa_port_phylink_mac_finish(struct phylink_config *config,
+ unsigned int mode,
+ phy_interface_t interface)
+{
+ struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
+ struct dsa_switch *ds = dp->ds;
+ int err = 0;
+
+ if (ds->ops->phylink_mac_finish)
+ err = ds->ops->phylink_mac_finish(ds, dp->index, mode,
+ interface);
+
+ return err;
+}
+
static void dsa_port_phylink_mac_an_restart(struct phylink_config *config)
{
struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
@@ -1671,7 +1701,9 @@ static const struct phylink_mac_ops dsa_port_phylink_mac_ops = {
.validate = dsa_port_phylink_validate,
.mac_select_pcs = dsa_port_phylink_mac_select_pcs,
.mac_pcs_get_state = dsa_port_phylink_mac_pcs_get_state,
+ .mac_prepare = dsa_port_phylink_mac_prepare,
.mac_config = dsa_port_phylink_mac_config,
+ .mac_finish = dsa_port_phylink_mac_finish,
.mac_an_restart = dsa_port_phylink_mac_an_restart,
.mac_link_down = dsa_port_phylink_mac_link_down,
.mac_link_up = dsa_port_phylink_mac_link_up,
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index 6bb778e10461..4a51e0ec295c 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -1436,15 +1436,26 @@ static int ethtool_get_wol(struct net_device *dev, char __user *useraddr)
static int ethtool_set_wol(struct net_device *dev, char __user *useraddr)
{
- struct ethtool_wolinfo wol;
+ struct ethtool_wolinfo wol, cur_wol;
int ret;
- if (!dev->ethtool_ops->set_wol)
+ if (!dev->ethtool_ops->get_wol || !dev->ethtool_ops->set_wol)
return -EOPNOTSUPP;
+ memset(&cur_wol, 0, sizeof(struct ethtool_wolinfo));
+ cur_wol.cmd = ETHTOOL_GWOL;
+ dev->ethtool_ops->get_wol(dev, &cur_wol);
+
if (copy_from_user(&wol, useraddr, sizeof(wol)))
return -EFAULT;
+ if (wol.wolopts & ~cur_wol.supported)
+ return -EINVAL;
+
+ if (wol.wolopts == cur_wol.wolopts &&
+ !memcmp(wol.sopass, cur_wol.sopass, sizeof(wol.sopass)))
+ return 0;
+
ret = dev->ethtool_ops->set_wol(dev, &wol);
if (ret)
return ret;
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index 08120095cc68..39a459b0111b 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -96,6 +96,8 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info,
int ret;
if (!header) {
+ if (!require_dev)
+ return 0;
NL_SET_ERR_MSG(extack, "request header missing");
return -EINVAL;
}
@@ -113,7 +115,8 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info,
if (tb[ETHTOOL_A_HEADER_DEV_INDEX]) {
u32 ifindex = nla_get_u32(tb[ETHTOOL_A_HEADER_DEV_INDEX]);
- dev = dev_get_by_index(net, ifindex);
+ dev = netdev_get_by_index(net, ifindex, &req_info->dev_tracker,
+ GFP_KERNEL);
if (!dev) {
NL_SET_ERR_MSG_ATTR(extack,
tb[ETHTOOL_A_HEADER_DEV_INDEX],
@@ -123,13 +126,14 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info,
/* if both ifindex and ifname are passed, they must match */
if (devname_attr &&
strncmp(dev->name, nla_data(devname_attr), IFNAMSIZ)) {
- dev_put(dev);
+ netdev_put(dev, &req_info->dev_tracker);
NL_SET_ERR_MSG_ATTR(extack, header,
"ifindex and name do not match");
return -ENODEV;
}
} else if (devname_attr) {
- dev = dev_get_by_name(net, nla_data(devname_attr));
+ dev = netdev_get_by_name(net, nla_data(devname_attr),
+ &req_info->dev_tracker, GFP_KERNEL);
if (!dev) {
NL_SET_ERR_MSG_ATTR(extack, devname_attr,
"no device matches name");
@@ -142,8 +146,6 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info,
}
req_info->dev = dev;
- if (dev)
- netdev_tracker_alloc(dev, &req_info->dev_tracker, GFP_KERNEL);
req_info->flags = flags;
return 0;
}
diff --git a/net/handshake/genl.c b/net/handshake/genl.c
index 9f29efb1493e..233be5cbfec9 100644
--- a/net/handshake/genl.c
+++ b/net/handshake/genl.c
@@ -8,7 +8,7 @@
#include "genl.h"
-#include <linux/handshake.h>
+#include <uapi/linux/handshake.h>
/* HANDSHAKE_CMD_ACCEPT - do */
static const struct nla_policy handshake_accept_nl_policy[HANDSHAKE_A_ACCEPT_HANDLER_CLASS + 1] = {
diff --git a/net/handshake/genl.h b/net/handshake/genl.h
index 2c1f1aa6a02a..ae72a596f6cc 100644
--- a/net/handshake/genl.h
+++ b/net/handshake/genl.h
@@ -9,7 +9,7 @@
#include <net/netlink.h>
#include <net/genetlink.h>
-#include <linux/handshake.h>
+#include <uapi/linux/handshake.h>
int handshake_nl_accept_doit(struct sk_buff *skb, struct genl_info *info);
int handshake_nl_done_doit(struct sk_buff *skb, struct genl_info *info);
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index 5a236aae2366..306f942c3b28 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -531,6 +531,11 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
if (res)
goto err_add_master;
+ /* HSR forwarding offload supported in lower device? */
+ if ((slave[0]->features & NETIF_F_HW_HSR_FWD) &&
+ (slave[1]->features & NETIF_F_HW_HSR_FWD))
+ hsr->fwd_offloaded = true;
+
res = register_netdevice(hsr_dev);
if (res)
goto err_unregister;
diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h
index 5584c80a5c79..6851e33df7d1 100644
--- a/net/hsr/hsr_main.h
+++ b/net/hsr/hsr_main.h
@@ -208,6 +208,7 @@ struct hsr_priv {
u8 net_id; /* for PRP, it occupies most significant 3 bits
* of lan_id
*/
+ bool fwd_offloaded; /* Forwarding offloaded to HW */
unsigned char sup_multicast_addr[ETH_ALEN] __aligned(sizeof(u16));
/* Align to u16 boundary to avoid unaligned access
* in ether_addr_equal
diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c
index b70e6bbf6021..e5742f2a2d52 100644
--- a/net/hsr/hsr_slave.c
+++ b/net/hsr/hsr_slave.c
@@ -131,9 +131,14 @@ static int hsr_portdev_setup(struct hsr_priv *hsr, struct net_device *dev,
struct hsr_port *master;
int res;
- res = dev_set_promiscuity(dev, 1);
- if (res)
- return res;
+ /* Don't use promiscuous mode for offload since L2 frame forward
+ * happens at the offloaded hardware.
+ */
+ if (!port->hsr->fwd_offloaded) {
+ res = dev_set_promiscuity(dev, 1);
+ if (res)
+ return res;
+ }
master = hsr_port_get_hsr(hsr, HSR_PT_MASTER);
hsr_dev = master->dev;
@@ -152,7 +157,9 @@ static int hsr_portdev_setup(struct hsr_priv *hsr, struct net_device *dev,
fail_rx_handler:
netdev_upper_dev_unlink(dev, hsr_dev);
fail_upper_dev_link:
- dev_set_promiscuity(dev, -1);
+ if (!port->hsr->fwd_offloaded)
+ dev_set_promiscuity(dev, -1);
+
return res;
}
diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
index 1fa2fe041ec0..9c124705120d 100644
--- a/net/ieee802154/socket.c
+++ b/net/ieee802154/socket.c
@@ -162,7 +162,7 @@ static int ieee802154_sock_ioctl(struct socket *sock, unsigned int cmd,
default:
if (!sk->sk_prot->ioctl)
return -ENOIOCTLCMD;
- return sk->sk_prot->ioctl(sk, cmd, arg);
+ return sk_ioctl(sk, cmd, (void __user *)arg);
}
}
@@ -531,22 +531,21 @@ out:
return err;
}
-static int dgram_ioctl(struct sock *sk, int cmd, unsigned long arg)
+static int dgram_ioctl(struct sock *sk, int cmd, int *karg)
{
switch (cmd) {
case SIOCOUTQ:
{
- int amount = sk_wmem_alloc_get(sk);
+ *karg = sk_wmem_alloc_get(sk);
- return put_user(amount, (int __user *)arg);
+ return 0;
}
case SIOCINQ:
{
struct sk_buff *skb;
- unsigned long amount;
- amount = 0;
+ *karg = 0;
spin_lock_bh(&sk->sk_receive_queue.lock);
skb = skb_peek(&sk->sk_receive_queue);
if (skb) {
@@ -554,10 +553,10 @@ static int dgram_ioctl(struct sock *sk, int cmd, unsigned long arg)
* of this packet since that is all
* that will be read.
*/
- amount = skb->len - ieee802154_hdr_length(skb);
+ *karg = skb->len - ieee802154_hdr_length(skb);
}
spin_unlock_bh(&sk->sk_receive_queue.lock);
- return put_user(amount, (int __user *)arg);
+ return 0;
}
}
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 4a76ebf793b8..38e649fb4474 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -100,6 +100,7 @@
#include <net/ip_fib.h>
#include <net/inet_connection_sock.h>
#include <net/gro.h>
+#include <net/gso.h>
#include <net/tcp.h>
#include <net/udp.h>
#include <net/udplite.h>
@@ -732,6 +733,20 @@ int inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
}
EXPORT_SYMBOL(inet_stream_connect);
+void __inet_accept(struct socket *sock, struct socket *newsock, struct sock *newsk)
+{
+ sock_rps_record_flow(newsk);
+ WARN_ON(!((1 << newsk->sk_state) &
+ (TCPF_ESTABLISHED | TCPF_SYN_RECV |
+ TCPF_CLOSE_WAIT | TCPF_CLOSE)));
+
+ if (test_bit(SOCK_SUPPORT_ZC, &sock->flags))
+ set_bit(SOCK_SUPPORT_ZC, &newsock->flags);
+ sock_graft(newsk, newsock);
+
+ newsock->state = SS_CONNECTED;
+}
+
/*
* Accept a pending connection. The TCP layer now gives BSD semantics.
*/
@@ -745,24 +760,12 @@ int inet_accept(struct socket *sock, struct socket *newsock, int flags,
/* IPV6_ADDRFORM can change sk->sk_prot under us. */
sk2 = READ_ONCE(sk1->sk_prot)->accept(sk1, flags, &err, kern);
if (!sk2)
- goto do_err;
+ return err;
lock_sock(sk2);
-
- sock_rps_record_flow(sk2);
- WARN_ON(!((1 << sk2->sk_state) &
- (TCPF_ESTABLISHED | TCPF_SYN_RECV |
- TCPF_CLOSE_WAIT | TCPF_CLOSE)));
-
- if (test_bit(SOCK_SUPPORT_ZC, &sock->flags))
- set_bit(SOCK_SUPPORT_ZC, &newsock->flags);
- sock_graft(sk2, newsock);
-
- newsock->state = SS_CONNECTED;
- err = 0;
+ __inet_accept(sock, newsock, sk2);
release_sock(sk2);
-do_err:
- return err;
+ return 0;
}
EXPORT_SYMBOL(inet_accept);
@@ -829,6 +832,21 @@ int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
}
EXPORT_SYMBOL(inet_sendmsg);
+void inet_splice_eof(struct socket *sock)
+{
+ const struct proto *prot;
+ struct sock *sk = sock->sk;
+
+ if (unlikely(inet_send_prepare(sk)))
+ return;
+
+ /* IPV6_ADDRFORM can change sk->sk_prot under us. */
+ prot = READ_ONCE(sk->sk_prot);
+ if (prot->splice_eof)
+ prot->splice_eof(sock);
+}
+EXPORT_SYMBOL_GPL(inet_splice_eof);
+
ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
size_t size, int flags)
{
@@ -980,7 +998,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
break;
default:
if (sk->sk_prot->ioctl)
- err = sk->sk_prot->ioctl(sk, cmd, arg);
+ err = sk_ioctl(sk, cmd, (void __user *)arg);
else
err = -ENOIOCTLCMD;
break;
@@ -1048,6 +1066,7 @@ const struct proto_ops inet_stream_ops = {
#ifdef CONFIG_MMU
.mmap = tcp_mmap,
#endif
+ .splice_eof = inet_splice_eof,
.sendpage = inet_sendpage,
.splice_read = tcp_splice_read,
.read_sock = tcp_read_sock,
@@ -1082,6 +1101,7 @@ const struct proto_ops inet_dgram_ops = {
.read_skb = udp_read_skb,
.recvmsg = inet_recvmsg,
.mmap = sock_no_mmap,
+ .splice_eof = inet_splice_eof,
.sendpage = inet_sendpage,
.set_peek_off = sk_set_peek_off,
#ifdef CONFIG_COMPAT
@@ -1113,6 +1133,7 @@ static const struct proto_ops inet_sockraw_ops = {
.sendmsg = inet_sendmsg,
.recvmsg = inet_recvmsg,
.mmap = sock_no_mmap,
+ .splice_eof = inet_splice_eof,
.sendpage = inet_sendpage,
#ifdef CONFIG_COMPAT
.compat_ioctl = inet_compat_ioctl,
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index ee848be59e65..10e96ed6c9e3 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -17,6 +17,7 @@
#include <linux/err.h>
#include <linux/module.h>
#include <net/gro.h>
+#include <net/gso.h>
#include <net/ip.h>
#include <net/xfrm.h>
#include <net/esp.h>
diff --git a/net/ipv4/fou_nl.c b/net/ipv4/fou_nl.c
index 6c37c4f98cca..98b90107b5ab 100644
--- a/net/ipv4/fou_nl.c
+++ b/net/ipv4/fou_nl.c
@@ -8,7 +8,7 @@
#include "fou_nl.h"
-#include <linux/fou.h>
+#include <uapi/linux/fou.h>
/* Global operation policy for fou */
const struct nla_policy fou_nl_policy[FOU_ATTR_IFINDEX + 1] = {
diff --git a/net/ipv4/fou_nl.h b/net/ipv4/fou_nl.h
index dbd0780a5d34..63a6c4ed803d 100644
--- a/net/ipv4/fou_nl.h
+++ b/net/ipv4/fou_nl.h
@@ -9,7 +9,7 @@
#include <net/netlink.h>
#include <net/genetlink.h>
-#include <linux/fou.h>
+#include <uapi/linux/fou.h>
/* Global operation policy for fou */
extern const struct nla_policy fou_nl_policy[FOU_ATTR_IFINDEX + 1];
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 2b9cb5398335..311e70bfce40 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -11,6 +11,7 @@
#include <net/protocol.h>
#include <net/gre.h>
#include <net/gro.h>
+#include <net/gso.h>
static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
netdev_features_t features)
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 1386787eaf1a..0cc19cfbb673 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -706,20 +706,23 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern)
out:
release_sock(sk);
if (newsk && mem_cgroup_sockets_enabled) {
- int amt;
+ int amt = 0;
/* atomically get the memory usage, set and charge the
* newsk->sk_memcg.
*/
lock_sock(newsk);
- /* The socket has not been accepted yet, no need to look at
- * newsk->sk_wmem_queued.
- */
- amt = sk_mem_pages(newsk->sk_forward_alloc +
- atomic_read(&newsk->sk_rmem_alloc));
mem_cgroup_sk_alloc(newsk);
- if (newsk->sk_memcg && amt)
+ if (newsk->sk_memcg) {
+ /* The socket has not been accepted yet, no need
+ * to look at newsk->sk_wmem_queued.
+ */
+ amt = sk_mem_pages(newsk->sk_forward_alloc +
+ atomic_read(&newsk->sk_rmem_alloc));
+ }
+
+ if (amt)
mem_cgroup_charge_skmem(newsk->sk_memcg, amt,
GFP_KERNEL | __GFP_NOFAIL);
@@ -792,7 +795,7 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk,
opt = rcu_dereference(ireq->ireq_opt);
flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
- RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
+ ip_sock_rt_tos(sk), ip_sock_rt_scope(sk),
sk->sk_protocol, inet_sk_flowi_flags(sk),
(opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
ireq->ir_loc_addr, ireq->ir_rmt_port,
@@ -830,7 +833,7 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
fl4 = &newinet->cork.fl.u.ip4;
flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
- RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
+ ip_sock_rt_tos(sk), ip_sock_rt_scope(sk),
sk->sk_protocol, inet_sk_flowi_flags(sk),
(opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
ireq->ir_loc_addr, ireq->ir_rmt_port,
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index e55a20264960..81a1cce1a7d1 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -189,10 +189,10 @@ static int ipgre_err(struct sk_buff *skb, u32 info,
}
#if IS_ENABLED(CONFIG_IPV6)
- if (tpi->proto == htons(ETH_P_IPV6) &&
- !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
- type, data_len))
- return 0;
+ if (tpi->proto == htons(ETH_P_IPV6) &&
+ !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
+ type, data_len))
+ return 0;
#endif
if (t->parms.iph.daddr == 0 ||
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 61892268e8a6..6e70839257f7 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -73,6 +73,7 @@
#include <net/arp.h>
#include <net/icmp.h>
#include <net/checksum.h>
+#include <net/gso.h>
#include <net/inetpeer.h>
#include <net/inet_ecn.h>
#include <net/lwtunnel.h>
@@ -946,17 +947,6 @@ ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk
}
EXPORT_SYMBOL(ip_generic_getfrag);
-static inline __wsum
-csum_page(struct page *page, int offset, int copy)
-{
- char *kaddr;
- __wsum csum;
- kaddr = kmap(page);
- csum = csum_partial(kaddr + offset, copy, 0);
- kunmap(page);
- return csum;
-}
-
static int __ip_append_data(struct sock *sk,
struct flowi4 *fl4,
struct sk_buff_head *queue,
@@ -1048,6 +1038,15 @@ static int __ip_append_data(struct sock *sk,
skb_zcopy_set(skb, uarg, &extra_uref);
}
}
+ } else if ((flags & MSG_SPLICE_PAGES) && length) {
+ if (inet->hdrincl)
+ return -EPERM;
+ if (rt->dst.dev->features & NETIF_F_SG &&
+ getfrag == ip_generic_getfrag)
+ /* We need an empty buffer to attach stuff to */
+ paged = true;
+ else
+ flags &= ~MSG_SPLICE_PAGES;
}
cork->length += length;
@@ -1207,6 +1206,15 @@ alloc_new_skb:
err = -EFAULT;
goto error;
}
+ } else if (flags & MSG_SPLICE_PAGES) {
+ struct msghdr *msg = from;
+
+ err = skb_splice_from_iter(skb, &msg->msg_iter, copy,
+ sk->sk_allocation);
+ if (err < 0)
+ goto error;
+ copy = err;
+ wmem_alloc_delta += copy;
} else if (!zc) {
int i = skb_shinfo(skb)->nr_frags;
@@ -1310,10 +1318,10 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
}
/*
- * ip_append_data() and ip_append_page() can make one large IP datagram
- * from many pieces of data. Each pieces will be holded on the socket
- * until ip_push_pending_frames() is called. Each piece can be a page
- * or non-page data.
+ * ip_append_data() can make one large IP datagram from many pieces of
+ * data. Each piece will be held on the socket until
+ * ip_push_pending_frames() is called. Each piece can be a page or
+ * non-page data.
*
* Not only UDP, other transport protocols - e.g. raw sockets - can use
* this interface potentially.
@@ -1346,134 +1354,6 @@ int ip_append_data(struct sock *sk, struct flowi4 *fl4,
from, length, transhdrlen, flags);
}
-ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
- int offset, size_t size, int flags)
-{
- struct inet_sock *inet = inet_sk(sk);
- struct sk_buff *skb;
- struct rtable *rt;
- struct ip_options *opt = NULL;
- struct inet_cork *cork;
- int hh_len;
- int mtu;
- int len;
- int err;
- unsigned int maxfraglen, fragheaderlen, fraggap, maxnonfragsize;
-
- if (inet->hdrincl)
- return -EPERM;
-
- if (flags&MSG_PROBE)
- return 0;
-
- if (skb_queue_empty(&sk->sk_write_queue))
- return -EINVAL;
-
- cork = &inet->cork.base;
- rt = (struct rtable *)cork->dst;
- if (cork->flags & IPCORK_OPT)
- opt = cork->opt;
-
- if (!(rt->dst.dev->features & NETIF_F_SG))
- return -EOPNOTSUPP;
-
- hh_len = LL_RESERVED_SPACE(rt->dst.dev);
- mtu = cork->gso_size ? IP_MAX_MTU : cork->fragsize;
-
- fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
- maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen;
- maxnonfragsize = ip_sk_ignore_df(sk) ? 0xFFFF : mtu;
-
- if (cork->length + size > maxnonfragsize - fragheaderlen) {
- ip_local_error(sk, EMSGSIZE, fl4->daddr, inet->inet_dport,
- mtu - (opt ? opt->optlen : 0));
- return -EMSGSIZE;
- }
-
- skb = skb_peek_tail(&sk->sk_write_queue);
- if (!skb)
- return -EINVAL;
-
- cork->length += size;
-
- while (size > 0) {
- /* Check if the remaining data fits into current packet. */
- len = mtu - skb->len;
- if (len < size)
- len = maxfraglen - skb->len;
-
- if (len <= 0) {
- struct sk_buff *skb_prev;
- int alloclen;
-
- skb_prev = skb;
- fraggap = skb_prev->len - maxfraglen;
-
- alloclen = fragheaderlen + hh_len + fraggap + 15;
- skb = sock_wmalloc(sk, alloclen, 1, sk->sk_allocation);
- if (unlikely(!skb)) {
- err = -ENOBUFS;
- goto error;
- }
-
- /*
- * Fill in the control structures
- */
- skb->ip_summed = CHECKSUM_NONE;
- skb->csum = 0;
- skb_reserve(skb, hh_len);
-
- /*
- * Find where to start putting bytes.
- */
- skb_put(skb, fragheaderlen + fraggap);
- skb_reset_network_header(skb);
- skb->transport_header = (skb->network_header +
- fragheaderlen);
- if (fraggap) {
- skb->csum = skb_copy_and_csum_bits(skb_prev,
- maxfraglen,
- skb_transport_header(skb),
- fraggap);
- skb_prev->csum = csum_sub(skb_prev->csum,
- skb->csum);
- pskb_trim_unique(skb_prev, maxfraglen);
- }
-
- /*
- * Put the packet on the pending queue.
- */
- __skb_queue_tail(&sk->sk_write_queue, skb);
- continue;
- }
-
- if (len > size)
- len = size;
-
- if (skb_append_pagefrags(skb, page, offset, len)) {
- err = -EMSGSIZE;
- goto error;
- }
-
- if (skb->ip_summed == CHECKSUM_NONE) {
- __wsum csum;
- csum = csum_page(page, offset, len);
- skb->csum = csum_block_add(skb->csum, csum, skb->len);
- }
-
- skb_len_add(skb, len);
- refcount_add(len, &sk->sk_wmem_alloc);
- offset += len;
- size -= len;
- }
- return 0;
-
-error:
- cork->length -= size;
- IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS);
- return err;
-}
-
static void ip_cork_release(struct inet_cork *cork)
{
cork->flags &= ~IPCORK_OPT;
@@ -1692,7 +1572,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
const struct ip_options *sopt,
__be32 daddr, __be32 saddr,
const struct ip_reply_arg *arg,
- unsigned int len, u64 transmit_time)
+ unsigned int len, u64 transmit_time, u32 txhash)
{
struct ip_options_data replyopts;
struct ipcm_cookie ipc;
@@ -1755,6 +1635,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
arg->csum));
nskb->ip_summed = CHECKSUM_NONE;
nskb->mono_delivery_time = !!transmit_time;
+ if (txhash)
+ skb_set_hash(nskb, txhash, PKT_HASH_TYPE_L4);
ip_push_pending_frames(sk, &fl4);
}
out:
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index e90bc0aa85c7..c56b6fe6f0d7 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -665,6 +665,9 @@ static struct packet_type bootp_packet_type __initdata = {
.func = ic_bootp_recv,
};
+/* DHCPACK can overwrite DNS if fallback was set upon first BOOTP reply */
+static int ic_nameservers_fallback __initdata;
+
/*
* Initialize DHCP/BOOTP extension fields in the request.
*/
@@ -938,7 +941,8 @@ static void __init ic_do_bootp_ext(u8 *ext)
if (servers > CONF_NAMESERVERS_MAX)
servers = CONF_NAMESERVERS_MAX;
for (i = 0; i < servers; i++) {
- if (ic_nameservers[i] == NONE)
+ if (ic_nameservers[i] == NONE ||
+ ic_nameservers_fallback)
memcpy(&ic_nameservers[i], ext+1+4*i, 4);
}
break;
@@ -1158,8 +1162,10 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
ic_addrservaddr = b->iph.saddr;
if (ic_gateway == NONE && b->relay_ip)
ic_gateway = b->relay_ip;
- if (ic_nameservers[0] == NONE)
+ if (ic_nameservers[0] == NONE) {
ic_nameservers[0] = ic_servaddr;
+ ic_nameservers_fallback = 1;
+ }
ic_got_reply = IC_BOOTP;
drop_unlock:
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index eec1f6df80d8..3f0c6d602fb7 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1547,6 +1547,28 @@ out:
return ret;
}
+/* Execute if this ioctl is a special mroute ioctl */
+int ipmr_sk_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
+{
+ switch (cmd) {
+ /* These userspace buffers will be consumed by ipmr_ioctl() */
+ case SIOCGETVIFCNT: {
+ struct sioc_vif_req buffer;
+
+ return sock_ioctl_inout(sk, cmd, arg, &buffer,
+ sizeof(buffer));
+ }
+ case SIOCGETSGCNT: {
+ struct sioc_sg_req buffer;
+
+ return sock_ioctl_inout(sk, cmd, arg, &buffer,
+ sizeof(buffer));
+ }
+ }
+ /* return code > 0 means that the ioctl was not executed */
+ return 1;
+}
+
/* Getsock opt support for the multicast routing system. */
int ip_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval,
sockptr_t optlen)
@@ -1593,13 +1615,13 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval,
}
/* The IP multicast ioctl support routines. */
-int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
+int ipmr_ioctl(struct sock *sk, int cmd, void *arg)
{
- struct sioc_sg_req sr;
- struct sioc_vif_req vr;
struct vif_device *vif;
struct mfc_cache *c;
struct net *net = sock_net(sk);
+ struct sioc_vif_req *vr;
+ struct sioc_sg_req *sr;
struct mr_table *mrt;
mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
@@ -1608,40 +1630,33 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
switch (cmd) {
case SIOCGETVIFCNT:
- if (copy_from_user(&vr, arg, sizeof(vr)))
- return -EFAULT;
- if (vr.vifi >= mrt->maxvif)
+ vr = (struct sioc_vif_req *)arg;
+ if (vr->vifi >= mrt->maxvif)
return -EINVAL;
- vr.vifi = array_index_nospec(vr.vifi, mrt->maxvif);
+ vr->vifi = array_index_nospec(vr->vifi, mrt->maxvif);
rcu_read_lock();
- vif = &mrt->vif_table[vr.vifi];
- if (VIF_EXISTS(mrt, vr.vifi)) {
- vr.icount = READ_ONCE(vif->pkt_in);
- vr.ocount = READ_ONCE(vif->pkt_out);
- vr.ibytes = READ_ONCE(vif->bytes_in);
- vr.obytes = READ_ONCE(vif->bytes_out);
+ vif = &mrt->vif_table[vr->vifi];
+ if (VIF_EXISTS(mrt, vr->vifi)) {
+ vr->icount = READ_ONCE(vif->pkt_in);
+ vr->ocount = READ_ONCE(vif->pkt_out);
+ vr->ibytes = READ_ONCE(vif->bytes_in);
+ vr->obytes = READ_ONCE(vif->bytes_out);
rcu_read_unlock();
- if (copy_to_user(arg, &vr, sizeof(vr)))
- return -EFAULT;
return 0;
}
rcu_read_unlock();
return -EADDRNOTAVAIL;
case SIOCGETSGCNT:
- if (copy_from_user(&sr, arg, sizeof(sr)))
- return -EFAULT;
+ sr = (struct sioc_sg_req *)arg;
rcu_read_lock();
- c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
+ c = ipmr_cache_find(mrt, sr->src.s_addr, sr->grp.s_addr);
if (c) {
- sr.pktcnt = c->_c.mfc_un.res.pkt;
- sr.bytecnt = c->_c.mfc_un.res.bytes;
- sr.wrong_if = c->_c.mfc_un.res.wrong_if;
+ sr->pktcnt = c->_c.mfc_un.res.pkt;
+ sr->bytecnt = c->_c.mfc_un.res.bytes;
+ sr->wrong_if = c->_c.mfc_un.res.wrong_if;
rcu_read_unlock();
-
- if (copy_to_user(arg, &sr, sizeof(sr)))
- return -EFAULT;
return 0;
}
rcu_read_unlock();
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 5178a3f3cb53..25dd78cee179 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -49,13 +49,8 @@
#include <net/transp_v6.h>
#endif
-#define ping_portaddr_for_each_entry(__sk, node, list) \
- hlist_nulls_for_each_entry(__sk, node, list, sk_nulls_node)
-#define ping_portaddr_for_each_entry_rcu(__sk, node, list) \
- hlist_nulls_for_each_entry_rcu(__sk, node, list, sk_nulls_node)
-
struct ping_table {
- struct hlist_nulls_head hash[PING_HTABLE_SIZE];
+ struct hlist_head hash[PING_HTABLE_SIZE];
spinlock_t lock;
};
@@ -74,17 +69,16 @@ static inline u32 ping_hashfn(const struct net *net, u32 num, u32 mask)
}
EXPORT_SYMBOL_GPL(ping_hash);
-static inline struct hlist_nulls_head *ping_hashslot(struct ping_table *table,
- struct net *net, unsigned int num)
+static inline struct hlist_head *ping_hashslot(struct ping_table *table,
+ struct net *net, unsigned int num)
{
return &table->hash[ping_hashfn(net, num, PING_HTABLE_MASK)];
}
int ping_get_port(struct sock *sk, unsigned short ident)
{
- struct hlist_nulls_node *node;
- struct hlist_nulls_head *hlist;
struct inet_sock *isk, *isk2;
+ struct hlist_head *hlist;
struct sock *sk2 = NULL;
isk = inet_sk(sk);
@@ -98,7 +92,7 @@ int ping_get_port(struct sock *sk, unsigned short ident)
result++; /* avoid zero */
hlist = ping_hashslot(&ping_table, sock_net(sk),
result);
- ping_portaddr_for_each_entry(sk2, node, hlist) {
+ sk_for_each(sk2, hlist) {
isk2 = inet_sk(sk2);
if (isk2->inet_num == result)
@@ -115,7 +109,7 @@ next_port:
goto fail;
} else {
hlist = ping_hashslot(&ping_table, sock_net(sk), ident);
- ping_portaddr_for_each_entry(sk2, node, hlist) {
+ sk_for_each(sk2, hlist) {
isk2 = inet_sk(sk2);
/* BUG? Why is this reuse and not reuseaddr? ping.c
@@ -133,9 +127,8 @@ next_port:
isk->inet_num = ident;
if (sk_unhashed(sk)) {
pr_debug("was not hashed\n");
- sock_hold(sk);
+ sk_add_node_rcu(sk, hlist);
sock_set_flag(sk, SOCK_RCU_FREE);
- hlist_nulls_add_head_rcu(&sk->sk_nulls_node, hlist);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
}
spin_unlock(&ping_table.lock);
@@ -161,9 +154,7 @@ void ping_unhash(struct sock *sk)
pr_debug("ping_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num);
spin_lock(&ping_table.lock);
- if (sk_hashed(sk)) {
- hlist_nulls_del_init_rcu(&sk->sk_nulls_node);
- sock_put(sk);
+ if (sk_del_node_init_rcu(sk)) {
isk->inet_num = 0;
isk->inet_sport = 0;
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
@@ -175,10 +166,9 @@ EXPORT_SYMBOL_GPL(ping_unhash);
/* Called under rcu_read_lock() */
static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
{
- struct hlist_nulls_head *hslot = ping_hashslot(&ping_table, net, ident);
+ struct hlist_head *hslot = ping_hashslot(&ping_table, net, ident);
struct sock *sk = NULL;
struct inet_sock *isk;
- struct hlist_nulls_node *hnode;
int dif, sdif;
if (skb->protocol == htons(ETH_P_IP)) {
@@ -197,7 +187,7 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
return NULL;
}
- ping_portaddr_for_each_entry_rcu(sk, hnode, hslot) {
+ sk_for_each_rcu(sk, hslot) {
isk = inet_sk(sk);
pr_debug("iterate\n");
@@ -715,7 +705,7 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
struct ip_options_data opt_copy;
int free = 0;
__be32 saddr, daddr, faddr;
- u8 tos;
+ u8 tos, scope;
int err;
pr_debug("ping_v4_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num);
@@ -779,11 +769,7 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
faddr = ipc.opt->opt.faddr;
}
tos = get_rttos(&ipc, inet);
- if (sock_flag(sk, SOCK_LOCALROUTE) ||
- (msg->msg_flags & MSG_DONTROUTE) ||
- (ipc.opt && ipc.opt->opt.is_strictroute)) {
- tos |= RTO_ONLINK;
- }
+ scope = ip_sendmsg_scope(inet, &ipc, msg);
if (ipv4_is_multicast(daddr)) {
if (!ipc.oif || netif_index_is_l3_master(sock_net(sk), ipc.oif))
@@ -793,10 +779,9 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
} else if (!ipc.oif)
ipc.oif = inet->uc_index;
- flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, tos,
- RT_SCOPE_UNIVERSE, sk->sk_protocol,
- inet_sk_flowi_flags(sk), faddr, saddr, 0, 0,
- sk->sk_uid);
+ flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, tos, scope,
+ sk->sk_protocol, inet_sk_flowi_flags(sk), faddr,
+ saddr, 0, 0, sk->sk_uid);
fl4.fl4_icmp_type = user_icmph.type;
fl4.fl4_icmp_code = user_icmph.code;
@@ -1045,15 +1030,14 @@ static struct sock *ping_get_first(struct seq_file *seq, int start)
for (state->bucket = start; state->bucket < PING_HTABLE_SIZE;
++state->bucket) {
- struct hlist_nulls_node *node;
- struct hlist_nulls_head *hslot;
+ struct hlist_head *hslot;
hslot = &ping_table.hash[state->bucket];
- if (hlist_nulls_empty(hslot))
+ if (hlist_empty(hslot))
continue;
- sk_nulls_for_each(sk, node, hslot) {
+ sk_for_each(sk, hslot) {
if (net_eq(sock_net(sk), net) &&
sk->sk_family == state->family)
goto found;
@@ -1070,7 +1054,7 @@ static struct sock *ping_get_next(struct seq_file *seq, struct sock *sk)
struct net *net = seq_file_net(seq);
do {
- sk = sk_nulls_next(sk);
+ sk = sk_next(sk);
} while (sk && (!net_eq(sock_net(sk), net)));
if (!sk)
@@ -1206,6 +1190,6 @@ void __init ping_init(void)
int i;
for (i = 0; i < PING_HTABLE_SIZE; i++)
- INIT_HLIST_NULLS_HEAD(&ping_table.hash[i], i);
+ INIT_HLIST_HEAD(&ping_table.hash[i]);
spin_lock_init(&ping_table.lock);
}
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index eadf1c9ef7e4..7782ff5e6539 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -476,10 +476,10 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
struct ipcm_cookie ipc;
struct rtable *rt = NULL;
struct flowi4 fl4;
+ u8 tos, scope;
int free = 0;
__be32 daddr;
__be32 saddr;
- u8 tos;
int err;
struct ip_options_data opt_copy;
struct raw_frag_vec rfv;
@@ -575,9 +575,8 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
daddr = ipc.opt->opt.faddr;
}
}
- tos = get_rtconn_flags(&ipc, sk);
- if (msg->msg_flags & MSG_DONTROUTE)
- tos |= RTO_ONLINK;
+ tos = get_rttos(&ipc, inet);
+ scope = ip_sendmsg_scope(inet, &ipc, msg);
if (ipv4_is_multicast(daddr)) {
if (!ipc.oif || netif_index_is_l3_master(sock_net(sk), ipc.oif))
@@ -600,8 +599,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
}
}
- flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, tos,
- RT_SCOPE_UNIVERSE,
+ flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, tos, scope,
hdrincl ? ipc.protocol : sk->sk_protocol,
inet_sk_flowi_flags(sk) |
(hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
@@ -858,29 +856,29 @@ static int raw_getsockopt(struct sock *sk, int level, int optname,
return do_raw_getsockopt(sk, level, optname, optval, optlen);
}
-static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg)
+static int raw_ioctl(struct sock *sk, int cmd, int *karg)
{
switch (cmd) {
case SIOCOUTQ: {
- int amount = sk_wmem_alloc_get(sk);
-
- return put_user(amount, (int __user *)arg);
+ *karg = sk_wmem_alloc_get(sk);
+ return 0;
}
case SIOCINQ: {
struct sk_buff *skb;
- int amount = 0;
spin_lock_bh(&sk->sk_receive_queue.lock);
skb = skb_peek(&sk->sk_receive_queue);
if (skb)
- amount = skb->len;
+ *karg = skb->len;
+ else
+ *karg = 0;
spin_unlock_bh(&sk->sk_receive_queue.lock);
- return put_user(amount, (int __user *)arg);
+ return 0;
}
default:
#ifdef CONFIG_IP_MROUTE
- return ipmr_ioctl(sk, cmd, (void __user *)arg);
+ return ipmr_ioctl(sk, cmd, karg);
#else
return -ENOIOCTLCMD;
#endif
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 26fb97d1d4d9..dc478a0574cb 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -418,8 +418,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
* no easy way to do this.
*/
flowi4_init_output(&fl4, ireq->ir_iif, ireq->ir_mark,
- RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, IPPROTO_TCP,
- inet_sk_flowi_flags(sk),
+ ip_sock_rt_tos(sk), ip_sock_rt_scope(sk),
+ IPPROTO_TCP, inet_sk_flowi_flags(sk),
opt->srr ? opt->faddr : ireq->ir_rmt_addr,
ireq->ir_loc_addr, th->source, th->dest, sk->sk_uid);
security_req_classify_flow(req, flowi4_to_flowi_common(&fl4));
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 88dfe51e68f3..2afb0870648b 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -34,6 +34,7 @@ static int ip_ttl_min = 1;
static int ip_ttl_max = 255;
static int tcp_syn_retries_min = 1;
static int tcp_syn_retries_max = MAX_TCP_SYNCNT;
+static int tcp_syn_linear_timeouts_max = MAX_TCP_SYNCNT;
static unsigned long ip_ping_group_range_min[] = { 0, 0 };
static unsigned long ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
static u32 u32_max_div_HZ = UINT_MAX / HZ;
@@ -1470,6 +1471,24 @@ static struct ctl_table ipv4_net_table[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = &tcp_plb_max_cong_thresh,
},
+ {
+ .procname = "tcp_syn_linear_timeouts",
+ .data = &init_net.ipv4.sysctl_tcp_syn_linear_timeouts,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = &tcp_syn_linear_timeouts_max,
+ },
+ {
+ .procname = "tcp_shrink_window",
+ .data = &init_net.ipv4.sysctl_tcp_shrink_window,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
{ }
};
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 8d20d9221238..71b42eef9dbf 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -599,7 +599,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
}
EXPORT_SYMBOL(tcp_poll);
-int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
+int tcp_ioctl(struct sock *sk, int cmd, int *karg)
{
struct tcp_sock *tp = tcp_sk(sk);
int answ;
@@ -641,7 +641,8 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
return -ENOIOCTLCMD;
}
- return put_user(answ, (int __user *)arg);
+ *karg = answ;
+ return 0;
}
EXPORT_SYMBOL(tcp_ioctl);
@@ -858,12 +859,12 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
}
EXPORT_SYMBOL(tcp_splice_read);
-struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp,
+struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, gfp_t gfp,
bool force_schedule)
{
struct sk_buff *skb;
- skb = alloc_skb_fclone(size + MAX_TCP_HEADER, gfp);
+ skb = alloc_skb_fclone(MAX_TCP_HEADER, gfp);
if (likely(skb)) {
bool mem_scheduled;
@@ -957,7 +958,7 @@ static int tcp_downgrade_zcopy_pure(struct sock *sk, struct sk_buff *skb)
}
-static int tcp_wmem_schedule(struct sock *sk, int copy)
+int tcp_wmem_schedule(struct sock *sk, int copy)
{
int left;
@@ -974,175 +975,24 @@ static int tcp_wmem_schedule(struct sock *sk, int copy)
return min(copy, sk->sk_forward_alloc);
}
-static struct sk_buff *tcp_build_frag(struct sock *sk, int size_goal, int flags,
- struct page *page, int offset, size_t *size)
-{
- struct sk_buff *skb = tcp_write_queue_tail(sk);
- struct tcp_sock *tp = tcp_sk(sk);
- bool can_coalesce;
- int copy, i;
-
- if (!skb || (copy = size_goal - skb->len) <= 0 ||
- !tcp_skb_can_collapse_to(skb)) {
-new_segment:
- if (!sk_stream_memory_free(sk))
- return NULL;
-
- skb = tcp_stream_alloc_skb(sk, 0, sk->sk_allocation,
- tcp_rtx_and_write_queues_empty(sk));
- if (!skb)
- return NULL;
-
-#ifdef CONFIG_TLS_DEVICE
- skb->decrypted = !!(flags & MSG_SENDPAGE_DECRYPTED);
-#endif
- tcp_skb_entail(sk, skb);
- copy = size_goal;
- }
-
- if (copy > *size)
- copy = *size;
-
- i = skb_shinfo(skb)->nr_frags;
- can_coalesce = skb_can_coalesce(skb, i, page, offset);
- if (!can_coalesce && i >= READ_ONCE(sysctl_max_skb_frags)) {
- tcp_mark_push(tp, skb);
- goto new_segment;
- }
- if (tcp_downgrade_zcopy_pure(sk, skb))
- return NULL;
-
- copy = tcp_wmem_schedule(sk, copy);
- if (!copy)
- return NULL;
-
- if (can_coalesce) {
- skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
- } else {
- get_page(page);
- skb_fill_page_desc_noacc(skb, i, page, offset, copy);
- }
-
- if (!(flags & MSG_NO_SHARED_FRAGS))
- skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG;
-
- skb->len += copy;
- skb->data_len += copy;
- skb->truesize += copy;
- sk_wmem_queued_add(sk, copy);
- sk_mem_charge(sk, copy);
- WRITE_ONCE(tp->write_seq, tp->write_seq + copy);
- TCP_SKB_CB(skb)->end_seq += copy;
- tcp_skb_pcount_set(skb, 0);
-
- *size = copy;
- return skb;
-}
-
-ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
- size_t size, int flags)
-{
- struct tcp_sock *tp = tcp_sk(sk);
- int mss_now, size_goal;
- int err;
- ssize_t copied;
- long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
-
- if (IS_ENABLED(CONFIG_DEBUG_VM) &&
- WARN_ONCE(!sendpage_ok(page),
- "page must not be a Slab one and have page_count > 0"))
- return -EINVAL;
-
- /* Wait for a connection to finish. One exception is TCP Fast Open
- * (passive side) where data is allowed to be sent before a connection
- * is fully established.
- */
- if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) &&
- !tcp_passive_fastopen(sk)) {
- err = sk_stream_wait_connect(sk, &timeo);
- if (err != 0)
- goto out_err;
- }
-
- sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
-
- mss_now = tcp_send_mss(sk, &size_goal, flags);
- copied = 0;
-
- err = -EPIPE;
- if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
- goto out_err;
-
- while (size > 0) {
- struct sk_buff *skb;
- size_t copy = size;
-
- skb = tcp_build_frag(sk, size_goal, flags, page, offset, &copy);
- if (!skb)
- goto wait_for_space;
-
- if (!copied)
- TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
-
- copied += copy;
- offset += copy;
- size -= copy;
- if (!size)
- goto out;
-
- if (skb->len < size_goal || (flags & MSG_OOB))
- continue;
-
- if (forced_push(tp)) {
- tcp_mark_push(tp, skb);
- __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH);
- } else if (skb == tcp_send_head(sk))
- tcp_push_one(sk, mss_now);
- continue;
-
-wait_for_space:
- set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
- tcp_push(sk, flags & ~MSG_MORE, mss_now,
- TCP_NAGLE_PUSH, size_goal);
-
- err = sk_stream_wait_memory(sk, &timeo);
- if (err != 0)
- goto do_error;
-
- mss_now = tcp_send_mss(sk, &size_goal, flags);
- }
-
-out:
- if (copied) {
- tcp_tx_timestamp(sk, sk->sk_tsflags);
- if (!(flags & MSG_SENDPAGE_NOTLAST))
- tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
- }
- return copied;
-
-do_error:
- tcp_remove_empty_skb(sk);
- if (copied)
- goto out;
-out_err:
- /* make sure we wake any epoll edge trigger waiter */
- if (unlikely(tcp_rtx_and_write_queues_empty(sk) && err == -EAGAIN)) {
- sk->sk_write_space(sk);
- tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
- }
- return sk_stream_error(sk, flags, err);
-}
-EXPORT_SYMBOL_GPL(do_tcp_sendpages);
-
int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
size_t size, int flags)
{
+ struct bio_vec bvec;
+ struct msghdr msg = { .msg_flags = flags | MSG_SPLICE_PAGES, };
+
if (!(sk->sk_route_caps & NETIF_F_SG))
return sock_no_sendpage_locked(sk, page, offset, size, flags);
tcp_rate_check_app_limited(sk); /* is sending application-limited? */
- return do_tcp_sendpages(sk, page, offset, size, flags);
+ bvec_set_page(&bvec, page, size, offset);
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
+
+ if (flags & MSG_SENDPAGE_NOTLAST)
+ msg.msg_flags |= MSG_MORE;
+
+ return tcp_sendmsg_locked(sk, &msg, size);
}
EXPORT_SYMBOL_GPL(tcp_sendpage_locked);
@@ -1223,28 +1073,31 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
int flags, err, copied = 0;
int mss_now = 0, size_goal, copied_syn = 0;
int process_backlog = 0;
- bool zc = false;
+ int zc = 0;
long timeo;
flags = msg->msg_flags;
if ((flags & MSG_ZEROCOPY) && size) {
- skb = tcp_write_queue_tail(sk);
-
if (msg->msg_ubuf) {
uarg = msg->msg_ubuf;
- net_zcopy_get(uarg);
- zc = sk->sk_route_caps & NETIF_F_SG;
+ if (sk->sk_route_caps & NETIF_F_SG)
+ zc = MSG_ZEROCOPY;
} else if (sock_flag(sk, SOCK_ZEROCOPY)) {
+ skb = tcp_write_queue_tail(sk);
uarg = msg_zerocopy_realloc(sk, size, skb_zcopy(skb));
if (!uarg) {
err = -ENOBUFS;
goto out_err;
}
- zc = sk->sk_route_caps & NETIF_F_SG;
- if (!zc)
+ if (sk->sk_route_caps & NETIF_F_SG)
+ zc = MSG_ZEROCOPY;
+ else
uarg_to_msgzc(uarg)->zerocopy = 0;
}
+ } else if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES) && size) {
+ if (sk->sk_route_caps & NETIF_F_SG)
+ zc = MSG_SPLICE_PAGES;
}
if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect) &&
@@ -1307,7 +1160,7 @@ restart:
goto do_error;
while (msg_data_left(msg)) {
- int copy = 0;
+ ssize_t copy = 0;
skb = tcp_write_queue_tail(sk);
if (skb)
@@ -1326,7 +1179,7 @@ new_segment:
goto restart;
}
first_skb = tcp_rtx_and_write_queues_empty(sk);
- skb = tcp_stream_alloc_skb(sk, 0, sk->sk_allocation,
+ skb = tcp_stream_alloc_skb(sk, sk->sk_allocation,
first_skb);
if (!skb)
goto wait_for_space;
@@ -1348,7 +1201,7 @@ new_segment:
if (copy > msg_data_left(msg))
copy = msg_data_left(msg);
- if (!zc) {
+ if (zc == 0) {
bool merge = true;
int i = skb_shinfo(skb)->nr_frags;
struct page_frag *pfrag = sk_page_frag(sk);
@@ -1393,7 +1246,7 @@ new_segment:
page_ref_inc(pfrag->page);
}
pfrag->offset += copy;
- } else {
+ } else if (zc == MSG_ZEROCOPY) {
/* First append to a fragless skb builds initial
* pure zerocopy skb
*/
@@ -1414,6 +1267,30 @@ new_segment:
if (err < 0)
goto do_error;
copy = err;
+ } else if (zc == MSG_SPLICE_PAGES) {
+ /* Splice in data if we can; copy if we can't. */
+ if (tcp_downgrade_zcopy_pure(sk, skb))
+ goto wait_for_space;
+ copy = tcp_wmem_schedule(sk, copy);
+ if (!copy)
+ goto wait_for_space;
+
+ err = skb_splice_from_iter(skb, &msg->msg_iter, copy,
+ sk->sk_allocation);
+ if (err < 0) {
+ if (err == -EMSGSIZE) {
+ tcp_mark_push(tp, skb);
+ goto new_segment;
+ }
+ goto do_error;
+ }
+ copy = err;
+
+ if (!(flags & MSG_NO_SHARED_FRAGS))
+ skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG;
+
+ sk_wmem_queued_add(sk, copy);
+ sk_mem_charge(sk, copy);
}
if (!copied)
@@ -1459,7 +1336,9 @@ out:
tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
}
out_nopush:
- net_zcopy_put(uarg);
+ /* msg->msg_ubuf is pinned by the caller so we don't take extra refs */
+ if (uarg && !msg->msg_ubuf)
+ net_zcopy_put(uarg);
return copied + copied_syn;
do_error:
@@ -1468,7 +1347,9 @@ do_error:
if (copied + copied_syn)
goto out;
out_err:
- net_zcopy_put_abort(uarg, true);
+ /* msg->msg_ubuf is pinned by the caller so we don't take extra refs */
+ if (uarg && !msg->msg_ubuf)
+ net_zcopy_put_abort(uarg, true);
err = sk_stream_error(sk, flags, err);
/* make sure we wake any epoll edge trigger waiter */
if (unlikely(tcp_rtx_and_write_queues_empty(sk) && err == -EAGAIN)) {
@@ -1491,6 +1372,22 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
}
EXPORT_SYMBOL(tcp_sendmsg);
+void tcp_splice_eof(struct socket *sock)
+{
+ struct sock *sk = sock->sk;
+ struct tcp_sock *tp = tcp_sk(sk);
+ int mss_now, size_goal;
+
+ if (!tcp_write_queue_tail(sk))
+ return;
+
+ lock_sock(sk);
+ mss_now = tcp_send_mss(sk, &size_goal, 0);
+ tcp_push(sk, 0, mss_now, tp->nonagle, size_goal);
+ release_sock(sk);
+}
+EXPORT_SYMBOL_GPL(tcp_splice_eof);
+
/*
* Handle reading urgent data. BSD has very simple semantics for
* this, no blocking and very strange errors 8)
@@ -1877,7 +1774,7 @@ void tcp_update_recv_tstamps(struct sk_buff *skb,
}
#ifdef CONFIG_MMU
-static const struct vm_operations_struct tcp_vm_ops = {
+const struct vm_operations_struct tcp_vm_ops = {
};
int tcp_mmap(struct file *file, struct socket *sock,
@@ -2176,6 +2073,34 @@ static void tcp_zc_finalize_rx_tstamp(struct sock *sk,
}
}
+static struct vm_area_struct *find_tcp_vma(struct mm_struct *mm,
+ unsigned long address,
+ bool *mmap_locked)
+{
+ struct vm_area_struct *vma = NULL;
+
+#ifdef CONFIG_PER_VMA_LOCK
+ vma = lock_vma_under_rcu(mm, address);
+#endif
+ if (vma) {
+ if (!vma_is_tcp(vma)) {
+ vma_end_read(vma);
+ return NULL;
+ }
+ *mmap_locked = false;
+ return vma;
+ }
+
+ mmap_read_lock(mm);
+ vma = vma_lookup(mm, address);
+ if (!vma || !vma_is_tcp(vma)) {
+ mmap_read_unlock(mm);
+ return NULL;
+ }
+ *mmap_locked = true;
+ return vma;
+}
+
#define TCP_ZEROCOPY_PAGE_BATCH_SIZE 32
static int tcp_zerocopy_receive(struct sock *sk,
struct tcp_zerocopy_receive *zc,
@@ -2193,6 +2118,7 @@ static int tcp_zerocopy_receive(struct sock *sk,
u32 seq = tp->copied_seq;
u32 total_bytes_to_map;
int inq = tcp_inq(sk);
+ bool mmap_locked;
int ret;
zc->copybuf_len = 0;
@@ -2217,13 +2143,10 @@ static int tcp_zerocopy_receive(struct sock *sk,
return 0;
}
- mmap_read_lock(current->mm);
-
- vma = vma_lookup(current->mm, address);
- if (!vma || vma->vm_ops != &tcp_vm_ops) {
- mmap_read_unlock(current->mm);
+ vma = find_tcp_vma(current->mm, address, &mmap_locked);
+ if (!vma)
return -EINVAL;
- }
+
vma_len = min_t(unsigned long, zc->length, vma->vm_end - address);
avail_len = min_t(u32, vma_len, inq);
total_bytes_to_map = avail_len & ~(PAGE_SIZE - 1);
@@ -2297,7 +2220,10 @@ static int tcp_zerocopy_receive(struct sock *sk,
zc, total_bytes_to_map);
}
out:
- mmap_read_unlock(current->mm);
+ if (mmap_locked)
+ mmap_read_unlock(current->mm);
+ else
+ vma_end_read(vma);
/* Try to copy straggler data. */
if (!ret)
copylen = tcp_zc_handle_leftover(zc, sk, skb, &seq, copybuf_len, tss);
@@ -4680,8 +4606,10 @@ int tcp_abort(struct sock *sk, int err)
return 0;
}
- /* Don't race with userspace socket closes such as tcp_close. */
- lock_sock(sk);
+ /* BPF context ensures sock locking. */
+ if (!has_current_bpf_ctx())
+ /* Don't race with userspace socket closes such as tcp_close. */
+ lock_sock(sk);
if (sk->sk_state == TCP_LISTEN) {
tcp_set_state(sk, TCP_CLOSE);
@@ -4705,7 +4633,8 @@ int tcp_abort(struct sock *sk, int err)
bh_unlock_sock(sk);
local_bh_enable();
tcp_write_queue_purge(sk);
- release_sock(sk);
+ if (!has_current_bpf_ctx())
+ release_sock(sk);
return 0;
}
EXPORT_SYMBOL_GPL(tcp_abort);
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 5f93918c063c..5a84053ac62b 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -90,11 +90,13 @@ static int tcp_bpf_push(struct sock *sk, struct sk_msg *msg, u32 apply_bytes,
{
bool apply = apply_bytes;
struct scatterlist *sge;
+ struct msghdr msghdr = { .msg_flags = flags | MSG_SPLICE_PAGES, };
struct page *page;
int size, ret = 0;
u32 off;
while (1) {
+ struct bio_vec bvec;
bool has_tx_ulp;
sge = sk_msg_elem(msg, msg->sg.start);
@@ -106,16 +108,18 @@ static int tcp_bpf_push(struct sock *sk, struct sk_msg *msg, u32 apply_bytes,
tcp_rate_check_app_limited(sk);
retry:
has_tx_ulp = tls_sw_has_ctx_tx(sk);
- if (has_tx_ulp) {
- flags |= MSG_SENDPAGE_NOPOLICY;
- ret = kernel_sendpage_locked(sk,
- page, off, size, flags);
- } else {
- ret = do_tcp_sendpages(sk, page, off, size, flags);
- }
+ if (has_tx_ulp)
+ msghdr.msg_flags |= MSG_SENDPAGE_NOPOLICY;
+
+ if (flags & MSG_SENDPAGE_NOTLAST)
+ msghdr.msg_flags |= MSG_MORE;
+ bvec_set_page(&bvec, page, size, off);
+ iov_iter_bvec(&msghdr.msg_iter, ITER_SOURCE, &bvec, 1, size);
+ ret = tcp_sendmsg_locked(sk, &msghdr, size);
if (ret <= 0)
return ret;
+
if (apply)
apply_bytes -= ret;
msg->sg.size -= ret;
@@ -481,7 +485,7 @@ static int tcp_bpf_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
long timeo;
int flags;
- /* Don't let internal do_tcp_sendpages() flags through */
+ /* Don't let internal sendpage flags through */
flags = (msg->msg_flags & ~MSG_SENDPAGE_DECRYPTED);
flags |= MSG_NO_SHARED_FRAGS;
@@ -564,49 +568,18 @@ out_err:
static int tcp_bpf_sendpage(struct sock *sk, struct page *page, int offset,
size_t size, int flags)
{
- struct sk_msg tmp, *msg = NULL;
- int err = 0, copied = 0;
- struct sk_psock *psock;
- bool enospc = false;
+ struct bio_vec bvec;
+ struct msghdr msg = {
+ .msg_flags = flags | MSG_SPLICE_PAGES,
+ };
- psock = sk_psock_get(sk);
- if (unlikely(!psock))
- return tcp_sendpage(sk, page, offset, size, flags);
-
- lock_sock(sk);
- if (psock->cork) {
- msg = psock->cork;
- } else {
- msg = &tmp;
- sk_msg_init(msg);
- }
+ bvec_set_page(&bvec, page, size, offset);
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
- /* Catch case where ring is full and sendpage is stalled. */
- if (unlikely(sk_msg_full(msg)))
- goto out_err;
-
- sk_msg_page_add(msg, page, size, offset);
- sk_mem_charge(sk, size);
- copied = size;
- if (sk_msg_full(msg))
- enospc = true;
- if (psock->cork_bytes) {
- if (size > psock->cork_bytes)
- psock->cork_bytes = 0;
- else
- psock->cork_bytes -= size;
- if (psock->cork_bytes && !enospc)
- goto out_err;
- /* All cork bytes are accounted, rerun the prog. */
- psock->eval = __SK_NONE;
- psock->cork_bytes = 0;
- }
+ if (flags & MSG_SENDPAGE_NOTLAST)
+ msg.msg_flags |= MSG_MORE;
- err = tcp_bpf_send_verdict(sk, psock, msg, &copied, flags);
-out_err:
- release_sock(sk);
- sk_psock_put(sk, psock);
- return copied ? copied : err;
+ return tcp_bpf_sendmsg(sk, &msg, size);
}
enum {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 06d2573685ca..9213804b034f 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -692,6 +692,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
u64 transmit_time = 0;
struct sock *ctl_sk;
struct net *net;
+ u32 txhash = 0;
/* Never send a reset in response to a reset. */
if (th->rst)
@@ -829,6 +830,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
inet_twsk(sk)->tw_priority : sk->sk_priority;
transmit_time = tcp_transmit_time(sk);
xfrm_sk_clone_policy(ctl_sk, sk);
+ txhash = (sk->sk_state == TCP_TIME_WAIT) ?
+ inet_twsk(sk)->tw_txhash : sk->sk_txhash;
} else {
ctl_sk->sk_mark = 0;
ctl_sk->sk_priority = 0;
@@ -837,7 +840,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
skb, &TCP_SKB_CB(skb)->header.h4.opt,
ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
&arg, arg.iov[0].iov_len,
- transmit_time);
+ transmit_time, txhash);
xfrm_sk_free_policy(ctl_sk);
sock_net_set(ctl_sk, &init_net);
@@ -859,7 +862,7 @@ static void tcp_v4_send_ack(const struct sock *sk,
struct sk_buff *skb, u32 seq, u32 ack,
u32 win, u32 tsval, u32 tsecr, int oif,
struct tcp_md5sig_key *key,
- int reply_flags, u8 tos)
+ int reply_flags, u8 tos, u32 txhash)
{
const struct tcphdr *th = tcp_hdr(skb);
struct {
@@ -935,7 +938,7 @@ static void tcp_v4_send_ack(const struct sock *sk,
skb, &TCP_SKB_CB(skb)->header.h4.opt,
ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
&arg, arg.iov[0].iov_len,
- transmit_time);
+ transmit_time, txhash);
sock_net_set(ctl_sk, &init_net);
__TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
@@ -955,7 +958,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
tw->tw_bound_dev_if,
tcp_twsk_md5_key(tcptw),
tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
- tw->tw_tos
+ tw->tw_tos,
+ tw->tw_txhash
);
inet_twsk_put(tw);
@@ -988,7 +992,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
0,
tcp_md5_do_lookup(sk, l3index, addr, AF_INET),
inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
- ip_hdr(skb)->tos);
+ ip_hdr(skb)->tos, tcp_rsk(req)->txhash);
}
/*
@@ -2963,7 +2967,6 @@ static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v)
struct bpf_iter_meta meta;
struct bpf_prog *prog;
struct sock *sk = v;
- bool slow;
uid_t uid;
int ret;
@@ -2971,7 +2974,7 @@ static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v)
return 0;
if (sk_fullsock(sk))
- slow = lock_sock_fast(sk);
+ lock_sock(sk);
if (unlikely(sk_unhashed(sk))) {
ret = SEQ_SKIP;
@@ -2995,7 +2998,7 @@ static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v)
unlock:
if (sk_fullsock(sk))
- unlock_sock_fast(sk, slow);
+ release_sock(sk);
return ret;
}
@@ -3113,6 +3116,7 @@ struct proto tcp_prot = {
.keepalive = tcp_set_keepalive,
.recvmsg = tcp_recvmsg,
.sendmsg = tcp_sendmsg,
+ .splice_eof = tcp_splice_eof,
.sendpage = tcp_sendpage,
.backlog_rcv = tcp_v4_do_rcv,
.release_cb = tcp_release_cb,
@@ -3276,6 +3280,9 @@ static int __net_init tcp_sk_init(struct net *net)
else
net->ipv4.tcp_congestion_control = &tcp_reno;
+ net->ipv4.sysctl_tcp_syn_linear_timeouts = 4;
+ net->ipv4.sysctl_tcp_shrink_window = 0;
+
return 0;
}
@@ -3356,7 +3363,7 @@ static struct bpf_iter_reg tcp_reg_info = {
.ctx_arg_info_size = 1,
.ctx_arg_info = {
{ offsetof(struct bpf_iter__tcp, sk_common),
- PTR_TO_BTF_ID_OR_NULL },
+ PTR_TO_BTF_ID_OR_NULL | PTR_TRUSTED },
},
.get_func_proto = bpf_iter_tcp_get_func_proto,
.seq_info = &tcp_seq_info,
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index dac0d62120e6..04fc328727e6 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -303,6 +303,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
tcptw->tw_ts_offset = tp->tsoffset;
tcptw->tw_last_oow_ack_time = 0;
tcptw->tw_tx_delay = tp->tcp_tx_delay;
+ tw->tw_txhash = sk->sk_txhash;
#if IS_ENABLED(CONFIG_IPV6)
if (tw->tw_family == PF_INET6) {
struct ipv6_pinfo *np = inet6_sk(sk);
@@ -311,7 +312,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
tw->tw_tclass = np->tclass;
tw->tw_flowlabel = be32_to_cpu(np->flow_label & IPV6_FLOWLABEL_MASK);
- tw->tw_txhash = sk->sk_txhash;
tw->tw_ipv6only = sk->sk_ipv6only;
}
#endif
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 4851211aa60d..8311c38267b5 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -9,6 +9,7 @@
#include <linux/indirect_call_wrapper.h>
#include <linux/skbuff.h>
#include <net/gro.h>
+#include <net/gso.h>
#include <net/tcp.h>
#include <net/protocol.h>
@@ -295,7 +296,7 @@ out:
return pp;
}
-int tcp_gro_complete(struct sk_buff *skb)
+void tcp_gro_complete(struct sk_buff *skb)
{
struct tcphdr *th = tcp_hdr(skb);
@@ -310,8 +311,6 @@ int tcp_gro_complete(struct sk_buff *skb)
if (skb->encapsulation)
skb->inner_transport_header = skb->transport_header;
-
- return 0;
}
EXPORT_SYMBOL(tcp_gro_complete);
@@ -341,7 +340,8 @@ INDIRECT_CALLABLE_SCOPE int tcp4_gro_complete(struct sk_buff *skb, int thoff)
if (NAPI_GRO_CB(skb)->is_atomic)
skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_FIXEDID;
- return tcp_gro_complete(skb);
+ tcp_gro_complete(skb);
+ return 0;
}
static const struct net_offload tcpv4_offload = {
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index cfe128b81a01..2cb39b6dad02 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -260,8 +260,8 @@ static u16 tcp_select_window(struct sock *sk)
u32 old_win = tp->rcv_wnd;
u32 cur_win = tcp_receive_window(tp);
u32 new_win = __tcp_select_window(sk);
+ struct net *net = sock_net(sk);
- /* Never shrink the offered window */
if (new_win < cur_win) {
/* Danger Will Robinson!
* Don't update rcv_wup/rcv_wnd here or else
@@ -270,11 +270,14 @@ static u16 tcp_select_window(struct sock *sk)
*
* Relax Will Robinson.
*/
- if (new_win == 0)
- NET_INC_STATS(sock_net(sk),
- LINUX_MIB_TCPWANTZEROWINDOWADV);
- new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale);
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_shrink_window) || !tp->rx_opt.rcv_wscale) {
+ /* Never shrink the offered window */
+ if (new_win == 0)
+ NET_INC_STATS(net, LINUX_MIB_TCPWANTZEROWINDOWADV);
+ new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale);
+ }
}
+
tp->rcv_wnd = new_win;
tp->rcv_wup = tp->rcv_nxt;
@@ -282,7 +285,7 @@ static u16 tcp_select_window(struct sock *sk)
* scaled window.
*/
if (!tp->rx_opt.rcv_wscale &&
- READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows))
+ READ_ONCE(net->ipv4.sysctl_tcp_workaround_signed_windows))
new_win = min(new_win, MAX_TCP_WINDOW);
else
new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
@@ -294,10 +297,9 @@ static u16 tcp_select_window(struct sock *sk)
if (new_win == 0) {
tp->pred_flags = 0;
if (old_win)
- NET_INC_STATS(sock_net(sk),
- LINUX_MIB_TCPTOZEROWINDOWADV);
+ NET_INC_STATS(net, LINUX_MIB_TCPTOZEROWINDOWADV);
} else if (old_win == 0) {
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFROMZEROWINDOWADV);
+ NET_INC_STATS(net, LINUX_MIB_TCPFROMZEROWINDOWADV);
}
return new_win;
@@ -1530,7 +1532,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *buff;
- int nsize, old_factor;
+ int old_factor;
long limit;
int nlen;
u8 flags;
@@ -1538,9 +1540,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
if (WARN_ON(len > skb->len))
return -EINVAL;
- nsize = skb_headlen(skb) - len;
- if (nsize < 0)
- nsize = 0;
+ DEBUG_NET_WARN_ON_ONCE(skb_headlen(skb));
/* tcp_sendmsg() can overshoot sk_wmem_queued by one full size skb.
* We need some allowance to not penalize applications setting small
@@ -1560,7 +1560,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
return -ENOMEM;
/* Get a new skb... force flag on. */
- buff = tcp_stream_alloc_skb(sk, nsize, gfp, true);
+ buff = tcp_stream_alloc_skb(sk, gfp, true);
if (!buff)
return -ENOMEM; /* We'll just try again later. */
skb_copy_decrypted(buff, skb);
@@ -1568,7 +1568,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue,
sk_wmem_queued_add(sk, buff->truesize);
sk_mem_charge(sk, buff->truesize);
- nlen = skb->len - len - nsize;
+ nlen = skb->len - len;
buff->truesize += nlen;
skb->truesize -= nlen;
@@ -1626,13 +1626,7 @@ static int __pskb_trim_head(struct sk_buff *skb, int len)
struct skb_shared_info *shinfo;
int i, k, eat;
- eat = min_t(int, len, skb_headlen(skb));
- if (eat) {
- __skb_pull(skb, eat);
- len -= eat;
- if (!len)
- return 0;
- }
+ DEBUG_NET_WARN_ON_ONCE(skb_headlen(skb));
eat = len;
k = 0;
shinfo = skb_shinfo(skb);
@@ -1671,12 +1665,10 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
TCP_SKB_CB(skb)->seq += len;
- if (delta_truesize) {
- skb->truesize -= delta_truesize;
- sk_wmem_queued_add(sk, -delta_truesize);
- if (!skb_zcopy_pure(skb))
- sk_mem_uncharge(sk, delta_truesize);
- }
+ skb->truesize -= delta_truesize;
+ sk_wmem_queued_add(sk, -delta_truesize);
+ if (!skb_zcopy_pure(skb))
+ sk_mem_uncharge(sk, delta_truesize);
/* Any change of skb->len requires recalculation of tso factor. */
if (tcp_skb_pcount(skb) > 1)
@@ -2126,11 +2118,9 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
u8 flags;
/* All of a TSO frame must be composed of paged data. */
- if (skb->len != skb->data_len)
- return tcp_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE,
- skb, len, mss_now, gfp);
+ DEBUG_NET_WARN_ON_ONCE(skb->len != skb->data_len);
- buff = tcp_stream_alloc_skb(sk, 0, gfp, true);
+ buff = tcp_stream_alloc_skb(sk, gfp, true);
if (unlikely(!buff))
return -ENOMEM;
skb_copy_decrypted(buff, skb);
@@ -2319,6 +2309,57 @@ static bool tcp_can_coalesce_send_queue_head(struct sock *sk, int len)
return true;
}
+static int tcp_clone_payload(struct sock *sk, struct sk_buff *to,
+ int probe_size)
+{
+ skb_frag_t *lastfrag = NULL, *fragto = skb_shinfo(to)->frags;
+ int i, todo, len = 0, nr_frags = 0;
+ const struct sk_buff *skb;
+
+ if (!sk_wmem_schedule(sk, to->truesize + probe_size))
+ return -ENOMEM;
+
+ skb_queue_walk(&sk->sk_write_queue, skb) {
+ const skb_frag_t *fragfrom = skb_shinfo(skb)->frags;
+
+ if (skb_headlen(skb))
+ return -EINVAL;
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++, fragfrom++) {
+ if (len >= probe_size)
+ goto commit;
+ todo = min_t(int, skb_frag_size(fragfrom),
+ probe_size - len);
+ len += todo;
+ if (lastfrag &&
+ skb_frag_page(fragfrom) == skb_frag_page(lastfrag) &&
+ skb_frag_off(fragfrom) == skb_frag_off(lastfrag) +
+ skb_frag_size(lastfrag)) {
+ skb_frag_size_add(lastfrag, todo);
+ continue;
+ }
+ if (unlikely(nr_frags == MAX_SKB_FRAGS))
+ return -E2BIG;
+ skb_frag_page_copy(fragto, fragfrom);
+ skb_frag_off_copy(fragto, fragfrom);
+ skb_frag_size_set(fragto, todo);
+ nr_frags++;
+ lastfrag = fragto++;
+ }
+ }
+commit:
+ WARN_ON_ONCE(len != probe_size);
+ for (i = 0; i < nr_frags; i++)
+ skb_frag_ref(to, i);
+
+ skb_shinfo(to)->nr_frags = nr_frags;
+ to->truesize += probe_size;
+ to->len += probe_size;
+ to->data_len += probe_size;
+ __skb_header_release(to);
+ return 0;
+}
+
/* Create a new MTU probe if we are ready.
* MTU probe is regularly attempting to increase the path MTU by
* deliberately sending larger packets. This discovers routing
@@ -2395,9 +2436,15 @@ static int tcp_mtu_probe(struct sock *sk)
return -1;
/* We're allowed to probe. Build it now. */
- nskb = tcp_stream_alloc_skb(sk, probe_size, GFP_ATOMIC, false);
+ nskb = tcp_stream_alloc_skb(sk, GFP_ATOMIC, false);
if (!nskb)
return -1;
+
+ /* build the payload, and be prepared to abort if this fails. */
+ if (tcp_clone_payload(sk, nskb, probe_size)) {
+ consume_skb(nskb);
+ return -1;
+ }
sk_wmem_queued_add(sk, nskb->truesize);
sk_mem_charge(sk, nskb->truesize);
@@ -2415,7 +2462,6 @@ static int tcp_mtu_probe(struct sock *sk)
len = 0;
tcp_for_write_queue_from_safe(skb, next, sk) {
copy = min_t(int, skb->len, probe_size - len);
- skb_copy_bits(skb, 0, skb_put(nskb, copy), copy);
if (skb->len <= copy) {
/* We've eaten all the data from this skb.
@@ -2431,12 +2477,8 @@ static int tcp_mtu_probe(struct sock *sk)
} else {
TCP_SKB_CB(nskb)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags &
~(TCPHDR_FIN|TCPHDR_PSH);
- if (!skb_shinfo(skb)->nr_frags) {
- skb_pull(skb, copy);
- } else {
- __pskb_trim_head(skb, copy);
- tcp_set_skb_tso_segs(skb, mss_now);
- }
+ __pskb_trim_head(skb, copy);
+ tcp_set_skb_tso_segs(skb, mss_now);
TCP_SKB_CB(skb)->seq += copy;
}
@@ -2947,6 +2989,7 @@ u32 __tcp_select_window(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
+ struct net *net = sock_net(sk);
/* MSS for the peer's data. Previous versions used mss_clamp
* here. I don't know if the value based on our guesses
* of peer's MSS is better for the performance. It's more correct
@@ -2968,6 +3011,15 @@ u32 __tcp_select_window(struct sock *sk)
if (mss <= 0)
return 0;
}
+
+ /* Only allow window shrink if the sysctl is enabled and we have
+ * a non-zero scaling factor in effect.
+ */
+ if (READ_ONCE(net->ipv4.sysctl_tcp_shrink_window) && tp->rx_opt.rcv_wscale)
+ goto shrink_window_allowed;
+
+ /* do not allow window to shrink */
+
if (free_space < (full_space >> 1)) {
icsk->icsk_ack.quick = 0;
@@ -3022,6 +3074,36 @@ u32 __tcp_select_window(struct sock *sk)
}
return window;
+
+shrink_window_allowed:
+ /* new window should always be an exact multiple of scaling factor */
+ free_space = round_down(free_space, 1 << tp->rx_opt.rcv_wscale);
+
+ if (free_space < (full_space >> 1)) {
+ icsk->icsk_ack.quick = 0;
+
+ if (tcp_under_memory_pressure(sk))
+ tcp_adjust_rcv_ssthresh(sk);
+
+ /* if free space is too low, return a zero window */
+ if (free_space < (allowed_space >> 4) || free_space < mss ||
+ free_space < (1 << tp->rx_opt.rcv_wscale))
+ return 0;
+ }
+
+ if (free_space > tp->rcv_ssthresh) {
+ free_space = tp->rcv_ssthresh;
+ /* new window should always be an exact multiple of scaling factor
+ *
+ * For this case, we ALIGN "up" (increase free_space) because
+ * we know free_space is not zero here, it has been reduced from
+ * the memory-based limit, and rcv_ssthresh is not a hard limit
+ * (unlike sk_rcvbuf).
+ */
+ free_space = ALIGN(free_space, (1 << tp->rx_opt.rcv_wscale));
+ }
+
+ return free_space;
}
void tcp_skb_collapse_tstamp(struct sk_buff *skb,
@@ -3746,8 +3828,9 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_fastopen_request *fo = tp->fastopen_req;
- int space, err = 0;
+ struct page_frag *pfrag = sk_page_frag(sk);
struct sk_buff *syn_data;
+ int space, err = 0;
tp->rx_opt.mss_clamp = tp->advmss; /* If MSS is not cached */
if (!tcp_fastopen_cookie_check(sk, &tp->rx_opt.mss_clamp, &fo->cookie))
@@ -3766,25 +3849,31 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
space = min_t(size_t, space, fo->size);
- /* limit to order-0 allocations */
- space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER));
-
- syn_data = tcp_stream_alloc_skb(sk, space, sk->sk_allocation, false);
+ if (space &&
+ !skb_page_frag_refill(min_t(size_t, space, PAGE_SIZE),
+ pfrag, sk->sk_allocation))
+ goto fallback;
+ syn_data = tcp_stream_alloc_skb(sk, sk->sk_allocation, false);
if (!syn_data)
goto fallback;
memcpy(syn_data->cb, syn->cb, sizeof(syn->cb));
if (space) {
- int copied = copy_from_iter(skb_put(syn_data, space), space,
- &fo->data->msg_iter);
- if (unlikely(!copied)) {
+ space = min_t(size_t, space, pfrag->size - pfrag->offset);
+ space = tcp_wmem_schedule(sk, space);
+ }
+ if (space) {
+ space = copy_page_from_iter(pfrag->page, pfrag->offset,
+ space, &fo->data->msg_iter);
+ if (unlikely(!space)) {
tcp_skb_tsorted_anchor_cleanup(syn_data);
kfree_skb(syn_data);
goto fallback;
}
- if (copied != space) {
- skb_trim(syn_data, copied);
- space = copied;
- }
+ skb_fill_page_desc(syn_data, 0, pfrag->page,
+ pfrag->offset, space);
+ page_ref_inc(pfrag->page);
+ pfrag->offset += space;
+ skb_len_add(syn_data, space);
skb_zcopy_set(syn_data, fo->uarg, NULL);
}
/* No more data pending in inet_wait_for_connect() */
@@ -3849,7 +3938,7 @@ int tcp_connect(struct sock *sk)
return 0;
}
- buff = tcp_stream_alloc_skb(sk, 0, sk->sk_allocation, true);
+ buff = tcp_stream_alloc_skb(sk, sk->sk_allocation, true);
if (unlikely(!buff))
return -ENOBUFS;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 39eb947fe392..470f581eedd4 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -234,14 +234,19 @@ static int tcp_write_timeout(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
struct net *net = sock_net(sk);
bool expired = false, do_reset;
- int retry_until;
+ int retry_until, max_retransmits;
if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
if (icsk->icsk_retransmits)
__dst_negative_advice(sk);
retry_until = icsk->icsk_syn_retries ? :
READ_ONCE(net->ipv4.sysctl_tcp_syn_retries);
- expired = icsk->icsk_retransmits >= retry_until;
+
+ max_retransmits = retry_until;
+ if (sk->sk_state == TCP_SYN_SENT)
+ max_retransmits += READ_ONCE(net->ipv4.sysctl_tcp_syn_linear_timeouts);
+
+ expired = icsk->icsk_retransmits >= max_retransmits;
} else {
if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1), 0)) {
/* Black hole detection */
@@ -587,8 +592,12 @@ out_reset_timer:
icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) {
icsk->icsk_backoff = 0;
icsk->icsk_rto = min(__tcp_set_rto(tp), TCP_RTO_MAX);
- } else {
- /* Use normal (exponential) backoff */
+ } else if (sk->sk_state != TCP_SYN_SENT ||
+ icsk->icsk_backoff >
+ READ_ONCE(net->ipv4.sysctl_tcp_syn_linear_timeouts)) {
+ /* Use normal (exponential) backoff unless linear timeouts are
+ * activated.
+ */
icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
}
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 9482def1f310..48fdcd3cad9c 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -103,6 +103,7 @@
#include <net/ip_tunnels.h>
#include <net/route.h>
#include <net/checksum.h>
+#include <net/gso.h>
#include <net/xfrm.h>
#include <trace/events/udp.h>
#include <linux/static_key.h>
@@ -1062,8 +1063,8 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
int free = 0;
int connected = 0;
__be32 daddr, faddr, saddr;
+ u8 tos, scope;
__be16 dport;
- u8 tos;
int err, is_udplite = IS_UDPLITE(sk);
int corkreq = READ_ONCE(up->corkflag) || msg->msg_flags&MSG_MORE;
int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
@@ -1183,12 +1184,9 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
connected = 0;
}
tos = get_rttos(&ipc, inet);
- if (sock_flag(sk, SOCK_LOCALROUTE) ||
- (msg->msg_flags & MSG_DONTROUTE) ||
- (ipc.opt && ipc.opt->opt.is_strictroute)) {
- tos |= RTO_ONLINK;
+ scope = ip_sendmsg_scope(inet, &ipc, msg);
+ if (scope == RT_SCOPE_LINK)
connected = 0;
- }
if (ipv4_is_multicast(daddr)) {
if (!ipc.oif || netif_index_is_l3_master(sock_net(sk), ipc.oif))
@@ -1221,11 +1219,9 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
fl4 = &fl4_stack;
- flowi4_init_output(fl4, ipc.oif, ipc.sockc.mark, tos,
- RT_SCOPE_UNIVERSE, sk->sk_protocol,
- flow_flags,
- faddr, saddr, dport, inet->inet_sport,
- sk->sk_uid);
+ flowi4_init_output(fl4, ipc.oif, ipc.sockc.mark, tos, scope,
+ sk->sk_protocol, flow_flags, faddr, saddr,
+ dport, inet->inet_sport, sk->sk_uid);
security_sk_classify_flow(sk, flowi4_to_flowi_common(fl4));
rt = ip_route_output_flow(net, fl4, sk);
@@ -1329,57 +1325,33 @@ do_confirm:
}
EXPORT_SYMBOL(udp_sendmsg);
-int udp_sendpage(struct sock *sk, struct page *page, int offset,
- size_t size, int flags)
+void udp_splice_eof(struct socket *sock)
{
- struct inet_sock *inet = inet_sk(sk);
+ struct sock *sk = sock->sk;
struct udp_sock *up = udp_sk(sk);
- int ret;
-
- if (flags & MSG_SENDPAGE_NOTLAST)
- flags |= MSG_MORE;
-
- if (!up->pending) {
- struct msghdr msg = { .msg_flags = flags|MSG_MORE };
- /* Call udp_sendmsg to specify destination address which
- * sendpage interface can't pass.
- * This will succeed only when the socket is connected.
- */
- ret = udp_sendmsg(sk, &msg, 0);
- if (ret < 0)
- return ret;
- }
+ if (!up->pending || READ_ONCE(up->corkflag))
+ return;
lock_sock(sk);
+ if (up->pending && !READ_ONCE(up->corkflag))
+ udp_push_pending_frames(sk);
+ release_sock(sk);
+}
+EXPORT_SYMBOL_GPL(udp_splice_eof);
- if (unlikely(!up->pending)) {
- release_sock(sk);
-
- net_dbg_ratelimited("cork failed\n");
- return -EINVAL;
- }
+int udp_sendpage(struct sock *sk, struct page *page, int offset,
+ size_t size, int flags)
+{
+ struct bio_vec bvec;
+ struct msghdr msg = { .msg_flags = flags | MSG_SPLICE_PAGES };
- ret = ip_append_page(sk, &inet->cork.fl.u.ip4,
- page, offset, size, flags);
- if (ret == -EOPNOTSUPP) {
- release_sock(sk);
- return sock_no_sendpage(sk->sk_socket, page, offset,
- size, flags);
- }
- if (ret < 0) {
- udp_flush_pending_frames(sk);
- goto out;
- }
+ if (flags & MSG_SENDPAGE_NOTLAST)
+ msg.msg_flags |= MSG_MORE;
- up->len += size;
- if (!(READ_ONCE(up->corkflag) || (flags&MSG_MORE)))
- ret = udp_push_pending_frames(sk);
- if (!ret)
- ret = size;
-out:
- release_sock(sk);
- return ret;
+ bvec_set_page(&bvec, page, size, offset);
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
+ return udp_sendmsg(sk, &msg, size);
}
#define UDP_SKB_IS_STATELESS 0x80000000
@@ -1720,21 +1692,19 @@ static int first_packet_length(struct sock *sk)
* IOCTL requests applicable to the UDP protocol
*/
-int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
+int udp_ioctl(struct sock *sk, int cmd, int *karg)
{
switch (cmd) {
case SIOCOUTQ:
{
- int amount = sk_wmem_alloc_get(sk);
-
- return put_user(amount, (int __user *)arg);
+ *karg = sk_wmem_alloc_get(sk);
+ return 0;
}
case SIOCINQ:
{
- int amount = max_t(int, 0, first_packet_length(sk));
-
- return put_user(amount, (int __user *)arg);
+ *karg = max_t(int, 0, first_packet_length(sk));
+ return 0;
}
default:
@@ -2927,7 +2897,8 @@ EXPORT_SYMBOL(udp_poll);
int udp_abort(struct sock *sk, int err)
{
- lock_sock(sk);
+ if (!has_current_bpf_ctx())
+ lock_sock(sk);
/* udp{v6}_destroy_sock() sets it under the sk lock, avoid racing
* with close()
@@ -2940,7 +2911,8 @@ int udp_abort(struct sock *sk, int err)
__udp_disconnect(sk, 0);
out:
- release_sock(sk);
+ if (!has_current_bpf_ctx())
+ release_sock(sk);
return 0;
}
@@ -2960,6 +2932,7 @@ struct proto udp_prot = {
.getsockopt = udp_getsockopt,
.sendmsg = udp_sendmsg,
.recvmsg = udp_recvmsg,
+ .splice_eof = udp_splice_eof,
.sendpage = udp_sendpage,
.release_cb = ip4_datagram_release_cb,
.hash = udp_lib_hash,
@@ -2985,9 +2958,30 @@ EXPORT_SYMBOL(udp_prot);
/* ------------------------------------------------------------------------ */
#ifdef CONFIG_PROC_FS
-static struct udp_table *udp_get_table_afinfo(struct udp_seq_afinfo *afinfo,
- struct net *net)
+static unsigned short seq_file_family(const struct seq_file *seq);
+static bool seq_sk_match(struct seq_file *seq, const struct sock *sk)
{
+ unsigned short family = seq_file_family(seq);
+
+ /* AF_UNSPEC is used as a match all */
+ return ((family == AF_UNSPEC || family == sk->sk_family) &&
+ net_eq(sock_net(sk), seq_file_net(seq)));
+}
+
+#ifdef CONFIG_BPF_SYSCALL
+static const struct seq_operations bpf_iter_udp_seq_ops;
+#endif
+static struct udp_table *udp_get_table_seq(struct seq_file *seq,
+ struct net *net)
+{
+ const struct udp_seq_afinfo *afinfo;
+
+#ifdef CONFIG_BPF_SYSCALL
+ if (seq->op == &bpf_iter_udp_seq_ops)
+ return net->ipv4.udp_table;
+#endif
+
+ afinfo = pde_data(file_inode(seq->file));
return afinfo->udp_table ? : net->ipv4.udp_table;
}
@@ -2995,16 +2989,10 @@ static struct sock *udp_get_first(struct seq_file *seq, int start)
{
struct udp_iter_state *state = seq->private;
struct net *net = seq_file_net(seq);
- struct udp_seq_afinfo *afinfo;
struct udp_table *udptable;
struct sock *sk;
- if (state->bpf_seq_afinfo)
- afinfo = state->bpf_seq_afinfo;
- else
- afinfo = pde_data(file_inode(seq->file));
-
- udptable = udp_get_table_afinfo(afinfo, net);
+ udptable = udp_get_table_seq(seq, net);
for (state->bucket = start; state->bucket <= udptable->mask;
++state->bucket) {
@@ -3015,10 +3003,7 @@ static struct sock *udp_get_first(struct seq_file *seq, int start)
spin_lock_bh(&hslot->lock);
sk_for_each(sk, &hslot->head) {
- if (!net_eq(sock_net(sk), net))
- continue;
- if (afinfo->family == AF_UNSPEC ||
- sk->sk_family == afinfo->family)
+ if (seq_sk_match(seq, sk))
goto found;
}
spin_unlock_bh(&hslot->lock);
@@ -3032,22 +3017,14 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk)
{
struct udp_iter_state *state = seq->private;
struct net *net = seq_file_net(seq);
- struct udp_seq_afinfo *afinfo;
struct udp_table *udptable;
- if (state->bpf_seq_afinfo)
- afinfo = state->bpf_seq_afinfo;
- else
- afinfo = pde_data(file_inode(seq->file));
-
do {
sk = sk_next(sk);
- } while (sk && (!net_eq(sock_net(sk), net) ||
- (afinfo->family != AF_UNSPEC &&
- sk->sk_family != afinfo->family)));
+ } while (sk && !seq_sk_match(seq, sk));
if (!sk) {
- udptable = udp_get_table_afinfo(afinfo, net);
+ udptable = udp_get_table_seq(seq, net);
if (state->bucket <= udptable->mask)
spin_unlock_bh(&udptable->hash[state->bucket].lock);
@@ -3093,15 +3070,9 @@ EXPORT_SYMBOL(udp_seq_next);
void udp_seq_stop(struct seq_file *seq, void *v)
{
struct udp_iter_state *state = seq->private;
- struct udp_seq_afinfo *afinfo;
struct udp_table *udptable;
- if (state->bpf_seq_afinfo)
- afinfo = state->bpf_seq_afinfo;
- else
- afinfo = pde_data(file_inode(seq->file));
-
- udptable = udp_get_table_afinfo(afinfo, seq_file_net(seq));
+ udptable = udp_get_table_seq(seq, seq_file_net(seq));
if (state->bucket <= udptable->mask)
spin_unlock_bh(&udptable->hash[state->bucket].lock);
@@ -3154,6 +3125,143 @@ struct bpf_iter__udp {
int bucket __aligned(8);
};
+struct bpf_udp_iter_state {
+ struct udp_iter_state state;
+ unsigned int cur_sk;
+ unsigned int end_sk;
+ unsigned int max_sk;
+ int offset;
+ struct sock **batch;
+ bool st_bucket_done;
+};
+
+static int bpf_iter_udp_realloc_batch(struct bpf_udp_iter_state *iter,
+ unsigned int new_batch_sz);
+static struct sock *bpf_iter_udp_batch(struct seq_file *seq)
+{
+ struct bpf_udp_iter_state *iter = seq->private;
+ struct udp_iter_state *state = &iter->state;
+ struct net *net = seq_file_net(seq);
+ struct udp_table *udptable;
+ unsigned int batch_sks = 0;
+ bool resized = false;
+ struct sock *sk;
+
+ /* The current batch is done, so advance the bucket. */
+ if (iter->st_bucket_done) {
+ state->bucket++;
+ iter->offset = 0;
+ }
+
+ udptable = udp_get_table_seq(seq, net);
+
+again:
+ /* New batch for the next bucket.
+ * Iterate over the hash table to find a bucket with sockets matching
+ * the iterator attributes, and return the first matching socket from
+ * the bucket. The remaining matched sockets from the bucket are batched
+ * before releasing the bucket lock. This allows BPF programs that are
+ * called in seq_show to acquire the bucket lock if needed.
+ */
+ iter->cur_sk = 0;
+ iter->end_sk = 0;
+ iter->st_bucket_done = false;
+ batch_sks = 0;
+
+ for (; state->bucket <= udptable->mask; state->bucket++) {
+ struct udp_hslot *hslot2 = &udptable->hash2[state->bucket];
+
+ if (hlist_empty(&hslot2->head)) {
+ iter->offset = 0;
+ continue;
+ }
+
+ spin_lock_bh(&hslot2->lock);
+ udp_portaddr_for_each_entry(sk, &hslot2->head) {
+ if (seq_sk_match(seq, sk)) {
+ /* Resume from the last iterated socket at the
+ * offset in the bucket before iterator was stopped.
+ */
+ if (iter->offset) {
+ --iter->offset;
+ continue;
+ }
+ if (iter->end_sk < iter->max_sk) {
+ sock_hold(sk);
+ iter->batch[iter->end_sk++] = sk;
+ }
+ batch_sks++;
+ }
+ }
+ spin_unlock_bh(&hslot2->lock);
+
+ if (iter->end_sk)
+ break;
+
+ /* Reset the current bucket's offset before moving to the next bucket. */
+ iter->offset = 0;
+ }
+
+ /* All done: no batch made. */
+ if (!iter->end_sk)
+ return NULL;
+
+ if (iter->end_sk == batch_sks) {
+ /* Batching is done for the current bucket; return the first
+ * socket to be iterated from the batch.
+ */
+ iter->st_bucket_done = true;
+ goto done;
+ }
+ if (!resized && !bpf_iter_udp_realloc_batch(iter, batch_sks * 3 / 2)) {
+ resized = true;
+ /* After allocating a larger batch, retry one more time to grab
+ * the whole bucket.
+ */
+ state->bucket--;
+ goto again;
+ }
+done:
+ return iter->batch[0];
+}
+
+static void *bpf_iter_udp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct bpf_udp_iter_state *iter = seq->private;
+ struct sock *sk;
+
+ /* Whenever seq_next() is called, the iter->cur_sk is
+ * done with seq_show(), so unref the iter->cur_sk.
+ */
+ if (iter->cur_sk < iter->end_sk) {
+ sock_put(iter->batch[iter->cur_sk++]);
+ ++iter->offset;
+ }
+
+ /* After updating iter->cur_sk, check if there are more sockets
+ * available in the current bucket batch.
+ */
+ if (iter->cur_sk < iter->end_sk)
+ sk = iter->batch[iter->cur_sk];
+ else
+ /* Prepare a new batch. */
+ sk = bpf_iter_udp_batch(seq);
+
+ ++*pos;
+ return sk;
+}
+
+static void *bpf_iter_udp_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ /* bpf iter does not support lseek, so it always
+ * continue from where it was stop()-ped.
+ */
+ if (*pos)
+ return bpf_iter_udp_batch(seq);
+
+ return SEQ_START_TOKEN;
+}
+
static int udp_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta,
struct udp_sock *udp_sk, uid_t uid, int bucket)
{
@@ -3174,18 +3282,37 @@ static int bpf_iter_udp_seq_show(struct seq_file *seq, void *v)
struct bpf_prog *prog;
struct sock *sk = v;
uid_t uid;
+ int ret;
if (v == SEQ_START_TOKEN)
return 0;
+ lock_sock(sk);
+
+ if (unlikely(sk_unhashed(sk))) {
+ ret = SEQ_SKIP;
+ goto unlock;
+ }
+
uid = from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk));
meta.seq = seq;
prog = bpf_iter_get_info(&meta, false);
- return udp_prog_seq_show(prog, &meta, v, uid, state->bucket);
+ ret = udp_prog_seq_show(prog, &meta, v, uid, state->bucket);
+
+unlock:
+ release_sock(sk);
+ return ret;
+}
+
+static void bpf_iter_udp_put_batch(struct bpf_udp_iter_state *iter)
+{
+ while (iter->cur_sk < iter->end_sk)
+ sock_put(iter->batch[iter->cur_sk++]);
}
static void bpf_iter_udp_seq_stop(struct seq_file *seq, void *v)
{
+ struct bpf_udp_iter_state *iter = seq->private;
struct bpf_iter_meta meta;
struct bpf_prog *prog;
@@ -3196,17 +3323,35 @@ static void bpf_iter_udp_seq_stop(struct seq_file *seq, void *v)
(void)udp_prog_seq_show(prog, &meta, v, 0, 0);
}
- udp_seq_stop(seq, v);
+ if (iter->cur_sk < iter->end_sk) {
+ bpf_iter_udp_put_batch(iter);
+ iter->st_bucket_done = false;
+ }
}
static const struct seq_operations bpf_iter_udp_seq_ops = {
- .start = udp_seq_start,
- .next = udp_seq_next,
+ .start = bpf_iter_udp_seq_start,
+ .next = bpf_iter_udp_seq_next,
.stop = bpf_iter_udp_seq_stop,
.show = bpf_iter_udp_seq_show,
};
#endif
+static unsigned short seq_file_family(const struct seq_file *seq)
+{
+ const struct udp_seq_afinfo *afinfo;
+
+#ifdef CONFIG_BPF_SYSCALL
+ /* BPF iterator: bpf programs to filter sockets. */
+ if (seq->op == &bpf_iter_udp_seq_ops)
+ return AF_UNSPEC;
+#endif
+
+ /* Proc fs iterator */
+ afinfo = pde_data(file_inode(seq->file));
+ return afinfo->family;
+}
+
const struct seq_operations udp_seq_ops = {
.start = udp_seq_start,
.next = udp_seq_next,
@@ -3415,38 +3560,55 @@ static struct pernet_operations __net_initdata udp_sysctl_ops = {
DEFINE_BPF_ITER_FUNC(udp, struct bpf_iter_meta *meta,
struct udp_sock *udp_sk, uid_t uid, int bucket)
-static int bpf_iter_init_udp(void *priv_data, struct bpf_iter_aux_info *aux)
+static int bpf_iter_udp_realloc_batch(struct bpf_udp_iter_state *iter,
+ unsigned int new_batch_sz)
{
- struct udp_iter_state *st = priv_data;
- struct udp_seq_afinfo *afinfo;
- int ret;
+ struct sock **new_batch;
- afinfo = kmalloc(sizeof(*afinfo), GFP_USER | __GFP_NOWARN);
- if (!afinfo)
+ new_batch = kvmalloc_array(new_batch_sz, sizeof(*new_batch),
+ GFP_USER | __GFP_NOWARN);
+ if (!new_batch)
return -ENOMEM;
- afinfo->family = AF_UNSPEC;
- afinfo->udp_table = NULL;
- st->bpf_seq_afinfo = afinfo;
+ bpf_iter_udp_put_batch(iter);
+ kvfree(iter->batch);
+ iter->batch = new_batch;
+ iter->max_sk = new_batch_sz;
+
+ return 0;
+}
+
+#define INIT_BATCH_SZ 16
+
+static int bpf_iter_init_udp(void *priv_data, struct bpf_iter_aux_info *aux)
+{
+ struct bpf_udp_iter_state *iter = priv_data;
+ int ret;
+
ret = bpf_iter_init_seq_net(priv_data, aux);
if (ret)
- kfree(afinfo);
+ return ret;
+
+ ret = bpf_iter_udp_realloc_batch(iter, INIT_BATCH_SZ);
+ if (ret)
+ bpf_iter_fini_seq_net(priv_data);
+
return ret;
}
static void bpf_iter_fini_udp(void *priv_data)
{
- struct udp_iter_state *st = priv_data;
+ struct bpf_udp_iter_state *iter = priv_data;
- kfree(st->bpf_seq_afinfo);
bpf_iter_fini_seq_net(priv_data);
+ kvfree(iter->batch);
}
static const struct bpf_iter_seq_info udp_seq_info = {
.seq_ops = &bpf_iter_udp_seq_ops,
.init_seq_private = bpf_iter_init_udp,
.fini_seq_private = bpf_iter_fini_udp,
- .seq_priv_size = sizeof(struct udp_iter_state),
+ .seq_priv_size = sizeof(struct bpf_udp_iter_state),
};
static struct bpf_iter_reg udp_reg_info = {
@@ -3454,7 +3616,7 @@ static struct bpf_iter_reg udp_reg_info = {
.ctx_arg_info_size = 1,
.ctx_arg_info = {
{ offsetof(struct bpf_iter__udp, udp_sk),
- PTR_TO_BTF_ID_OR_NULL },
+ PTR_TO_BTF_ID_OR_NULL | PTR_TRUSTED },
},
.seq_info = &udp_seq_info,
};
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 1f01e15ca24f..75aa4de5b731 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -8,6 +8,7 @@
#include <linux/skbuff.h>
#include <net/gro.h>
+#include <net/gso.h>
#include <net/udp.h>
#include <net/protocol.h>
#include <net/inet_common.h>
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 3797917237d0..5479da08ef40 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3633,8 +3633,8 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
idev->if_flags |= IF_READY;
}
- pr_info("ADDRCONF(NETDEV_CHANGE): %s: link becomes ready\n",
- dev->name);
+ pr_debug("ADDRCONF(NETDEV_CHANGE): %s: link becomes ready\n",
+ dev->name);
run_pending = 1;
}
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 2bbf13216a3d..b3451cf47d29 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -579,7 +579,7 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
prot = READ_ONCE(sk->sk_prot);
if (!prot->ioctl)
return -ENOIOCTLCMD;
- return prot->ioctl(sk, cmd, arg);
+ return sk_ioctl(sk, cmd, (void __user *)arg);
}
/*NOTREACHED*/
return 0;
@@ -695,6 +695,7 @@ const struct proto_ops inet6_stream_ops = {
#ifdef CONFIG_MMU
.mmap = tcp_mmap,
#endif
+ .splice_eof = inet_splice_eof,
.sendpage = inet_sendpage,
.sendmsg_locked = tcp_sendmsg_locked,
.sendpage_locked = tcp_sendpage_locked,
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 772340268997..a189e08370a5 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -17,6 +17,7 @@
#include <linux/err.h>
#include <linux/module.h>
#include <net/gro.h>
+#include <net/gso.h>
#include <net/ip.h>
#include <net/xfrm.h>
#include <net/esp.h>
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 5fa0e37305d9..202fc3aaa83c 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -126,9 +126,6 @@ static bool ip6_parse_tlv(bool hopbyhop,
max_count = -max_count;
}
- if (skb_transport_offset(skb) + len > skb_headlen(skb))
- goto bad;
-
off += 2;
len -= 2;
@@ -402,11 +399,7 @@ looped_back:
skb_postpull_rcsum(skb, skb_network_header(skb),
skb_network_header_len(skb));
-
- if (!pskb_pull(skb, offset)) {
- kfree_skb(skb);
- return -1;
- }
+ skb_pull(skb, offset);
skb_postpull_rcsum(skb, skb_transport_header(skb),
offset);
@@ -444,9 +437,9 @@ looped_back:
kfree_skb(skb);
return -1;
}
- }
- hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb);
+ hdr = (struct ipv6_sr_hdr *)skb_transport_header(skb);
+ }
hdr->segments_left--;
addr = hdr->segments + hdr->segments_left;
@@ -458,8 +451,6 @@ looped_back:
ipv6_hdr(skb)->daddr = *addr;
- skb_dst_drop(skb);
-
ip6_route_input(skb);
if (skb_dst(skb)->error) {
@@ -519,11 +510,7 @@ looped_back:
skb_postpull_rcsum(skb, skb_network_header(skb),
skb_network_header_len(skb));
-
- if (!pskb_pull(skb, offset)) {
- kfree_skb(skb);
- return -1;
- }
+ skb_pull(skb, offset);
skb_postpull_rcsum(skb, skb_transport_header(skb),
offset);
@@ -545,11 +532,6 @@ looped_back:
return 1;
}
- if (!pskb_may_pull(skb, sizeof(*hdr))) {
- kfree_skb(skb);
- return -1;
- }
-
n = (hdr->hdrlen << 3) - hdr->pad - (16 - hdr->cmpre);
r = do_div(n, (16 - hdr->cmpri));
/* checks if calculation was without remainder and n fits into
@@ -569,12 +551,6 @@ looped_back:
return -1;
}
- if (!pskb_may_pull(skb, ipv6_rpl_srh_size(n, hdr->cmpri,
- hdr->cmpre))) {
- kfree_skb(skb);
- return -1;
- }
-
hdr->segments_left--;
i = n - hdr->segments_left;
@@ -588,8 +564,7 @@ looped_back:
ipv6_rpl_srh_decompress(ohdr, hdr, &ipv6_hdr(skb)->daddr, n);
chdr = (struct ipv6_rpl_sr_hdr *)(buf + ((ohdr->hdrlen + 1) << 3));
- if ((ipv6_addr_type(&ipv6_hdr(skb)->daddr) & IPV6_ADDR_MULTICAST) ||
- (ipv6_addr_type(&ohdr->rpl_segaddr[i]) & IPV6_ADDR_MULTICAST)) {
+ if (ipv6_addr_is_multicast(&ohdr->rpl_segaddr[i])) {
kfree_skb(skb);
kfree(buf);
return -1;
@@ -827,7 +802,6 @@ looped_back:
*addr = ipv6_hdr(skb)->daddr;
ipv6_hdr(skb)->daddr = daddr;
- skb_dst_drop(skb);
ip6_route_input(skb);
if (skb_dst(skb)->error) {
skb_push(skb, skb->data - skb_network_header(skb));
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 00dc2e3b0184..d6314287338d 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -16,6 +16,7 @@
#include <net/tcp.h>
#include <net/udp.h>
#include <net/gro.h>
+#include <net/gso.h>
#include "ip6_offload.h"
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 9554cf46ed88..1e8c90e97608 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -42,6 +42,7 @@
#include <net/sock.h>
#include <net/snmp.h>
+#include <net/gso.h>
#include <net/ipv6.h>
#include <net/ndisc.h>
#include <net/protocol.h>
@@ -1589,6 +1590,15 @@ emsgsize:
skb_zcopy_set(skb, uarg, &extra_uref);
}
}
+ } else if ((flags & MSG_SPLICE_PAGES) && length) {
+ if (inet_sk(sk)->hdrincl)
+ return -EPERM;
+ if (rt->dst.dev->features & NETIF_F_SG &&
+ getfrag == ip_generic_getfrag)
+ /* We need an empty buffer to attach stuff to */
+ paged = true;
+ else
+ flags &= ~MSG_SPLICE_PAGES;
}
/*
@@ -1778,6 +1788,15 @@ alloc_new_skb:
err = -EFAULT;
goto error;
}
+ } else if (flags & MSG_SPLICE_PAGES) {
+ struct msghdr *msg = from;
+
+ err = skb_splice_from_iter(skb, &msg->msg_iter, copy,
+ sk->sk_allocation);
+ if (err < 0)
+ goto error;
+ copy = err;
+ wmem_alloc_delta += copy;
} else if (!zc) {
int i = skb_shinfo(skb)->nr_frags;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 51cf37abd142..cc3d5ad17257 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1879,11 +1879,10 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval,
/*
* The IP multicast ioctl support routines.
*/
-
-int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
+int ip6mr_ioctl(struct sock *sk, int cmd, void *arg)
{
- struct sioc_sg_req6 sr;
- struct sioc_mif_req6 vr;
+ struct sioc_sg_req6 *sr;
+ struct sioc_mif_req6 *vr;
struct vif_device *vif;
struct mfc6_cache *c;
struct net *net = sock_net(sk);
@@ -1895,40 +1894,33 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
switch (cmd) {
case SIOCGETMIFCNT_IN6:
- if (copy_from_user(&vr, arg, sizeof(vr)))
- return -EFAULT;
- if (vr.mifi >= mrt->maxvif)
+ vr = (struct sioc_mif_req6 *)arg;
+ if (vr->mifi >= mrt->maxvif)
return -EINVAL;
- vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif);
+ vr->mifi = array_index_nospec(vr->mifi, mrt->maxvif);
rcu_read_lock();
- vif = &mrt->vif_table[vr.mifi];
- if (VIF_EXISTS(mrt, vr.mifi)) {
- vr.icount = READ_ONCE(vif->pkt_in);
- vr.ocount = READ_ONCE(vif->pkt_out);
- vr.ibytes = READ_ONCE(vif->bytes_in);
- vr.obytes = READ_ONCE(vif->bytes_out);
+ vif = &mrt->vif_table[vr->mifi];
+ if (VIF_EXISTS(mrt, vr->mifi)) {
+ vr->icount = READ_ONCE(vif->pkt_in);
+ vr->ocount = READ_ONCE(vif->pkt_out);
+ vr->ibytes = READ_ONCE(vif->bytes_in);
+ vr->obytes = READ_ONCE(vif->bytes_out);
rcu_read_unlock();
-
- if (copy_to_user(arg, &vr, sizeof(vr)))
- return -EFAULT;
return 0;
}
rcu_read_unlock();
return -EADDRNOTAVAIL;
case SIOCGETSGCNT_IN6:
- if (copy_from_user(&sr, arg, sizeof(sr)))
- return -EFAULT;
+ sr = (struct sioc_sg_req6 *)arg;
rcu_read_lock();
- c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
+ c = ip6mr_cache_find(mrt, &sr->src.sin6_addr,
+ &sr->grp.sin6_addr);
if (c) {
- sr.pktcnt = c->_c.mfc_un.res.pkt;
- sr.bytecnt = c->_c.mfc_un.res.bytes;
- sr.wrong_if = c->_c.mfc_un.res.wrong_if;
+ sr->pktcnt = c->_c.mfc_un.res.pkt;
+ sr->bytecnt = c->_c.mfc_un.res.bytes;
+ sr->wrong_if = c->_c.mfc_un.res.wrong_if;
rcu_read_unlock();
-
- if (copy_to_user(arg, &sr, sizeof(sr)))
- return -EFAULT;
return 0;
}
rcu_read_unlock();
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 44ee7a2e72ac..c9caeb5a43ed 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1118,29 +1118,29 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname,
return do_rawv6_getsockopt(sk, level, optname, optval, optlen);
}
-static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg)
+static int rawv6_ioctl(struct sock *sk, int cmd, int *karg)
{
switch (cmd) {
case SIOCOUTQ: {
- int amount = sk_wmem_alloc_get(sk);
-
- return put_user(amount, (int __user *)arg);
+ *karg = sk_wmem_alloc_get(sk);
+ return 0;
}
case SIOCINQ: {
struct sk_buff *skb;
- int amount = 0;
spin_lock_bh(&sk->sk_receive_queue.lock);
skb = skb_peek(&sk->sk_receive_queue);
if (skb)
- amount = skb->len;
+ *karg = skb->len;
+ else
+ *karg = 0;
spin_unlock_bh(&sk->sk_receive_queue.lock);
- return put_user(amount, (int __user *)arg);
+ return 0;
}
default:
#ifdef CONFIG_IPV6_MROUTE
- return ip6mr_ioctl(sk, cmd, (void __user *)arg);
+ return ip6mr_ioctl(sk, cmd, karg);
#else
return -ENOIOCTLCMD;
#endif
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 392aaa373b66..64e873f5895f 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3360,6 +3360,7 @@ static int ip6_route_check_nh_onlink(struct net *net,
static int ip6_route_check_nh(struct net *net,
struct fib6_config *cfg,
struct net_device **_dev,
+ netdevice_tracker *dev_tracker,
struct inet6_dev **idev)
{
const struct in6_addr *gw_addr = &cfg->fc_gateway;
@@ -3404,7 +3405,7 @@ static int ip6_route_check_nh(struct net *net,
err = -EHOSTUNREACH;
} else {
*_dev = dev = res.nh->fib_nh_dev;
- dev_hold(dev);
+ netdev_hold(dev, dev_tracker, GFP_ATOMIC);
*idev = in6_dev_get(dev);
}
@@ -3412,7 +3413,9 @@ static int ip6_route_check_nh(struct net *net,
}
static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
- struct net_device **_dev, struct inet6_dev **idev,
+ struct net_device **_dev,
+ netdevice_tracker *dev_tracker,
+ struct inet6_dev **idev,
struct netlink_ext_ack *extack)
{
const struct in6_addr *gw_addr = &cfg->fc_gateway;
@@ -3453,7 +3456,8 @@ static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
if (cfg->fc_flags & RTNH_F_ONLINK)
err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
else
- err = ip6_route_check_nh(net, cfg, _dev, idev);
+ err = ip6_route_check_nh(net, cfg, _dev, dev_tracker,
+ idev);
rcu_read_unlock();
@@ -3503,6 +3507,7 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
struct fib6_config *cfg, gfp_t gfp_flags,
struct netlink_ext_ack *extack)
{
+ netdevice_tracker *dev_tracker = &fib6_nh->fib_nh_dev_tracker;
struct net_device *dev = NULL;
struct inet6_dev *idev = NULL;
int addr_type;
@@ -3520,7 +3525,8 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
err = -ENODEV;
if (cfg->fc_ifindex) {
- dev = dev_get_by_index(net, cfg->fc_ifindex);
+ dev = netdev_get_by_index(net, cfg->fc_ifindex,
+ dev_tracker, gfp_flags);
if (!dev)
goto out;
idev = in6_dev_get(dev);
@@ -3554,11 +3560,11 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
/* hold loopback dev/idev if we haven't done so. */
if (dev != net->loopback_dev) {
if (dev) {
- dev_put(dev);
+ netdev_put(dev, dev_tracker);
in6_dev_put(idev);
}
dev = net->loopback_dev;
- dev_hold(dev);
+ netdev_hold(dev, dev_tracker, gfp_flags);
idev = in6_dev_get(dev);
if (!idev) {
err = -ENODEV;
@@ -3569,7 +3575,8 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
}
if (cfg->fc_flags & RTF_GATEWAY) {
- err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
+ err = ip6_validate_gw(net, cfg, &dev, dev_tracker,
+ &idev, extack);
if (err)
goto out;
@@ -3610,8 +3617,6 @@ pcpu_alloc:
}
fib6_nh->fib_nh_dev = dev;
- netdev_tracker_alloc(dev, &fib6_nh->fib_nh_dev_tracker, gfp_flags);
-
fib6_nh->fib_nh_oif = dev->ifindex;
err = 0;
out:
@@ -3621,7 +3626,7 @@ out:
if (err) {
lwtstate_put(fib6_nh->fib_nh_lws);
fib6_nh->fib_nh_lws = NULL;
- dev_put(dev);
+ netdev_put(dev, dev_tracker);
}
return err;
diff --git a/net/ipv6/rpl.c b/net/ipv6/rpl.c
index d1876f192225..e186998bfbf7 100644
--- a/net/ipv6/rpl.c
+++ b/net/ipv6/rpl.c
@@ -29,13 +29,6 @@ static void *ipv6_rpl_segdata_pos(const struct ipv6_rpl_sr_hdr *hdr, int i)
return (void *)&hdr->rpl_segdata[i * IPV6_PFXTAIL_LEN(hdr->cmpri)];
}
-size_t ipv6_rpl_srh_size(unsigned char n, unsigned char cmpri,
- unsigned char cmpre)
-{
- return sizeof(struct ipv6_rpl_sr_hdr) + (n * IPV6_PFXTAIL_LEN(cmpri)) +
- IPV6_PFXTAIL_LEN(cmpre);
-}
-
void ipv6_rpl_srh_decompress(struct ipv6_rpl_sr_hdr *outhdr,
const struct ipv6_rpl_sr_hdr *inhdr,
const struct in6_addr *daddr, unsigned char n)
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 34db881204d2..03b877ff4558 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -470,8 +470,6 @@ static int seg6_input_core(struct net *net, struct sock *sk,
dst = dst_cache_get(&slwt->cache);
preempt_enable();
- skb_dst_drop(skb);
-
if (!dst) {
ip6_route_input(skb);
dst = skb_dst(skb);
@@ -482,6 +480,7 @@ static int seg6_input_core(struct net *net, struct sock *sk,
preempt_enable();
}
} else {
+ skb_dst_drop(skb);
skb_dst_set(skb, dst);
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 7132eb213a7a..c17c8ff94b79 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -93,12 +93,8 @@ static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
* This avoids a dereference and allow compiler optimizations.
* It is a specialized version of inet6_sk_generic().
*/
-static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
-{
- unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
-
- return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
-}
+#define tcp_inet6_sk(sk) (&container_of_const(tcp_sk(sk), \
+ struct tcp6_sock, tcp)->inet6)
static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
{
@@ -533,7 +529,7 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
struct sk_buff *syn_skb)
{
struct inet_request_sock *ireq = inet_rsk(req);
- struct ipv6_pinfo *np = tcp_inet6_sk(sk);
+ const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
struct ipv6_txoptions *opt;
struct flowi6 *fl6 = &fl->u.ip6;
struct sk_buff *skb;
@@ -2154,6 +2150,7 @@ struct proto tcpv6_prot = {
.keepalive = tcp_set_keepalive,
.recvmsg = tcp_recvmsg,
.sendmsg = tcp_sendmsg,
+ .splice_eof = tcp_splice_eof,
.sendpage = tcp_sendpage,
.backlog_rcv = tcp_v6_do_rcv,
.release_cb = tcp_release_cb,
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index 39db5a226855..bf0c957e4b5e 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -36,7 +36,8 @@ INDIRECT_CALLABLE_SCOPE int tcp6_gro_complete(struct sk_buff *skb, int thoff)
&iph->daddr, 0);
skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6;
- return tcp_gro_complete(skb);
+ tcp_gro_complete(skb);
+ return 0;
}
static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index e5a337e6b970..317b01c9bc39 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1653,6 +1653,20 @@ do_confirm:
}
EXPORT_SYMBOL(udpv6_sendmsg);
+static void udpv6_splice_eof(struct socket *sock)
+{
+ struct sock *sk = sock->sk;
+ struct udp_sock *up = udp_sk(sk);
+
+ if (!up->pending || READ_ONCE(up->corkflag))
+ return;
+
+ lock_sock(sk);
+ if (up->pending && !READ_ONCE(up->corkflag))
+ udp_v6_push_pending_frames(sk);
+ release_sock(sk);
+}
+
void udpv6_destroy_sock(struct sock *sk)
{
struct udp_sock *up = udp_sk(sk);
@@ -1764,6 +1778,7 @@ struct proto udpv6_prot = {
.getsockopt = udpv6_getsockopt,
.sendmsg = udpv6_sendmsg,
.recvmsg = udpv6_recvmsg,
+ .splice_eof = udpv6_splice_eof,
.release_cb = ip6_datagram_release_cb,
.hash = udp_lib_hash,
.unhash = udp_lib_unhash,
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index c39c1e32f980..ad3b8726873e 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -14,6 +14,7 @@
#include <net/ip6_checksum.h>
#include "ip6_offload.h"
#include <net/gro.h>
+#include <net/gso.h>
static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
netdev_features_t features)
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index cfe828bd7fc6..d0537c1c8cd7 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -581,12 +581,10 @@ static void kcm_report_tx_retry(struct kcm_sock *kcm)
*/
static int kcm_write_msgs(struct kcm_sock *kcm)
{
+ unsigned int total_sent = 0;
struct sock *sk = &kcm->sk;
struct kcm_psock *psock;
- struct sk_buff *skb, *head;
- struct kcm_tx_msg *txm;
- unsigned short fragidx, frag_offset;
- unsigned int sent, total_sent = 0;
+ struct sk_buff *head;
int ret = 0;
kcm->tx_wait_more = false;
@@ -600,72 +598,57 @@ static int kcm_write_msgs(struct kcm_sock *kcm)
if (skb_queue_empty(&sk->sk_write_queue))
return 0;
- kcm_tx_msg(skb_peek(&sk->sk_write_queue))->sent = 0;
-
- } else if (skb_queue_empty(&sk->sk_write_queue)) {
- return 0;
+ kcm_tx_msg(skb_peek(&sk->sk_write_queue))->started_tx = false;
}
- head = skb_peek(&sk->sk_write_queue);
- txm = kcm_tx_msg(head);
+retry:
+ while ((head = skb_peek(&sk->sk_write_queue))) {
+ struct msghdr msg = {
+ .msg_flags = MSG_DONTWAIT | MSG_SPLICE_PAGES,
+ };
+ struct kcm_tx_msg *txm = kcm_tx_msg(head);
+ struct sk_buff *skb;
+ unsigned int msize;
+ int i;
- if (txm->sent) {
- /* Send of first skbuff in queue already in progress */
- if (WARN_ON(!psock)) {
- ret = -EINVAL;
- goto out;
+ if (!txm->started_tx) {
+ psock = reserve_psock(kcm);
+ if (!psock)
+ goto out;
+ skb = head;
+ txm->frag_offset = 0;
+ txm->sent = 0;
+ txm->started_tx = true;
+ } else {
+ if (WARN_ON(!psock)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ skb = txm->frag_skb;
}
- sent = txm->sent;
- frag_offset = txm->frag_offset;
- fragidx = txm->fragidx;
- skb = txm->frag_skb;
- goto do_frag;
- }
-
-try_again:
- psock = reserve_psock(kcm);
- if (!psock)
- goto out;
-
- do {
- skb = head;
- txm = kcm_tx_msg(head);
- sent = 0;
-
-do_frag_list:
if (WARN_ON(!skb_shinfo(skb)->nr_frags)) {
ret = -EINVAL;
goto out;
}
- for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags;
- fragidx++) {
- skb_frag_t *frag;
+ msize = 0;
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+ msize += skb_shinfo(skb)->frags[i].bv_len;
- frag_offset = 0;
-do_frag:
- frag = &skb_shinfo(skb)->frags[fragidx];
- if (WARN_ON(!skb_frag_size(frag))) {
- ret = -EINVAL;
- goto out;
- }
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE,
+ skb_shinfo(skb)->frags, skb_shinfo(skb)->nr_frags,
+ msize);
+ iov_iter_advance(&msg.msg_iter, txm->frag_offset);
- ret = kernel_sendpage(psock->sk->sk_socket,
- skb_frag_page(frag),
- skb_frag_off(frag) + frag_offset,
- skb_frag_size(frag) - frag_offset,
- MSG_DONTWAIT);
+ do {
+ ret = sock_sendmsg(psock->sk->sk_socket, &msg);
if (ret <= 0) {
if (ret == -EAGAIN) {
/* Save state to try again when there's
* write space on the socket
*/
- txm->sent = sent;
- txm->frag_offset = frag_offset;
- txm->fragidx = fragidx;
txm->frag_skb = skb;
-
ret = 0;
goto out;
}
@@ -678,45 +661,44 @@ do_frag:
kcm_abort_tx_psock(psock, ret ? -ret : EPIPE,
true);
unreserve_psock(kcm);
+ psock = NULL;
- txm->sent = 0;
+ txm->started_tx = false;
kcm_report_tx_retry(kcm);
ret = 0;
-
- goto try_again;
+ goto retry;
}
- sent += ret;
- frag_offset += ret;
+ txm->sent += ret;
+ txm->frag_offset += ret;
KCM_STATS_ADD(psock->stats.tx_bytes, ret);
- if (frag_offset < skb_frag_size(frag)) {
- /* Not finished with this frag */
- goto do_frag;
- }
- }
+ } while (msg.msg_iter.count > 0);
if (skb == head) {
if (skb_has_frag_list(skb)) {
- skb = skb_shinfo(skb)->frag_list;
- goto do_frag_list;
+ txm->frag_skb = skb_shinfo(skb)->frag_list;
+ txm->frag_offset = 0;
+ continue;
}
} else if (skb->next) {
- skb = skb->next;
- goto do_frag_list;
+ txm->frag_skb = skb->next;
+ txm->frag_offset = 0;
+ continue;
}
/* Successfully sent the whole packet, account for it. */
+ sk->sk_wmem_queued -= txm->sent;
+ total_sent += txm->sent;
skb_dequeue(&sk->sk_write_queue);
kfree_skb(head);
- sk->sk_wmem_queued -= sent;
- total_sent += sent;
KCM_STATS_INCR(psock->stats.tx_msgs);
- } while ((head = skb_peek(&sk->sk_write_queue)));
+ }
out:
if (!head) {
/* Done with all queued messages. */
WARN_ON(!skb_queue_empty(&sk->sk_write_queue));
- unreserve_psock(kcm);
+ if (psock)
+ unreserve_psock(kcm);
}
/* Check if write space is available */
@@ -761,149 +743,6 @@ static void kcm_push(struct kcm_sock *kcm)
kcm_write_msgs(kcm);
}
-static ssize_t kcm_sendpage(struct socket *sock, struct page *page,
- int offset, size_t size, int flags)
-
-{
- struct sock *sk = sock->sk;
- struct kcm_sock *kcm = kcm_sk(sk);
- struct sk_buff *skb = NULL, *head = NULL;
- long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
- bool eor;
- int err = 0;
- int i;
-
- if (flags & MSG_SENDPAGE_NOTLAST)
- flags |= MSG_MORE;
-
- /* No MSG_EOR from splice, only look at MSG_MORE */
- eor = !(flags & MSG_MORE);
-
- lock_sock(sk);
-
- sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
-
- err = -EPIPE;
- if (sk->sk_err)
- goto out_error;
-
- if (kcm->seq_skb) {
- /* Previously opened message */
- head = kcm->seq_skb;
- skb = kcm_tx_msg(head)->last_skb;
- i = skb_shinfo(skb)->nr_frags;
-
- if (skb_can_coalesce(skb, i, page, offset)) {
- skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], size);
- skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG;
- goto coalesced;
- }
-
- if (i >= MAX_SKB_FRAGS) {
- struct sk_buff *tskb;
-
- tskb = alloc_skb(0, sk->sk_allocation);
- while (!tskb) {
- kcm_push(kcm);
- err = sk_stream_wait_memory(sk, &timeo);
- if (err)
- goto out_error;
- }
-
- if (head == skb)
- skb_shinfo(head)->frag_list = tskb;
- else
- skb->next = tskb;
-
- skb = tskb;
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- i = 0;
- }
- } else {
- /* Call the sk_stream functions to manage the sndbuf mem. */
- if (!sk_stream_memory_free(sk)) {
- kcm_push(kcm);
- set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
- err = sk_stream_wait_memory(sk, &timeo);
- if (err)
- goto out_error;
- }
-
- head = alloc_skb(0, sk->sk_allocation);
- while (!head) {
- kcm_push(kcm);
- err = sk_stream_wait_memory(sk, &timeo);
- if (err)
- goto out_error;
- }
-
- skb = head;
- i = 0;
- }
-
- get_page(page);
- skb_fill_page_desc_noacc(skb, i, page, offset, size);
- skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG;
-
-coalesced:
- skb->len += size;
- skb->data_len += size;
- skb->truesize += size;
- sk->sk_wmem_queued += size;
- sk_mem_charge(sk, size);
-
- if (head != skb) {
- head->len += size;
- head->data_len += size;
- head->truesize += size;
- }
-
- if (eor) {
- bool not_busy = skb_queue_empty(&sk->sk_write_queue);
-
- /* Message complete, queue it on send buffer */
- __skb_queue_tail(&sk->sk_write_queue, head);
- kcm->seq_skb = NULL;
- KCM_STATS_INCR(kcm->stats.tx_msgs);
-
- if (flags & MSG_BATCH) {
- kcm->tx_wait_more = true;
- } else if (kcm->tx_wait_more || not_busy) {
- err = kcm_write_msgs(kcm);
- if (err < 0) {
- /* We got a hard error in write_msgs but have
- * already queued this message. Report an error
- * in the socket, but don't affect return value
- * from sendmsg
- */
- pr_warn("KCM: Hard failure on kcm_write_msgs\n");
- report_csk_error(&kcm->sk, -err);
- }
- }
- } else {
- /* Message not complete, save state */
- kcm->seq_skb = head;
- kcm_tx_msg(head)->last_skb = skb;
- }
-
- KCM_STATS_ADD(kcm->stats.tx_bytes, size);
-
- release_sock(sk);
- return size;
-
-out_error:
- kcm_push(kcm);
-
- err = sk_stream_error(sk, flags, err);
-
- /* make sure we wake any epoll edge trigger waiter */
- if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN))
- sk->sk_write_space(sk);
-
- release_sock(sk);
- return err;
-}
-
static int kcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
{
struct sock *sk = sock->sk;
@@ -989,29 +828,52 @@ start:
merge = false;
}
- copy = min_t(int, msg_data_left(msg),
- pfrag->size - pfrag->offset);
+ if (msg->msg_flags & MSG_SPLICE_PAGES) {
+ copy = msg_data_left(msg);
+ if (!sk_wmem_schedule(sk, copy))
+ goto wait_for_memory;
- if (!sk_wmem_schedule(sk, copy))
- goto wait_for_memory;
+ err = skb_splice_from_iter(skb, &msg->msg_iter, copy,
+ sk->sk_allocation);
+ if (err < 0) {
+ if (err == -EMSGSIZE)
+ goto wait_for_memory;
+ goto out_error;
+ }
- err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb,
- pfrag->page,
- pfrag->offset,
- copy);
- if (err)
- goto out_error;
+ copy = err;
+ skb_shinfo(skb)->flags |= SKBFL_SHARED_FRAG;
+ sk_wmem_queued_add(sk, copy);
+ sk_mem_charge(sk, copy);
- /* Update the skb. */
- if (merge) {
- skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
+ if (head != skb)
+ head->truesize += copy;
} else {
- skb_fill_page_desc(skb, i, pfrag->page,
- pfrag->offset, copy);
- get_page(pfrag->page);
+ copy = min_t(int, msg_data_left(msg),
+ pfrag->size - pfrag->offset);
+ if (!sk_wmem_schedule(sk, copy))
+ goto wait_for_memory;
+
+ err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb,
+ pfrag->page,
+ pfrag->offset,
+ copy);
+ if (err)
+ goto out_error;
+
+ /* Update the skb. */
+ if (merge) {
+ skb_frag_size_add(
+ &skb_shinfo(skb)->frags[i - 1], copy);
+ } else {
+ skb_fill_page_desc(skb, i, pfrag->page,
+ pfrag->offset, copy);
+ get_page(pfrag->page);
+ }
+
+ pfrag->offset += copy;
}
- pfrag->offset += copy;
copied += copy;
if (head != skb) {
head->len += copy;
@@ -1088,6 +950,37 @@ out_error:
return err;
}
+static void kcm_splice_eof(struct socket *sock)
+{
+ struct sock *sk = sock->sk;
+ struct kcm_sock *kcm = kcm_sk(sk);
+
+ if (skb_queue_empty_lockless(&sk->sk_write_queue))
+ return;
+
+ lock_sock(sk);
+ kcm_write_msgs(kcm);
+ release_sock(sk);
+}
+
+static ssize_t kcm_sendpage(struct socket *sock, struct page *page,
+ int offset, size_t size, int flags)
+
+{
+ struct bio_vec bvec;
+ struct msghdr msg = { .msg_flags = flags | MSG_SPLICE_PAGES, };
+
+ if (flags & MSG_SENDPAGE_NOTLAST)
+ msg.msg_flags |= MSG_MORE;
+
+ if (flags & MSG_OOB)
+ return -EOPNOTSUPP;
+
+ bvec_set_page(&bvec, page, size, offset);
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
+ return kcm_sendmsg(sock, &msg, size);
+}
+
static int kcm_recvmsg(struct socket *sock, struct msghdr *msg,
size_t len, int flags)
{
@@ -1875,6 +1768,7 @@ static const struct proto_ops kcm_dgram_ops = {
.sendmsg = kcm_sendmsg,
.recvmsg = kcm_recvmsg,
.mmap = sock_no_mmap,
+ .splice_eof = kcm_splice_eof,
.sendpage = kcm_sendpage,
};
@@ -1896,6 +1790,7 @@ static const struct proto_ops kcm_seqpacket_ops = {
.sendmsg = kcm_sendmsg,
.recvmsg = kcm_recvmsg,
.mmap = sock_no_mmap,
+ .splice_eof = kcm_splice_eof,
.sendpage = kcm_sendpage,
.splice_read = kcm_splice_read,
};
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index a88e070b431d..91ebf0a3f499 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -272,7 +272,7 @@ int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops
void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type);
/* IOCTL helper for IP encap modules. */
-int l2tp_ioctl(struct sock *sk, int cmd, unsigned long arg);
+int l2tp_ioctl(struct sock *sk, int cmd, int *karg);
/* Extract the tunnel structure from a socket's sk_user_data pointer,
* validating the tunnel magic feather.
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 41a74fc84ca1..2b795c1064f5 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -562,19 +562,18 @@ out:
return err ? err : copied;
}
-int l2tp_ioctl(struct sock *sk, int cmd, unsigned long arg)
+int l2tp_ioctl(struct sock *sk, int cmd, int *karg)
{
struct sk_buff *skb;
- int amount;
switch (cmd) {
case SIOCOUTQ:
- amount = sk_wmem_alloc_get(sk);
+ *karg = sk_wmem_alloc_get(sk);
break;
case SIOCINQ:
spin_lock_bh(&sk->sk_receive_queue.lock);
skb = skb_peek(&sk->sk_receive_queue);
- amount = skb ? skb->len : 0;
+ *karg = skb ? skb->len : 0;
spin_unlock_bh(&sk->sk_receive_queue.lock);
break;
@@ -582,7 +581,7 @@ int l2tp_ioctl(struct sock *sk, int cmd, unsigned long arg)
return -ENOIOCTLCMD;
}
- return put_user(amount, (int __user *)arg);
+ return 0;
}
EXPORT_SYMBOL_GPL(l2tp_ioctl);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index f2d08dbccfb7..4aaead4895b7 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1101,18 +1101,20 @@ ieee80211_copy_rnr_beacon(u8 *pos, struct cfg80211_rnr_elems *dst,
return offset;
}
-static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata,
- struct ieee80211_link_data *link,
- struct cfg80211_beacon_data *params,
- const struct ieee80211_csa_settings *csa,
- const struct ieee80211_color_change_settings *cca)
+static int
+ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_link_data *link,
+ struct cfg80211_beacon_data *params,
+ const struct ieee80211_csa_settings *csa,
+ const struct ieee80211_color_change_settings *cca,
+ u64 *changed)
{
struct cfg80211_mbssid_elems *mbssid = NULL;
struct cfg80211_rnr_elems *rnr = NULL;
struct beacon_data *new, *old;
int new_head_len, new_tail_len;
int size, err;
- u32 changed = BSS_CHANGED_BEACON;
+ u64 _changed = BSS_CHANGED_BEACON;
struct ieee80211_bss_conf *link_conf = link->conf;
old = sdata_dereference(link->u.ap.beacon, sdata);
@@ -1219,7 +1221,7 @@ static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata,
return err;
}
if (err == 0)
- changed |= BSS_CHANGED_AP_PROBE_RESP;
+ _changed |= BSS_CHANGED_AP_PROBE_RESP;
if (params->ftm_responder != -1) {
link_conf->ftm_responder = params->ftm_responder;
@@ -1235,7 +1237,7 @@ static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata,
return err;
}
- changed |= BSS_CHANGED_FTM_RESPONDER;
+ _changed |= BSS_CHANGED_FTM_RESPONDER;
}
rcu_assign_pointer(link->u.ap.beacon, new);
@@ -1244,7 +1246,8 @@ static int ieee80211_assign_beacon(struct ieee80211_sub_if_data *sdata,
if (old)
kfree_rcu(old, rcu_head);
- return changed;
+ *changed |= _changed;
+ return 0;
}
static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
@@ -1446,10 +1449,10 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL))
link_conf->beacon_tx_rate = params->beacon_rate;
- err = ieee80211_assign_beacon(sdata, link, &params->beacon, NULL, NULL);
+ err = ieee80211_assign_beacon(sdata, link, &params->beacon, NULL, NULL,
+ &changed);
if (err < 0)
goto error;
- changed |= err;
if (params->fils_discovery.max_interval) {
err = ieee80211_set_fils_discovery(sdata,
@@ -1506,6 +1509,7 @@ static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev,
struct beacon_data *old;
int err;
struct ieee80211_bss_conf *link_conf;
+ u64 changed = 0;
sdata_assert_lock(sdata);
@@ -1525,17 +1529,18 @@ static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev,
if (!old)
return -ENOENT;
- err = ieee80211_assign_beacon(sdata, link, params, NULL, NULL);
+ err = ieee80211_assign_beacon(sdata, link, params, NULL, NULL,
+ &changed);
if (err < 0)
return err;
if (params->he_bss_color_valid &&
params->he_bss_color.enabled != link_conf->he_bss_color.enabled) {
link_conf->he_bss_color.enabled = params->he_bss_color.enabled;
- err |= BSS_CHANGED_HE_BSS_COLOR;
+ changed |= BSS_CHANGED_HE_BSS_COLOR;
}
- ieee80211_link_info_change_notify(sdata, link, err);
+ ieee80211_link_info_change_notify(sdata, link, changed);
return 0;
}
@@ -1718,7 +1723,7 @@ static void sta_apply_mesh_params(struct ieee80211_local *local,
{
#ifdef CONFIG_MAC80211_MESH
struct ieee80211_sub_if_data *sdata = sta->sdata;
- u32 changed = 0;
+ u64 changed = 0;
if (params->sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE) {
switch (params->plink_state) {
@@ -2665,7 +2670,7 @@ static int ieee80211_change_bss(struct wiphy *wiphy,
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_link_data *link;
struct ieee80211_supported_band *sband;
- u32 changed = 0;
+ u64 changed = 0;
link = ieee80211_link_or_deflink(sdata, params->link_id, true);
if (IS_ERR(link))
@@ -3585,7 +3590,7 @@ void ieee80211_channel_switch_disconnect(struct ieee80211_vif *vif, bool block_t
sdata->deflink.csa_block_tx = block_tx;
sdata_info(sdata, "channel switch failed, disconnecting\n");
- ieee80211_queue_work(&local->hw, &ifmgd->csa_connection_drop_work);
+ wiphy_work_queue(local->hw.wiphy, &ifmgd->csa_connection_drop_work);
}
EXPORT_SYMBOL(ieee80211_channel_switch_disconnect);
@@ -3601,25 +3606,22 @@ static int ieee80211_set_after_csa_beacon(struct ieee80211_sub_if_data *sdata,
err = ieee80211_assign_beacon(sdata, &sdata->deflink,
sdata->deflink.u.ap.next_beacon,
- NULL, NULL);
+ NULL, NULL, changed);
ieee80211_free_next_beacon(&sdata->deflink);
if (err < 0)
return err;
- *changed |= err;
break;
case NL80211_IFTYPE_ADHOC:
- err = ieee80211_ibss_finish_csa(sdata);
+ err = ieee80211_ibss_finish_csa(sdata, changed);
if (err < 0)
return err;
- *changed |= err;
break;
#ifdef CONFIG_MAC80211_MESH
case NL80211_IFTYPE_MESH_POINT:
- err = ieee80211_mesh_finish_csa(sdata);
+ err = ieee80211_mesh_finish_csa(sdata, changed);
if (err < 0)
return err;
- *changed |= err;
break;
#endif
default:
@@ -3730,7 +3732,7 @@ unlock:
static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata,
struct cfg80211_csa_settings *params,
- u32 *changed)
+ u64 *changed)
{
struct ieee80211_csa_settings csa = {};
int err;
@@ -3777,12 +3779,11 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata,
err = ieee80211_assign_beacon(sdata, &sdata->deflink,
&params->beacon_csa, &csa,
- NULL);
+ NULL, changed);
if (err < 0) {
ieee80211_free_next_beacon(&sdata->deflink);
return err;
}
- *changed |= err;
break;
case NL80211_IFTYPE_ADHOC:
@@ -3814,10 +3815,9 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata,
/* see comments in the NL80211_IFTYPE_AP block */
if (params->count > 1) {
- err = ieee80211_ibss_csa_beacon(sdata, params);
+ err = ieee80211_ibss_csa_beacon(sdata, params, changed);
if (err < 0)
return err;
- *changed |= err;
}
ieee80211_send_action_csa(sdata, params);
@@ -3842,12 +3842,11 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata,
/* see comments in the NL80211_IFTYPE_AP block */
if (params->count > 1) {
- err = ieee80211_mesh_csa_beacon(sdata, params);
+ err = ieee80211_mesh_csa_beacon(sdata, params, changed);
if (err < 0) {
ifmsh->csa_role = IEEE80211_MESH_CSA_ROLE_NONE;
return err;
}
- *changed |= err;
}
if (ifmsh->csa_role == IEEE80211_MESH_CSA_ROLE_INIT)
@@ -3881,7 +3880,7 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
struct ieee80211_channel_switch ch_switch;
struct ieee80211_chanctx_conf *conf;
struct ieee80211_chanctx *chanctx;
- u32 changed = 0;
+ u64 changed = 0;
int err;
sdata_assert_lock(sdata);
@@ -4614,7 +4613,7 @@ static int ieee80211_set_sar_specs(struct wiphy *wiphy,
static int
ieee80211_set_after_color_change_beacon(struct ieee80211_sub_if_data *sdata,
- u32 *changed)
+ u64 *changed)
{
switch (sdata->vif.type) {
case NL80211_IFTYPE_AP: {
@@ -4625,13 +4624,12 @@ ieee80211_set_after_color_change_beacon(struct ieee80211_sub_if_data *sdata,
ret = ieee80211_assign_beacon(sdata, &sdata->deflink,
sdata->deflink.u.ap.next_beacon,
- NULL, NULL);
+ NULL, NULL, changed);
ieee80211_free_next_beacon(&sdata->deflink);
if (ret < 0)
return ret;
- *changed |= ret;
break;
}
default:
@@ -4645,7 +4643,7 @@ ieee80211_set_after_color_change_beacon(struct ieee80211_sub_if_data *sdata,
static int
ieee80211_set_color_change_beacon(struct ieee80211_sub_if_data *sdata,
struct cfg80211_color_change_settings *params,
- u32 *changed)
+ u64 *changed)
{
struct ieee80211_color_change_settings color_change = {};
int err;
@@ -4668,12 +4666,11 @@ ieee80211_set_color_change_beacon(struct ieee80211_sub_if_data *sdata,
err = ieee80211_assign_beacon(sdata, &sdata->deflink,
&params->beacon_color_change,
- NULL, &color_change);
+ NULL, &color_change, changed);
if (err < 0) {
ieee80211_free_next_beacon(&sdata->deflink);
return err;
}
- *changed |= err;
break;
default:
return -EOPNOTSUPP;
@@ -4684,7 +4681,7 @@ ieee80211_set_color_change_beacon(struct ieee80211_sub_if_data *sdata,
static void
ieee80211_color_change_bss_config_notify(struct ieee80211_sub_if_data *sdata,
- u8 color, int enable, u32 changed)
+ u8 color, int enable, u64 changed)
{
sdata->vif.bss_conf.he_bss_color.color = color;
sdata->vif.bss_conf.he_bss_color.enabled = enable;
@@ -4712,7 +4709,7 @@ ieee80211_color_change_bss_config_notify(struct ieee80211_sub_if_data *sdata,
static int ieee80211_color_change_finalize(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_local *local = sdata->local;
- u32 changed = 0;
+ u64 changed = 0;
int err;
sdata_assert_lock(sdata);
@@ -4809,7 +4806,7 @@ ieee80211_color_change(struct wiphy *wiphy, struct net_device *dev,
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_local *local = sdata->local;
- u32 changed = 0;
+ u64 changed = 0;
int err;
sdata_assert_lock(sdata);
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 77c90ed8f5d7..168bf3edd4b4 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -1205,8 +1205,8 @@ ieee80211_link_chanctx_reservation_complete(struct ieee80211_link_data *link)
&link->csa_finalize_work);
break;
case NL80211_IFTYPE_STATION:
- ieee80211_queue_work(&sdata->local->hw,
- &link->u.mgd.chswitch_work);
+ wiphy_delayed_work_queue(sdata->local->hw.wiphy,
+ &link->u.mgd.chswitch_work, 0);
break;
case NL80211_IFTYPE_UNSPECIFIED:
case NL80211_IFTYPE_AP_VLAN:
@@ -1257,7 +1257,7 @@ ieee80211_link_use_reserved_reassign(struct ieee80211_link_data *link)
struct ieee80211_vif_chanctx_switch vif_chsw[1] = {};
struct ieee80211_chanctx *old_ctx, *new_ctx;
const struct cfg80211_chan_def *chandef;
- u32 changed = 0;
+ u64 changed = 0;
int err;
lockdep_assert_held(&local->mtx);
@@ -1653,7 +1653,7 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local)
reserved_chanctx_list) {
struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_bss_conf *link_conf = link->conf;
- u32 changed = 0;
+ u64 changed = 0;
if (!ieee80211_link_has_in_place_reservation(link))
continue;
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 45d3e53c7383..c4505593ba7a 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -2,7 +2,7 @@
/*
* Portions of this file
* Copyright(c) 2016 Intel Deutschland GmbH
-* Copyright (C) 2018 - 2019, 2021 Intel Corporation
+* Copyright (C) 2018 - 2019, 2021 - 2023 Intel Corporation
*/
#ifndef __MAC80211_DRIVER_OPS
@@ -13,9 +13,11 @@
#include "trace.h"
#define check_sdata_in_driver(sdata) ({ \
- !WARN_ONCE(!(sdata->flags & IEEE80211_SDATA_IN_DRIVER), \
- "%s: Failed check-sdata-in-driver check, flags: 0x%x\n", \
- sdata->dev ? sdata->dev->name : sdata->name, sdata->flags); \
+ WARN_ONCE(!sdata->local->reconfig_failure && \
+ !(sdata->flags & IEEE80211_SDATA_IN_DRIVER), \
+ "%s: Failed check-sdata-in-driver check, flags: 0x%x\n", \
+ sdata->dev ? sdata->dev->name : sdata->name, sdata->flags); \
+ !!(sdata->flags & IEEE80211_SDATA_IN_DRIVER); \
})
static inline struct ieee80211_sub_if_data *
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 5315ab750280..33729870ad8a 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -9,7 +9,7 @@
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2007-2010, Intel Corporation
* Copyright 2017 Intel Deutschland GmbH
- * Copyright(c) 2020-2022 Intel Corporation
+ * Copyright(c) 2020-2023 Intel Corporation
*/
#include <linux/ieee80211.h>
@@ -602,7 +602,8 @@ void ieee80211_request_smps(struct ieee80211_vif *vif, unsigned int link_id,
goto out;
link->u.mgd.driver_smps_mode = smps_mode;
- ieee80211_queue_work(&sdata->local->hw, &link->u.mgd.request_smps_work);
+ wiphy_work_queue(sdata->local->hw.wiphy,
+ &link->u.mgd.request_smps_work);
out:
rcu_read_unlock();
}
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 9dffc3079588..e1900077bc4b 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -9,7 +9,7 @@
* Copyright 2009, Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright(c) 2016 Intel Deutschland GmbH
- * Copyright(c) 2018-2022 Intel Corporation
+ * Copyright(c) 2018-2023 Intel Corporation
*/
#include <linux/delay.h>
@@ -226,7 +226,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
struct ieee80211_local *local = sdata->local;
struct ieee80211_mgmt *mgmt;
struct cfg80211_bss *bss;
- u32 bss_change;
+ u64 bss_change;
struct cfg80211_chan_def chandef;
struct ieee80211_channel *chan;
struct beacon_data *presp;
@@ -478,7 +478,8 @@ static void ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
}
int ieee80211_ibss_csa_beacon(struct ieee80211_sub_if_data *sdata,
- struct cfg80211_csa_settings *csa_settings)
+ struct cfg80211_csa_settings *csa_settings,
+ u64 *changed)
{
struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
struct beacon_data *presp, *old_presp;
@@ -520,10 +521,11 @@ int ieee80211_ibss_csa_beacon(struct ieee80211_sub_if_data *sdata,
if (old_presp)
kfree_rcu(old_presp, rcu_head);
- return BSS_CHANGED_BEACON;
+ *changed |= BSS_CHANGED_BEACON;
+ return 0;
}
-int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata)
+int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata, u64 *changed)
{
struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
struct cfg80211_bss *cbss;
@@ -552,14 +554,15 @@ int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata)
ifibss->chandef = sdata->deflink.csa_chandef;
/* generate the beacon */
- return ieee80211_ibss_csa_beacon(sdata, NULL);
+ return ieee80211_ibss_csa_beacon(sdata, NULL, changed);
}
void ieee80211_ibss_stop(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_if_ibss *ifibss = &sdata->u.ibss;
- cancel_work_sync(&ifibss->csa_connection_drop_work);
+ wiphy_work_cancel(sdata->local->hw.wiphy,
+ &ifibss->csa_connection_drop_work);
}
static struct sta_info *ieee80211_ibss_finish_sta(struct sta_info *sta)
@@ -728,7 +731,8 @@ static void ieee80211_ibss_disconnect(struct ieee80211_sub_if_data *sdata)
mutex_unlock(&local->mtx);
}
-static void ieee80211_csa_connection_drop_work(struct work_struct *work)
+static void ieee80211_csa_connection_drop_work(struct wiphy *wiphy,
+ struct wiphy_work *work)
{
struct ieee80211_sub_if_data *sdata =
container_of(work, struct ieee80211_sub_if_data,
@@ -741,7 +745,7 @@ static void ieee80211_csa_connection_drop_work(struct work_struct *work)
skb_queue_purge(&sdata->skb_queue);
/* trigger a scan to find another IBSS network to join */
- ieee80211_queue_work(&sdata->local->hw, &sdata->work);
+ wiphy_work_queue(sdata->local->hw.wiphy, &sdata->work);
sdata_unlock(sdata);
}
@@ -894,8 +898,8 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata,
return true;
disconnect:
ibss_dbg(sdata, "Can't handle channel switch, disconnect\n");
- ieee80211_queue_work(&sdata->local->hw,
- &ifibss->csa_connection_drop_work);
+ wiphy_work_queue(sdata->local->hw.wiphy,
+ &ifibss->csa_connection_drop_work);
ieee80211_ibss_csa_mark_radar(sdata);
@@ -1242,7 +1246,7 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata,
spin_lock(&ifibss->incomplete_lock);
list_add(&sta->list, &ifibss->incomplete_stations);
spin_unlock(&ifibss->incomplete_lock);
- ieee80211_queue_work(&local->hw, &sdata->work);
+ wiphy_work_queue(local->hw.wiphy, &sdata->work);
}
static void ieee80211_ibss_sta_expire(struct ieee80211_sub_if_data *sdata)
@@ -1721,7 +1725,7 @@ static void ieee80211_ibss_timer(struct timer_list *t)
struct ieee80211_sub_if_data *sdata =
from_timer(sdata, t, u.ibss.timer);
- ieee80211_queue_work(&sdata->local->hw, &sdata->work);
+ wiphy_work_queue(sdata->local->hw.wiphy, &sdata->work);
}
void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata)
@@ -1731,8 +1735,8 @@ void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata)
timer_setup(&ifibss->timer, ieee80211_ibss_timer, 0);
INIT_LIST_HEAD(&ifibss->incomplete_stations);
spin_lock_init(&ifibss->incomplete_lock);
- INIT_WORK(&ifibss->csa_connection_drop_work,
- ieee80211_csa_connection_drop_work);
+ wiphy_work_init(&ifibss->csa_connection_drop_work,
+ ieee80211_csa_connection_drop_work);
}
/* scan finished notification */
@@ -1754,7 +1758,7 @@ void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local)
int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
struct cfg80211_ibss_params *params)
{
- u32 changed = 0;
+ u64 changed = 0;
u32 rate_flags;
struct ieee80211_supported_band *sband;
enum ieee80211_chanctx_mode chanmode;
@@ -1856,7 +1860,7 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
sdata->deflink.needed_rx_chains = local->rx_chains;
sdata->control_port_over_nl80211 = params->control_port_over_nl80211;
- ieee80211_queue_work(&local->hw, &sdata->work);
+ wiphy_work_queue(local->hw.wiphy, &sdata->work);
return 0;
}
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 4159fb65038b..8e90cb2ea4bb 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -466,8 +466,8 @@ struct ieee80211_if_managed {
struct timer_list conn_mon_timer;
struct timer_list bcn_mon_timer;
struct work_struct monitor_work;
- struct work_struct beacon_connection_loss_work;
- struct work_struct csa_connection_drop_work;
+ struct wiphy_work beacon_connection_loss_work;
+ struct wiphy_work csa_connection_drop_work;
unsigned long beacon_timeout;
unsigned long probe_timeout;
@@ -553,7 +553,7 @@ struct ieee80211_if_managed {
struct ieee80211_if_ibss {
struct timer_list timer;
- struct work_struct csa_connection_drop_work;
+ struct wiphy_work csa_connection_drop_work;
unsigned long last_scan_completed;
@@ -918,10 +918,9 @@ struct ieee80211_link_data_managed {
bool csa_waiting_bcn;
bool csa_ignored_same_chan;
- struct timer_list chswitch_timer;
- struct work_struct chswitch_work;
+ struct wiphy_delayed_work chswitch_work;
- struct work_struct request_smps_work;
+ struct wiphy_work request_smps_work;
bool beacon_crc_valid;
u32 beacon_crc;
struct ewma_beacon_signal ave_beacon_signal;
@@ -1061,7 +1060,7 @@ struct ieee80211_sub_if_data {
/* used to reconfigure hardware SM PS */
struct work_struct recalc_smps;
- struct work_struct work;
+ struct wiphy_work work;
struct sk_buff_head skb_queue;
struct sk_buff_head status_queue;
@@ -1394,6 +1393,9 @@ struct ieee80211_local {
/* device is during a HW reconfig */
bool in_reconfig;
+ /* reconfiguration failed ... suppress some warnings etc. */
+ bool reconfig_failure;
+
/* wowlan is enabled -- don't reconfig on resume */
bool wowlan;
@@ -1827,7 +1829,7 @@ void ieee80211_link_info_change_notify(struct ieee80211_sub_if_data *sdata,
struct ieee80211_link_data *link,
u64 changed);
void ieee80211_configure_filter(struct ieee80211_local *local);
-u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata);
+u64 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata);
u64 ieee80211_mgmt_tx_cookie(struct ieee80211_local *local);
int ieee80211_attach_ack_skb(struct ieee80211_local *local, struct sk_buff *skb,
@@ -1887,8 +1889,10 @@ void ieee80211_ibss_work(struct ieee80211_sub_if_data *sdata);
void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb);
int ieee80211_ibss_csa_beacon(struct ieee80211_sub_if_data *sdata,
- struct cfg80211_csa_settings *csa_settings);
-int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata);
+ struct cfg80211_csa_settings *csa_settings,
+ u64 *changed);
+int ieee80211_ibss_finish_csa(struct ieee80211_sub_if_data *sdata,
+ u64 *changed);
void ieee80211_ibss_stop(struct ieee80211_sub_if_data *sdata);
/* OCB code */
@@ -1905,8 +1909,10 @@ void ieee80211_mesh_work(struct ieee80211_sub_if_data *sdata);
void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb);
int ieee80211_mesh_csa_beacon(struct ieee80211_sub_if_data *sdata,
- struct cfg80211_csa_settings *csa_settings);
-int ieee80211_mesh_finish_csa(struct ieee80211_sub_if_data *sdata);
+ struct cfg80211_csa_settings *csa_settings,
+ u64 *changed);
+int ieee80211_mesh_finish_csa(struct ieee80211_sub_if_data *sdata,
+ u64 *changed);
/* scan/BSS handling */
void ieee80211_scan_work(struct work_struct *work);
@@ -2269,8 +2275,6 @@ static inline void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata,
* (or re-association) response frame if this is given
* @from_ap: frame is received from an AP (currently used only
* for EHT capabilities parsing)
- * @scratch_len: if non zero, specifies the requested length of the scratch
- * buffer; otherwise, 'len' is used.
*/
struct ieee80211_elems_parse_params {
const u8 *start;
@@ -2281,7 +2285,6 @@ struct ieee80211_elems_parse_params {
struct cfg80211_bss *bss;
int link_id;
bool from_ap;
- size_t scratch_len;
};
struct ieee802_11_elems *
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index bd2c48870add..5b67b44e3f89 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -8,7 +8,7 @@
* Copyright 2008, Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (c) 2016 Intel Deutschland GmbH
- * Copyright (C) 2018-2022 Intel Corporation
+ * Copyright (C) 2018-2023 Intel Corporation
*/
#include <linux/slab.h>
#include <linux/kernel.h>
@@ -43,7 +43,7 @@
* by either the RTNL, the iflist_mtx or RCU.
*/
-static void ieee80211_iface_work(struct work_struct *work);
+static void ieee80211_iface_work(struct wiphy *wiphy, struct wiphy_work *work);
bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata)
{
@@ -614,7 +614,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do
RCU_INIT_POINTER(local->p2p_sdata, NULL);
fallthrough;
default:
- cancel_work_sync(&sdata->work);
+ wiphy_work_cancel(sdata->local->hw.wiphy, &sdata->work);
/*
* When we get here, the interface is marked down.
* Free the remaining keys, if there are any
@@ -1173,7 +1173,7 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local)
skb_queue_head_init(&sdata->skb_queue);
skb_queue_head_init(&sdata->status_queue);
- INIT_WORK(&sdata->work, ieee80211_iface_work);
+ wiphy_work_init(&sdata->work, ieee80211_iface_work);
return 0;
}
@@ -1221,7 +1221,7 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
struct net_device *dev = wdev->netdev;
struct ieee80211_local *local = sdata->local;
- u32 changed = 0;
+ u64 changed = 0;
int res;
u32 hw_reconf_flags = 0;
@@ -1281,6 +1281,9 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
}
if (local->open_count == 0) {
+ /* here we can consider everything in good order (again) */
+ local->reconfig_failure = false;
+
res = drv_start(local);
if (res)
goto err_del_bss;
@@ -1622,7 +1625,7 @@ static void ieee80211_iface_process_status(struct ieee80211_sub_if_data *sdata,
}
}
-static void ieee80211_iface_work(struct work_struct *work)
+static void ieee80211_iface_work(struct wiphy *wiphy, struct wiphy_work *work)
{
struct ieee80211_sub_if_data *sdata =
container_of(work, struct ieee80211_sub_if_data, work);
@@ -1734,7 +1737,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
skb_queue_head_init(&sdata->skb_queue);
skb_queue_head_init(&sdata->status_queue);
- INIT_WORK(&sdata->work, ieee80211_iface_work);
+ wiphy_work_init(&sdata->work, ieee80211_iface_work);
INIT_WORK(&sdata->recalc_smps, ieee80211_recalc_smps_work);
INIT_WORK(&sdata->activate_links_work, ieee80211_activate_links_work);
@@ -2255,7 +2258,6 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local)
{
struct ieee80211_sub_if_data *sdata, *tmp;
LIST_HEAD(unreg_list);
- LIST_HEAD(wdev_list);
ASSERT_RTNL();
@@ -2278,23 +2280,18 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local)
ieee80211_txq_teardown_flows(local);
mutex_lock(&local->iflist_mtx);
- list_for_each_entry_safe(sdata, tmp, &local->interfaces, list) {
- list_del(&sdata->list);
-
- if (sdata->dev)
- unregister_netdevice_queue(sdata->dev, &unreg_list);
- else
- list_add(&sdata->list, &wdev_list);
- }
+ list_splice_init(&local->interfaces, &unreg_list);
mutex_unlock(&local->iflist_mtx);
- unregister_netdevice_many(&unreg_list);
-
wiphy_lock(local->hw.wiphy);
- list_for_each_entry_safe(sdata, tmp, &wdev_list, list) {
+ list_for_each_entry_safe(sdata, tmp, &unreg_list, list) {
+ bool netdev = sdata->dev;
+
list_del(&sdata->list);
cfg80211_unregister_wdev(&sdata->wdev);
- kfree(sdata);
+
+ if (!netdev)
+ kfree(sdata);
}
wiphy_unlock(local->hw.wiphy);
}
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 55cdfaef0f5d..24315d7b3126 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -5,7 +5,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2022 Intel Corporation
+ * Copyright (C) 2018-2023 Intel Corporation
*/
#include <net/mac80211.h>
@@ -291,7 +291,7 @@ void ieee80211_link_info_change_notify(struct ieee80211_sub_if_data *sdata,
drv_link_info_changed(local, sdata, link->conf, link->link_id, changed);
}
-u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata)
+u64 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata)
{
sdata->vif.bss_conf.use_cts_prot = false;
sdata->vif.bss_conf.use_short_preamble = false;
@@ -364,7 +364,8 @@ static void ieee80211_restart_work(struct work_struct *work)
* The exception is ieee80211_chswitch_done.
* Then we can have a race...
*/
- cancel_work_sync(&sdata->u.mgd.csa_connection_drop_work);
+ wiphy_work_cancel(local->hw.wiphy,
+ &sdata->u.mgd.csa_connection_drop_work);
if (sdata->vif.bss_conf.csa_active) {
sdata_lock(sdata);
ieee80211_sta_connection_lost(sdata,
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index f72333201903..af8c5fc2db14 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2008, 2009 open80211s Ltd.
- * Copyright (C) 2018 - 2022 Intel Corporation
+ * Copyright (C) 2018 - 2023 Intel Corporation
* Authors: Luis Carlos Cobo <luisca@cozybit.com>
* Javier Cardona <javier@cozybit.com>
*/
@@ -45,7 +45,7 @@ static void ieee80211_mesh_housekeeping_timer(struct timer_list *t)
set_bit(MESH_WORK_HOUSEKEEPING, &ifmsh->wrkq_flags);
- ieee80211_queue_work(&local->hw, &sdata->work);
+ wiphy_work_queue(local->hw.wiphy, &sdata->work);
}
/**
@@ -133,10 +133,10 @@ bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie)
*
* Returns: beacon changed flag if the beacon content changed.
*/
-u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata)
+u64 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata)
{
bool free_plinks;
- u32 changed = 0;
+ u64 changed = 0;
/* In case mesh_plink_free_count > 0 and mesh_plinktbl_capacity == 0,
* the mesh interface might be able to establish plinks with peers that
@@ -162,7 +162,7 @@ u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata)
void mesh_sta_cleanup(struct sta_info *sta)
{
struct ieee80211_sub_if_data *sdata = sta->sdata;
- u32 changed = mesh_plink_deactivate(sta);
+ u64 changed = mesh_plink_deactivate(sta);
if (changed)
ieee80211_mbss_info_change_notify(sdata, changed);
@@ -703,7 +703,7 @@ static void ieee80211_mesh_path_timer(struct timer_list *t)
struct ieee80211_sub_if_data *sdata =
from_timer(sdata, t, u.mesh.mesh_path_timer);
- ieee80211_queue_work(&sdata->local->hw, &sdata->work);
+ wiphy_work_queue(sdata->local->hw.wiphy, &sdata->work);
}
static void ieee80211_mesh_path_root_timer(struct timer_list *t)
@@ -714,7 +714,7 @@ static void ieee80211_mesh_path_root_timer(struct timer_list *t)
set_bit(MESH_WORK_ROOT, &ifmsh->wrkq_flags);
- ieee80211_queue_work(&sdata->local->hw, &sdata->work);
+ wiphy_work_queue(sdata->local->hw.wiphy, &sdata->work);
}
void ieee80211_mesh_root_setup(struct ieee80211_if_mesh *ifmsh)
@@ -923,7 +923,7 @@ unsigned int ieee80211_new_mesh_header(struct ieee80211_sub_if_data *sdata,
static void ieee80211_mesh_housekeeping(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
- u32 changed;
+ u64 changed;
if (ifmsh->mshcfg.plink_timeout > 0)
ieee80211_sta_expire(sdata, ifmsh->mshcfg.plink_timeout * HZ);
@@ -1164,7 +1164,7 @@ ieee80211_mesh_rebuild_beacon(struct ieee80211_sub_if_data *sdata)
}
void ieee80211_mbss_info_change_notify(struct ieee80211_sub_if_data *sdata,
- u32 changed)
+ u64 changed)
{
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
unsigned long bits = changed;
@@ -1177,14 +1177,14 @@ void ieee80211_mbss_info_change_notify(struct ieee80211_sub_if_data *sdata,
for_each_set_bit(bit, &bits, sizeof(changed) * BITS_PER_BYTE)
set_bit(bit, &ifmsh->mbss_changed);
set_bit(MESH_WORK_MBSS_CHANGED, &ifmsh->wrkq_flags);
- ieee80211_queue_work(&sdata->local->hw, &sdata->work);
+ wiphy_work_queue(sdata->local->hw.wiphy, &sdata->work);
}
int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
struct ieee80211_local *local = sdata->local;
- u32 changed = BSS_CHANGED_BEACON |
+ u64 changed = BSS_CHANGED_BEACON |
BSS_CHANGED_BEACON_ENABLED |
BSS_CHANGED_HT |
BSS_CHANGED_BASIC_RATES |
@@ -1202,7 +1202,7 @@ int ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata)
ifmsh->sync_offset_clockdrift_max = 0;
set_bit(MESH_WORK_HOUSEKEEPING, &ifmsh->wrkq_flags);
ieee80211_mesh_root_setup(ifmsh);
- ieee80211_queue_work(&local->hw, &sdata->work);
+ wiphy_work_queue(local->hw.wiphy, &sdata->work);
sdata->vif.bss_conf.ht_operation_mode =
ifmsh->mshcfg.ht_opmode;
sdata->vif.bss_conf.enable_beacon = true;
@@ -1525,12 +1525,11 @@ free:
kfree(elems);
}
-int ieee80211_mesh_finish_csa(struct ieee80211_sub_if_data *sdata)
+int ieee80211_mesh_finish_csa(struct ieee80211_sub_if_data *sdata, u64 *changed)
{
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
struct mesh_csa_settings *tmp_csa_settings;
int ret = 0;
- int changed = 0;
/* Reset the TTL value and Initiator flag */
ifmsh->csa_role = IEEE80211_MESH_CSA_ROLE_NONE;
@@ -1545,15 +1544,16 @@ int ieee80211_mesh_finish_csa(struct ieee80211_sub_if_data *sdata)
if (ret)
return -EINVAL;
- changed |= BSS_CHANGED_BEACON;
+ *changed |= BSS_CHANGED_BEACON;
mcsa_dbg(sdata, "complete switching to center freq %d MHz",
sdata->vif.bss_conf.chandef.chan->center_freq);
- return changed;
+ return 0;
}
int ieee80211_mesh_csa_beacon(struct ieee80211_sub_if_data *sdata,
- struct cfg80211_csa_settings *csa_settings)
+ struct cfg80211_csa_settings *csa_settings,
+ u64 *changed)
{
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
struct mesh_csa_settings *tmp_csa_settings;
@@ -1579,7 +1579,8 @@ int ieee80211_mesh_csa_beacon(struct ieee80211_sub_if_data *sdata,
return ret;
}
- return BSS_CHANGED_BEACON;
+ *changed |= BSS_CHANGED_BEACON;
+ return 0;
}
static int mesh_fwd_csa_frame(struct ieee80211_sub_if_data *sdata,
@@ -1720,7 +1721,8 @@ out:
static void mesh_bss_info_changed(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
- u32 bit, changed = 0;
+ u32 bit;
+ u64 changed = 0;
for_each_set_bit(bit, &ifmsh->mbss_changed,
sizeof(changed) * BITS_PER_BYTE) {
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 022f41292a05..6c94222a9df5 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -1,6 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2008, 2009 open80211s Ltd.
+ * Copyright (C) 2023 Intel Corporation
* Authors: Luis Carlos Cobo <luisca@cozybit.com>
* Javier Cardona <javier@cozybit.com>
*/
@@ -252,11 +253,11 @@ void ieee80211_mesh_root_setup(struct ieee80211_if_mesh *ifmsh);
const struct ieee80211_mesh_sync_ops *ieee80211_mesh_sync_ops_get(u8 method);
/* wrapper for ieee80211_bss_info_change_notify() */
void ieee80211_mbss_info_change_notify(struct ieee80211_sub_if_data *sdata,
- u32 changed);
+ u64 changed);
/* mesh power save */
-u32 ieee80211_mps_local_status_update(struct ieee80211_sub_if_data *sdata);
-u32 ieee80211_mps_set_sta_local_pm(struct sta_info *sta,
+u64 ieee80211_mps_local_status_update(struct ieee80211_sub_if_data *sdata);
+u64 ieee80211_mps_set_sta_local_pm(struct sta_info *sta,
enum nl80211_mesh_power_mode pm);
void ieee80211_mps_set_frame_flags(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta,
@@ -303,12 +304,12 @@ void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata,
u8 *hw_addr, struct ieee802_11_elems *ie,
struct ieee80211_rx_status *rx_status);
bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie);
-u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata);
+u64 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata);
void mesh_plink_timer(struct timer_list *t);
void mesh_plink_broken(struct sta_info *sta);
-u32 mesh_plink_deactivate(struct sta_info *sta);
-u32 mesh_plink_open(struct sta_info *sta);
-u32 mesh_plink_block(struct sta_info *sta);
+u64 mesh_plink_deactivate(struct sta_info *sta);
+u64 mesh_plink_open(struct sta_info *sta);
+u64 mesh_plink_block(struct sta_info *sta);
void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata,
struct ieee80211_mgmt *mgmt, size_t len,
struct ieee80211_rx_status *rx_status);
@@ -349,14 +350,14 @@ void mesh_path_refresh(struct ieee80211_sub_if_data *sdata,
#ifdef CONFIG_MAC80211_MESH
static inline
-u32 mesh_plink_inc_estab_count(struct ieee80211_sub_if_data *sdata)
+u64 mesh_plink_inc_estab_count(struct ieee80211_sub_if_data *sdata)
{
atomic_inc(&sdata->u.mesh.estab_plinks);
return mesh_accept_plinks_update(sdata) | BSS_CHANGED_BEACON;
}
static inline
-u32 mesh_plink_dec_estab_count(struct ieee80211_sub_if_data *sdata)
+u64 mesh_plink_dec_estab_count(struct ieee80211_sub_if_data *sdata)
{
atomic_dec(&sdata->u.mesh.estab_plinks);
return mesh_accept_plinks_update(sdata) | BSS_CHANGED_BEACON;
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 5217e1d97dd6..51369072984e 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2008, 2009 open80211s Ltd.
- * Copyright (C) 2019, 2021-2022 Intel Corporation
+ * Copyright (C) 2019, 2021-2023 Intel Corporation
* Author: Luis Carlos Cobo <luisca@cozybit.com>
*/
@@ -1026,14 +1026,14 @@ static void mesh_queue_preq(struct mesh_path *mpath, u8 flags)
spin_unlock_bh(&ifmsh->mesh_preq_queue_lock);
if (time_after(jiffies, ifmsh->last_preq + min_preq_int_jiff(sdata)))
- ieee80211_queue_work(&sdata->local->hw, &sdata->work);
+ wiphy_work_queue(sdata->local->hw.wiphy, &sdata->work);
else if (time_before(jiffies, ifmsh->last_preq)) {
/* avoid long wait if did not send preqs for a long time
* and jiffies wrapped around
*/
ifmsh->last_preq = jiffies - min_preq_int_jiff(sdata) - 1;
- ieee80211_queue_work(&sdata->local->hw, &sdata->work);
+ wiphy_work_queue(sdata->local->hw.wiphy, &sdata->work);
} else
mod_timer(&ifmsh->mesh_path_timer, ifmsh->last_preq +
min_preq_int_jiff(sdata));
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 8f168bc4e4b8..f3d5bb0a59f1 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2008, 2009 open80211s Ltd.
- * Copyright (C) 2019, 2021-2022 Intel Corporation
+ * Copyright (C) 2019, 2021-2023 Intel Corporation
* Author: Luis Carlos Cobo <luisca@cozybit.com>
*/
#include <linux/gfp.h>
@@ -90,12 +90,13 @@ static inline void mesh_plink_fsm_restart(struct sta_info *sta)
*
* Returns BSS_CHANGED_ERP_SLOT or 0 for no change.
*/
-static u32 mesh_set_short_slot_time(struct ieee80211_sub_if_data *sdata)
+static u64 mesh_set_short_slot_time(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_supported_band *sband;
struct sta_info *sta;
- u32 erp_rates = 0, changed = 0;
+ u32 erp_rates = 0;
+ u64 changed = 0;
int i;
bool short_slot = false;
@@ -153,7 +154,7 @@ out:
* is selected if all peers in our 20/40MHz MBSS support HT and at least one
* HT20 peer is present. Otherwise no-protection mode is selected.
*/
-static u32 mesh_set_ht_prot_mode(struct ieee80211_sub_if_data *sdata)
+static u64 mesh_set_ht_prot_mode(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_local *local = sdata->local;
struct sta_info *sta;
@@ -365,10 +366,10 @@ free:
*
* Locking: the caller must hold sta->mesh->plink_lock
*/
-static u32 __mesh_plink_deactivate(struct sta_info *sta)
+static u64 __mesh_plink_deactivate(struct sta_info *sta)
{
struct ieee80211_sub_if_data *sdata = sta->sdata;
- u32 changed = 0;
+ u64 changed = 0;
lockdep_assert_held(&sta->mesh->plink_lock);
@@ -390,10 +391,10 @@ static u32 __mesh_plink_deactivate(struct sta_info *sta)
*
* All mesh paths with this peer as next hop will be flushed
*/
-u32 mesh_plink_deactivate(struct sta_info *sta)
+u64 mesh_plink_deactivate(struct sta_info *sta)
{
struct ieee80211_sub_if_data *sdata = sta->sdata;
- u32 changed;
+ u64 changed;
spin_lock_bh(&sta->mesh->plink_lock);
changed = __mesh_plink_deactivate(sta);
@@ -622,7 +623,7 @@ void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata,
struct ieee80211_rx_status *rx_status)
{
struct sta_info *sta;
- u32 changed = 0;
+ u64 changed = 0;
sta = mesh_sta_info_get(sdata, hw_addr, elems, rx_status);
if (!sta)
@@ -775,10 +776,10 @@ static u16 mesh_get_new_llid(struct ieee80211_sub_if_data *sdata)
return llid;
}
-u32 mesh_plink_open(struct sta_info *sta)
+u64 mesh_plink_open(struct sta_info *sta)
{
struct ieee80211_sub_if_data *sdata = sta->sdata;
- u32 changed;
+ u64 changed;
if (!test_sta_flag(sta, WLAN_STA_AUTH))
return 0;
@@ -805,9 +806,9 @@ u32 mesh_plink_open(struct sta_info *sta)
return changed;
}
-u32 mesh_plink_block(struct sta_info *sta)
+u64 mesh_plink_block(struct sta_info *sta)
{
- u32 changed;
+ u64 changed;
spin_lock_bh(&sta->mesh->plink_lock);
changed = __mesh_plink_deactivate(sta);
@@ -831,11 +832,11 @@ static void mesh_plink_close(struct ieee80211_sub_if_data *sdata,
mod_plink_timer(sta, mshcfg->dot11MeshHoldingTimeout);
}
-static u32 mesh_plink_establish(struct ieee80211_sub_if_data *sdata,
+static u64 mesh_plink_establish(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta)
{
struct mesh_config *mshcfg = &sdata->u.mesh.mshcfg;
- u32 changed = 0;
+ u64 changed = 0;
del_timer(&sta->mesh->plink_timer);
sta->mesh->plink_state = NL80211_PLINK_ESTAB;
@@ -857,12 +858,12 @@ static u32 mesh_plink_establish(struct ieee80211_sub_if_data *sdata,
*
* Return: changed MBSS flags
*/
-static u32 mesh_plink_fsm(struct ieee80211_sub_if_data *sdata,
+static u64 mesh_plink_fsm(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta, enum plink_event event)
{
struct mesh_config *mshcfg = &sdata->u.mesh.mshcfg;
enum ieee80211_self_protected_actioncode action = 0;
- u32 changed = 0;
+ u64 changed = 0;
bool flush = false;
mpl_dbg(sdata, "peer %pM in state %s got event %s\n", sta->sta.addr,
@@ -1117,7 +1118,7 @@ mesh_process_plink_frame(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta;
enum plink_event event;
enum ieee80211_self_protected_actioncode ftype;
- u32 changed = 0;
+ u64 changed = 0;
u8 ie_len = elems->peering_len;
u16 plid, llid = 0;
diff --git a/net/mac80211/mesh_ps.c b/net/mac80211/mesh_ps.c
index 3fbd0b9ff913..35eacca43e49 100644
--- a/net/mac80211/mesh_ps.c
+++ b/net/mac80211/mesh_ps.c
@@ -3,6 +3,7 @@
* Copyright 2012-2013, Marco Porsch <marco.porsch@s2005.tu-chemnitz.de>
* Copyright 2012-2013, cozybit Inc.
* Copyright (C) 2021 Intel Corporation
+ * Copyright (C) 2023 Intel Corporation
*/
#include "mesh.h"
@@ -77,14 +78,14 @@ static void mps_qos_null_tx(struct sta_info *sta)
* sets the non-peer power mode and triggers the driver PS (re-)configuration
* Return BSS_CHANGED_BEACON if a beacon update is necessary.
*/
-u32 ieee80211_mps_local_status_update(struct ieee80211_sub_if_data *sdata)
+u64 ieee80211_mps_local_status_update(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
struct sta_info *sta;
bool peering = false;
int light_sleep_cnt = 0;
int deep_sleep_cnt = 0;
- u32 changed = 0;
+ u64 changed = 0;
enum nl80211_mesh_power_mode nonpeer_pm;
rcu_read_lock();
@@ -148,7 +149,7 @@ u32 ieee80211_mps_local_status_update(struct ieee80211_sub_if_data *sdata)
* @pm: the power mode to set
* Return BSS_CHANGED_BEACON if a beacon update is in order.
*/
-u32 ieee80211_mps_set_sta_local_pm(struct sta_info *sta,
+u64 ieee80211_mps_set_sta_local_pm(struct sta_info *sta,
enum nl80211_mesh_power_mode pm)
{
struct ieee80211_sub_if_data *sdata = sta->sdata;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 5a4303130ef2..5793f0e7f955 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1680,10 +1680,12 @@ void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local,
}
/* spectrum management related things */
-static void ieee80211_chswitch_work(struct work_struct *work)
+static void ieee80211_chswitch_work(struct wiphy *wiphy,
+ struct wiphy_work *work)
{
struct ieee80211_link_data *link =
- container_of(work, struct ieee80211_link_data, u.mgd.chswitch_work);
+ container_of(work, struct ieee80211_link_data,
+ u.mgd.chswitch_work.work);
struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_local *local = sdata->local;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
@@ -1723,8 +1725,8 @@ static void ieee80211_chswitch_work(struct work_struct *work)
sdata_info(sdata,
"failed to use reserved channel context, disconnecting (err=%d)\n",
ret);
- ieee80211_queue_work(&sdata->local->hw,
- &ifmgd->csa_connection_drop_work);
+ wiphy_work_queue(sdata->local->hw.wiphy,
+ &ifmgd->csa_connection_drop_work);
goto out;
}
@@ -1735,8 +1737,8 @@ static void ieee80211_chswitch_work(struct work_struct *work)
&link->csa_chandef)) {
sdata_info(sdata,
"failed to finalize channel switch, disconnecting\n");
- ieee80211_queue_work(&sdata->local->hw,
- &ifmgd->csa_connection_drop_work);
+ wiphy_work_queue(sdata->local->hw.wiphy,
+ &ifmgd->csa_connection_drop_work);
goto out;
}
@@ -1780,8 +1782,8 @@ static void ieee80211_chswitch_post_beacon(struct ieee80211_link_data *link)
if (ret) {
sdata_info(sdata,
"driver post channel switch failed, disconnecting\n");
- ieee80211_queue_work(&local->hw,
- &ifmgd->csa_connection_drop_work);
+ wiphy_work_queue(sdata->local->hw.wiphy,
+ &ifmgd->csa_connection_drop_work);
return;
}
@@ -1800,24 +1802,16 @@ void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success)
if (!success) {
sdata_info(sdata,
"driver channel switch failed, disconnecting\n");
- ieee80211_queue_work(&sdata->local->hw,
- &ifmgd->csa_connection_drop_work);
+ wiphy_work_queue(sdata->local->hw.wiphy,
+ &ifmgd->csa_connection_drop_work);
} else {
- ieee80211_queue_work(&sdata->local->hw,
- &sdata->deflink.u.mgd.chswitch_work);
+ wiphy_delayed_work_queue(sdata->local->hw.wiphy,
+ &sdata->deflink.u.mgd.chswitch_work,
+ 0);
}
}
EXPORT_SYMBOL(ieee80211_chswitch_done);
-static void ieee80211_chswitch_timer(struct timer_list *t)
-{
- struct ieee80211_link_data *link =
- from_timer(link, t, u.mgd.chswitch_timer);
-
- ieee80211_queue_work(&link->sdata->local->hw,
- &link->u.mgd.chswitch_work);
-}
-
static void
ieee80211_sta_abort_chanswitch(struct ieee80211_link_data *link)
{
@@ -1861,6 +1855,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link,
struct ieee80211_csa_ie csa_ie;
struct ieee80211_channel_switch ch_switch;
struct ieee80211_bss *bss;
+ unsigned long timeout;
int res;
sdata_assert_lock(sdata);
@@ -2004,12 +1999,11 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link,
}
/* channel switch handled in software */
- if (csa_ie.count <= 1)
- ieee80211_queue_work(&local->hw, &link->u.mgd.chswitch_work);
- else
- mod_timer(&link->u.mgd.chswitch_timer,
- TU_TO_EXP_TIME((csa_ie.count - 1) *
- cbss->beacon_interval));
+ timeout = TU_TO_JIFFIES((max_t(int, csa_ie.count, 1) - 1) *
+ cbss->beacon_interval);
+ wiphy_delayed_work_queue(local->hw.wiphy,
+ &link->u.mgd.chswitch_work,
+ timeout);
return;
lock_and_drop_connection:
mutex_lock(&local->mtx);
@@ -2025,7 +2019,8 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link,
link->conf->csa_active = true;
link->csa_block_tx = csa_ie.mode;
- ieee80211_queue_work(&local->hw, &ifmgd->csa_connection_drop_work);
+ wiphy_work_queue(sdata->local->hw.wiphy,
+ &ifmgd->csa_connection_drop_work);
mutex_unlock(&local->chanctx_mtx);
mutex_unlock(&local->mtx);
}
@@ -2116,7 +2111,7 @@ static void ieee80211_find_cisco_dtpc(struct ieee80211_sub_if_data *sdata,
*pwr_level = (__s8)cisco_dtpc_ie[4];
}
-static u32 ieee80211_handle_pwr_constr(struct ieee80211_link_data *link,
+static u64 ieee80211_handle_pwr_constr(struct ieee80211_link_data *link,
struct ieee80211_channel *channel,
struct ieee80211_mgmt *mgmt,
const u8 *country_ie, u8 country_ie_len,
@@ -2706,12 +2701,12 @@ static void ieee80211_stop_poll(struct ieee80211_sub_if_data *sdata)
mutex_unlock(&sdata->local->mtx);
}
-static u32 ieee80211_handle_bss_capability(struct ieee80211_link_data *link,
+static u64 ieee80211_handle_bss_capability(struct ieee80211_link_data *link,
u16 capab, bool erp_valid, u8 erp)
{
struct ieee80211_bss_conf *bss_conf = link->conf;
struct ieee80211_supported_band *sband;
- u32 changed = 0;
+ u64 changed = 0;
bool use_protection;
bool use_short_preamble;
bool use_short_slot;
@@ -2757,7 +2752,7 @@ static u64 ieee80211_link_set_associated(struct ieee80211_link_data *link,
struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_bss_conf *bss_conf = link->conf;
struct ieee80211_bss *bss = (void *)cbss->priv;
- u32 changed = BSS_CHANGED_QOS;
+ u64 changed = BSS_CHANGED_QOS;
/* not really used in MLO */
sdata->u.mgd.beacon_timeout =
@@ -2895,7 +2890,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
struct ieee80211_local *local = sdata->local;
unsigned int link_id;
- u32 changed = 0;
+ u64 changed = 0;
struct ieee80211_prep_tx_info info = {
.subtype = stype,
};
@@ -3031,7 +3026,6 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
del_timer_sync(&sdata->u.mgd.conn_mon_timer);
del_timer_sync(&sdata->u.mgd.bcn_mon_timer);
del_timer_sync(&sdata->u.mgd.timer);
- del_timer_sync(&sdata->deflink.u.mgd.chswitch_timer);
sdata->vif.bss_conf.dtim_period = 0;
sdata->vif.bss_conf.beacon_rate = NULL;
@@ -3162,7 +3156,7 @@ void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata,
sdata->u.mgd.probe_send_count = 0;
else
sdata->u.mgd.nullfunc_failed = true;
- ieee80211_queue_work(&sdata->local->hw, &sdata->work);
+ wiphy_work_queue(sdata->local->hw.wiphy, &sdata->work);
}
static void ieee80211_mlme_send_probe_req(struct ieee80211_sub_if_data *sdata,
@@ -3423,7 +3417,8 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
sdata_unlock(sdata);
}
-static void ieee80211_beacon_connection_loss_work(struct work_struct *work)
+static void ieee80211_beacon_connection_loss_work(struct wiphy *wiphy,
+ struct wiphy_work *work)
{
struct ieee80211_sub_if_data *sdata =
container_of(work, struct ieee80211_sub_if_data,
@@ -3448,7 +3443,8 @@ static void ieee80211_beacon_connection_loss_work(struct work_struct *work)
}
}
-static void ieee80211_csa_connection_drop_work(struct work_struct *work)
+static void ieee80211_csa_connection_drop_work(struct wiphy *wiphy,
+ struct wiphy_work *work)
{
struct ieee80211_sub_if_data *sdata =
container_of(work, struct ieee80211_sub_if_data,
@@ -3465,7 +3461,7 @@ void ieee80211_beacon_loss(struct ieee80211_vif *vif)
trace_api_beacon_loss(sdata);
sdata->u.mgd.connection_loss = false;
- ieee80211_queue_work(hw, &sdata->u.mgd.beacon_connection_loss_work);
+ wiphy_work_queue(hw->wiphy, &sdata->u.mgd.beacon_connection_loss_work);
}
EXPORT_SYMBOL(ieee80211_beacon_loss);
@@ -3477,7 +3473,7 @@ void ieee80211_connection_loss(struct ieee80211_vif *vif)
trace_api_connection_loss(sdata);
sdata->u.mgd.connection_loss = true;
- ieee80211_queue_work(hw, &sdata->u.mgd.beacon_connection_loss_work);
+ wiphy_work_queue(hw->wiphy, &sdata->u.mgd.beacon_connection_loss_work);
}
EXPORT_SYMBOL(ieee80211_connection_loss);
@@ -3493,7 +3489,7 @@ void ieee80211_disconnect(struct ieee80211_vif *vif, bool reconnect)
sdata->u.mgd.driver_disconnect = true;
sdata->u.mgd.reconnect = reconnect;
- ieee80211_queue_work(hw, &sdata->u.mgd.beacon_connection_loss_work);
+ wiphy_work_queue(hw->wiphy, &sdata->u.mgd.beacon_connection_loss_work);
}
EXPORT_SYMBOL(ieee80211_disconnect);
@@ -3909,8 +3905,8 @@ static void ieee80211_get_rates(struct ieee80211_supported_band *sband,
*have_higher_than_11mbit = true;
/*
- * Skip HT, VHT, HE and SAE H2E only BSS membership selectors
- * since they're not rates.
+ * Skip HT, VHT, HE, EHT and SAE H2E only BSS membership
+ * selectors since they're not rates.
*
* Note: Even though the membership selector and the basic
* rate flag share the same bit, they are not exactly
@@ -3919,6 +3915,7 @@ static void ieee80211_get_rates(struct ieee80211_supported_band *sband,
if (supp_rates[i] == (0x80 | BSS_MEMBERSHIP_SELECTOR_HT_PHY) ||
supp_rates[i] == (0x80 | BSS_MEMBERSHIP_SELECTOR_VHT_PHY) ||
supp_rates[i] == (0x80 | BSS_MEMBERSHIP_SELECTOR_HE_PHY) ||
+ supp_rates[i] == (0x80 | BSS_MEMBERSHIP_SELECTOR_EHT_PHY) ||
supp_rates[i] == (0x80 | BSS_MEMBERSHIP_SELECTOR_SAE_H2E))
continue;
@@ -3965,7 +3962,7 @@ static bool ieee80211_twt_req_supported(struct ieee80211_sub_if_data *sdata,
IEEE80211_HE_MAC_CAP0_TWT_REQ);
}
-static int ieee80211_recalc_twt_req(struct ieee80211_sub_if_data *sdata,
+static u64 ieee80211_recalc_twt_req(struct ieee80211_sub_if_data *sdata,
struct ieee80211_supported_band *sband,
struct ieee80211_link_data *link,
struct link_sta_info *link_sta,
@@ -4844,6 +4841,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
IEEE80211_CONN_DISABLE_EHT)) &&
he_oper) {
const struct cfg80211_bss_ies *cbss_ies;
+ const struct element *eht_ml_elem;
const u8 *eht_oper_ie;
cbss_ies = rcu_dereference(cbss->ies);
@@ -4854,6 +4852,19 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
eht_oper = (void *)(eht_oper_ie + 3);
else
eht_oper = NULL;
+
+ eht_ml_elem = cfg80211_find_ext_elem(WLAN_EID_EXT_EHT_MULTI_LINK,
+ cbss_ies->data, cbss_ies->len);
+
+ /* data + 1 / datalen - 1 since it's an extended element */
+ if (eht_ml_elem &&
+ ieee80211_mle_size_ok(eht_ml_elem->data + 1,
+ eht_ml_elem->datalen - 1)) {
+ sdata->vif.cfg.eml_cap =
+ ieee80211_mle_get_eml_cap(eht_ml_elem->data + 1);
+ sdata->vif.cfg.eml_med_sync_delay =
+ ieee80211_mle_get_eml_med_sync_delay(eht_ml_elem->data + 1);
+ }
}
/* Allow VHT if at least one channel on the sband supports 80 MHz */
@@ -6060,7 +6071,7 @@ static void ieee80211_sta_timer(struct timer_list *t)
struct ieee80211_sub_if_data *sdata =
from_timer(sdata, t, u.mgd.timer);
- ieee80211_queue_work(&sdata->local->hw, &sdata->work);
+ wiphy_work_queue(sdata->local->hw.wiphy, &sdata->work);
}
void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata,
@@ -6204,7 +6215,7 @@ void ieee80211_mgd_conn_tx_status(struct ieee80211_sub_if_data *sdata,
sdata->u.mgd.status_acked = acked;
sdata->u.mgd.status_received = true;
- ieee80211_queue_work(&local->hw, &sdata->work);
+ wiphy_work_queue(local->hw.wiphy, &sdata->work);
}
void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
@@ -6367,8 +6378,8 @@ static void ieee80211_sta_bcn_mon_timer(struct timer_list *t)
return;
sdata->u.mgd.connection_loss = false;
- ieee80211_queue_work(&sdata->local->hw,
- &sdata->u.mgd.beacon_connection_loss_work);
+ wiphy_work_queue(sdata->local->hw.wiphy,
+ &sdata->u.mgd.beacon_connection_loss_work);
}
static void ieee80211_sta_conn_mon_timer(struct timer_list *t)
@@ -6524,7 +6535,8 @@ void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata)
sdata_unlock(sdata);
}
-static void ieee80211_request_smps_mgd_work(struct work_struct *work)
+static void ieee80211_request_smps_mgd_work(struct wiphy *wiphy,
+ struct wiphy_work *work)
{
struct ieee80211_link_data *link =
container_of(work, struct ieee80211_link_data,
@@ -6542,10 +6554,10 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
INIT_WORK(&ifmgd->monitor_work, ieee80211_sta_monitor_work);
- INIT_WORK(&ifmgd->beacon_connection_loss_work,
- ieee80211_beacon_connection_loss_work);
- INIT_WORK(&ifmgd->csa_connection_drop_work,
- ieee80211_csa_connection_drop_work);
+ wiphy_work_init(&ifmgd->beacon_connection_loss_work,
+ ieee80211_beacon_connection_loss_work);
+ wiphy_work_init(&ifmgd->csa_connection_drop_work,
+ ieee80211_csa_connection_drop_work);
INIT_DELAYED_WORK(&ifmgd->tdls_peer_del_work,
ieee80211_tdls_peer_del_work);
timer_setup(&ifmgd->timer, ieee80211_sta_timer, 0);
@@ -6574,15 +6586,15 @@ void ieee80211_mgd_setup_link(struct ieee80211_link_data *link)
link->u.mgd.conn_flags = 0;
link->conf->bssid = link->u.mgd.bssid;
- INIT_WORK(&link->u.mgd.request_smps_work,
- ieee80211_request_smps_mgd_work);
+ wiphy_work_init(&link->u.mgd.request_smps_work,
+ ieee80211_request_smps_mgd_work);
if (local->hw.wiphy->features & NL80211_FEATURE_DYNAMIC_SMPS)
link->u.mgd.req_smps = IEEE80211_SMPS_AUTOMATIC;
else
link->u.mgd.req_smps = IEEE80211_SMPS_OFF;
- INIT_WORK(&link->u.mgd.chswitch_work, ieee80211_chswitch_work);
- timer_setup(&link->u.mgd.chswitch_timer, ieee80211_chswitch_timer, 0);
+ wiphy_delayed_work_init(&link->u.mgd.chswitch_work,
+ ieee80211_chswitch_work);
if (sdata->u.mgd.assoc_data)
ether_addr_copy(link->conf->addr,
@@ -7538,8 +7550,10 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
void ieee80211_mgd_stop_link(struct ieee80211_link_data *link)
{
- cancel_work_sync(&link->u.mgd.request_smps_work);
- cancel_work_sync(&link->u.mgd.chswitch_work);
+ wiphy_work_cancel(link->sdata->local->hw.wiphy,
+ &link->u.mgd.request_smps_work);
+ wiphy_delayed_work_cancel(link->sdata->local->hw.wiphy,
+ &link->u.mgd.chswitch_work);
}
void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata)
@@ -7552,8 +7566,10 @@ void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata)
* cancelled when disconnecting.
*/
cancel_work_sync(&ifmgd->monitor_work);
- cancel_work_sync(&ifmgd->beacon_connection_loss_work);
- cancel_work_sync(&ifmgd->csa_connection_drop_work);
+ wiphy_work_cancel(sdata->local->hw.wiphy,
+ &ifmgd->beacon_connection_loss_work);
+ wiphy_work_cancel(sdata->local->hw.wiphy,
+ &ifmgd->csa_connection_drop_work);
cancel_delayed_work_sync(&ifmgd->tdls_peer_del_work);
sdata_lock(sdata);
diff --git a/net/mac80211/ocb.c b/net/mac80211/ocb.c
index a57dcbe99a0d..b44896e14522 100644
--- a/net/mac80211/ocb.c
+++ b/net/mac80211/ocb.c
@@ -4,7 +4,7 @@
*
* Copyright: (c) 2014 Czech Technical University in Prague
* (c) 2014 Volkswagen Group Research
- * Copyright (C) 2022 Intel Corporation
+ * Copyright (C) 2022 - 2023 Intel Corporation
* Author: Rostislav Lisovy <rostislav.lisovy@fel.cvut.cz>
* Funded by: Volkswagen Group Research
*/
@@ -81,7 +81,7 @@ void ieee80211_ocb_rx_no_sta(struct ieee80211_sub_if_data *sdata,
spin_lock(&ifocb->incomplete_lock);
list_add(&sta->list, &ifocb->incomplete_stations);
spin_unlock(&ifocb->incomplete_lock);
- ieee80211_queue_work(&local->hw, &sdata->work);
+ wiphy_work_queue(local->hw.wiphy, &sdata->work);
}
static struct sta_info *ieee80211_ocb_finish_sta(struct sta_info *sta)
@@ -157,7 +157,7 @@ static void ieee80211_ocb_housekeeping_timer(struct timer_list *t)
set_bit(OCB_WORK_HOUSEKEEPING, &ifocb->wrkq_flags);
- ieee80211_queue_work(&local->hw, &sdata->work);
+ wiphy_work_queue(local->hw.wiphy, &sdata->work);
}
void ieee80211_ocb_setup_sdata(struct ieee80211_sub_if_data *sdata)
@@ -175,7 +175,7 @@ int ieee80211_ocb_join(struct ieee80211_sub_if_data *sdata,
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_if_ocb *ifocb = &sdata->u.ocb;
- u32 changed = BSS_CHANGED_OCB | BSS_CHANGED_BSSID;
+ u64 changed = BSS_CHANGED_OCB | BSS_CHANGED_BSSID;
int err;
if (ifocb->joined == true)
@@ -197,7 +197,7 @@ int ieee80211_ocb_join(struct ieee80211_sub_if_data *sdata,
ifocb->joined = true;
set_bit(OCB_WORK_HOUSEKEEPING, &ifocb->wrkq_flags);
- ieee80211_queue_work(&local->hw, &sdata->work);
+ wiphy_work_queue(local->hw.wiphy, &sdata->work);
netif_carrier_on(sdata->dev);
return 0;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index fc6e130364da..e2a973309bf7 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -229,7 +229,7 @@ static void __ieee80211_queue_skb_to_iface(struct ieee80211_sub_if_data *sdata,
}
skb_queue_tail(&sdata->skb_queue, skb);
- ieee80211_queue_work(&sdata->local->hw, &sdata->work);
+ wiphy_work_queue(sdata->local->hw.wiphy, &sdata->work);
if (sta)
sta->deflink.rx_stats.packets++;
}
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 32fa8aca7005..ea5383136fff 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -502,7 +502,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
*/
list_for_each_entry_rcu(sdata, &local->interfaces, list) {
if (ieee80211_sdata_running(sdata))
- ieee80211_queue_work(&sdata->local->hw, &sdata->work);
+ wiphy_work_queue(sdata->local->hw.wiphy, &sdata->work);
}
if (was_scanning)
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 1400512e0dde..731b832b257c 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -4,7 +4,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2022 Intel Corporation
+ * Copyright (C) 2018-2023 Intel Corporation
*/
#include <linux/module.h>
@@ -1274,7 +1274,117 @@ static int __must_check __sta_info_destroy_part1(struct sta_info *sta)
return 0;
}
-static void __sta_info_destroy_part2(struct sta_info *sta)
+static int _sta_info_move_state(struct sta_info *sta,
+ enum ieee80211_sta_state new_state,
+ bool recalc)
+{
+ might_sleep();
+
+ if (sta->sta_state == new_state)
+ return 0;
+
+ /* check allowed transitions first */
+
+ switch (new_state) {
+ case IEEE80211_STA_NONE:
+ if (sta->sta_state != IEEE80211_STA_AUTH)
+ return -EINVAL;
+ break;
+ case IEEE80211_STA_AUTH:
+ if (sta->sta_state != IEEE80211_STA_NONE &&
+ sta->sta_state != IEEE80211_STA_ASSOC)
+ return -EINVAL;
+ break;
+ case IEEE80211_STA_ASSOC:
+ if (sta->sta_state != IEEE80211_STA_AUTH &&
+ sta->sta_state != IEEE80211_STA_AUTHORIZED)
+ return -EINVAL;
+ break;
+ case IEEE80211_STA_AUTHORIZED:
+ if (sta->sta_state != IEEE80211_STA_ASSOC)
+ return -EINVAL;
+ break;
+ default:
+ WARN(1, "invalid state %d", new_state);
+ return -EINVAL;
+ }
+
+ sta_dbg(sta->sdata, "moving STA %pM to state %d\n",
+ sta->sta.addr, new_state);
+
+ /* notify the driver before the actual changes so it can
+ * fail the transition
+ */
+ if (test_sta_flag(sta, WLAN_STA_INSERTED)) {
+ int err = drv_sta_state(sta->local, sta->sdata, sta,
+ sta->sta_state, new_state);
+ if (err)
+ return err;
+ }
+
+ /* reflect the change in all state variables */
+
+ switch (new_state) {
+ case IEEE80211_STA_NONE:
+ if (sta->sta_state == IEEE80211_STA_AUTH)
+ clear_bit(WLAN_STA_AUTH, &sta->_flags);
+ break;
+ case IEEE80211_STA_AUTH:
+ if (sta->sta_state == IEEE80211_STA_NONE) {
+ set_bit(WLAN_STA_AUTH, &sta->_flags);
+ } else if (sta->sta_state == IEEE80211_STA_ASSOC) {
+ clear_bit(WLAN_STA_ASSOC, &sta->_flags);
+ if (recalc) {
+ ieee80211_recalc_min_chandef(sta->sdata, -1);
+ if (!sta->sta.support_p2p_ps)
+ ieee80211_recalc_p2p_go_ps_allowed(sta->sdata);
+ }
+ }
+ break;
+ case IEEE80211_STA_ASSOC:
+ if (sta->sta_state == IEEE80211_STA_AUTH) {
+ set_bit(WLAN_STA_ASSOC, &sta->_flags);
+ sta->assoc_at = ktime_get_boottime_ns();
+ if (recalc) {
+ ieee80211_recalc_min_chandef(sta->sdata, -1);
+ if (!sta->sta.support_p2p_ps)
+ ieee80211_recalc_p2p_go_ps_allowed(sta->sdata);
+ }
+ } else if (sta->sta_state == IEEE80211_STA_AUTHORIZED) {
+ ieee80211_vif_dec_num_mcast(sta->sdata);
+ clear_bit(WLAN_STA_AUTHORIZED, &sta->_flags);
+ ieee80211_clear_fast_xmit(sta);
+ ieee80211_clear_fast_rx(sta);
+ }
+ break;
+ case IEEE80211_STA_AUTHORIZED:
+ if (sta->sta_state == IEEE80211_STA_ASSOC) {
+ ieee80211_vif_inc_num_mcast(sta->sdata);
+ set_bit(WLAN_STA_AUTHORIZED, &sta->_flags);
+ ieee80211_check_fast_xmit(sta);
+ ieee80211_check_fast_rx(sta);
+ }
+ if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
+ sta->sdata->vif.type == NL80211_IFTYPE_AP)
+ cfg80211_send_layer2_update(sta->sdata->dev,
+ sta->sta.addr);
+ break;
+ default:
+ break;
+ }
+
+ sta->sta_state = new_state;
+
+ return 0;
+}
+
+int sta_info_move_state(struct sta_info *sta,
+ enum ieee80211_sta_state new_state)
+{
+ return _sta_info_move_state(sta, new_state, true);
+}
+
+static void __sta_info_destroy_part2(struct sta_info *sta, bool recalc)
{
struct ieee80211_local *local = sta->local;
struct ieee80211_sub_if_data *sdata = sta->sdata;
@@ -1290,7 +1400,7 @@ static void __sta_info_destroy_part2(struct sta_info *sta)
lockdep_assert_held(&local->sta_mtx);
if (sta->sta_state == IEEE80211_STA_AUTHORIZED) {
- ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC);
+ ret = _sta_info_move_state(sta, IEEE80211_STA_ASSOC, recalc);
WARN_ON_ONCE(ret);
}
@@ -1318,7 +1428,7 @@ static void __sta_info_destroy_part2(struct sta_info *sta)
local->sta_generation++;
while (sta->sta_state > IEEE80211_STA_NONE) {
- ret = sta_info_move_state(sta, sta->sta_state - 1);
+ ret = _sta_info_move_state(sta, sta->sta_state - 1, recalc);
if (ret) {
WARN_ON_ONCE(1);
break;
@@ -1355,7 +1465,7 @@ int __must_check __sta_info_destroy(struct sta_info *sta)
synchronize_net();
- __sta_info_destroy_part2(sta);
+ __sta_info_destroy_part2(sta, true);
return 0;
}
@@ -1462,9 +1572,18 @@ int __sta_info_flush(struct ieee80211_sub_if_data *sdata, bool vlans)
}
if (!list_empty(&free_list)) {
+ bool support_p2p_ps = true;
+
synchronize_net();
- list_for_each_entry_safe(sta, tmp, &free_list, free_list)
- __sta_info_destroy_part2(sta);
+ list_for_each_entry_safe(sta, tmp, &free_list, free_list) {
+ if (!sta->sta.support_p2p_ps)
+ support_p2p_ps = false;
+ __sta_info_destroy_part2(sta, false);
+ }
+
+ ieee80211_recalc_min_chandef(sdata, -1);
+ if (!support_p2p_ps)
+ ieee80211_recalc_p2p_go_ps_allowed(sdata);
}
mutex_unlock(&local->sta_mtx);
@@ -2252,106 +2371,6 @@ void ieee80211_sta_update_pending_airtime(struct ieee80211_local *local,
}
}
-int sta_info_move_state(struct sta_info *sta,
- enum ieee80211_sta_state new_state)
-{
- might_sleep();
-
- if (sta->sta_state == new_state)
- return 0;
-
- /* check allowed transitions first */
-
- switch (new_state) {
- case IEEE80211_STA_NONE:
- if (sta->sta_state != IEEE80211_STA_AUTH)
- return -EINVAL;
- break;
- case IEEE80211_STA_AUTH:
- if (sta->sta_state != IEEE80211_STA_NONE &&
- sta->sta_state != IEEE80211_STA_ASSOC)
- return -EINVAL;
- break;
- case IEEE80211_STA_ASSOC:
- if (sta->sta_state != IEEE80211_STA_AUTH &&
- sta->sta_state != IEEE80211_STA_AUTHORIZED)
- return -EINVAL;
- break;
- case IEEE80211_STA_AUTHORIZED:
- if (sta->sta_state != IEEE80211_STA_ASSOC)
- return -EINVAL;
- break;
- default:
- WARN(1, "invalid state %d", new_state);
- return -EINVAL;
- }
-
- sta_dbg(sta->sdata, "moving STA %pM to state %d\n",
- sta->sta.addr, new_state);
-
- /*
- * notify the driver before the actual changes so it can
- * fail the transition
- */
- if (test_sta_flag(sta, WLAN_STA_INSERTED)) {
- int err = drv_sta_state(sta->local, sta->sdata, sta,
- sta->sta_state, new_state);
- if (err)
- return err;
- }
-
- /* reflect the change in all state variables */
-
- switch (new_state) {
- case IEEE80211_STA_NONE:
- if (sta->sta_state == IEEE80211_STA_AUTH)
- clear_bit(WLAN_STA_AUTH, &sta->_flags);
- break;
- case IEEE80211_STA_AUTH:
- if (sta->sta_state == IEEE80211_STA_NONE) {
- set_bit(WLAN_STA_AUTH, &sta->_flags);
- } else if (sta->sta_state == IEEE80211_STA_ASSOC) {
- clear_bit(WLAN_STA_ASSOC, &sta->_flags);
- ieee80211_recalc_min_chandef(sta->sdata, -1);
- if (!sta->sta.support_p2p_ps)
- ieee80211_recalc_p2p_go_ps_allowed(sta->sdata);
- }
- break;
- case IEEE80211_STA_ASSOC:
- if (sta->sta_state == IEEE80211_STA_AUTH) {
- set_bit(WLAN_STA_ASSOC, &sta->_flags);
- sta->assoc_at = ktime_get_boottime_ns();
- ieee80211_recalc_min_chandef(sta->sdata, -1);
- if (!sta->sta.support_p2p_ps)
- ieee80211_recalc_p2p_go_ps_allowed(sta->sdata);
- } else if (sta->sta_state == IEEE80211_STA_AUTHORIZED) {
- ieee80211_vif_dec_num_mcast(sta->sdata);
- clear_bit(WLAN_STA_AUTHORIZED, &sta->_flags);
- ieee80211_clear_fast_xmit(sta);
- ieee80211_clear_fast_rx(sta);
- }
- break;
- case IEEE80211_STA_AUTHORIZED:
- if (sta->sta_state == IEEE80211_STA_ASSOC) {
- ieee80211_vif_inc_num_mcast(sta->sdata);
- set_bit(WLAN_STA_AUTHORIZED, &sta->_flags);
- ieee80211_check_fast_xmit(sta);
- ieee80211_check_fast_rx(sta);
- }
- if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
- sta->sdata->vif.type == NL80211_IFTYPE_AP)
- cfg80211_send_layer2_update(sta->sdata->dev,
- sta->sta.addr);
- break;
- default:
- break;
- }
-
- sta->sta_state = new_state;
-
- return 0;
-}
-
static struct ieee80211_sta_rx_stats *
sta_get_last_rx_stats(struct sta_info *sta)
{
@@ -2913,6 +2932,8 @@ int ieee80211_sta_activate_link(struct sta_info *sta, unsigned int link_id)
if (!test_sta_flag(sta, WLAN_STA_INSERTED))
goto hash;
+ ieee80211_recalc_min_chandef(sdata, link_id);
+
/* Ensure the values are updated for the driver,
* redone by sta_remove_link on failure.
*/
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 2b13a52ce96c..44d83da60aee 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -5,7 +5,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2008-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
- * Copyright 2021-2022 Intel Corporation
+ * Copyright 2021-2023 Intel Corporation
*/
#include <linux/export.h>
@@ -747,8 +747,8 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local,
if (qskb) {
skb_queue_tail(&sdata->status_queue,
qskb);
- ieee80211_queue_work(&local->hw,
- &sdata->work);
+ wiphy_work_queue(local->hw.wiphy,
+ &sdata->work);
}
}
} else {
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index b255f3b5bf01..52c47674a554 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -1431,8 +1431,8 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
}
if (ret == 0)
- ieee80211_queue_work(&sdata->local->hw,
- &sdata->deflink.u.mgd.request_smps_work);
+ wiphy_work_queue(sdata->local->hw.wiphy,
+ &sdata->deflink.u.mgd.request_smps_work);
mutex_unlock(&local->mtx);
sdata_unlock(sdata);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 13b522dab0a3..7e2d68e0d79f 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -26,6 +26,7 @@
#include <net/codel_impl.h>
#include <asm/unaligned.h>
#include <net/fq_impl.h>
+#include <net/gso.h>
#include "ieee80211_i.h"
#include "driver-ops.h"
@@ -581,25 +582,9 @@ ieee80211_select_link_key(struct ieee80211_tx_data *tx)
{
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data;
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
- enum {
- USE_NONE,
- USE_MGMT_KEY,
- USE_MCAST_KEY,
- } which_key = USE_NONE;
struct ieee80211_link_data *link;
unsigned int link_id;
- if (ieee80211_is_group_privacy_action(tx->skb))
- which_key = USE_MCAST_KEY;
- else if (ieee80211_is_mgmt(hdr->frame_control) &&
- is_multicast_ether_addr(hdr->addr1) &&
- ieee80211_is_robust_mgmt_frame(tx->skb))
- which_key = USE_MGMT_KEY;
- else if (is_multicast_ether_addr(hdr->addr1))
- which_key = USE_MCAST_KEY;
- else
- return NULL;
-
link_id = u32_get_bits(info->control.flags, IEEE80211_TX_CTRL_MLO_LINK);
if (link_id == IEEE80211_LINK_UNSPECIFIED) {
link = &tx->sdata->deflink;
@@ -609,14 +594,14 @@ ieee80211_select_link_key(struct ieee80211_tx_data *tx)
return NULL;
}
- switch (which_key) {
- case USE_NONE:
- break;
- case USE_MGMT_KEY:
+ if (ieee80211_is_group_privacy_action(tx->skb))
+ return rcu_dereference(link->default_multicast_key);
+ else if (ieee80211_is_mgmt(hdr->frame_control) &&
+ is_multicast_ether_addr(hdr->addr1) &&
+ ieee80211_is_robust_mgmt_frame(tx->skb))
return rcu_dereference(link->default_mgmt_key);
- case USE_MCAST_KEY:
+ else if (is_multicast_ether_addr(hdr->addr1))
return rcu_dereference(link->default_multicast_key);
- }
return NULL;
}
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 3bd07a0a782f..e07be65806b7 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1604,7 +1604,7 @@ ieee802_11_parse_elems_full(struct ieee80211_elems_parse_params *params)
const struct element *non_inherit = NULL;
u8 *nontransmitted_profile;
int nontransmitted_profile_len = 0;
- size_t scratch_len = params->scratch_len ?: 3 * params->len;
+ size_t scratch_len = 3 * params->len;
elems = kzalloc(sizeof(*elems) + scratch_len, GFP_ATOMIC);
if (!elems)
@@ -2373,6 +2373,7 @@ static void ieee80211_handle_reconfig_failure(struct ieee80211_local *local)
local->resuming = false;
local->suspended = false;
local->in_reconfig = false;
+ local->reconfig_failure = true;
ieee80211_flush_completed_scan(local, true);
@@ -2475,6 +2476,35 @@ static int ieee80211_reconfig_nan(struct ieee80211_sub_if_data *sdata)
return 0;
}
+static void ieee80211_reconfig_ap_links(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ u64 changed)
+{
+ int link_id;
+
+ for (link_id = 0; link_id < ARRAY_SIZE(sdata->link); link_id++) {
+ struct ieee80211_link_data *link;
+
+ if (!(sdata->vif.active_links & BIT(link_id)))
+ continue;
+
+ link = sdata_dereference(sdata->link[link_id], sdata);
+ if (!link)
+ continue;
+
+ if (rcu_access_pointer(link->u.ap.beacon))
+ drv_start_ap(local, sdata, link->conf);
+
+ if (!link->conf->enable_beacon)
+ continue;
+
+ changed |= BSS_CHANGED_BEACON |
+ BSS_CHANGED_BEACON_ENABLED;
+
+ ieee80211_link_info_change_notify(sdata, link, changed);
+ }
+}
+
int ieee80211_reconfig(struct ieee80211_local *local)
{
struct ieee80211_hw *hw = &local->hw;
@@ -2624,21 +2654,55 @@ int ieee80211_reconfig(struct ieee80211_local *local)
/* Finally also reconfigure all the BSS information */
list_for_each_entry(sdata, &local->interfaces, list) {
+ /* common change flags for all interface types - link only */
+ u64 changed = BSS_CHANGED_ERP_CTS_PROT |
+ BSS_CHANGED_ERP_PREAMBLE |
+ BSS_CHANGED_ERP_SLOT |
+ BSS_CHANGED_HT |
+ BSS_CHANGED_BASIC_RATES |
+ BSS_CHANGED_BEACON_INT |
+ BSS_CHANGED_BSSID |
+ BSS_CHANGED_CQM |
+ BSS_CHANGED_QOS |
+ BSS_CHANGED_TXPOWER |
+ BSS_CHANGED_MCAST_RATE;
+ struct ieee80211_link_data *link = NULL;
unsigned int link_id;
- u32 changed;
+ u32 active_links = 0;
if (!ieee80211_sdata_running(sdata))
continue;
sdata_lock(sdata);
+ if (sdata->vif.valid_links) {
+ struct ieee80211_bss_conf *old[IEEE80211_MLD_MAX_NUM_LINKS] = {
+ [0] = &sdata->vif.bss_conf,
+ };
+
+ if (sdata->vif.type == NL80211_IFTYPE_STATION) {
+ /* start with a single active link */
+ active_links = sdata->vif.active_links;
+ link_id = ffs(active_links) - 1;
+ sdata->vif.active_links = BIT(link_id);
+ }
+
+ drv_change_vif_links(local, sdata, 0,
+ sdata->vif.active_links,
+ old);
+ }
+
for (link_id = 0;
link_id < ARRAY_SIZE(sdata->vif.link_conf);
link_id++) {
- struct ieee80211_link_data *link;
+ if (sdata->vif.valid_links &&
+ !(sdata->vif.active_links & BIT(link_id)))
+ continue;
link = sdata_dereference(sdata->link[link_id], sdata);
- if (link)
- ieee80211_assign_chanctx(local, sdata, link);
+ if (!link)
+ continue;
+
+ ieee80211_assign_chanctx(local, sdata, link);
}
switch (sdata->vif.type) {
@@ -2658,42 +2722,42 @@ int ieee80211_reconfig(struct ieee80211_local *local)
&sdata->deflink.tx_conf[i]);
break;
}
- sdata_unlock(sdata);
-
- /* common change flags for all interface types */
- changed = BSS_CHANGED_ERP_CTS_PROT |
- BSS_CHANGED_ERP_PREAMBLE |
- BSS_CHANGED_ERP_SLOT |
- BSS_CHANGED_HT |
- BSS_CHANGED_BASIC_RATES |
- BSS_CHANGED_BEACON_INT |
- BSS_CHANGED_BSSID |
- BSS_CHANGED_CQM |
- BSS_CHANGED_QOS |
- BSS_CHANGED_IDLE |
- BSS_CHANGED_TXPOWER |
- BSS_CHANGED_MCAST_RATE;
if (sdata->vif.bss_conf.mu_mimo_owner)
changed |= BSS_CHANGED_MU_GROUPS;
+ if (!sdata->vif.valid_links)
+ changed |= BSS_CHANGED_IDLE;
+
switch (sdata->vif.type) {
case NL80211_IFTYPE_STATION:
- changed |= BSS_CHANGED_ASSOC |
- BSS_CHANGED_ARP_FILTER |
- BSS_CHANGED_PS;
-
- /* Re-send beacon info report to the driver */
- if (sdata->deflink.u.mgd.have_beacon)
- changed |= BSS_CHANGED_BEACON_INFO;
-
- if (sdata->vif.bss_conf.max_idle_period ||
- sdata->vif.bss_conf.protected_keep_alive)
- changed |= BSS_CHANGED_KEEP_ALIVE;
-
- sdata_lock(sdata);
- ieee80211_bss_info_change_notify(sdata, changed);
- sdata_unlock(sdata);
+ if (!sdata->vif.valid_links) {
+ changed |= BSS_CHANGED_ASSOC |
+ BSS_CHANGED_ARP_FILTER |
+ BSS_CHANGED_PS;
+
+ /* Re-send beacon info report to the driver */
+ if (sdata->deflink.u.mgd.have_beacon)
+ changed |= BSS_CHANGED_BEACON_INFO;
+
+ if (sdata->vif.bss_conf.max_idle_period ||
+ sdata->vif.bss_conf.protected_keep_alive)
+ changed |= BSS_CHANGED_KEEP_ALIVE;
+
+ if (sdata->vif.bss_conf.eht_puncturing)
+ changed |= BSS_CHANGED_EHT_PUNCTURING;
+
+ ieee80211_bss_info_change_notify(sdata,
+ changed);
+ } else if (!WARN_ON(!link)) {
+ ieee80211_link_info_change_notify(sdata, link,
+ changed);
+ changed = BSS_CHANGED_ASSOC |
+ BSS_CHANGED_IDLE |
+ BSS_CHANGED_PS |
+ BSS_CHANGED_ARP_FILTER;
+ ieee80211_vif_cfg_change_notify(sdata, changed);
+ }
break;
case NL80211_IFTYPE_OCB:
changed |= BSS_CHANGED_OCB;
@@ -2703,7 +2767,13 @@ int ieee80211_reconfig(struct ieee80211_local *local)
changed |= BSS_CHANGED_IBSS;
fallthrough;
case NL80211_IFTYPE_AP:
- changed |= BSS_CHANGED_SSID | BSS_CHANGED_P2P_PS;
+ changed |= BSS_CHANGED_P2P_PS;
+
+ if (sdata->vif.valid_links)
+ ieee80211_vif_cfg_change_notify(sdata,
+ BSS_CHANGED_SSID);
+ else
+ changed |= BSS_CHANGED_SSID;
if (sdata->vif.bss_conf.ftm_responder == 1 &&
wiphy_ext_feature_isset(sdata->local->hw.wiphy,
@@ -2713,6 +2783,13 @@ int ieee80211_reconfig(struct ieee80211_local *local)
if (sdata->vif.type == NL80211_IFTYPE_AP) {
changed |= BSS_CHANGED_AP_PROBE_RESP;
+ if (sdata->vif.valid_links) {
+ ieee80211_reconfig_ap_links(local,
+ sdata,
+ changed);
+ break;
+ }
+
if (rcu_access_pointer(sdata->deflink.u.ap.beacon))
drv_start_ap(local, sdata,
sdata->deflink.conf);
@@ -2728,6 +2805,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
case NL80211_IFTYPE_NAN:
res = ieee80211_reconfig_nan(sdata);
if (res < 0) {
+ sdata_unlock(sdata);
ieee80211_handle_reconfig_failure(local);
return res;
}
@@ -2745,6 +2823,10 @@ int ieee80211_reconfig(struct ieee80211_local *local)
WARN_ON(1);
break;
}
+ sdata_unlock(sdata);
+
+ if (active_links)
+ ieee80211_set_active_links(&sdata->vif, active_links);
}
ieee80211_recalc_ps(local);
@@ -2860,7 +2942,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
/* Requeue all works */
list_for_each_entry(sdata, &local->interfaces, list)
- ieee80211_queue_work(&local->hw, &sdata->work);
+ wiphy_work_queue(local->hw.wiphy, &sdata->work);
}
ieee80211_wake_queues_by_reason(hw, IEEE80211_MAX_QUEUE_MAP,
diff --git a/net/mctp/route.c b/net/mctp/route.c
index f51a05ec7162..ab62fe447038 100644
--- a/net/mctp/route.c
+++ b/net/mctp/route.c
@@ -1249,9 +1249,6 @@ static int mctp_newroute(struct sk_buff *skb, struct nlmsghdr *nlh,
mtu = nla_get_u32(tbx[RTAX_MTU]);
}
- if (rtm->rtm_type != RTN_UNICAST)
- return -EINVAL;
-
rc = mctp_route_add(mdev, daddr_start, rtm->rtm_dst_len, mtu,
rtm->rtm_type);
return rc;
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index dc5165d3eec4..bf6e81d56263 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -12,6 +12,7 @@
#include <linux/nospec.h>
#include <linux/vmalloc.h>
#include <linux/percpu.h>
+#include <net/gso.h>
#include <net/ip.h>
#include <net/dst.h>
#include <net/sock.h>
diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c
index 1482259de9b5..533d082f0701 100644
--- a/net/mpls/mpls_gso.c
+++ b/net/mpls/mpls_gso.c
@@ -14,6 +14,7 @@
#include <linux/netdev_features.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
+#include <net/gso.h>
#include <net/mpls.h>
static struct sk_buff *mpls_gso_segment(struct sk_buff *skb,
diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c
index 0dac2863c6e1..a0990c365a2e 100644
--- a/net/mptcp/mib.c
+++ b/net/mptcp/mib.c
@@ -34,7 +34,11 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("NoDSSInWindow", MPTCP_MIB_NODSSWINDOW),
SNMP_MIB_ITEM("DuplicateData", MPTCP_MIB_DUPDATA),
SNMP_MIB_ITEM("AddAddr", MPTCP_MIB_ADDADDR),
+ SNMP_MIB_ITEM("AddAddrTx", MPTCP_MIB_ADDADDRTX),
+ SNMP_MIB_ITEM("AddAddrTxDrop", MPTCP_MIB_ADDADDRTXDROP),
SNMP_MIB_ITEM("EchoAdd", MPTCP_MIB_ECHOADD),
+ SNMP_MIB_ITEM("EchoAddTx", MPTCP_MIB_ECHOADDTX),
+ SNMP_MIB_ITEM("EchoAddTxDrop", MPTCP_MIB_ECHOADDTXDROP),
SNMP_MIB_ITEM("PortAdd", MPTCP_MIB_PORTADD),
SNMP_MIB_ITEM("AddAddrDrop", MPTCP_MIB_ADDADDRDROP),
SNMP_MIB_ITEM("MPJoinPortSynRx", MPTCP_MIB_JOINPORTSYNRX),
@@ -44,6 +48,8 @@ static const struct snmp_mib mptcp_snmp_list[] = {
SNMP_MIB_ITEM("MismatchPortAckRx", MPTCP_MIB_MISMATCHPORTACKRX),
SNMP_MIB_ITEM("RmAddr", MPTCP_MIB_RMADDR),
SNMP_MIB_ITEM("RmAddrDrop", MPTCP_MIB_RMADDRDROP),
+ SNMP_MIB_ITEM("RmAddrTx", MPTCP_MIB_RMADDRTX),
+ SNMP_MIB_ITEM("RmAddrTxDrop", MPTCP_MIB_RMADDRTXDROP),
SNMP_MIB_ITEM("RmSubflow", MPTCP_MIB_RMSUBFLOW),
SNMP_MIB_ITEM("MPPrioTx", MPTCP_MIB_MPPRIOTX),
SNMP_MIB_ITEM("MPPrioRx", MPTCP_MIB_MPPRIORX),
diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h
index 2be3596374f4..cae71d947252 100644
--- a/net/mptcp/mib.h
+++ b/net/mptcp/mib.h
@@ -27,7 +27,15 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_NODSSWINDOW, /* Segments not in MPTCP windows */
MPTCP_MIB_DUPDATA, /* Segments discarded due to duplicate DSS */
MPTCP_MIB_ADDADDR, /* Received ADD_ADDR with echo-flag=0 */
+ MPTCP_MIB_ADDADDRTX, /* Sent ADD_ADDR with echo-flag=0 */
+ MPTCP_MIB_ADDADDRTXDROP, /* ADD_ADDR with echo-flag=0 not send due to
+ * resource exhaustion
+ */
MPTCP_MIB_ECHOADD, /* Received ADD_ADDR with echo-flag=1 */
+ MPTCP_MIB_ECHOADDTX, /* Send ADD_ADDR with echo-flag=1 */
+ MPTCP_MIB_ECHOADDTXDROP, /* ADD_ADDR with echo-flag=1 not send due
+ * to resource exhaustion
+ */
MPTCP_MIB_PORTADD, /* Received ADD_ADDR with a port-number */
MPTCP_MIB_ADDADDRDROP, /* Dropped incoming ADD_ADDR */
MPTCP_MIB_JOINPORTSYNRX, /* Received a SYN MP_JOIN with a different port-number */
@@ -37,6 +45,8 @@ enum linux_mptcp_mib_field {
MPTCP_MIB_MISMATCHPORTACKRX, /* Received an ACK MP_JOIN with a mismatched port-number */
MPTCP_MIB_RMADDR, /* Received RM_ADDR */
MPTCP_MIB_RMADDRDROP, /* Dropped incoming RM_ADDR */
+ MPTCP_MIB_RMADDRTX, /* Sent RM_ADDR */
+ MPTCP_MIB_RMADDRTXDROP, /* RM_ADDR not sent due to resource exhaustion */
MPTCP_MIB_RMSUBFLOW, /* Remove a subflow */
MPTCP_MIB_MPPRIOTX, /* Transmit a MP_PRIO */
MPTCP_MIB_MPPRIORX, /* Received a MP_PRIO */
@@ -63,6 +73,14 @@ struct mptcp_mib {
unsigned long mibs[LINUX_MIB_MPTCP_MAX];
};
+static inline void MPTCP_ADD_STATS(struct net *net,
+ enum linux_mptcp_mib_field field,
+ int val)
+{
+ if (likely(net->mib.mptcp_statistics))
+ SNMP_ADD_STATS(net->mib.mptcp_statistics, field, val);
+}
+
static inline void MPTCP_INC_STATS(struct net *net,
enum linux_mptcp_mib_field field)
{
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 19a01b6566f1..c254accb14de 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -687,9 +687,12 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
}
opts->suboptions |= OPTION_MPTCP_ADD_ADDR;
if (!echo) {
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDRTX);
opts->ahmac = add_addr_generate_hmac(msk->local_key,
msk->remote_key,
&opts->addr);
+ } else {
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADDTX);
}
pr_debug("addr_id=%d, ahmac=%llu, echo=%d, port=%d",
opts->addr.id, opts->ahmac, echo, ntohs(opts->addr.port));
@@ -723,7 +726,7 @@ static bool mptcp_established_options_rm_addr(struct sock *sk,
for (i = 0; i < opts->rm_list.nr; i++)
pr_debug("rm_list_ids[%d]=%d", i, opts->rm_list.ids[i]);
-
+ MPTCP_ADD_STATS(sock_net(sk), MPTCP_MIB_RMADDRTX, opts->rm_list.nr);
return true;
}
@@ -1023,6 +1026,12 @@ u64 __mptcp_expand_seq(u64 old_seq, u64 cur_seq)
return cur_seq;
}
+static void __mptcp_snd_una_update(struct mptcp_sock *msk, u64 new_snd_una)
+{
+ msk->bytes_acked += new_snd_una - msk->snd_una;
+ msk->snd_una = new_snd_una;
+}
+
static void ack_update_msk(struct mptcp_sock *msk,
struct sock *ssk,
struct mptcp_options_received *mp_opt)
@@ -1054,7 +1063,7 @@ static void ack_update_msk(struct mptcp_sock *msk,
__mptcp_check_push(sk, ssk);
if (after64(new_snd_una, old_snd_una)) {
- msk->snd_una = new_snd_una;
+ __mptcp_snd_una_update(msk, new_snd_una);
__mptcp_data_acked(sk);
}
mptcp_data_unlock(sk);
@@ -1116,6 +1125,12 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
mptcp_data_lock(subflow->conn);
if (sk_stream_memory_free(sk))
__mptcp_check_push(subflow->conn, sk);
+
+ /* on fallback we just need to ignore the msk-level snd_una, as
+ * this is really plain TCP
+ */
+ __mptcp_snd_una_update(msk, READ_ONCE(msk->snd_nxt));
+
__mptcp_data_acked(subflow->conn);
mptcp_data_unlock(subflow->conn);
return true;
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 76612bca275a..7dbbad1e4f55 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -26,7 +26,8 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk,
if (add_addr &
(echo ? BIT(MPTCP_ADD_ADDR_ECHO) : BIT(MPTCP_ADD_ADDR_SIGNAL))) {
- pr_warn("addr_signal error, add_addr=%d, echo=%d", add_addr, echo);
+ MPTCP_INC_STATS(sock_net((struct sock *)msk),
+ echo ? MPTCP_MIB_ECHOADDTXDROP : MPTCP_MIB_ADDADDRTXDROP);
return -EINVAL;
}
@@ -48,7 +49,8 @@ int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_
pr_debug("msk=%p, rm_list_nr=%d", msk, rm_list->nr);
if (rm_addr) {
- pr_warn("addr_signal error, rm_addr=%d", rm_addr);
+ MPTCP_ADD_STATS(sock_net((struct sock *)msk),
+ MPTCP_MIB_RMADDRTXDROP, rm_list->nr);
return -EINVAL;
}
@@ -413,7 +415,46 @@ out_unlock:
int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
{
- return mptcp_pm_nl_get_local_id(msk, skc);
+ struct mptcp_addr_info skc_local;
+ struct mptcp_addr_info msk_local;
+
+ if (WARN_ON_ONCE(!msk))
+ return -1;
+
+ /* The 0 ID mapping is defined by the first subflow, copied into the msk
+ * addr
+ */
+ mptcp_local_address((struct sock_common *)msk, &msk_local);
+ mptcp_local_address((struct sock_common *)skc, &skc_local);
+ if (mptcp_addresses_equal(&msk_local, &skc_local, false))
+ return 0;
+
+ if (mptcp_pm_is_userspace(msk))
+ return mptcp_userspace_pm_get_local_id(msk, &skc_local);
+ return mptcp_pm_nl_get_local_id(msk, &skc_local);
+}
+
+int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id,
+ u8 *flags, int *ifindex)
+{
+ *flags = 0;
+ *ifindex = 0;
+
+ if (!id)
+ return 0;
+
+ if (mptcp_pm_is_userspace(msk))
+ return mptcp_userspace_pm_get_flags_and_ifindex_by_id(msk, id, flags, ifindex);
+ return mptcp_pm_nl_get_flags_and_ifindex_by_id(msk, id, flags, ifindex);
+}
+
+int mptcp_pm_set_flags(struct net *net, struct nlattr *token,
+ struct mptcp_pm_addr_entry *loc,
+ struct mptcp_pm_addr_entry *rem, u8 bkup)
+{
+ if (token)
+ return mptcp_userspace_pm_set_flags(net, token, loc, rem, bkup);
+ return mptcp_pm_nl_set_flags(net, loc, bkup);
}
void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk)
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 1224dfca5bf3..5692daf57a4d 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -25,9 +25,9 @@ static int pm_nl_pernet_id;
struct mptcp_pm_add_entry {
struct list_head list;
struct mptcp_addr_info addr;
+ u8 retrans_times;
struct timer_list add_timer;
struct mptcp_sock *sock;
- u8 retrans_times;
};
struct pm_nl_pernet {
@@ -86,8 +86,7 @@ bool mptcp_addresses_equal(const struct mptcp_addr_info *a,
return a->port == b->port;
}
-static void local_address(const struct sock_common *skc,
- struct mptcp_addr_info *addr)
+void mptcp_local_address(const struct sock_common *skc, struct mptcp_addr_info *addr)
{
addr->family = skc->skc_family;
addr->port = htons(skc->skc_num);
@@ -122,7 +121,7 @@ static bool lookup_subflow_by_saddr(const struct list_head *list,
list_for_each_entry(subflow, list, node) {
skc = (struct sock_common *)mptcp_subflow_tcp_sock(subflow);
- local_address(skc, &cur);
+ mptcp_local_address(skc, &cur);
if (mptcp_addresses_equal(&cur, saddr, saddr->port))
return true;
}
@@ -263,7 +262,7 @@ bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk)
struct mptcp_addr_info saddr;
bool ret = false;
- local_address((struct sock_common *)sk, &saddr);
+ mptcp_local_address((struct sock_common *)sk, &saddr);
spin_lock_bh(&msk->pm.lock);
list_for_each_entry(entry, &msk->pm.anno_list, list) {
@@ -342,7 +341,7 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk,
}
bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
- const struct mptcp_pm_addr_entry *entry)
+ const struct mptcp_addr_info *addr)
{
struct mptcp_pm_add_entry *add_entry = NULL;
struct sock *sk = (struct sock *)msk;
@@ -350,7 +349,7 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
lockdep_assert_held(&msk->pm.lock);
- add_entry = mptcp_lookup_anno_list_by_saddr(msk, &entry->addr);
+ add_entry = mptcp_lookup_anno_list_by_saddr(msk, addr);
if (add_entry) {
if (mptcp_pm_is_kernel(msk))
@@ -367,7 +366,7 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
list_add(&add_entry->list, &msk->pm.anno_list);
- add_entry->addr = entry->addr;
+ add_entry->addr = *addr;
add_entry->sock = msk;
add_entry->retrans_times = 0;
@@ -541,7 +540,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
struct mptcp_addr_info mpc_addr;
bool backup = false;
- local_address((struct sock_common *)msk->first, &mpc_addr);
+ mptcp_local_address((struct sock_common *)msk->first, &mpc_addr);
rcu_read_lock();
entry = __lookup_addr(pernet, &mpc_addr, false);
if (entry) {
@@ -577,7 +576,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
return;
if (local) {
- if (mptcp_pm_alloc_anno_list(msk, local)) {
+ if (mptcp_pm_alloc_anno_list(msk, &local->addr)) {
__clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
msk->pm.add_addr_signaled++;
mptcp_pm_announce_addr(msk, &local->addr, false);
@@ -752,7 +751,7 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
struct mptcp_addr_info local, remote;
- local_address((struct sock_common *)ssk, &local);
+ mptcp_local_address((struct sock_common *)ssk, &local);
if (!mptcp_addresses_equal(&local, addr, addr->port))
continue;
@@ -1057,33 +1056,17 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
return 0;
}
-int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
+int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc)
{
struct mptcp_pm_addr_entry *entry;
- struct mptcp_addr_info skc_local;
- struct mptcp_addr_info msk_local;
struct pm_nl_pernet *pernet;
int ret = -1;
- if (WARN_ON_ONCE(!msk))
- return -1;
-
- /* The 0 ID mapping is defined by the first subflow, copied into the msk
- * addr
- */
- local_address((struct sock_common *)msk, &msk_local);
- local_address((struct sock_common *)skc, &skc_local);
- if (mptcp_addresses_equal(&msk_local, &skc_local, false))
- return 0;
-
- if (mptcp_pm_is_userspace(msk))
- return mptcp_userspace_pm_get_local_id(msk, &skc_local);
-
pernet = pm_nl_get_pernet_from_msk(msk);
rcu_read_lock();
list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
- if (mptcp_addresses_equal(&entry->addr, &skc_local, entry->addr.port)) {
+ if (mptcp_addresses_equal(&entry->addr, skc, entry->addr.port)) {
ret = entry->addr.id;
break;
}
@@ -1097,7 +1080,7 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc)
if (!entry)
return -ENOMEM;
- entry->addr = skc_local;
+ entry->addr = *skc;
entry->addr.id = 0;
entry->addr.port = 0;
entry->ifindex = 0;
@@ -1374,31 +1357,20 @@ out_free:
return ret;
}
-int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id,
- u8 *flags, int *ifindex)
+int mptcp_pm_nl_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id,
+ u8 *flags, int *ifindex)
{
struct mptcp_pm_addr_entry *entry;
struct sock *sk = (struct sock *)msk;
struct net *net = sock_net(sk);
- *flags = 0;
- *ifindex = 0;
-
- if (id) {
- if (mptcp_pm_is_userspace(msk))
- return mptcp_userspace_pm_get_flags_and_ifindex_by_id(msk,
- id,
- flags,
- ifindex);
-
- rcu_read_lock();
- entry = __lookup_addr_by_id(pm_nl_get_pernet(net), id);
- if (entry) {
- *flags = entry->flags;
- *ifindex = entry->ifindex;
- }
- rcu_read_unlock();
+ rcu_read_lock();
+ entry = __lookup_addr_by_id(pm_nl_get_pernet(net), id);
+ if (entry) {
+ *flags = entry->flags;
+ *ifindex = entry->ifindex;
}
+ rcu_read_unlock();
return 0;
}
@@ -1492,7 +1464,7 @@ static int mptcp_nl_remove_id_zero_address(struct net *net,
if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk))
goto next;
- local_address((struct sock_common *)msk, &msk_local);
+ mptcp_local_address((struct sock_common *)msk, &msk_local);
if (!mptcp_addresses_equal(&msk_local, addr, addr->port))
goto next;
@@ -1911,18 +1883,50 @@ next:
return ret;
}
+int mptcp_pm_nl_set_flags(struct net *net, struct mptcp_pm_addr_entry *addr, u8 bkup)
+{
+ struct pm_nl_pernet *pernet = pm_nl_get_pernet(net);
+ u8 changed, mask = MPTCP_PM_ADDR_FLAG_BACKUP |
+ MPTCP_PM_ADDR_FLAG_FULLMESH;
+ struct mptcp_pm_addr_entry *entry;
+ u8 lookup_by_id = 0;
+
+ if (addr->addr.family == AF_UNSPEC) {
+ lookup_by_id = 1;
+ if (!addr->addr.id)
+ return -EOPNOTSUPP;
+ }
+
+ spin_lock_bh(&pernet->lock);
+ entry = __lookup_addr(pernet, &addr->addr, lookup_by_id);
+ if (!entry) {
+ spin_unlock_bh(&pernet->lock);
+ return -EINVAL;
+ }
+ if ((addr->flags & MPTCP_PM_ADDR_FLAG_FULLMESH) &&
+ (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) {
+ spin_unlock_bh(&pernet->lock);
+ return -EINVAL;
+ }
+
+ changed = (addr->flags ^ entry->flags) & mask;
+ entry->flags = (entry->flags & ~mask) | (addr->flags & mask);
+ *addr = *entry;
+ spin_unlock_bh(&pernet->lock);
+
+ mptcp_nl_set_flags(net, &addr->addr, bkup, changed);
+ return 0;
+}
+
static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
{
- struct mptcp_pm_addr_entry addr = { .addr = { .family = AF_UNSPEC }, }, *entry;
struct mptcp_pm_addr_entry remote = { .addr = { .family = AF_UNSPEC }, };
+ struct mptcp_pm_addr_entry addr = { .addr = { .family = AF_UNSPEC }, };
struct nlattr *attr_rem = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE];
struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR];
- struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
- u8 changed, mask = MPTCP_PM_ADDR_FLAG_BACKUP |
- MPTCP_PM_ADDR_FLAG_FULLMESH;
struct net *net = sock_net(skb->sk);
- u8 bkup = 0, lookup_by_id = 0;
+ u8 bkup = 0;
int ret;
ret = mptcp_pm_parse_entry(attr, info, false, &addr);
@@ -1937,34 +1941,8 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
if (addr.flags & MPTCP_PM_ADDR_FLAG_BACKUP)
bkup = 1;
- if (addr.addr.family == AF_UNSPEC) {
- lookup_by_id = 1;
- if (!addr.addr.id)
- return -EOPNOTSUPP;
- }
-
- if (token)
- return mptcp_userspace_pm_set_flags(net, token, &addr, &remote, bkup);
- spin_lock_bh(&pernet->lock);
- entry = __lookup_addr(pernet, &addr.addr, lookup_by_id);
- if (!entry) {
- spin_unlock_bh(&pernet->lock);
- return -EINVAL;
- }
- if ((addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) &&
- (entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) {
- spin_unlock_bh(&pernet->lock);
- return -EINVAL;
- }
-
- changed = (addr.flags ^ entry->flags) & mask;
- entry->flags = (entry->flags & ~mask) | (addr.flags & mask);
- addr = *entry;
- spin_unlock_bh(&pernet->lock);
-
- mptcp_nl_set_flags(net, &addr.addr, bkup, changed);
- return 0;
+ return mptcp_pm_set_flags(net, token, &addr, &remote, bkup);
}
static void mptcp_nl_mcast_send(struct net *net, struct sk_buff *nlskb, gfp_t gfp)
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
index b06aa58dfcf2..b5a8aa4c1ebd 100644
--- a/net/mptcp/pm_userspace.c
+++ b/net/mptcp/pm_userspace.c
@@ -111,9 +111,6 @@ int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
{
struct mptcp_pm_addr_entry *entry, *match = NULL;
- *flags = 0;
- *ifindex = 0;
-
spin_lock_bh(&msk->pm.lock);
list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) {
if (id == entry->addr.id) {
@@ -196,7 +193,7 @@ int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info)
lock_sock((struct sock *)msk);
spin_lock_bh(&msk->pm.lock);
- if (mptcp_pm_alloc_anno_list(msk, &addr_val)) {
+ if (mptcp_pm_alloc_anno_list(msk, &addr_val.addr)) {
msk->pm.add_addr_signaled++;
mptcp_pm_announce_addr(msk, &addr_val.addr, false);
mptcp_pm_nl_addr_send_ack(msk);
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index a6c7f2d24909..bd023debedc8 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -96,6 +96,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
list_add(&subflow->node, &msk->conn_list);
sock_hold(ssock->sk);
subflow->request_mptcp = 1;
+ subflow->subflow_id = msk->subflow_id++;
/* This is the first subflow, always with id 0 */
subflow->local_id_valid = 1;
@@ -377,6 +378,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
if (MPTCP_SKB_CB(skb)->map_seq == msk->ack_seq) {
/* in sequence */
+ msk->bytes_received += copy_len;
WRITE_ONCE(msk->ack_seq, msk->ack_seq + copy_len);
tail = skb_peek_tail(&sk->sk_receive_queue);
if (tail && mptcp_try_coalesce(sk, tail, skb))
@@ -757,6 +759,7 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk)
MPTCP_SKB_CB(skb)->map_seq += delta;
__skb_queue_tail(&sk->sk_receive_queue, skb);
}
+ msk->bytes_received += end_seq - msk->ack_seq;
msk->ack_seq = end_seq;
moved = true;
}
@@ -842,6 +845,7 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
if (sk->sk_socket && !ssk->sk_socket)
mptcp_sock_graft(ssk, sk->sk_socket);
+ mptcp_subflow_ctx(ssk)->subflow_id = msk->subflow_id++;
mptcp_sockopt_sync_locked(msk, ssk);
mptcp_subflow_joined(msk, ssk);
return true;
@@ -958,12 +962,6 @@ static void __mptcp_clean_una(struct sock *sk)
struct mptcp_data_frag *dtmp, *dfrag;
u64 snd_una;
- /* on fallback we just need to ignore snd_una, as this is really
- * plain TCP
- */
- if (__mptcp_check_fallback(msk))
- msk->snd_una = READ_ONCE(msk->snd_nxt);
-
snd_una = msk->snd_una;
list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list) {
if (after64(dfrag->data_seq + dfrag->data_len, snd_una))
@@ -1491,8 +1489,10 @@ static void mptcp_update_post_push(struct mptcp_sock *msk,
* that has been handed to the subflow for transmission
* and skip update in case it was old dfrag.
*/
- if (likely(after64(snd_nxt_new, msk->snd_nxt)))
+ if (likely(after64(snd_nxt_new, msk->snd_nxt))) {
+ msk->bytes_sent += snd_nxt_new - msk->snd_nxt;
msk->snd_nxt = snd_nxt_new;
+ }
}
void mptcp_check_and_set_pending(struct sock *sk)
@@ -2549,6 +2549,7 @@ static void __mptcp_retrans(struct sock *sk)
}
if (copied) {
dfrag->already_sent = max(dfrag->already_sent, info.sent);
+ msk->bytes_retrans += copied;
tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
info.size_goal);
WRITE_ONCE(msk->allow_infinite_fallback, false);
@@ -2607,6 +2608,7 @@ static void mptcp_do_fastclose(struct sock *sk)
struct mptcp_subflow_context *subflow, *tmp;
struct mptcp_sock *msk = mptcp_sk(sk);
+ inet_sk_state_store(sk, TCP_CLOSE);
mptcp_for_each_subflow_safe(msk, subflow, tmp)
__mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow),
subflow, MPTCP_CF_FASTCLOSE);
@@ -2640,10 +2642,9 @@ static void mptcp_worker(struct work_struct *work)
* even if it is orphaned and in FIN_WAIT2 state
*/
if (sock_flag(sk, SOCK_DEAD)) {
- if (mptcp_should_close(sk)) {
- inet_sk_state_store(sk, TCP_CLOSE);
+ if (mptcp_should_close(sk))
mptcp_do_fastclose(sk);
- }
+
if (sk->sk_state == TCP_CLOSE) {
__mptcp_destroy_sock(sk);
goto unlock;
@@ -2682,6 +2683,7 @@ static int __mptcp_init_sock(struct sock *sk)
WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
WRITE_ONCE(msk->allow_infinite_fallback, true);
msk->recovery = false;
+ msk->subflow_id = 1;
mptcp_pm_data_init(msk);
@@ -2878,7 +2880,6 @@ static void __mptcp_destroy_sock(struct sock *sk)
void __mptcp_unaccepted_force_close(struct sock *sk)
{
sock_set_flag(sk, SOCK_DEAD);
- inet_sk_state_store(sk, TCP_CLOSE);
mptcp_do_fastclose(sk);
__mptcp_destroy_sock(sk);
}
@@ -2934,7 +2935,6 @@ bool __mptcp_close(struct sock *sk, long timeout)
/* If the msk has read data, or the caller explicitly ask it,
* do the MPTCP equivalent of TCP reset, aka MPTCP fastclose
*/
- inet_sk_state_store(sk, TCP_CLOSE);
mptcp_do_fastclose(sk);
timeout = 0;
} else if (mptcp_close_state(sk)) {
@@ -3069,6 +3069,10 @@ static int mptcp_disconnect(struct sock *sk, int flags)
WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
mptcp_pm_data_reset(msk);
mptcp_ca_reset(sk);
+ msk->bytes_acked = 0;
+ msk->bytes_received = 0;
+ msk->bytes_sent = 0;
+ msk->bytes_retrans = 0;
WRITE_ONCE(sk->sk_shutdown, 0);
sk_error_report(sk);
@@ -3119,6 +3123,9 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
+ /* passive msk is created after the first/MPC subflow */
+ msk->subflow_id = 2;
+
sock_reset_flag(nsk, SOCK_RCU_FREE);
security_inet_csk_clone(nsk, req);
@@ -3538,11 +3545,10 @@ static int mptcp_ioctl_outq(const struct mptcp_sock *msk, u64 v)
return (int)delta;
}
-static int mptcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
+static int mptcp_ioctl(struct sock *sk, int cmd, int *karg)
{
struct mptcp_sock *msk = mptcp_sk(sk);
bool slow;
- int answ;
switch (cmd) {
case SIOCINQ:
@@ -3551,24 +3557,24 @@ static int mptcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
lock_sock(sk);
__mptcp_move_skbs(msk);
- answ = mptcp_inq_hint(sk);
+ *karg = mptcp_inq_hint(sk);
release_sock(sk);
break;
case SIOCOUTQ:
slow = lock_sock_fast(sk);
- answ = mptcp_ioctl_outq(msk, READ_ONCE(msk->snd_una));
+ *karg = mptcp_ioctl_outq(msk, READ_ONCE(msk->snd_una));
unlock_sock_fast(sk, slow);
break;
case SIOCOUTQNSD:
slow = lock_sock_fast(sk);
- answ = mptcp_ioctl_outq(msk, msk->snd_nxt);
+ *karg = mptcp_ioctl_outq(msk, msk->snd_nxt);
unlock_sock_fast(sk, slow);
break;
default:
return -ENOIOCTLCMD;
}
- return put_user(answ, (int __user *)arg);
+ return 0;
}
static void mptcp_subflow_early_fallback(struct mptcp_sock *msk,
@@ -3726,6 +3732,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
{
struct mptcp_sock *msk = mptcp_sk(sock->sk);
struct socket *ssock;
+ struct sock *newsk;
int err;
pr_debug("msk=%p", msk);
@@ -3737,17 +3744,20 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
if (!ssock)
return -EINVAL;
- err = ssock->ops->accept(sock, newsock, flags, kern);
- if (err == 0 && !mptcp_is_tcpsk(newsock->sk)) {
- struct mptcp_sock *msk = mptcp_sk(newsock->sk);
+ newsk = mptcp_accept(sock->sk, flags, &err, kern);
+ if (!newsk)
+ return err;
+
+ lock_sock(newsk);
+
+ __inet_accept(sock, newsock, newsk);
+ if (!mptcp_is_tcpsk(newsock->sk)) {
+ struct mptcp_sock *msk = mptcp_sk(newsk);
struct mptcp_subflow_context *subflow;
- struct sock *newsk = newsock->sk;
set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags);
msk->in_accept_queue = 0;
- lock_sock(newsk);
-
/* set ssk->sk_socket of accept()ed flows to mptcp socket.
* This is needed so NOSPACE flag can be set from tcp stack.
*/
@@ -3768,11 +3778,10 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
if (unlikely(list_empty(&msk->conn_list)))
inet_sk_state_store(newsk, TCP_CLOSE);
}
-
- release_sock(newsk);
}
+ release_sock(newsk);
- return err;
+ return 0;
}
static __poll_t mptcp_check_writeable(struct mptcp_sock *msk)
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index d3783a7056e1..37fbe22e2433 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -261,10 +261,13 @@ struct mptcp_sock {
u64 local_key;
u64 remote_key;
u64 write_seq;
+ u64 bytes_sent;
u64 snd_nxt;
+ u64 bytes_received;
u64 ack_seq;
atomic64_t rcv_wnd_sent;
u64 rcv_data_fin_seq;
+ u64 bytes_retrans;
int rmem_fwd_alloc;
struct sock *last_snd;
int snd_burst;
@@ -273,6 +276,7 @@ struct mptcp_sock {
* recovery related fields are under data_lock
* protection
*/
+ u64 bytes_acked;
u64 snd_una;
u64 wnd_end;
unsigned long timer_ival;
@@ -318,7 +322,8 @@ struct mptcp_sock {
u64 rtt_us; /* last maximum rtt of subflows */
} rcvq_space;
- u32 setsockopt_seq;
+ u32 subflow_id;
+ u32 setsockopt_seq;
char ca_name[TCP_CA_NAME_MAX];
struct mptcp_sock *dl_next;
};
@@ -498,6 +503,8 @@ struct mptcp_subflow_context {
u8 reset_reason:4;
u8 stale_count;
+ u32 subflow_id;
+
long delegated_status;
unsigned long fail_tout;
@@ -636,6 +643,7 @@ void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk);
bool mptcp_addresses_equal(const struct mptcp_addr_info *a,
const struct mptcp_addr_info *b, bool use_port);
+void mptcp_local_address(const struct sock_common *skc, struct mptcp_addr_info *addr);
/* called with sk socket lock held */
int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
@@ -806,7 +814,7 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
struct mptcp_addr_info *rem,
u8 bkup);
bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
- const struct mptcp_pm_addr_entry *entry);
+ const struct mptcp_addr_info *addr);
void mptcp_pm_free_anno_list(struct mptcp_sock *msk);
bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk);
struct mptcp_pm_add_entry *
@@ -818,9 +826,15 @@ mptcp_lookup_anno_list_by_saddr(const struct mptcp_sock *msk,
int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
unsigned int id,
u8 *flags, int *ifindex);
+int mptcp_pm_nl_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id,
+ u8 *flags, int *ifindex);
int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
unsigned int id,
u8 *flags, int *ifindex);
+int mptcp_pm_set_flags(struct net *net, struct nlattr *token,
+ struct mptcp_pm_addr_entry *loc,
+ struct mptcp_pm_addr_entry *rem, u8 bkup);
+int mptcp_pm_nl_set_flags(struct net *net, struct mptcp_pm_addr_entry *addr, u8 bkup);
int mptcp_userspace_pm_set_flags(struct net *net, struct nlattr *token,
struct mptcp_pm_addr_entry *loc,
struct mptcp_pm_addr_entry *rem, u8 bkup);
@@ -913,13 +927,13 @@ bool mptcp_pm_add_addr_signal(struct mptcp_sock *msk, const struct sk_buff *skb,
bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining,
struct mptcp_rm_list *rm_list);
int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
+int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc);
int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc);
void __init mptcp_pm_nl_init(void);
void mptcp_pm_nl_work(struct mptcp_sock *msk);
void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk,
const struct mptcp_rm_list *rm_list);
-int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct sock_common *skc);
unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk);
unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk);
unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk);
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index d4258869ac48..63f7a09335c5 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -14,7 +14,8 @@
#include <net/mptcp.h>
#include "protocol.h"
-#define MIN_INFO_OPTLEN_SIZE 16
+#define MIN_INFO_OPTLEN_SIZE 16
+#define MIN_FULL_INFO_OPTLEN_SIZE 40
static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk)
{
@@ -355,6 +356,7 @@ static int mptcp_setsockopt_sol_socket(struct mptcp_sock *msk, int optname,
case SO_BROADCAST:
case SO_BSDCOMPAT:
case SO_PASSCRED:
+ case SO_PASSPIDFD:
case SO_PASSSEC:
case SO_RXQ_OVFL:
case SO_WIFI_STATUS:
@@ -888,7 +890,9 @@ out:
void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
{
+ struct sock *sk = (struct sock *)msk;
u32 flags = 0;
+ bool slow;
memset(info, 0, sizeof(*info));
@@ -897,6 +901,9 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted);
info->mptcpi_local_addr_used = READ_ONCE(msk->pm.local_addr_used);
+ if (inet_sk_state_load(sk) == TCP_LISTEN)
+ return;
+
/* The following limits only make sense for the in-kernel PM */
if (mptcp_pm_is_kernel(msk)) {
info->mptcpi_subflows_max =
@@ -914,11 +921,21 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
if (READ_ONCE(msk->can_ack))
flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED;
info->mptcpi_flags = flags;
- info->mptcpi_token = READ_ONCE(msk->token);
- info->mptcpi_write_seq = READ_ONCE(msk->write_seq);
- info->mptcpi_snd_una = READ_ONCE(msk->snd_una);
- info->mptcpi_rcv_nxt = READ_ONCE(msk->ack_seq);
- info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled);
+ mptcp_data_lock(sk);
+ info->mptcpi_snd_una = msk->snd_una;
+ info->mptcpi_rcv_nxt = msk->ack_seq;
+ info->mptcpi_bytes_acked = msk->bytes_acked;
+ mptcp_data_unlock(sk);
+
+ slow = lock_sock_fast(sk);
+ info->mptcpi_csum_enabled = msk->csum_enabled;
+ info->mptcpi_token = msk->token;
+ info->mptcpi_write_seq = msk->write_seq;
+ info->mptcpi_retransmits = inet_csk(sk)->icsk_retransmits;
+ info->mptcpi_bytes_sent = msk->bytes_sent;
+ info->mptcpi_bytes_received = msk->bytes_received;
+ info->mptcpi_bytes_retrans = msk->bytes_retrans;
+ unlock_sock_fast(sk, slow);
}
EXPORT_SYMBOL_GPL(mptcp_diag_fill_info);
@@ -965,7 +982,8 @@ static int mptcp_put_subflow_data(struct mptcp_subflow_data *sfd,
}
static int mptcp_get_subflow_data(struct mptcp_subflow_data *sfd,
- char __user *optval, int __user *optlen)
+ char __user *optval,
+ int __user *optlen)
{
int len, copylen;
@@ -1146,6 +1164,125 @@ static int mptcp_getsockopt_subflow_addrs(struct mptcp_sock *msk, char __user *o
return 0;
}
+static int mptcp_get_full_info(struct mptcp_full_info *mfi,
+ char __user *optval,
+ int __user *optlen)
+{
+ int len;
+
+ BUILD_BUG_ON(offsetof(struct mptcp_full_info, mptcp_info) !=
+ MIN_FULL_INFO_OPTLEN_SIZE);
+
+ if (get_user(len, optlen))
+ return -EFAULT;
+
+ if (len < MIN_FULL_INFO_OPTLEN_SIZE)
+ return -EINVAL;
+
+ memset(mfi, 0, sizeof(*mfi));
+ if (copy_from_user(mfi, optval, MIN_FULL_INFO_OPTLEN_SIZE))
+ return -EFAULT;
+
+ if (mfi->size_tcpinfo_kernel ||
+ mfi->size_sfinfo_kernel ||
+ mfi->num_subflows)
+ return -EINVAL;
+
+ if (mfi->size_sfinfo_user > INT_MAX ||
+ mfi->size_tcpinfo_user > INT_MAX)
+ return -EINVAL;
+
+ return len - MIN_FULL_INFO_OPTLEN_SIZE;
+}
+
+static int mptcp_put_full_info(struct mptcp_full_info *mfi,
+ char __user *optval,
+ u32 copylen,
+ int __user *optlen)
+{
+ copylen += MIN_FULL_INFO_OPTLEN_SIZE;
+ if (put_user(copylen, optlen))
+ return -EFAULT;
+
+ if (copy_to_user(optval, mfi, copylen))
+ return -EFAULT;
+ return 0;
+}
+
+static int mptcp_getsockopt_full_info(struct mptcp_sock *msk, char __user *optval,
+ int __user *optlen)
+{
+ unsigned int sfcount = 0, copylen = 0;
+ struct mptcp_subflow_context *subflow;
+ struct sock *sk = (struct sock *)msk;
+ void __user *tcpinfoptr, *sfinfoptr;
+ struct mptcp_full_info mfi;
+ int len;
+
+ len = mptcp_get_full_info(&mfi, optval, optlen);
+ if (len < 0)
+ return len;
+
+ /* don't bother filling the mptcp info if there is not enough
+ * user-space-provided storage
+ */
+ if (len > 0) {
+ mptcp_diag_fill_info(msk, &mfi.mptcp_info);
+ copylen += min_t(unsigned int, len, sizeof(struct mptcp_info));
+ }
+
+ mfi.size_tcpinfo_kernel = sizeof(struct tcp_info);
+ mfi.size_tcpinfo_user = min_t(unsigned int, mfi.size_tcpinfo_user,
+ sizeof(struct tcp_info));
+ sfinfoptr = u64_to_user_ptr(mfi.subflow_info);
+ mfi.size_sfinfo_kernel = sizeof(struct mptcp_subflow_info);
+ mfi.size_sfinfo_user = min_t(unsigned int, mfi.size_sfinfo_user,
+ sizeof(struct mptcp_subflow_info));
+ tcpinfoptr = u64_to_user_ptr(mfi.tcp_info);
+
+ lock_sock(sk);
+ mptcp_for_each_subflow(msk, subflow) {
+ struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+ struct mptcp_subflow_info sfinfo;
+ struct tcp_info tcp_info;
+
+ if (sfcount++ >= mfi.size_arrays_user)
+ continue;
+
+ /* fetch addr/tcp_info only if the user space buffers
+ * are wide enough
+ */
+ memset(&sfinfo, 0, sizeof(sfinfo));
+ sfinfo.id = subflow->subflow_id;
+ if (mfi.size_sfinfo_user >
+ offsetof(struct mptcp_subflow_info, addrs))
+ mptcp_get_sub_addrs(ssk, &sfinfo.addrs);
+ if (copy_to_user(sfinfoptr, &sfinfo, mfi.size_sfinfo_user))
+ goto fail_release;
+
+ if (mfi.size_tcpinfo_user) {
+ tcp_get_info(ssk, &tcp_info);
+ if (copy_to_user(tcpinfoptr, &tcp_info,
+ mfi.size_tcpinfo_user))
+ goto fail_release;
+ }
+
+ tcpinfoptr += mfi.size_tcpinfo_user;
+ sfinfoptr += mfi.size_sfinfo_user;
+ }
+ release_sock(sk);
+
+ mfi.num_subflows = sfcount;
+ if (mptcp_put_full_info(&mfi, optval, copylen, optlen))
+ return -EFAULT;
+
+ return 0;
+
+fail_release:
+ release_sock(sk);
+ return -EFAULT;
+}
+
static int mptcp_put_int_option(struct mptcp_sock *msk, char __user *optval,
int __user *optlen, int val)
{
@@ -1219,6 +1356,8 @@ static int mptcp_getsockopt_sol_mptcp(struct mptcp_sock *msk, int optname,
switch (optname) {
case MPTCP_INFO:
return mptcp_getsockopt_info(msk, optval, optlen);
+ case MPTCP_FULL_INFO:
+ return mptcp_getsockopt_full_info(msk, optval, optlen);
case MPTCP_TCPINFO:
return mptcp_getsockopt_tcpinfo(msk, optval, optlen);
case MPTCP_SUBFLOW_ADDRS:
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index d9c8b21c6076..285e8ff74277 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -819,6 +819,7 @@ create_child:
if (!ctx->conn)
goto fallback;
+ ctx->subflow_id = 1;
owner = mptcp_sk(ctx->conn);
mptcp_pm_new_connection(owner, child, 1);
@@ -1574,6 +1575,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
subflow->remote_id = remote_id;
subflow->request_join = 1;
subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP);
+ subflow->subflow_id = msk->subflow_id++;
mptcp_info2sockaddr(remote, &addr, ssk->sk_family);
sock_hold(ssk);
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index 6447a09932f5..069c2659074b 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -611,14 +611,14 @@ static int ncsi_rsp_handler_snfc(struct ncsi_request *nr)
return 0;
}
-/* Response handler for Mellanox command Get Mac Address */
-static int ncsi_rsp_handler_oem_mlx_gma(struct ncsi_request *nr)
+/* Response handler for Get Mac Address command */
+static int ncsi_rsp_handler_oem_gma(struct ncsi_request *nr, int mfr_id)
{
struct ncsi_dev_priv *ndp = nr->ndp;
struct net_device *ndev = ndp->ndev.dev;
- const struct net_device_ops *ops = ndev->netdev_ops;
struct ncsi_rsp_oem_pkt *rsp;
struct sockaddr saddr;
+ u32 mac_addr_off = 0;
int ret = 0;
/* Get the response header */
@@ -626,11 +626,25 @@ static int ncsi_rsp_handler_oem_mlx_gma(struct ncsi_request *nr)
saddr.sa_family = ndev->type;
ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
- memcpy(saddr.sa_data, &rsp->data[MLX_MAC_ADDR_OFFSET], ETH_ALEN);
+ if (mfr_id == NCSI_OEM_MFR_BCM_ID)
+ mac_addr_off = BCM_MAC_ADDR_OFFSET;
+ else if (mfr_id == NCSI_OEM_MFR_MLX_ID)
+ mac_addr_off = MLX_MAC_ADDR_OFFSET;
+ else if (mfr_id == NCSI_OEM_MFR_INTEL_ID)
+ mac_addr_off = INTEL_MAC_ADDR_OFFSET;
+
+ memcpy(saddr.sa_data, &rsp->data[mac_addr_off], ETH_ALEN);
+ if (mfr_id == NCSI_OEM_MFR_BCM_ID || mfr_id == NCSI_OEM_MFR_INTEL_ID)
+ eth_addr_inc((u8 *)saddr.sa_data);
+ if (!is_valid_ether_addr((const u8 *)saddr.sa_data))
+ return -ENXIO;
+
/* Set the flag for GMA command which should only be called once */
ndp->gma_flag = 1;
- ret = ops->ndo_set_mac_address(ndev, &saddr);
+ rtnl_lock();
+ ret = dev_set_mac_address(ndev, &saddr, NULL);
+ rtnl_unlock();
if (ret < 0)
netdev_warn(ndev, "NCSI: 'Writing mac address to device failed\n");
@@ -649,41 +663,10 @@ static int ncsi_rsp_handler_oem_mlx(struct ncsi_request *nr)
if (mlx->cmd == NCSI_OEM_MLX_CMD_GMA &&
mlx->param == NCSI_OEM_MLX_CMD_GMA_PARAM)
- return ncsi_rsp_handler_oem_mlx_gma(nr);
+ return ncsi_rsp_handler_oem_gma(nr, NCSI_OEM_MFR_MLX_ID);
return 0;
}
-/* Response handler for Broadcom command Get Mac Address */
-static int ncsi_rsp_handler_oem_bcm_gma(struct ncsi_request *nr)
-{
- struct ncsi_dev_priv *ndp = nr->ndp;
- struct net_device *ndev = ndp->ndev.dev;
- const struct net_device_ops *ops = ndev->netdev_ops;
- struct ncsi_rsp_oem_pkt *rsp;
- struct sockaddr saddr;
- int ret = 0;
-
- /* Get the response header */
- rsp = (struct ncsi_rsp_oem_pkt *)skb_network_header(nr->rsp);
-
- saddr.sa_family = ndev->type;
- ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
- memcpy(saddr.sa_data, &rsp->data[BCM_MAC_ADDR_OFFSET], ETH_ALEN);
- /* Increase mac address by 1 for BMC's address */
- eth_addr_inc((u8 *)saddr.sa_data);
- if (!is_valid_ether_addr((const u8 *)saddr.sa_data))
- return -ENXIO;
-
- /* Set the flag for GMA command which should only be called once */
- ndp->gma_flag = 1;
-
- ret = ops->ndo_set_mac_address(ndev, &saddr);
- if (ret < 0)
- netdev_warn(ndev, "NCSI: 'Writing mac address to device failed\n");
-
- return ret;
-}
-
/* Response handler for Broadcom card */
static int ncsi_rsp_handler_oem_bcm(struct ncsi_request *nr)
{
@@ -695,42 +678,10 @@ static int ncsi_rsp_handler_oem_bcm(struct ncsi_request *nr)
bcm = (struct ncsi_rsp_oem_bcm_pkt *)(rsp->data);
if (bcm->type == NCSI_OEM_BCM_CMD_GMA)
- return ncsi_rsp_handler_oem_bcm_gma(nr);
+ return ncsi_rsp_handler_oem_gma(nr, NCSI_OEM_MFR_BCM_ID);
return 0;
}
-/* Response handler for Intel command Get Mac Address */
-static int ncsi_rsp_handler_oem_intel_gma(struct ncsi_request *nr)
-{
- struct ncsi_dev_priv *ndp = nr->ndp;
- struct net_device *ndev = ndp->ndev.dev;
- const struct net_device_ops *ops = ndev->netdev_ops;
- struct ncsi_rsp_oem_pkt *rsp;
- struct sockaddr saddr;
- int ret = 0;
-
- /* Get the response header */
- rsp = (struct ncsi_rsp_oem_pkt *)skb_network_header(nr->rsp);
-
- saddr.sa_family = ndev->type;
- ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
- memcpy(saddr.sa_data, &rsp->data[INTEL_MAC_ADDR_OFFSET], ETH_ALEN);
- /* Increase mac address by 1 for BMC's address */
- eth_addr_inc((u8 *)saddr.sa_data);
- if (!is_valid_ether_addr((const u8 *)saddr.sa_data))
- return -ENXIO;
-
- /* Set the flag for GMA command which should only be called once */
- ndp->gma_flag = 1;
-
- ret = ops->ndo_set_mac_address(ndev, &saddr);
- if (ret < 0)
- netdev_warn(ndev,
- "NCSI: 'Writing mac address to device failed\n");
-
- return ret;
-}
-
/* Response handler for Intel card */
static int ncsi_rsp_handler_oem_intel(struct ncsi_request *nr)
{
@@ -742,7 +693,7 @@ static int ncsi_rsp_handler_oem_intel(struct ncsi_request *nr)
intel = (struct ncsi_rsp_oem_intel_pkt *)(rsp->data);
if (intel->cmd == NCSI_OEM_INTEL_CMD_GMA)
- return ncsi_rsp_handler_oem_intel_gma(nr);
+ return ncsi_rsp_handler_oem_gma(nr, NCSI_OEM_MFR_INTEL_ID);
return 0;
}
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index a80b960223e1..9193e109e6b3 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -139,7 +139,7 @@ retry:
if (PTR_ERR(rt) == -EINVAL && *saddr &&
rt_mode & IP_VS_RT_MODE_CONNECT && !loop) {
*saddr = 0;
- flowi4_update_output(&fl4, 0, 0, daddr, 0);
+ flowi4_update_output(&fl4, 0, daddr, 0);
goto retry;
}
IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr);
@@ -147,7 +147,7 @@ retry:
} else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) {
ip_rt_put(rt);
*saddr = fl4.saddr;
- flowi4_update_output(&fl4, 0, 0, daddr, fl4.saddr);
+ flowi4_update_output(&fl4, 0, daddr, fl4.saddr);
loop = true;
goto retry;
}
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 728eeb0aea87..ad6f0ca40cd2 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -296,6 +296,7 @@ void nf_conntrack_gre_init_net(struct net *net)
/* protocol helper struct */
const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre = {
.l4proto = IPPROTO_GRE,
+ .allow_clash = true,
#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = gre_print_conntrack,
#endif
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index b0ef48b21dcb..1d34d700bd09 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -125,9 +125,6 @@ static int flow_offload_fill_route(struct flow_offload *flow,
break;
case FLOW_OFFLOAD_XMIT_XFRM:
case FLOW_OFFLOAD_XMIT_NEIGH:
- if (!dst_hold_safe(route->tuple[dir].dst))
- return -1;
-
flow_tuple->dst_cache = dst;
flow_tuple->dst_cookie = flow_offload_dst_cookie(flow_tuple);
break;
@@ -148,27 +145,12 @@ static void nft_flow_dst_release(struct flow_offload *flow,
dst_release(flow->tuplehash[dir].tuple.dst_cache);
}
-int flow_offload_route_init(struct flow_offload *flow,
+void flow_offload_route_init(struct flow_offload *flow,
const struct nf_flow_route *route)
{
- int err;
-
- err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL);
- if (err < 0)
- return err;
-
- err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY);
- if (err < 0)
- goto err_route_reply;
-
+ flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL);
+ flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY);
flow->type = NF_FLOW_OFFLOAD_ROUTE;
-
- return 0;
-
-err_route_reply:
- nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
-
- return err;
}
EXPORT_SYMBOL_GPL(flow_offload_route_init);
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 3bbaf9c7ea46..e45fade76409 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -8,6 +8,7 @@
#include <linux/ipv6.h>
#include <linux/netdevice.h>
#include <linux/if_ether.h>
+#include <net/gso.h>
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
@@ -163,38 +164,43 @@ static void nf_flow_tuple_encap(struct sk_buff *skb,
}
}
-static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
- struct flow_offload_tuple *tuple, u32 *hdrsize,
- u32 offset)
+struct nf_flowtable_ctx {
+ const struct net_device *in;
+ u32 offset;
+ u32 hdrsize;
+};
+
+static int nf_flow_tuple_ip(struct nf_flowtable_ctx *ctx, struct sk_buff *skb,
+ struct flow_offload_tuple *tuple)
{
struct flow_ports *ports;
unsigned int thoff;
struct iphdr *iph;
u8 ipproto;
- if (!pskb_may_pull(skb, sizeof(*iph) + offset))
+ if (!pskb_may_pull(skb, sizeof(*iph) + ctx->offset))
return -1;
- iph = (struct iphdr *)(skb_network_header(skb) + offset);
+ iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset);
thoff = (iph->ihl * 4);
if (ip_is_fragment(iph) ||
unlikely(ip_has_options(thoff)))
return -1;
- thoff += offset;
+ thoff += ctx->offset;
ipproto = iph->protocol;
switch (ipproto) {
case IPPROTO_TCP:
- *hdrsize = sizeof(struct tcphdr);
+ ctx->hdrsize = sizeof(struct tcphdr);
break;
case IPPROTO_UDP:
- *hdrsize = sizeof(struct udphdr);
+ ctx->hdrsize = sizeof(struct udphdr);
break;
#ifdef CONFIG_NF_CT_PROTO_GRE
case IPPROTO_GRE:
- *hdrsize = sizeof(struct gre_base_hdr);
+ ctx->hdrsize = sizeof(struct gre_base_hdr);
break;
#endif
default:
@@ -204,7 +210,7 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
if (iph->ttl <= 1)
return -1;
- if (!pskb_may_pull(skb, thoff + *hdrsize))
+ if (!pskb_may_pull(skb, thoff + ctx->hdrsize))
return -1;
switch (ipproto) {
@@ -224,13 +230,13 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
}
}
- iph = (struct iphdr *)(skb_network_header(skb) + offset);
+ iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset);
tuple->src_v4.s_addr = iph->saddr;
tuple->dst_v4.s_addr = iph->daddr;
tuple->l3proto = AF_INET;
tuple->l4proto = ipproto;
- tuple->iifidx = dev->ifindex;
+ tuple->iifidx = ctx->in->ifindex;
nf_flow_tuple_encap(skb, tuple);
return 0;
@@ -336,58 +342,56 @@ static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
return NF_STOLEN;
}
-unsigned int
-nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
+static struct flow_offload_tuple_rhash *
+nf_flow_offload_lookup(struct nf_flowtable_ctx *ctx,
+ struct nf_flowtable *flow_table, struct sk_buff *skb)
{
- struct flow_offload_tuple_rhash *tuplehash;
- struct nf_flowtable *flow_table = priv;
struct flow_offload_tuple tuple = {};
- enum flow_offload_tuple_dir dir;
- struct flow_offload *flow;
- struct net_device *outdev;
- u32 hdrsize, offset = 0;
- unsigned int thoff, mtu;
- struct rtable *rt;
- struct iphdr *iph;
- __be32 nexthop;
- int ret;
if (skb->protocol != htons(ETH_P_IP) &&
- !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &offset))
- return NF_ACCEPT;
+ !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &ctx->offset))
+ return NULL;
- if (nf_flow_tuple_ip(skb, state->in, &tuple, &hdrsize, offset) < 0)
- return NF_ACCEPT;
+ if (nf_flow_tuple_ip(ctx, skb, &tuple) < 0)
+ return NULL;
- tuplehash = flow_offload_lookup(flow_table, &tuple);
- if (tuplehash == NULL)
- return NF_ACCEPT;
+ return flow_offload_lookup(flow_table, &tuple);
+}
+
+static int nf_flow_offload_forward(struct nf_flowtable_ctx *ctx,
+ struct nf_flowtable *flow_table,
+ struct flow_offload_tuple_rhash *tuplehash,
+ struct sk_buff *skb)
+{
+ enum flow_offload_tuple_dir dir;
+ struct flow_offload *flow;
+ unsigned int thoff, mtu;
+ struct iphdr *iph;
dir = tuplehash->tuple.dir;
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
- mtu = flow->tuplehash[dir].tuple.mtu + offset;
+ mtu = flow->tuplehash[dir].tuple.mtu + ctx->offset;
if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
- return NF_ACCEPT;
+ return 0;
- iph = (struct iphdr *)(skb_network_header(skb) + offset);
- thoff = (iph->ihl * 4) + offset;
+ iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset);
+ thoff = (iph->ihl * 4) + ctx->offset;
if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
- return NF_ACCEPT;
+ return 0;
if (!nf_flow_dst_check(&tuplehash->tuple)) {
flow_offload_teardown(flow);
- return NF_ACCEPT;
+ return 0;
}
- if (skb_try_make_writable(skb, thoff + hdrsize))
- return NF_DROP;
+ if (skb_try_make_writable(skb, thoff + ctx->hdrsize))
+ return -1;
flow_offload_refresh(flow_table, flow, false);
nf_flow_encap_pop(skb, tuplehash);
- thoff -= offset;
+ thoff -= ctx->offset;
iph = ip_hdr(skb);
nf_flow_nat_ip(flow, skb, thoff, dir, iph);
@@ -398,6 +402,35 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
if (flow_table->flags & NF_FLOWTABLE_COUNTER)
nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
+ return 1;
+}
+
+unsigned int
+nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct flow_offload_tuple_rhash *tuplehash;
+ struct nf_flowtable *flow_table = priv;
+ enum flow_offload_tuple_dir dir;
+ struct nf_flowtable_ctx ctx = {
+ .in = state->in,
+ };
+ struct flow_offload *flow;
+ struct net_device *outdev;
+ struct rtable *rt;
+ __be32 nexthop;
+ int ret;
+
+ tuplehash = nf_flow_offload_lookup(&ctx, flow_table, skb);
+ if (!tuplehash)
+ return NF_ACCEPT;
+
+ ret = nf_flow_offload_forward(&ctx, flow_table, tuplehash, skb);
+ if (ret < 0)
+ return NF_DROP;
+ else if (ret == 0)
+ return NF_ACCEPT;
+
if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
rt = (struct rtable *)tuplehash->tuple.dst_cache;
memset(skb->cb, 0, sizeof(struct inet_skb_parm));
@@ -406,6 +439,9 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
return nf_flow_xmit_xfrm(skb, state, &rt->dst);
}
+ dir = tuplehash->tuple.dir;
+ flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+
switch (tuplehash->tuple.xmit_type) {
case FLOW_OFFLOAD_XMIT_NEIGH:
rt = (struct rtable *)tuplehash->tuple.dst_cache;
@@ -535,32 +571,31 @@ static void nf_flow_nat_ipv6(const struct flow_offload *flow,
}
}
-static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
- struct flow_offload_tuple *tuple, u32 *hdrsize,
- u32 offset)
+static int nf_flow_tuple_ipv6(struct nf_flowtable_ctx *ctx, struct sk_buff *skb,
+ struct flow_offload_tuple *tuple)
{
struct flow_ports *ports;
struct ipv6hdr *ip6h;
unsigned int thoff;
u8 nexthdr;
- thoff = sizeof(*ip6h) + offset;
+ thoff = sizeof(*ip6h) + ctx->offset;
if (!pskb_may_pull(skb, thoff))
return -1;
- ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + ctx->offset);
nexthdr = ip6h->nexthdr;
switch (nexthdr) {
case IPPROTO_TCP:
- *hdrsize = sizeof(struct tcphdr);
+ ctx->hdrsize = sizeof(struct tcphdr);
break;
case IPPROTO_UDP:
- *hdrsize = sizeof(struct udphdr);
+ ctx->hdrsize = sizeof(struct udphdr);
break;
#ifdef CONFIG_NF_CT_PROTO_GRE
case IPPROTO_GRE:
- *hdrsize = sizeof(struct gre_base_hdr);
+ ctx->hdrsize = sizeof(struct gre_base_hdr);
break;
#endif
default:
@@ -570,7 +605,7 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
if (ip6h->hop_limit <= 1)
return -1;
- if (!pskb_may_pull(skb, thoff + *hdrsize))
+ if (!pskb_may_pull(skb, thoff + ctx->hdrsize))
return -1;
switch (nexthdr) {
@@ -590,65 +625,47 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
}
}
- ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + ctx->offset);
tuple->src_v6 = ip6h->saddr;
tuple->dst_v6 = ip6h->daddr;
tuple->l3proto = AF_INET6;
tuple->l4proto = nexthdr;
- tuple->iifidx = dev->ifindex;
+ tuple->iifidx = ctx->in->ifindex;
nf_flow_tuple_encap(skb, tuple);
return 0;
}
-unsigned int
-nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
+static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx,
+ struct nf_flowtable *flow_table,
+ struct flow_offload_tuple_rhash *tuplehash,
+ struct sk_buff *skb)
{
- struct flow_offload_tuple_rhash *tuplehash;
- struct nf_flowtable *flow_table = priv;
- struct flow_offload_tuple tuple = {};
enum flow_offload_tuple_dir dir;
- const struct in6_addr *nexthop;
struct flow_offload *flow;
- struct net_device *outdev;
unsigned int thoff, mtu;
- u32 hdrsize, offset = 0;
struct ipv6hdr *ip6h;
- struct rt6_info *rt;
- int ret;
-
- if (skb->protocol != htons(ETH_P_IPV6) &&
- !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), &offset))
- return NF_ACCEPT;
-
- if (nf_flow_tuple_ipv6(skb, state->in, &tuple, &hdrsize, offset) < 0)
- return NF_ACCEPT;
-
- tuplehash = flow_offload_lookup(flow_table, &tuple);
- if (tuplehash == NULL)
- return NF_ACCEPT;
dir = tuplehash->tuple.dir;
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
- mtu = flow->tuplehash[dir].tuple.mtu + offset;
+ mtu = flow->tuplehash[dir].tuple.mtu + ctx->offset;
if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
- return NF_ACCEPT;
+ return 0;
- ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
- thoff = sizeof(*ip6h) + offset;
+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + ctx->offset);
+ thoff = sizeof(*ip6h) + ctx->offset;
if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff))
- return NF_ACCEPT;
+ return 0;
if (!nf_flow_dst_check(&tuplehash->tuple)) {
flow_offload_teardown(flow);
- return NF_ACCEPT;
+ return 0;
}
- if (skb_try_make_writable(skb, thoff + hdrsize))
- return NF_DROP;
+ if (skb_try_make_writable(skb, thoff + ctx->hdrsize))
+ return -1;
flow_offload_refresh(flow_table, flow, false);
@@ -663,6 +680,52 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
if (flow_table->flags & NF_FLOWTABLE_COUNTER)
nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
+ return 1;
+}
+
+static struct flow_offload_tuple_rhash *
+nf_flow_offload_ipv6_lookup(struct nf_flowtable_ctx *ctx,
+ struct nf_flowtable *flow_table,
+ struct sk_buff *skb)
+{
+ struct flow_offload_tuple tuple = {};
+
+ if (skb->protocol != htons(ETH_P_IPV6) &&
+ !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), &ctx->offset))
+ return NULL;
+
+ if (nf_flow_tuple_ipv6(ctx, skb, &tuple) < 0)
+ return NULL;
+
+ return flow_offload_lookup(flow_table, &tuple);
+}
+
+unsigned int
+nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct flow_offload_tuple_rhash *tuplehash;
+ struct nf_flowtable *flow_table = priv;
+ enum flow_offload_tuple_dir dir;
+ struct nf_flowtable_ctx ctx = {
+ .in = state->in,
+ };
+ const struct in6_addr *nexthop;
+ struct flow_offload *flow;
+ struct net_device *outdev;
+ struct rt6_info *rt;
+ int ret;
+
+ tuplehash = nf_flow_offload_ipv6_lookup(&ctx, flow_table, skb);
+ if (tuplehash == NULL)
+ return NF_ACCEPT;
+
+ ret = nf_flow_offload_ipv6_forward(&ctx, flow_table, tuplehash, skb);
+ if (ret < 0)
+ return NF_DROP;
+ else if (ret == 0)
+ return NF_ACCEPT;
+
if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
@@ -671,6 +734,9 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
return nf_flow_xmit_xfrm(skb, state, &rt->dst);
}
+ dir = tuplehash->tuple.dir;
+ flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+
switch (tuplehash->tuple.xmit_type) {
case FLOW_OFFLOAD_XMIT_NEIGH:
rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 4c7937fd803f..d543787fc851 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -6754,10 +6754,13 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
goto err_element_clash;
}
- if (!(flags & NFT_SET_ELEM_CATCHALL) && set->size &&
- !atomic_add_unless(&set->nelems, 1, set->size + set->ndeact)) {
- err = -ENFILE;
- goto err_set_full;
+ if (!(flags & NFT_SET_ELEM_CATCHALL)) {
+ unsigned int max = set->size ? set->size + set->ndeact : UINT_MAX;
+
+ if (!atomic_add_unless(&set->nelems, 1, max)) {
+ err = -ENFILE;
+ goto err_set_full;
+ }
}
nft_trans_elem(trans) = elem;
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index e311462f6d98..556bc902af00 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -30,6 +30,7 @@
#include <linux/netfilter/nf_conntrack_common.h>
#include <linux/list.h>
#include <linux/cgroup-defs.h>
+#include <net/gso.h>
#include <net/sock.h>
#include <net/tcp_states.h>
#include <net/netfilter/nf_queue.h>
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index a54a7f772cec..671474e59817 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -10,6 +10,7 @@
#include <linux/netlink.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
+#include <linux/dccp.h>
#include <linux/sctp.h>
#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_tables.h>
@@ -406,6 +407,82 @@ err:
regs->verdict.code = NFT_BREAK;
}
+static void nft_exthdr_dccp_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_exthdr *priv = nft_expr_priv(expr);
+ unsigned int thoff, dataoff, optoff, optlen, i;
+ u32 *dest = &regs->data[priv->dreg];
+ const struct dccp_hdr *dh;
+ struct dccp_hdr _dh;
+
+ if (pkt->tprot != IPPROTO_DCCP || pkt->fragoff)
+ goto err;
+
+ thoff = nft_thoff(pkt);
+
+ dh = skb_header_pointer(pkt->skb, thoff, sizeof(_dh), &_dh);
+ if (!dh)
+ goto err;
+
+ dataoff = dh->dccph_doff * sizeof(u32);
+ optoff = __dccp_hdr_len(dh);
+ if (dataoff <= optoff)
+ goto err;
+
+ optlen = dataoff - optoff;
+
+ for (i = 0; i < optlen; ) {
+ /* Options 0 (DCCPO_PADDING) - 31 (DCCPO_MAX_RESERVED) are 1B in
+ * the length; the remaining options are at least 2B long. In
+ * all cases, the first byte contains the option type. In
+ * multi-byte options, the second byte contains the option
+ * length, which must be at least two: 1 for the type plus 1 for
+ * the length plus 0-253 for any following option data. We
+ * aren't interested in the option data, only the type and the
+ * length, so we don't need to read more than two bytes at a
+ * time.
+ */
+ unsigned int buflen = optlen - i;
+ u8 buf[2], *bufp;
+ u8 type, len;
+
+ if (buflen > sizeof(buf))
+ buflen = sizeof(buf);
+
+ bufp = skb_header_pointer(pkt->skb, thoff + optoff + i, buflen,
+ &buf);
+ if (!bufp)
+ goto err;
+
+ type = bufp[0];
+
+ if (type == priv->type) {
+ *dest = 1;
+ return;
+ }
+
+ if (type <= DCCPO_MAX_RESERVED) {
+ i++;
+ continue;
+ }
+
+ if (buflen < 2)
+ goto err;
+
+ len = bufp[1];
+
+ if (len < 2)
+ goto err;
+
+ i += len;
+ }
+
+err:
+ *dest = 0;
+}
+
static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = {
[NFTA_EXTHDR_DREG] = { .type = NLA_U32 },
[NFTA_EXTHDR_TYPE] = { .type = NLA_U8 },
@@ -557,6 +634,22 @@ static int nft_exthdr_ipv4_init(const struct nft_ctx *ctx,
return 0;
}
+static int nft_exthdr_dccp_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_exthdr *priv = nft_expr_priv(expr);
+ int err = nft_exthdr_init(ctx, expr, tb);
+
+ if (err < 0)
+ return err;
+
+ if (!(priv->flags & NFT_EXTHDR_F_PRESENT))
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
static int nft_exthdr_dump_common(struct sk_buff *skb, const struct nft_exthdr *priv)
{
if (nla_put_u8(skb, NFTA_EXTHDR_TYPE, priv->type))
@@ -686,6 +779,15 @@ static const struct nft_expr_ops nft_exthdr_sctp_ops = {
.reduce = nft_exthdr_reduce,
};
+static const struct nft_expr_ops nft_exthdr_dccp_ops = {
+ .type = &nft_exthdr_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
+ .eval = nft_exthdr_dccp_eval,
+ .init = nft_exthdr_dccp_init,
+ .dump = nft_exthdr_dump,
+ .reduce = nft_exthdr_reduce,
+};
+
static const struct nft_expr_ops *
nft_exthdr_select_ops(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
@@ -720,6 +822,10 @@ nft_exthdr_select_ops(const struct nft_ctx *ctx,
if (tb[NFTA_EXTHDR_DREG])
return &nft_exthdr_sctp_ops;
break;
+ case NFT_EXTHDR_OP_DCCP:
+ if (tb[NFTA_EXTHDR_DREG])
+ return &nft_exthdr_dccp_ops;
+ break;
}
return ERR_PTR(-EOPNOTSUPP);
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index e860d8fe0e5e..5ef9146e74ad 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -250,9 +250,14 @@ static int nft_flow_route(const struct nft_pktinfo *pkt,
break;
}
+ if (!dst_hold_safe(this_dst))
+ return -ENOENT;
+
nf_route(nft_net(pkt), &other_dst, &fl, false, nft_pf(pkt));
- if (!other_dst)
+ if (!other_dst) {
+ dst_release(this_dst);
return -ENOENT;
+ }
nft_default_forward_path(route, this_dst, dir);
nft_default_forward_path(route, other_dst, !dir);
@@ -349,8 +354,7 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
if (!flow)
goto err_flow_alloc;
- if (flow_offload_route_init(flow, &route) < 0)
- goto err_flow_add;
+ flow_offload_route_init(flow, &route);
if (tcph) {
ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
@@ -361,12 +365,12 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
if (ret < 0)
goto err_flow_add;
- dst_release(route.tuple[!dir].dst);
return;
err_flow_add:
flow_offload_free(flow);
err_flow_alloc:
+ dst_release(route.tuple[dir].dst);
dst_release(route.tuple[!dir].dst);
err_flow_route:
clear_bit(IPS_OFFLOAD_BIT, &ct->status);
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index 03ef4fdaa460..29ac48cdd6db 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -19,6 +19,7 @@ struct nft_lookup {
struct nft_set *set;
u8 sreg;
u8 dreg;
+ bool dreg_set;
bool invert;
struct nft_set_binding binding;
};
@@ -75,7 +76,7 @@ void nft_lookup_eval(const struct nft_expr *expr,
}
if (ext) {
- if (set->flags & NFT_SET_MAP)
+ if (priv->dreg_set)
nft_data_copy(&regs->data[priv->dreg],
nft_set_ext_data(ext), set->dlen);
@@ -122,11 +123,8 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
if (flags & ~NFT_LOOKUP_F_INV)
return -EINVAL;
- if (flags & NFT_LOOKUP_F_INV) {
- if (set->flags & NFT_SET_MAP)
- return -EINVAL;
+ if (flags & NFT_LOOKUP_F_INV)
priv->invert = true;
- }
}
if (tb[NFTA_LOOKUP_DREG] != NULL) {
@@ -140,8 +138,17 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
set->dlen);
if (err < 0)
return err;
- } else if (set->flags & NFT_SET_MAP)
- return -EINVAL;
+ priv->dreg_set = true;
+ } else if (set->flags & NFT_SET_MAP) {
+ /* Map given, but user asks for lookup only (i.e. to
+ * ignore value assoicated with key).
+ *
+ * This makes no sense for anonymous maps since they are
+ * scoped to the rule, but for named sets this can be useful.
+ */
+ if (set->flags & NFT_SET_ANONYMOUS)
+ return -EINVAL;
+ }
priv->binding.flags = set->flags & NFT_SET_MAP;
@@ -188,7 +195,7 @@ static int nft_lookup_dump(struct sk_buff *skb,
goto nla_put_failure;
if (nft_dump_register(skb, NFTA_LOOKUP_SREG, priv->sreg))
goto nla_put_failure;
- if (priv->set->flags & NFT_SET_MAP)
+ if (priv->dreg_set)
if (nft_dump_register(skb, NFTA_LOOKUP_DREG, priv->dreg))
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_LOOKUP_FLAGS, htonl(flags)))
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 0452ee586c1c..db526cb7a485 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -1274,8 +1274,7 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old)
struct nft_pipapo_match *new;
int i;
- new = kmalloc(sizeof(*new) + sizeof(*dst) * old->field_count,
- GFP_KERNEL);
+ new = kmalloc(struct_size(new, f, old->field_count), GFP_KERNEL);
if (!new)
return ERR_PTR(-ENOMEM);
@@ -2084,8 +2083,7 @@ static int nft_pipapo_init(const struct nft_set *set,
if (field_count > NFT_PIPAPO_MAX_FIELDS)
return -EINVAL;
- m = kmalloc(sizeof(*priv->match) + sizeof(*f) * field_count,
- GFP_KERNEL);
+ m = kmalloc(struct_size(m, f, field_count), GFP_KERNEL);
if (!m)
return -ENOMEM;
diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h
index 9f80972ae39b..7eaa35fdd9bd 100644
--- a/net/netlabel/netlabel_domainhash.h
+++ b/net/netlabel/netlabel_domainhash.h
@@ -57,8 +57,8 @@ struct netlbl_domaddr6_map {
struct netlbl_dom_map {
char *domain;
- u16 family;
struct netlbl_dommap_def def;
+ u16 family;
u32 valid;
struct list_head list;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 3a1e0fd5bf14..cbd9aa7ee24a 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2360,7 +2360,9 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
cb->strict_check = !!(nlk2->flags & NETLINK_F_STRICT_CHK);
if (control->start) {
+ cb->extack = control->extack;
ret = control->start(cb);
+ cb->extack = NULL;
if (ret)
goto error_put;
}
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 04c4036bf406..a157247a1e45 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -912,6 +912,7 @@ static int genl_family_rcv_msg_dumpit(const struct genl_family *family,
.start = genl_start,
.dump = genl_lock_dumpit,
.done = genl_lock_done,
+ .extack = extack,
};
genl_unlock();
@@ -924,6 +925,7 @@ static int genl_family_rcv_msg_dumpit(const struct genl_family *family,
.start = genl_start,
.dump = ops->dumpit,
.done = genl_parallel_done,
+ .extack = extack,
};
err = __netlink_dump_start(net->genl_sock, skb, nlh, &c);
diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c
index 41e3a20c8935..cdb001de0692 100644
--- a/net/nfc/llcp_commands.c
+++ b/net/nfc/llcp_commands.c
@@ -390,7 +390,8 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock)
const u8 *service_name_tlv = NULL;
const u8 *miux_tlv = NULL;
const u8 *rw_tlv = NULL;
- u8 service_name_tlv_length, miux_tlv_length, rw_tlv_length, rw;
+ u8 service_name_tlv_length = 0;
+ u8 miux_tlv_length, rw_tlv_length, rw;
int err;
u16 size = 0;
__be16 miux;
diff --git a/net/nsh/nsh.c b/net/nsh/nsh.c
index 0f23e5e8e03e..f4a38bd6a7e0 100644
--- a/net/nsh/nsh.c
+++ b/net/nsh/nsh.c
@@ -8,6 +8,7 @@
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
+#include <net/gso.h>
#include <net/nsh.h>
#include <net/tun_proto.h>
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index a8cf9a88758e..cab1e02b63e0 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -17,6 +17,7 @@
#include <linux/if_vlan.h>
#include <net/dst.h>
+#include <net/gso.h>
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ip6_fib.h>
@@ -1072,8 +1073,16 @@ static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key,
struct ovs_action_hash *hash_act = nla_data(attr);
u32 hash = 0;
- /* OVS_HASH_ALG_L4 is the only possible hash algorithm. */
- hash = skb_get_hash(skb);
+ if (hash_act->hash_alg == OVS_HASH_ALG_L4) {
+ /* OVS_HASH_ALG_L4 hasing type. */
+ hash = skb_get_hash(skb);
+ } else if (hash_act->hash_alg == OVS_HASH_ALG_SYM_L4) {
+ /* OVS_HASH_ALG_SYM_L4 hashing type. NOTE: this doesn't
+ * extend past an encapsulated header.
+ */
+ hash = __skb_get_hash_symmetric(skb);
+ }
+
hash = jhash_1word(hash, hash_act->hash_basis);
if (!hash)
hash = 0x1;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 58f530f60172..a6d2a0b1aa21 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -35,6 +35,7 @@
#include <linux/rculist.h>
#include <linux/dmi.h>
#include <net/genetlink.h>
+#include <net/gso.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <net/pkt_cls.h>
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index ead5418c126e..41116361433d 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -3221,6 +3221,8 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
switch (act_hash->hash_alg) {
case OVS_HASH_ALG_L4:
+ fallthrough;
+ case OVS_HASH_ALG_SYM_L4:
break;
default:
return -EINVAL;
diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index f2698d2316df..c4ebf810e4b1 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -69,9 +69,7 @@ static struct dp_meter_instance *dp_meter_instance_alloc(const u32 size)
{
struct dp_meter_instance *ti;
- ti = kvzalloc(sizeof(*ti) +
- sizeof(struct dp_meter *) * size,
- GFP_KERNEL);
+ ti = kvzalloc(struct_size(ti, dp_meters, size), GFP_KERNEL);
if (!ti)
return NULL;
diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c
index ff5f49ab236e..3aa50dc7535b 100644
--- a/net/phonet/datagram.c
+++ b/net/phonet/datagram.c
@@ -28,24 +28,21 @@ static void pn_sock_close(struct sock *sk, long timeout)
sk_common_release(sk);
}
-static int pn_ioctl(struct sock *sk, int cmd, unsigned long arg)
+static int pn_ioctl(struct sock *sk, int cmd, int *karg)
{
struct sk_buff *skb;
- int answ;
switch (cmd) {
case SIOCINQ:
lock_sock(sk);
skb = skb_peek(&sk->sk_receive_queue);
- answ = skb ? skb->len : 0;
+ *karg = skb ? skb->len : 0;
release_sock(sk);
- return put_user(answ, (int __user *)arg);
+ return 0;
case SIOCPNADDRESOURCE:
case SIOCPNDELRESOURCE: {
- u32 res;
- if (get_user(res, (u32 __user *)arg))
- return -EFAULT;
+ u32 res = *karg;
if (res >= 256)
return -EINVAL;
if (cmd == SIOCPNADDRESOURCE)
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 83ea13a50690..faba31f2eff2 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -917,10 +917,9 @@ static int pep_sock_enable(struct sock *sk, struct sockaddr *addr, int len)
return 0;
}
-static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg)
+static int pep_ioctl(struct sock *sk, int cmd, int *karg)
{
struct pep_sock *pn = pep_sk(sk);
- int answ;
int ret = -ENOIOCTLCMD;
switch (cmd) {
@@ -933,13 +932,13 @@ static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg)
lock_sock(sk);
if (sock_flag(sk, SOCK_URGINLINE) &&
!skb_queue_empty(&pn->ctrlreq_queue))
- answ = skb_peek(&pn->ctrlreq_queue)->len;
+ *karg = skb_peek(&pn->ctrlreq_queue)->len;
else if (!skb_queue_empty(&sk->sk_receive_queue))
- answ = skb_peek(&sk->sk_receive_queue)->len;
+ *karg = skb_peek(&sk->sk_receive_queue)->len;
else
- answ = 0;
+ *karg = 0;
release_sock(sk);
- ret = put_user(answ, (int __user *)arg);
+ ret = 0;
break;
case SIOCPNENABLEPIPE:
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index 71e2caf6ab85..967f9b4dc026 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -387,7 +387,7 @@ static int pn_socket_ioctl(struct socket *sock, unsigned int cmd,
return put_user(handle, (__u16 __user *)arg);
}
- return sk->sk_prot->ioctl(sk, cmd, arg);
+ return sk_ioctl(sk, cmd, (void __user *)arg);
}
static int pn_socket_listen(struct socket *sock, int backlog)
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index c819b812a899..b562fc2bb5b1 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -245,14 +245,12 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
nparms->tcfp_flags = parm->flags;
nparms->tcfp_nkeys = parm->nkeys;
- nparms->tcfp_keys = kmalloc(ksize, GFP_KERNEL);
+ nparms->tcfp_keys = kmemdup(parm->keys, ksize, GFP_KERNEL);
if (!nparms->tcfp_keys) {
ret = -ENOMEM;
goto put_chain;
}
- memcpy(nparms->tcfp_keys, parm->keys, ksize);
-
for (i = 0; i < nparms->tcfp_nkeys; ++i) {
u32 offmask = nparms->tcfp_keys[i].offmask;
u32 cur = nparms->tcfp_keys[i].off;
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 2e9dce03d1ec..f3121c5a85e9 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -16,6 +16,7 @@
#include <linux/init.h>
#include <linux/slab.h>
#include <net/act_api.h>
+#include <net/gso.h>
#include <net/netlink.h>
#include <net/pkt_cls.h>
#include <net/tc_act/tc_police.h>
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 815c3e416bc5..56065cc5a661 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -11,6 +11,7 @@
#include <linux/rhashtable.h>
#include <linux/workqueue.h>
#include <linux/refcount.h>
+#include <linux/bitfield.h>
#include <linux/if_ether.h>
#include <linux/in6.h>
@@ -71,6 +72,7 @@ struct fl_flow_key {
struct flow_dissector_key_num_of_vlans num_of_vlans;
struct flow_dissector_key_pppoe pppoe;
struct flow_dissector_key_l2tpv3 l2tpv3;
+ struct flow_dissector_key_cfm cfm;
} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
struct fl_flow_mask_range {
@@ -120,6 +122,7 @@ struct cls_fl_filter {
u32 handle;
u32 flags;
u32 in_hw_count;
+ u8 needs_tc_skb_ext:1;
struct rcu_work rwork;
struct net_device *hw_dev;
/* Flower classifier is unlocked, which means that its reference counter
@@ -415,6 +418,8 @@ static struct cls_fl_head *fl_head_dereference(struct tcf_proto *tp)
static void __fl_destroy_filter(struct cls_fl_filter *f)
{
+ if (f->needs_tc_skb_ext)
+ tc_skb_ext_tc_disable();
tcf_exts_destroy(&f->exts);
tcf_exts_put_net(&f->exts);
kfree(f);
@@ -615,7 +620,8 @@ static void *fl_get(struct tcf_proto *tp, u32 handle)
}
static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
- [TCA_FLOWER_UNSPEC] = { .type = NLA_UNSPEC },
+ [TCA_FLOWER_UNSPEC] = { .strict_start_type =
+ TCA_FLOWER_L2_MISS },
[TCA_FLOWER_CLASSID] = { .type = NLA_U32 },
[TCA_FLOWER_INDEV] = { .type = NLA_STRING,
.len = IFNAMSIZ },
@@ -720,7 +726,8 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
[TCA_FLOWER_KEY_PPPOE_SID] = { .type = NLA_U16 },
[TCA_FLOWER_KEY_PPP_PROTO] = { .type = NLA_U16 },
[TCA_FLOWER_KEY_L2TPV3_SID] = { .type = NLA_U32 },
-
+ [TCA_FLOWER_L2_MISS] = NLA_POLICY_MAX(NLA_U8, 1),
+ [TCA_FLOWER_KEY_CFM] = { .type = NLA_NESTED },
};
static const struct nla_policy
@@ -769,6 +776,12 @@ mpls_stack_entry_policy[TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX + 1] = {
[TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL] = { .type = NLA_U32 },
};
+static const struct nla_policy cfm_opt_policy[TCA_FLOWER_KEY_CFM_OPT_MAX] = {
+ [TCA_FLOWER_KEY_CFM_MD_LEVEL] = NLA_POLICY_MAX(NLA_U8,
+ FLOW_DIS_CFM_MDL_MAX),
+ [TCA_FLOWER_KEY_CFM_OPCODE] = { .type = NLA_U8 },
+};
+
static void fl_set_key_val(struct nlattr **tb,
void *val, int val_type,
void *mask, int mask_type, int len)
@@ -1656,6 +1669,53 @@ static bool is_vlan_key(struct nlattr *tb, __be16 *ethertype,
return false;
}
+static void fl_set_key_cfm_md_level(struct nlattr **tb,
+ struct fl_flow_key *key,
+ struct fl_flow_key *mask,
+ struct netlink_ext_ack *extack)
+{
+ u8 level;
+
+ if (!tb[TCA_FLOWER_KEY_CFM_MD_LEVEL])
+ return;
+
+ level = nla_get_u8(tb[TCA_FLOWER_KEY_CFM_MD_LEVEL]);
+ key->cfm.mdl_ver = FIELD_PREP(FLOW_DIS_CFM_MDL_MASK, level);
+ mask->cfm.mdl_ver = FLOW_DIS_CFM_MDL_MASK;
+}
+
+static void fl_set_key_cfm_opcode(struct nlattr **tb,
+ struct fl_flow_key *key,
+ struct fl_flow_key *mask,
+ struct netlink_ext_ack *extack)
+{
+ fl_set_key_val(tb, &key->cfm.opcode, TCA_FLOWER_KEY_CFM_OPCODE,
+ &mask->cfm.opcode, TCA_FLOWER_UNSPEC,
+ sizeof(key->cfm.opcode));
+}
+
+static int fl_set_key_cfm(struct nlattr **tb,
+ struct fl_flow_key *key,
+ struct fl_flow_key *mask,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *nla_cfm_opt[TCA_FLOWER_KEY_CFM_OPT_MAX];
+ int err;
+
+ if (!tb[TCA_FLOWER_KEY_CFM])
+ return 0;
+
+ err = nla_parse_nested(nla_cfm_opt, TCA_FLOWER_KEY_CFM_OPT_MAX,
+ tb[TCA_FLOWER_KEY_CFM], cfm_opt_policy, extack);
+ if (err < 0)
+ return err;
+
+ fl_set_key_cfm_opcode(nla_cfm_opt, key, mask, extack);
+ fl_set_key_cfm_md_level(nla_cfm_opt, key, mask, extack);
+
+ return 0;
+}
+
static int fl_set_key(struct net *net, struct nlattr **tb,
struct fl_flow_key *key, struct fl_flow_key *mask,
struct netlink_ext_ack *extack)
@@ -1671,6 +1731,10 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
mask->meta.ingress_ifindex = 0xffffffff;
}
+ fl_set_key_val(tb, &key->meta.l2_miss, TCA_FLOWER_L2_MISS,
+ &mask->meta.l2_miss, TCA_FLOWER_UNSPEC,
+ sizeof(key->meta.l2_miss));
+
fl_set_key_val(tb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
sizeof(key->eth.dst));
@@ -1806,6 +1870,10 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
TCA_FLOWER_KEY_L2TPV3_SID,
&mask->l2tpv3.session_id, TCA_FLOWER_UNSPEC,
sizeof(key->l2tpv3.session_id));
+ } else if (key->basic.n_proto == htons(ETH_P_CFM)) {
+ ret = fl_set_key_cfm(tb, key, mask, extack);
+ if (ret)
+ return ret;
}
if (key->basic.ip_proto == IPPROTO_TCP ||
@@ -1988,6 +2056,8 @@ static void fl_init_dissector(struct flow_dissector *dissector,
FLOW_DISSECTOR_KEY_PPPOE, pppoe);
FL_KEY_SET_IF_MASKED(mask, keys, cnt,
FLOW_DISSECTOR_KEY_L2TPV3, l2tpv3);
+ FL_KEY_SET_IF_MASKED(mask, keys, cnt,
+ FLOW_DISSECTOR_KEY_CFM, cfm);
skb_flow_dissector_init(dissector, keys, cnt);
}
@@ -2088,6 +2158,11 @@ errout_cleanup:
return ret;
}
+static bool fl_needs_tc_skb_ext(const struct fl_flow_key *mask)
+{
+ return mask->meta.l2_miss;
+}
+
static int fl_set_parms(struct net *net, struct tcf_proto *tp,
struct cls_fl_filter *f, struct fl_flow_mask *mask,
unsigned long base, struct nlattr **tb,
@@ -2124,6 +2199,14 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp,
return -EINVAL;
}
+ /* Enable tc skb extension if filter matches on data extracted from
+ * this extension.
+ */
+ if (fl_needs_tc_skb_ext(&mask->key)) {
+ f->needs_tc_skb_ext = 1;
+ tc_skb_ext_tc_enable();
+ }
+
return 0;
}
@@ -3008,6 +3091,43 @@ nla_put_failure:
return -EMSGSIZE;
}
+static int fl_dump_key_cfm(struct sk_buff *skb,
+ struct flow_dissector_key_cfm *key,
+ struct flow_dissector_key_cfm *mask)
+{
+ struct nlattr *opts;
+ int err;
+ u8 mdl;
+
+ if (!memchr_inv(mask, 0, sizeof(*mask)))
+ return 0;
+
+ opts = nla_nest_start(skb, TCA_FLOWER_KEY_CFM);
+ if (!opts)
+ return -EMSGSIZE;
+
+ if (FIELD_GET(FLOW_DIS_CFM_MDL_MASK, mask->mdl_ver)) {
+ mdl = FIELD_GET(FLOW_DIS_CFM_MDL_MASK, key->mdl_ver);
+ err = nla_put_u8(skb, TCA_FLOWER_KEY_CFM_MD_LEVEL, mdl);
+ if (err)
+ goto err_cfm_opts;
+ }
+
+ if (mask->opcode) {
+ err = nla_put_u8(skb, TCA_FLOWER_KEY_CFM_OPCODE, key->opcode);
+ if (err)
+ goto err_cfm_opts;
+ }
+
+ nla_nest_end(skb, opts);
+
+ return 0;
+
+err_cfm_opts:
+ nla_nest_cancel(skb, opts);
+ return err;
+}
+
static int fl_dump_key_options(struct sk_buff *skb, int enc_opt_type,
struct flow_dissector_key_enc_opts *enc_opts)
{
@@ -3077,6 +3197,11 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net,
goto nla_put_failure;
}
+ if (fl_dump_key_val(skb, &key->meta.l2_miss,
+ TCA_FLOWER_L2_MISS, &mask->meta.l2_miss,
+ TCA_FLOWER_UNSPEC, sizeof(key->meta.l2_miss)))
+ goto nla_put_failure;
+
if (fl_dump_key_val(skb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
sizeof(key->eth.dst)) ||
@@ -3290,6 +3415,9 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net,
sizeof(key->hash.hash)))
goto nla_put_failure;
+ if (fl_dump_key_cfm(skb, &key->cfm, &mask->cfm))
+ goto nla_put_failure;
+
return 0;
nla_put_failure:
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 891e007d5c0b..9cff99558694 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -65,6 +65,7 @@
#include <linux/reciprocal_div.h>
#include <net/netlink.h>
#include <linux/if_vlan.h>
+#include <net/gso.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
#include <net/tcp.h>
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 8aef7dd9fb88..325c29041c7d 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1814,10 +1814,6 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
NL_SET_ERR_MSG(extack, "HTB offload doesn't support the quantum parameter");
goto failure;
}
- if (hopt->prio) {
- NL_SET_ERR_MSG(extack, "HTB offload doesn't support the prio parameter");
- goto failure;
- }
}
/* Keeping backward compatible with rate_table based iproute2 tc */
@@ -1913,6 +1909,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
TC_HTB_CLASSID_ROOT,
.rate = max_t(u64, hopt->rate.rate, rate64),
.ceil = max_t(u64, hopt->ceil.rate, ceil64),
+ .prio = hopt->prio,
.extack = extack,
};
err = htb_offload(dev, &offload_opt);
@@ -1933,6 +1930,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
TC_H_MIN(parent->common.classid),
.rate = max_t(u64, hopt->rate.rate, rate64),
.ceil = max_t(u64, hopt->ceil.rate, ceil64),
+ .prio = hopt->prio,
.extack = extack,
};
err = htb_offload(dev, &offload_opt);
@@ -2018,6 +2016,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
.classid = cl->common.classid,
.rate = max_t(u64, hopt->rate.rate, rate64),
.ceil = max_t(u64, hopt->ceil.rate, ceil64),
+ .prio = hopt->prio,
.extack = extack,
};
err = htb_offload(dev, &offload_opt);
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index e79be1b3e74d..33c0dbe35956 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -21,6 +21,7 @@
#include <linux/reciprocal_div.h>
#include <linux/rbtree.h>
+#include <net/gso.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/inet_ecn.h>
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index cf0e61ed9225..717ae51d94a0 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -20,6 +20,7 @@
#include <linux/spinlock.h>
#include <linux/rcupdate.h>
#include <linux/time.h>
+#include <net/gso.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
@@ -27,6 +28,8 @@
#include <net/sock.h>
#include <net/tcp.h>
+#define TAPRIO_STAT_NOT_SET (~0ULL)
+
#include "sch_mqprio_lib.h"
static LIST_HEAD(taprio_list);
@@ -1527,7 +1530,7 @@ static int taprio_enable_offload(struct net_device *dev,
"Not enough memory for enabling offload mode");
return -ENOMEM;
}
- offload->enable = 1;
+ offload->cmd = TAPRIO_CMD_REPLACE;
offload->extack = extack;
mqprio_qopt_reconstruct(dev, &offload->mqprio.qopt);
offload->mqprio.extack = extack;
@@ -1575,7 +1578,7 @@ static int taprio_disable_offload(struct net_device *dev,
"Not enough memory to disable offload mode");
return -ENOMEM;
}
- offload->enable = 0;
+ offload->cmd = TAPRIO_CMD_DESTROY;
err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload);
if (err < 0) {
@@ -2292,6 +2295,72 @@ nla_put_failure:
return -EMSGSIZE;
}
+static int taprio_put_stat(struct sk_buff *skb, u64 val, u16 attrtype)
+{
+ if (val == TAPRIO_STAT_NOT_SET)
+ return 0;
+ if (nla_put_u64_64bit(skb, attrtype, val, TCA_TAPRIO_OFFLOAD_STATS_PAD))
+ return -EMSGSIZE;
+ return 0;
+}
+
+static int taprio_dump_xstats(struct Qdisc *sch, struct gnet_dump *d,
+ struct tc_taprio_qopt_offload *offload,
+ struct tc_taprio_qopt_stats *stats)
+{
+ struct net_device *dev = qdisc_dev(sch);
+ const struct net_device_ops *ops;
+ struct sk_buff *skb = d->skb;
+ struct nlattr *xstats;
+ int err;
+
+ ops = qdisc_dev(sch)->netdev_ops;
+
+ /* FIXME I could use qdisc_offload_dump_helper(), but that messes
+ * with sch->flags depending on whether the device reports taprio
+ * stats, and I'm not sure whether that's a good idea, considering
+ * that stats are optional to the offload itself
+ */
+ if (!ops->ndo_setup_tc)
+ return 0;
+
+ memset(stats, 0xff, sizeof(*stats));
+
+ err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload);
+ if (err == -EOPNOTSUPP)
+ return 0;
+ if (err)
+ return err;
+
+ xstats = nla_nest_start(skb, TCA_STATS_APP);
+ if (!xstats)
+ goto err;
+
+ if (taprio_put_stat(skb, stats->window_drops,
+ TCA_TAPRIO_OFFLOAD_STATS_WINDOW_DROPS) ||
+ taprio_put_stat(skb, stats->tx_overruns,
+ TCA_TAPRIO_OFFLOAD_STATS_TX_OVERRUNS))
+ goto err_cancel;
+
+ nla_nest_end(skb, xstats);
+
+ return 0;
+
+err_cancel:
+ nla_nest_cancel(skb, xstats);
+err:
+ return -EMSGSIZE;
+}
+
+static int taprio_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
+{
+ struct tc_taprio_qopt_offload offload = {
+ .cmd = TAPRIO_CMD_STATS,
+ };
+
+ return taprio_dump_xstats(sch, d, &offload, &offload.stats);
+}
+
static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct taprio_sched *q = qdisc_priv(sch);
@@ -2391,12 +2460,20 @@ static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
__acquires(d->lock)
{
struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
+ struct tc_taprio_qopt_offload offload = {
+ .cmd = TAPRIO_CMD_QUEUE_STATS,
+ .queue_stats = {
+ .queue = cl - 1,
+ },
+ };
+ struct Qdisc *child;
- sch = rtnl_dereference(dev_queue->qdisc_sleeping);
- if (gnet_stats_copy_basic(d, NULL, &sch->bstats, true) < 0 ||
- qdisc_qstats_copy(d, sch) < 0)
+ child = rtnl_dereference(dev_queue->qdisc_sleeping);
+ if (gnet_stats_copy_basic(d, NULL, &child->bstats, true) < 0 ||
+ qdisc_qstats_copy(d, child) < 0)
return -1;
- return 0;
+
+ return taprio_dump_xstats(sch, d, &offload, &offload.queue_stats.stats);
}
static void taprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
@@ -2443,6 +2520,7 @@ static struct Qdisc_ops taprio_qdisc_ops __read_mostly = {
.dequeue = taprio_dequeue,
.enqueue = taprio_enqueue,
.dump = taprio_dump,
+ .dump_stats = taprio_dump_stats,
.owner = THIS_MODULE,
};
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 277ad11f4d61..17d2d00ddb18 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -13,6 +13,7 @@
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/skbuff.h>
+#include <net/gso.h>
#include <net/netlink.h>
#include <net/sch_generic.h>
#include <net/pkt_cls.h>
diff --git a/net/sctp/offload.c b/net/sctp/offload.c
index eb874e3c399a..502095173d88 100644
--- a/net/sctp/offload.c
+++ b/net/sctp/offload.c
@@ -22,6 +22,7 @@
#include <net/sctp/sctp.h>
#include <net/sctp/checksum.h>
#include <net/protocol.h>
+#include <net/gso.h>
static __le32 sctp_gso_make_checksum(struct sk_buff *skb)
{
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index c365df24ad33..664d1f2e9121 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -500,9 +500,7 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
continue;
fl4->fl4_sport = laddr->a.v4.sin_port;
- flowi4_update_output(fl4,
- asoc->base.sk->sk_bound_dev_if,
- RT_CONN_FLAGS_TOS(asoc->base.sk, tos),
+ flowi4_update_output(fl4, asoc->base.sk->sk_bound_dev_if,
daddr->v4.sin_addr.s_addr,
laddr->a.v4.sin_addr.s_addr);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index cda8c2874691..6554a357fe33 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4895,7 +4895,7 @@ out:
}
/* The SCTP ioctl handler. */
-static int sctp_ioctl(struct sock *sk, int cmd, unsigned long arg)
+static int sctp_ioctl(struct sock *sk, int cmd, int *karg)
{
int rc = -ENOTCONN;
@@ -4911,7 +4911,7 @@ static int sctp_ioctl(struct sock *sk, int cmd, unsigned long arg)
switch (cmd) {
case SIOCINQ: {
struct sk_buff *skb;
- unsigned int amount = 0;
+ *karg = 0;
skb = skb_peek(&sk->sk_receive_queue);
if (skb != NULL) {
@@ -4919,9 +4919,9 @@ static int sctp_ioctl(struct sock *sk, int cmd, unsigned long arg)
* We will only return the amount of this packet since
* that is all that will be read.
*/
- amount = skb->len;
+ *karg = skb->len;
}
- rc = put_user(amount, (int __user *)arg);
+ rc = 0;
break;
}
default:
@@ -8281,6 +8281,22 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname,
return retval;
}
+static bool sctp_bpf_bypass_getsockopt(int level, int optname)
+{
+ if (level == SOL_SCTP) {
+ switch (optname) {
+ case SCTP_SOCKOPT_PEELOFF:
+ case SCTP_SOCKOPT_PEELOFF_FLAGS:
+ case SCTP_SOCKOPT_CONNECTX3:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ return false;
+}
+
static int sctp_hash(struct sock *sk)
{
/* STUB */
@@ -9650,6 +9666,7 @@ struct proto sctp_prot = {
.shutdown = sctp_shutdown,
.setsockopt = sctp_setsockopt,
.getsockopt = sctp_getsockopt,
+ .bpf_bypass_getsockopt = sctp_bpf_bypass_getsockopt,
.sendmsg = sctp_sendmsg,
.recvmsg = sctp_recvmsg,
.bind = sctp_bind,
@@ -9705,6 +9722,7 @@ struct proto sctpv6_prot = {
.shutdown = sctp_shutdown,
.setsockopt = sctp_setsockopt,
.getsockopt = sctp_getsockopt,
+ .bpf_bypass_getsockopt = sctp_bpf_bypass_getsockopt,
.sendmsg = sctp_sendmsg,
.recvmsg = sctp_recvmsg,
.bind = sctp_bind,
diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c
index e843760e9aaa..54afbe4fb087 100644
--- a/net/sctp/stream_sched.c
+++ b/net/sctp/stream_sched.c
@@ -148,18 +148,19 @@ static void sctp_sched_free_sched(struct sctp_stream *stream)
int sctp_sched_set_sched(struct sctp_association *asoc,
enum sctp_sched_type sched)
{
- struct sctp_sched_ops *n = sctp_sched_ops[sched];
struct sctp_sched_ops *old = asoc->outqueue.sched;
struct sctp_datamsg *msg = NULL;
+ struct sctp_sched_ops *n;
struct sctp_chunk *ch;
int i, ret = 0;
- if (old == n)
- return ret;
-
if (sched > SCTP_SS_MAX)
return -EINVAL;
+ n = sctp_sched_ops[sched];
+ if (old == n)
+ return ret;
+
if (old)
sctp_sched_free_sched(&asoc->stream);
diff --git a/net/socket.c b/net/socket.c
index b7e01d0fe082..b778fc03c6e0 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -57,6 +57,7 @@
#include <linux/mm.h>
#include <linux/socket.h>
#include <linux/file.h>
+#include <linux/splice.h>
#include <linux/net.h>
#include <linux/interrupt.h>
#include <linux/thread_info.h>
@@ -126,11 +127,10 @@ static long compat_sock_ioctl(struct file *file,
unsigned int cmd, unsigned long arg);
#endif
static int sock_fasync(int fd, struct file *filp, int on);
-static ssize_t sock_sendpage(struct file *file, struct page *page,
- int offset, size_t size, loff_t *ppos, int more);
static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags);
+static void sock_splice_eof(struct file *file);
#ifdef CONFIG_PROC_FS
static void sock_show_fdinfo(struct seq_file *m, struct file *f)
@@ -162,9 +162,9 @@ static const struct file_operations socket_file_ops = {
.mmap = sock_mmap,
.release = sock_close,
.fasync = sock_fasync,
- .sendpage = sock_sendpage,
- .splice_write = generic_splice_sendpage,
+ .splice_write = splice_to_socket,
.splice_read = sock_splice_read,
+ .splice_eof = sock_splice_eof,
.show_fdinfo = sock_show_fdinfo,
};
@@ -1066,26 +1066,6 @@ int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
}
EXPORT_SYMBOL(kernel_recvmsg);
-static ssize_t sock_sendpage(struct file *file, struct page *page,
- int offset, size_t size, loff_t *ppos, int more)
-{
- struct socket *sock;
- int flags;
- int ret;
-
- sock = file->private_data;
-
- flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
- /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
- flags |= more;
-
- ret = kernel_sendpage(sock, page, offset, size, flags);
-
- if (trace_sock_send_length_enabled())
- call_trace_sock_send_length(sock->sk, ret, 0);
- return ret;
-}
-
static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags)
@@ -1098,6 +1078,14 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
return sock->ops->splice_read(sock, ppos, pipe, len, flags);
}
+static void sock_splice_eof(struct file *file)
+{
+ struct socket *sock = file->private_data;
+
+ if (sock->ops->splice_eof)
+ sock->ops->splice_eof(sock);
+}
+
static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct file *file = iocb->ki_filp;
@@ -2138,6 +2126,7 @@ int __sys_sendto(int fd, void __user *buff, size_t len, unsigned int flags,
msg.msg_name = (struct sockaddr *)&address;
msg.msg_namelen = addr_len;
}
+ flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
if (sock->file->f_flags & O_NONBLOCK)
flags |= MSG_DONTWAIT;
msg.msg_flags = flags;
@@ -2483,6 +2472,7 @@ static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys,
msg_sys->msg_control = ctl_buf;
msg_sys->msg_control_is_user = false;
}
+ flags &= ~MSG_INTERNAL_SENDMSG_FLAGS;
msg_sys->msg_flags = flags;
if (sock->file->f_flags & O_NONBLOCK)
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index f77cebe2c071..9d9f522e3ae1 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1203,13 +1203,14 @@ err_noclose:
static int svc_tcp_send_kvec(struct socket *sock, const struct kvec *vec,
int flags)
{
- return kernel_sendpage(sock, virt_to_page(vec->iov_base),
- offset_in_page(vec->iov_base),
- vec->iov_len, flags);
+ struct msghdr msg = { .msg_flags = MSG_SPLICE_PAGES | flags, };
+
+ iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, vec, 1, vec->iov_len);
+ return sock_sendmsg(sock, &msg);
}
/*
- * kernel_sendpage() is used exclusively to reduce the number of
+ * MSG_SPLICE_PAGES is used exclusively to reduce the number of
* copy operations in this path. Therefore the caller must ensure
* that the pages backing @xdr are unchanging.
*
@@ -1249,28 +1250,13 @@ static int svc_tcp_sendmsg(struct socket *sock, struct xdr_buf *xdr,
if (ret != head->iov_len)
goto out;
- if (xdr->page_len) {
- unsigned int offset, len, remaining;
- struct bio_vec *bvec;
-
- bvec = xdr->bvec + (xdr->page_base >> PAGE_SHIFT);
- offset = offset_in_page(xdr->page_base);
- remaining = xdr->page_len;
- while (remaining > 0) {
- len = min(remaining, bvec->bv_len - offset);
- ret = kernel_sendpage(sock, bvec->bv_page,
- bvec->bv_offset + offset,
- len, 0);
- if (ret < 0)
- return ret;
- *sentp += ret;
- if (ret != len)
- goto out;
- remaining -= len;
- offset = 0;
- bvec++;
- }
- }
+ msg.msg_flags = MSG_SPLICE_PAGES;
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, xdr->bvec,
+ xdr_buf_pagecount(xdr), xdr->page_len);
+ ret = sock_sendmsg(sock, &msg);
+ if (ret < 0)
+ return ret;
+ *sentp += ret;
if (tail->iov_len) {
ret = svc_tcp_send_kvec(sock, tail, 0);
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index cdcd2731860b..2cde375477e3 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -176,7 +176,7 @@ static int bearer_name_validate(const char *name,
*/
struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
+ struct tipc_net *tn = tipc_net(net);
struct tipc_bearer *b;
u32 i;
@@ -211,11 +211,10 @@ int tipc_bearer_get_name(struct net *net, char *name, u32 bearer_id)
void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
struct tipc_bearer *b;
rcu_read_lock();
- b = rcu_dereference(tn->bearer_list[bearer_id]);
+ b = bearer_get(net, bearer_id);
if (b)
tipc_disc_add_dest(b->disc);
rcu_read_unlock();
@@ -223,11 +222,10 @@ void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest)
void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
struct tipc_bearer *b;
rcu_read_lock();
- b = rcu_dereference(tn->bearer_list[bearer_id]);
+ b = bearer_get(net, bearer_id);
if (b)
tipc_disc_remove_dest(b->disc);
rcu_read_unlock();
@@ -431,7 +429,7 @@ int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
dev = dev_get_by_name(net, dev_name);
if (!dev)
return -ENODEV;
- if (tipc_mtu_bad(dev, 0)) {
+ if (tipc_mtu_bad(dev)) {
dev_put(dev);
return -EINVAL;
}
@@ -534,7 +532,7 @@ int tipc_bearer_mtu(struct net *net, u32 bearer_id)
struct tipc_bearer *b;
rcu_read_lock();
- b = rcu_dereference(tipc_net(net)->bearer_list[bearer_id]);
+ b = bearer_get(net, bearer_id);
if (b)
mtu = b->mtu;
rcu_read_unlock();
@@ -708,7 +706,7 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt,
test_and_set_bit_lock(0, &b->up);
break;
case NETDEV_CHANGEMTU:
- if (tipc_mtu_bad(dev, 0)) {
+ if (tipc_mtu_bad(dev)) {
bearer_disable(net, b);
break;
}
@@ -745,7 +743,7 @@ void tipc_bearer_cleanup(void)
void tipc_bearer_stop(struct net *net)
{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
+ struct tipc_net *tn = tipc_net(net);
struct tipc_bearer *b;
u32 i;
@@ -881,7 +879,7 @@ int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb)
struct tipc_bearer *bearer;
struct tipc_nl_msg msg;
struct net *net = sock_net(skb->sk);
- struct tipc_net *tn = net_generic(net, tipc_net_id);
+ struct tipc_net *tn = tipc_net(net);
if (i == MAX_BEARERS)
return 0;
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index bd0cc5c287ef..1ee60649bd17 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -257,9 +257,9 @@ static inline void tipc_loopback_trace(struct net *net,
}
/* check if device MTU is too low for tipc headers */
-static inline bool tipc_mtu_bad(struct net_device *dev, unsigned int reserve)
+static inline bool tipc_mtu_bad(struct net_device *dev)
{
- if (dev->mtu >= TIPC_MIN_BEARER_MTU + reserve)
+ if (dev->mtu >= TIPC_MIN_BEARER_MTU)
return false;
netdev_warn(dev, "MTU too low for tipc bearer\n");
return true;
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index 0a85244fd618..926232557e77 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -739,10 +739,6 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
udp_conf.use_udp_checksums = false;
ub->ifindex = dev->ifindex;
b->encap_hlen = sizeof(struct iphdr) + sizeof(struct udphdr);
- if (tipc_mtu_bad(dev, b->encap_hlen)) {
- err = -EINVAL;
- goto err;
- }
b->mtu = b->media->mtu;
#if IS_ENABLED(CONFIG_IPV6)
} else if (local.proto == htons(ETH_P_IPV6)) {
diff --git a/net/tls/tls.h b/net/tls/tls.h
index 0672acab2773..d002c3af1966 100644
--- a/net/tls/tls.h
+++ b/net/tls/tls.h
@@ -97,6 +97,7 @@ void tls_update_rx_zc_capable(struct tls_context *tls_ctx);
void tls_sw_strparser_arm(struct sock *sk, struct tls_context *ctx);
void tls_sw_strparser_done(struct tls_context *tls_ctx);
int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
+void tls_sw_splice_eof(struct socket *sock);
int tls_sw_sendpage_locked(struct sock *sk, struct page *page,
int offset, size_t size, int flags);
int tls_sw_sendpage(struct sock *sk, struct page *page,
@@ -115,6 +116,7 @@ ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos,
size_t len, unsigned int flags);
int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
+void tls_device_splice_eof(struct socket *sock);
int tls_device_sendpage(struct sock *sk, struct page *page,
int offset, size_t size, int flags);
int tls_tx_records(struct sock *sk, int flags);
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index bf69c9d6d06c..b82770f68807 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -268,9 +268,8 @@ static void tls_append_frag(struct tls_record_info *record,
skb_frag_size_add(frag, size);
} else {
++frag;
- __skb_frag_set_page(frag, pfrag->page);
- skb_frag_off_set(frag, pfrag->offset);
- skb_frag_size_set(frag, size);
+ skb_frag_fill_page_desc(frag, pfrag->page, pfrag->offset,
+ size);
++record->num_frags;
get_page(pfrag->page);
}
@@ -357,9 +356,8 @@ static int tls_create_new_record(struct tls_offload_context_tx *offload_ctx,
return -ENOMEM;
frag = &record->frags[0];
- __skb_frag_set_page(frag, pfrag->page);
- skb_frag_off_set(frag, pfrag->offset);
- skb_frag_size_set(frag, prepend_size);
+ skb_frag_fill_page_desc(frag, pfrag->page, pfrag->offset,
+ prepend_size);
get_page(pfrag->page);
pfrag->offset += prepend_size;
@@ -424,16 +422,10 @@ static int tls_device_copy_data(void *addr, size_t bytes, struct iov_iter *i)
return 0;
}
-union tls_iter_offset {
- struct iov_iter *msg_iter;
- int offset;
-};
-
static int tls_push_data(struct sock *sk,
- union tls_iter_offset iter_offset,
+ struct iov_iter *iter,
size_t size, int flags,
- unsigned char record_type,
- struct page *zc_page)
+ unsigned char record_type)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_prot_info *prot = &tls_ctx->prot_info;
@@ -449,7 +441,8 @@ static int tls_push_data(struct sock *sk,
long timeo;
if (flags &
- ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_SENDPAGE_NOTLAST))
+ ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_SENDPAGE_NOTLAST |
+ MSG_SPLICE_PAGES))
return -EOPNOTSUPP;
if (unlikely(sk->sk_err))
@@ -501,21 +494,35 @@ handle_error:
record = ctx->open_record;
copy = min_t(size_t, size, max_open_record_len - record->len);
- if (copy && zc_page) {
+ if (copy && (flags & MSG_SPLICE_PAGES)) {
struct page_frag zc_pfrag;
+ struct page **pages = &zc_pfrag.page;
+ size_t off;
+
+ rc = iov_iter_extract_pages(iter, &pages,
+ copy, 1, 0, &off);
+ if (rc <= 0) {
+ if (rc == 0)
+ rc = -EIO;
+ goto handle_error;
+ }
+ copy = rc;
- zc_pfrag.page = zc_page;
- zc_pfrag.offset = iter_offset.offset;
+ if (WARN_ON_ONCE(!sendpage_ok(zc_pfrag.page))) {
+ iov_iter_revert(iter, copy);
+ rc = -EIO;
+ goto handle_error;
+ }
+
+ zc_pfrag.offset = off;
zc_pfrag.size = copy;
tls_append_frag(record, &zc_pfrag, copy);
-
- iter_offset.offset += copy;
} else if (copy) {
copy = min_t(size_t, copy, pfrag->size - pfrag->offset);
rc = tls_device_copy_data(page_address(pfrag->page) +
pfrag->offset, copy,
- iter_offset.msg_iter);
+ iter);
if (rc)
goto handle_error;
tls_append_frag(record, pfrag, copy);
@@ -570,9 +577,11 @@ int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
{
unsigned char record_type = TLS_RECORD_TYPE_DATA;
struct tls_context *tls_ctx = tls_get_ctx(sk);
- union tls_iter_offset iter;
int rc;
+ if (!tls_ctx->zerocopy_sendfile)
+ msg->msg_flags &= ~MSG_SPLICE_PAGES;
+
mutex_lock(&tls_ctx->tx_lock);
lock_sock(sk);
@@ -582,8 +591,8 @@ int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
goto out;
}
- iter.msg_iter = &msg->msg_iter;
- rc = tls_push_data(sk, iter, size, msg->msg_flags, record_type, NULL);
+ rc = tls_push_data(sk, &msg->msg_iter, size, msg->msg_flags,
+ record_type);
out:
release_sock(sk);
@@ -591,47 +600,42 @@ out:
return rc;
}
-int tls_device_sendpage(struct sock *sk, struct page *page,
- int offset, size_t size, int flags)
+void tls_device_splice_eof(struct socket *sock)
{
+ struct sock *sk = sock->sk;
struct tls_context *tls_ctx = tls_get_ctx(sk);
- union tls_iter_offset iter_offset;
- struct iov_iter msg_iter;
- char *kaddr;
- struct kvec iov;
- int rc;
+ struct iov_iter iter = {};
- if (flags & MSG_SENDPAGE_NOTLAST)
- flags |= MSG_MORE;
+ if (!tls_is_partially_sent_record(tls_ctx))
+ return;
mutex_lock(&tls_ctx->tx_lock);
lock_sock(sk);
- if (flags & MSG_OOB) {
- rc = -EOPNOTSUPP;
- goto out;
- }
-
- if (tls_ctx->zerocopy_sendfile) {
- iter_offset.offset = offset;
- rc = tls_push_data(sk, iter_offset, size,
- flags, TLS_RECORD_TYPE_DATA, page);
- goto out;
+ if (tls_is_partially_sent_record(tls_ctx)) {
+ iov_iter_bvec(&iter, ITER_SOURCE, NULL, 0, 0);
+ tls_push_data(sk, &iter, 0, 0, TLS_RECORD_TYPE_DATA);
}
- kaddr = kmap(page);
- iov.iov_base = kaddr + offset;
- iov.iov_len = size;
- iov_iter_kvec(&msg_iter, ITER_SOURCE, &iov, 1, size);
- iter_offset.msg_iter = &msg_iter;
- rc = tls_push_data(sk, iter_offset, size, flags, TLS_RECORD_TYPE_DATA,
- NULL);
- kunmap(page);
-
-out:
release_sock(sk);
mutex_unlock(&tls_ctx->tx_lock);
- return rc;
+}
+
+int tls_device_sendpage(struct sock *sk, struct page *page,
+ int offset, size_t size, int flags)
+{
+ struct bio_vec bvec;
+ struct msghdr msg = { .msg_flags = flags | MSG_SPLICE_PAGES, };
+
+ if (flags & MSG_SENDPAGE_NOTLAST)
+ msg.msg_flags |= MSG_MORE;
+
+ if (flags & MSG_OOB)
+ return -EOPNOTSUPP;
+
+ bvec_set_page(&bvec, page, size, offset);
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
+ return tls_device_sendmsg(sk, &msg, size);
}
struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context,
@@ -696,12 +700,10 @@ EXPORT_SYMBOL(tls_get_record);
static int tls_device_push_pending_record(struct sock *sk, int flags)
{
- union tls_iter_offset iter;
- struct iov_iter msg_iter;
+ struct iov_iter iter;
- iov_iter_kvec(&msg_iter, ITER_SOURCE, NULL, 0, 0);
- iter.msg_iter = &msg_iter;
- return tls_push_data(sk, iter, 0, flags, TLS_RECORD_TYPE_DATA, NULL);
+ iov_iter_kvec(&iter, ITER_SOURCE, NULL, 0, 0);
+ return tls_push_data(sk, &iter, 0, flags, TLS_RECORD_TYPE_DATA);
}
void tls_device_write_space(struct sock *sk, struct tls_context *ctx)
@@ -1217,7 +1219,7 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
tls_device_attach(ctx, sk, netdev);
up_read(&device_offload_lock);
- /* following this assignment tls_is_sk_tx_device_offloaded
+ /* following this assignment tls_is_skb_tx_device_offloaded
* will return true and the context might be accessed
* by the netdev's xmit function.
*/
@@ -1370,7 +1372,7 @@ static int tls_device_down(struct net_device *netdev)
list_for_each_entry_safe(ctx, tmp, &list, list) {
/* Stop offloaded TX and switch to the fallback.
- * tls_is_sk_tx_device_offloaded will return false.
+ * tls_is_skb_tx_device_offloaded will return false.
*/
WRITE_ONCE(ctx->sk->sk_validate_xmit_skb, tls_validate_xmit_skb_sw);
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index f2e7302a4d96..7b9c83dd7de2 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -125,7 +125,10 @@ int tls_push_sg(struct sock *sk,
u16 first_offset,
int flags)
{
- int sendpage_flags = flags | MSG_SENDPAGE_NOTLAST;
+ struct bio_vec bvec;
+ struct msghdr msg = {
+ .msg_flags = MSG_SENDPAGE_NOTLAST | MSG_SPLICE_PAGES | flags,
+ };
int ret = 0;
struct page *p;
size_t size;
@@ -134,16 +137,19 @@ int tls_push_sg(struct sock *sk,
size = sg->length - offset;
offset += sg->offset;
- ctx->in_tcp_sendpages = true;
+ ctx->splicing_pages = true;
while (1) {
if (sg_is_last(sg))
- sendpage_flags = flags;
+ msg.msg_flags = flags;
/* is sending application-limited? */
tcp_rate_check_app_limited(sk);
p = sg_page(sg);
retry:
- ret = do_tcp_sendpages(sk, p, offset, size, sendpage_flags);
+ bvec_set_page(&bvec, p, size, offset);
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
+
+ ret = tcp_sendmsg_locked(sk, &msg, size);
if (ret != size) {
if (ret > 0) {
@@ -155,7 +161,7 @@ retry:
offset -= sg->offset;
ctx->partially_sent_offset = offset;
ctx->partially_sent_record = (void *)sg;
- ctx->in_tcp_sendpages = false;
+ ctx->splicing_pages = false;
return ret;
}
@@ -169,7 +175,7 @@ retry:
size = sg->length;
}
- ctx->in_tcp_sendpages = false;
+ ctx->splicing_pages = false;
return 0;
}
@@ -247,11 +253,11 @@ static void tls_write_space(struct sock *sk)
{
struct tls_context *ctx = tls_get_ctx(sk);
- /* If in_tcp_sendpages call lower protocol write space handler
+ /* If splicing_pages call lower protocol write space handler
* to ensure we wake up any waiting operations there. For example
- * if do_tcp_sendpages where to call sk_wait_event.
+ * if splicing pages where to call sk_wait_event.
*/
- if (ctx->in_tcp_sendpages) {
+ if (ctx->splicing_pages) {
ctx->sk_write_space(sk);
return;
}
@@ -352,6 +358,39 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
tls_ctx_free(sk, ctx);
}
+static __poll_t tls_sk_poll(struct file *file, struct socket *sock,
+ struct poll_table_struct *wait)
+{
+ struct tls_sw_context_rx *ctx;
+ struct tls_context *tls_ctx;
+ struct sock *sk = sock->sk;
+ struct sk_psock *psock;
+ __poll_t mask = 0;
+ u8 shutdown;
+ int state;
+
+ mask = tcp_poll(file, sock, wait);
+
+ state = inet_sk_state_load(sk);
+ shutdown = READ_ONCE(sk->sk_shutdown);
+ if (unlikely(state != TCP_ESTABLISHED || shutdown & RCV_SHUTDOWN))
+ return mask;
+
+ tls_ctx = tls_get_ctx(sk);
+ ctx = tls_sw_ctx_rx(tls_ctx);
+ psock = sk_psock_get(sk);
+
+ if (skb_queue_empty_lockless(&ctx->rx_list) &&
+ !tls_strp_msg_ready(ctx) &&
+ sk_psock_queue_empty(psock))
+ mask &= ~(EPOLLIN | EPOLLRDNORM);
+
+ if (psock)
+ sk_psock_put(sk, psock);
+
+ return mask;
+}
+
static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval,
int __user *optlen, int tx)
{
@@ -918,13 +957,16 @@ static void build_proto_ops(struct proto_ops ops[TLS_NUM_CONFIG][TLS_NUM_CONFIG]
ops[TLS_BASE][TLS_BASE] = *base;
ops[TLS_SW ][TLS_BASE] = ops[TLS_BASE][TLS_BASE];
+ ops[TLS_SW ][TLS_BASE].splice_eof = tls_sw_splice_eof;
ops[TLS_SW ][TLS_BASE].sendpage_locked = tls_sw_sendpage_locked;
ops[TLS_BASE][TLS_SW ] = ops[TLS_BASE][TLS_BASE];
ops[TLS_BASE][TLS_SW ].splice_read = tls_sw_splice_read;
+ ops[TLS_BASE][TLS_SW ].poll = tls_sk_poll;
ops[TLS_SW ][TLS_SW ] = ops[TLS_SW ][TLS_BASE];
ops[TLS_SW ][TLS_SW ].splice_read = tls_sw_splice_read;
+ ops[TLS_SW ][TLS_SW ].poll = tls_sk_poll;
#ifdef CONFIG_TLS_DEVICE
ops[TLS_HW ][TLS_BASE] = ops[TLS_BASE][TLS_BASE];
@@ -986,6 +1028,7 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
prot[TLS_SW][TLS_BASE] = prot[TLS_BASE][TLS_BASE];
prot[TLS_SW][TLS_BASE].sendmsg = tls_sw_sendmsg;
+ prot[TLS_SW][TLS_BASE].splice_eof = tls_sw_splice_eof;
prot[TLS_SW][TLS_BASE].sendpage = tls_sw_sendpage;
prot[TLS_BASE][TLS_SW] = prot[TLS_BASE][TLS_BASE];
@@ -1001,10 +1044,12 @@ static void build_protos(struct proto prot[TLS_NUM_CONFIG][TLS_NUM_CONFIG],
#ifdef CONFIG_TLS_DEVICE
prot[TLS_HW][TLS_BASE] = prot[TLS_BASE][TLS_BASE];
prot[TLS_HW][TLS_BASE].sendmsg = tls_device_sendmsg;
+ prot[TLS_HW][TLS_BASE].splice_eof = tls_device_splice_eof;
prot[TLS_HW][TLS_BASE].sendpage = tls_device_sendpage;
prot[TLS_HW][TLS_SW] = prot[TLS_BASE][TLS_SW];
prot[TLS_HW][TLS_SW].sendmsg = tls_device_sendmsg;
+ prot[TLS_HW][TLS_SW].splice_eof = tls_device_splice_eof;
prot[TLS_HW][TLS_SW].sendpage = tls_device_sendpage;
prot[TLS_BASE][TLS_HW] = prot[TLS_BASE][TLS_SW];
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 1a53c8f481e9..319f61590d2c 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -931,7 +931,37 @@ static int tls_sw_push_pending_record(struct sock *sk, int flags)
&copied, flags);
}
-int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+static int tls_sw_sendmsg_splice(struct sock *sk, struct msghdr *msg,
+ struct sk_msg *msg_pl, size_t try_to_copy,
+ ssize_t *copied)
+{
+ struct page *page = NULL, **pages = &page;
+
+ do {
+ ssize_t part;
+ size_t off;
+
+ part = iov_iter_extract_pages(&msg->msg_iter, &pages,
+ try_to_copy, 1, 0, &off);
+ if (part <= 0)
+ return part ?: -EIO;
+
+ if (WARN_ON_ONCE(!sendpage_ok(page))) {
+ iov_iter_revert(&msg->msg_iter, part);
+ return -EIO;
+ }
+
+ sk_msg_page_add(msg_pl, page, part, off);
+ sk_mem_charge(sk, part);
+ *copied += part;
+ try_to_copy -= part;
+ } while (try_to_copy && !sk_msg_full(msg_pl));
+
+ return 0;
+}
+
+static int tls_sw_sendmsg_locked(struct sock *sk, struct msghdr *msg,
+ size_t size)
{
long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
struct tls_context *tls_ctx = tls_get_ctx(sk);
@@ -954,15 +984,6 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
int ret = 0;
int pending;
- if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
- MSG_CMSG_COMPAT))
- return -EOPNOTSUPP;
-
- ret = mutex_lock_interruptible(&tls_ctx->tx_lock);
- if (ret)
- return ret;
- lock_sock(sk);
-
if (unlikely(msg->msg_controllen)) {
ret = tls_process_cmsg(sk, msg, &record_type);
if (ret) {
@@ -1020,6 +1041,17 @@ alloc_encrypted:
full_record = true;
}
+ if (try_to_copy && (msg->msg_flags & MSG_SPLICE_PAGES)) {
+ ret = tls_sw_sendmsg_splice(sk, msg, msg_pl,
+ try_to_copy, &copied);
+ if (ret < 0)
+ goto send_end;
+ tls_ctx->pending_open_record_frags = true;
+ if (full_record || eor || sk_msg_full(msg_pl))
+ goto copied;
+ continue;
+ }
+
if (!is_kvec && (full_record || eor) && !async_capable) {
u32 first = msg_pl->sg.end;
@@ -1084,6 +1116,7 @@ fallback_to_reg_send:
*/
tls_ctx->pending_open_record_frags = true;
copied += try_to_copy;
+copied:
if (full_record || eor) {
ret = bpf_exec_tx_verdict(msg_pl, sk, full_record,
record_type, &copied,
@@ -1151,157 +1184,136 @@ trim_sgl:
send_end:
ret = sk_stream_error(sk, msg->msg_flags, ret);
+ return copied > 0 ? copied : ret;
+}
+
+int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
+{
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
+ int ret;
+
+ if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
+ MSG_CMSG_COMPAT | MSG_SPLICE_PAGES |
+ MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY))
+ return -EOPNOTSUPP;
+ ret = mutex_lock_interruptible(&tls_ctx->tx_lock);
+ if (ret)
+ return ret;
+ lock_sock(sk);
+ ret = tls_sw_sendmsg_locked(sk, msg, size);
release_sock(sk);
mutex_unlock(&tls_ctx->tx_lock);
- return copied > 0 ? copied : ret;
+ return ret;
}
-static int tls_sw_do_sendpage(struct sock *sk, struct page *page,
- int offset, size_t size, int flags)
+/*
+ * Handle unexpected EOF during splice without SPLICE_F_MORE set.
+ */
+void tls_sw_splice_eof(struct socket *sock)
{
- long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
+ struct sock *sk = sock->sk;
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
- struct tls_prot_info *prot = &tls_ctx->prot_info;
- unsigned char record_type = TLS_RECORD_TYPE_DATA;
- struct sk_msg *msg_pl;
struct tls_rec *rec;
- int num_async = 0;
+ struct sk_msg *msg_pl;
ssize_t copied = 0;
- bool full_record;
- int record_room;
+ bool retrying = false;
int ret = 0;
- bool eor;
-
- eor = !(flags & MSG_SENDPAGE_NOTLAST);
- sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
-
- /* Call the sk_stream functions to manage the sndbuf mem. */
- while (size > 0) {
- size_t copy, required_size;
-
- if (sk->sk_err) {
- ret = -sk->sk_err;
- goto sendpage_end;
- }
+ int pending;
- if (ctx->open_rec)
- rec = ctx->open_rec;
- else
- rec = ctx->open_rec = tls_get_rec(sk);
- if (!rec) {
- ret = -ENOMEM;
- goto sendpage_end;
- }
+ if (!ctx->open_rec)
+ return;
- msg_pl = &rec->msg_plaintext;
+ mutex_lock(&tls_ctx->tx_lock);
+ lock_sock(sk);
- full_record = false;
- record_room = TLS_MAX_PAYLOAD_SIZE - msg_pl->sg.size;
- copy = size;
- if (copy >= record_room) {
- copy = record_room;
- full_record = true;
- }
+retry:
+ rec = ctx->open_rec;
+ if (!rec)
+ goto unlock;
- required_size = msg_pl->sg.size + copy + prot->overhead_size;
+ msg_pl = &rec->msg_plaintext;
- if (!sk_stream_memory_free(sk))
- goto wait_for_sndbuf;
-alloc_payload:
- ret = tls_alloc_encrypted_msg(sk, required_size);
- if (ret) {
- if (ret != -ENOSPC)
- goto wait_for_memory;
+ /* Check the BPF advisor and perform transmission. */
+ ret = bpf_exec_tx_verdict(msg_pl, sk, false, TLS_RECORD_TYPE_DATA,
+ &copied, 0);
+ switch (ret) {
+ case 0:
+ case -EAGAIN:
+ if (retrying)
+ goto unlock;
+ retrying = true;
+ goto retry;
+ case -EINPROGRESS:
+ break;
+ default:
+ goto unlock;
+ }
- /* Adjust copy according to the amount that was
- * actually allocated. The difference is due
- * to max sg elements limit
- */
- copy -= required_size - msg_pl->sg.size;
- full_record = true;
- }
+ /* Wait for pending encryptions to get completed */
+ spin_lock_bh(&ctx->encrypt_compl_lock);
+ ctx->async_notify = true;
- sk_msg_page_add(msg_pl, page, copy, offset);
- sk_mem_charge(sk, copy);
+ pending = atomic_read(&ctx->encrypt_pending);
+ spin_unlock_bh(&ctx->encrypt_compl_lock);
+ if (pending)
+ crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
+ else
+ reinit_completion(&ctx->async_wait.completion);
- offset += copy;
- size -= copy;
- copied += copy;
+ /* There can be no concurrent accesses, since we have no pending
+ * encrypt operations
+ */
+ WRITE_ONCE(ctx->async_notify, false);
- tls_ctx->pending_open_record_frags = true;
- if (full_record || eor || sk_msg_full(msg_pl)) {
- ret = bpf_exec_tx_verdict(msg_pl, sk, full_record,
- record_type, &copied, flags);
- if (ret) {
- if (ret == -EINPROGRESS)
- num_async++;
- else if (ret == -ENOMEM)
- goto wait_for_memory;
- else if (ret != -EAGAIN) {
- if (ret == -ENOSPC)
- ret = 0;
- goto sendpage_end;
- }
- }
- }
- continue;
-wait_for_sndbuf:
- set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
-wait_for_memory:
- ret = sk_stream_wait_memory(sk, &timeo);
- if (ret) {
- if (ctx->open_rec)
- tls_trim_both_msgs(sk, msg_pl->sg.size);
- goto sendpage_end;
- }
+ if (ctx->async_wait.err)
+ goto unlock;
- if (ctx->open_rec)
- goto alloc_payload;
+ /* Transmit if any encryptions have completed */
+ if (test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) {
+ cancel_delayed_work(&ctx->tx_work.work);
+ tls_tx_records(sk, 0);
}
- if (num_async) {
- /* Transmit if any encryptions have completed */
- if (test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask)) {
- cancel_delayed_work(&ctx->tx_work.work);
- tls_tx_records(sk, flags);
- }
- }
-sendpage_end:
- ret = sk_stream_error(sk, flags, ret);
- return copied > 0 ? copied : ret;
+unlock:
+ release_sock(sk);
+ mutex_unlock(&tls_ctx->tx_lock);
}
int tls_sw_sendpage_locked(struct sock *sk, struct page *page,
int offset, size_t size, int flags)
{
+ struct bio_vec bvec;
+ struct msghdr msg = { .msg_flags = flags | MSG_SPLICE_PAGES, };
+
if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY |
MSG_NO_SHARED_FRAGS))
return -EOPNOTSUPP;
+ if (flags & MSG_SENDPAGE_NOTLAST)
+ msg.msg_flags |= MSG_MORE;
- return tls_sw_do_sendpage(sk, page, offset, size, flags);
+ bvec_set_page(&bvec, page, size, offset);
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
+ return tls_sw_sendmsg_locked(sk, &msg, size);
}
int tls_sw_sendpage(struct sock *sk, struct page *page,
int offset, size_t size, int flags)
{
- struct tls_context *tls_ctx = tls_get_ctx(sk);
- int ret;
+ struct bio_vec bvec;
+ struct msghdr msg = { .msg_flags = flags | MSG_SPLICE_PAGES, };
if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY))
return -EOPNOTSUPP;
+ if (flags & MSG_SENDPAGE_NOTLAST)
+ msg.msg_flags |= MSG_MORE;
- ret = mutex_lock_interruptible(&tls_ctx->tx_lock);
- if (ret)
- return ret;
- lock_sock(sk);
- ret = tls_sw_do_sendpage(sk, page, offset, size, flags);
- release_sock(sk);
- mutex_unlock(&tls_ctx->tx_lock);
- return ret;
+ bvec_set_page(&bvec, page, size, offset);
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
+ return tls_sw_sendmsg(sk, &msg, size);
}
static int
diff --git a/net/unix/Kconfig b/net/unix/Kconfig
index b7f811216820..28b232f281ab 100644
--- a/net/unix/Kconfig
+++ b/net/unix/Kconfig
@@ -4,7 +4,7 @@
#
config UNIX
- tristate "Unix domain sockets"
+ bool "Unix domain sockets"
help
If you say Y here, you will include support for Unix domain sockets;
sockets are the standard Unix mechanism for establishing and
@@ -14,10 +14,6 @@ config UNIX
an embedded system or something similar, you therefore definitely
want to say Y here.
- To compile this driver as a module, choose M here: the module will be
- called unix. Note that several important services won't work
- correctly if you say M here and then neglect to load the module.
-
Say Y unless you know what you are doing.
config UNIX_SCM
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index e7728b57a8c7..73c61a010b01 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -921,11 +921,26 @@ static void unix_unhash(struct sock *sk)
*/
}
+static bool unix_bpf_bypass_getsockopt(int level, int optname)
+{
+ if (level == SOL_SOCKET) {
+ switch (optname) {
+ case SO_PEERPIDFD:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ return false;
+}
+
struct proto unix_dgram_proto = {
.name = "UNIX",
.owner = THIS_MODULE,
.obj_size = sizeof(struct unix_sock),
.close = unix_close,
+ .bpf_bypass_getsockopt = unix_bpf_bypass_getsockopt,
#ifdef CONFIG_BPF_SYSCALL
.psock_update_sk_prot = unix_dgram_bpf_update_proto,
#endif
@@ -937,6 +952,7 @@ struct proto unix_stream_proto = {
.obj_size = sizeof(struct unix_sock),
.close = unix_close,
.unhash = unix_unhash,
+ .bpf_bypass_getsockopt = unix_bpf_bypass_getsockopt,
#ifdef CONFIG_BPF_SYSCALL
.psock_update_sk_prot = unix_stream_bpf_update_proto,
#endif
@@ -1361,7 +1377,8 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
if (err)
goto out;
- if (test_bit(SOCK_PASSCRED, &sock->flags) &&
+ if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
+ test_bit(SOCK_PASSPIDFD, &sock->flags)) &&
!unix_sk(sk)->addr) {
err = unix_autobind(sk);
if (err)
@@ -1469,7 +1486,8 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
if (err)
goto out;
- if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) {
+ if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
+ test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) {
err = unix_autobind(sk);
if (err)
goto out;
@@ -1670,6 +1688,8 @@ static void unix_sock_inherit_flags(const struct socket *old,
{
if (test_bit(SOCK_PASSCRED, &old->flags))
set_bit(SOCK_PASSCRED, &new->flags);
+ if (test_bit(SOCK_PASSPIDFD, &old->flags))
+ set_bit(SOCK_PASSPIDFD, &new->flags);
if (test_bit(SOCK_PASSSEC, &old->flags))
set_bit(SOCK_PASSSEC, &new->flags);
}
@@ -1819,8 +1839,10 @@ static bool unix_passcred_enabled(const struct socket *sock,
const struct sock *other)
{
return test_bit(SOCK_PASSCRED, &sock->flags) ||
+ test_bit(SOCK_PASSPIDFD, &sock->flags) ||
!other->sk_socket ||
- test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
+ test_bit(SOCK_PASSCRED, &other->sk_socket->flags) ||
+ test_bit(SOCK_PASSPIDFD, &other->sk_socket->flags);
}
/*
@@ -1839,24 +1861,6 @@ static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
}
}
-static int maybe_init_creds(struct scm_cookie *scm,
- struct socket *socket,
- const struct sock *other)
-{
- int err;
- struct msghdr msg = { .msg_controllen = 0 };
-
- err = scm_send(socket, &msg, scm, false);
- if (err)
- return err;
-
- if (unix_passcred_enabled(socket, other)) {
- scm->pid = get_pid(task_tgid(current));
- current_uid_gid(&scm->creds.uid, &scm->creds.gid);
- }
- return err;
-}
-
static bool unix_skb_scm_eq(struct sk_buff *skb,
struct scm_cookie *scm)
{
@@ -1922,7 +1926,8 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
goto out;
}
- if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr) {
+ if ((test_bit(SOCK_PASSCRED, &sock->flags) ||
+ test_bit(SOCK_PASSPIDFD, &sock->flags)) && !u->addr) {
err = unix_autobind(sk);
if (err)
goto out;
@@ -2200,19 +2205,25 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
while (sent < len) {
size = len - sent;
- /* Keep two messages in the pipe so it schedules better */
- size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
+ if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
+ skb = sock_alloc_send_pskb(sk, 0, 0,
+ msg->msg_flags & MSG_DONTWAIT,
+ &err, 0);
+ } else {
+ /* Keep two messages in the pipe so it schedules better */
+ size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
- /* allow fallback to order-0 allocations */
- size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
+ /* allow fallback to order-0 allocations */
+ size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
- data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
+ data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
- data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
+ data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
- skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
- msg->msg_flags & MSG_DONTWAIT, &err,
- get_order(UNIX_SKB_FRAGS_SZ));
+ skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
+ msg->msg_flags & MSG_DONTWAIT, &err,
+ get_order(UNIX_SKB_FRAGS_SZ));
+ }
if (!skb)
goto out_err;
@@ -2224,13 +2235,24 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
}
fds_sent = true;
- skb_put(skb, size - data_len);
- skb->data_len = data_len;
- skb->len = size;
- err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
- if (err) {
- kfree_skb(skb);
- goto out_err;
+ if (unlikely(msg->msg_flags & MSG_SPLICE_PAGES)) {
+ err = skb_splice_from_iter(skb, &msg->msg_iter, size,
+ sk->sk_allocation);
+ if (err < 0) {
+ kfree_skb(skb);
+ goto out_err;
+ }
+ size = err;
+ refcount_add(size, &sk->sk_wmem_alloc);
+ } else {
+ skb_put(skb, size - data_len);
+ skb->data_len = data_len;
+ skb->len = size;
+ err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
+ if (err) {
+ kfree_skb(skb);
+ goto out_err;
+ }
}
unix_state_lock(other);
@@ -2275,117 +2297,15 @@ out_err:
static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
int offset, size_t size, int flags)
{
- int err;
- bool send_sigpipe = false;
- bool init_scm = true;
- struct scm_cookie scm;
- struct sock *other, *sk = socket->sk;
- struct sk_buff *skb, *newskb = NULL, *tail = NULL;
+ struct bio_vec bvec;
+ struct msghdr msg = { .msg_flags = flags | MSG_SPLICE_PAGES };
- if (flags & MSG_OOB)
- return -EOPNOTSUPP;
+ if (flags & MSG_SENDPAGE_NOTLAST)
+ msg.msg_flags |= MSG_MORE;
- other = unix_peer(sk);
- if (!other || sk->sk_state != TCP_ESTABLISHED)
- return -ENOTCONN;
-
- if (false) {
-alloc_skb:
- unix_state_unlock(other);
- mutex_unlock(&unix_sk(other)->iolock);
- newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
- &err, 0);
- if (!newskb)
- goto err;
- }
-
- /* we must acquire iolock as we modify already present
- * skbs in the sk_receive_queue and mess with skb->len
- */
- err = mutex_lock_interruptible(&unix_sk(other)->iolock);
- if (err) {
- err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
- goto err;
- }
-
- if (sk->sk_shutdown & SEND_SHUTDOWN) {
- err = -EPIPE;
- send_sigpipe = true;
- goto err_unlock;
- }
-
- unix_state_lock(other);
-
- if (sock_flag(other, SOCK_DEAD) ||
- other->sk_shutdown & RCV_SHUTDOWN) {
- err = -EPIPE;
- send_sigpipe = true;
- goto err_state_unlock;
- }
-
- if (init_scm) {
- err = maybe_init_creds(&scm, socket, other);
- if (err)
- goto err_state_unlock;
- init_scm = false;
- }
-
- skb = skb_peek_tail(&other->sk_receive_queue);
- if (tail && tail == skb) {
- skb = newskb;
- } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
- if (newskb) {
- skb = newskb;
- } else {
- tail = skb;
- goto alloc_skb;
- }
- } else if (newskb) {
- /* this is fast path, we don't necessarily need to
- * call to kfree_skb even though with newskb == NULL
- * this - does no harm
- */
- consume_skb(newskb);
- newskb = NULL;
- }
-
- if (skb_append_pagefrags(skb, page, offset, size)) {
- tail = skb;
- goto alloc_skb;
- }
-
- skb->len += size;
- skb->data_len += size;
- skb->truesize += size;
- refcount_add(size, &sk->sk_wmem_alloc);
-
- if (newskb) {
- err = unix_scm_to_skb(&scm, skb, false);
- if (err)
- goto err_state_unlock;
- spin_lock(&other->sk_receive_queue.lock);
- __skb_queue_tail(&other->sk_receive_queue, newskb);
- spin_unlock(&other->sk_receive_queue.lock);
- }
-
- unix_state_unlock(other);
- mutex_unlock(&unix_sk(other)->iolock);
-
- other->sk_data_ready(other);
- scm_destroy(&scm);
- return size;
-
-err_state_unlock:
- unix_state_unlock(other);
-err_unlock:
- mutex_unlock(&unix_sk(other)->iolock);
-err:
- kfree_skb(newskb);
- if (send_sigpipe && !(flags & MSG_NOSIGNAL))
- send_sig(SIGPIPE, current, 0);
- if (!init_scm)
- scm_destroy(&scm);
- return err;
+ bvec_set_page(&bvec, page, size, offset);
+ iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size);
+ return unix_stream_sendmsg(socket, &msg, size);
}
static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
@@ -2821,7 +2741,8 @@ unlock:
/* Never glue messages from different writers */
if (!unix_skb_scm_eq(skb, &scm))
break;
- } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
+ } else if (test_bit(SOCK_PASSCRED, &sock->flags) ||
+ test_bit(SOCK_PASSPIDFD, &sock->flags)) {
/* Copy credentials */
scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
unix_set_secdata(&scm, skb);
diff --git a/net/wireless/core.c b/net/wireless/core.c
index b3ec9eaec36b..fc449bea39a1 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -129,6 +129,7 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev,
int result;
ASSERT_RTNL();
+ lockdep_assert_wiphy(&rdev->wiphy);
/* Ignore nop renames */
if (strcmp(newname, wiphy_name(&rdev->wiphy)) == 0)
@@ -195,6 +196,8 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev,
continue;
nl80211_notify_iface(rdev, wdev, NL80211_CMD_DEL_INTERFACE);
}
+
+ wiphy_lock(&rdev->wiphy);
nl80211_notify_wiphy(rdev, NL80211_CMD_DEL_WIPHY);
wiphy_net_set(&rdev->wiphy, net);
@@ -203,6 +206,8 @@ int cfg80211_switch_netns(struct cfg80211_registered_device *rdev,
WARN_ON(err);
nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY);
+ wiphy_unlock(&rdev->wiphy);
+
list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
if (!wdev->netdev)
continue;
@@ -360,7 +365,8 @@ static void cfg80211_destroy_iface_wk(struct work_struct *work)
rtnl_unlock();
}
-static void cfg80211_sched_scan_stop_wk(struct work_struct *work)
+static void cfg80211_sched_scan_stop_wk(struct wiphy *wiphy,
+ struct wiphy_work *work)
{
struct cfg80211_registered_device *rdev;
struct cfg80211_sched_scan_request *req, *tmp;
@@ -368,12 +374,10 @@ static void cfg80211_sched_scan_stop_wk(struct work_struct *work)
rdev = container_of(work, struct cfg80211_registered_device,
sched_scan_stop_wk);
- wiphy_lock(&rdev->wiphy);
list_for_each_entry_safe(req, tmp, &rdev->sched_scan_req_list, list) {
if (req->nl_owner_dead)
cfg80211_stop_sched_scan_req(rdev, req, false);
}
- wiphy_unlock(&rdev->wiphy);
}
static void cfg80211_propagate_radar_detect_wk(struct work_struct *work)
@@ -408,6 +412,34 @@ static void cfg80211_propagate_cac_done_wk(struct work_struct *work)
rtnl_unlock();
}
+static void cfg80211_wiphy_work(struct work_struct *work)
+{
+ struct cfg80211_registered_device *rdev;
+ struct wiphy_work *wk;
+
+ rdev = container_of(work, struct cfg80211_registered_device, wiphy_work);
+
+ wiphy_lock(&rdev->wiphy);
+ if (rdev->suspended)
+ goto out;
+
+ spin_lock_irq(&rdev->wiphy_work_lock);
+ wk = list_first_entry_or_null(&rdev->wiphy_work_list,
+ struct wiphy_work, entry);
+ if (wk) {
+ list_del_init(&wk->entry);
+ if (!list_empty(&rdev->wiphy_work_list))
+ schedule_work(work);
+ spin_unlock_irq(&rdev->wiphy_work_lock);
+
+ wk->func(&rdev->wiphy, wk);
+ } else {
+ spin_unlock_irq(&rdev->wiphy_work_lock);
+ }
+out:
+ wiphy_unlock(&rdev->wiphy);
+}
+
/* exported functions */
struct wiphy *wiphy_new_nm(const struct cfg80211_ops *ops, int sizeof_priv,
@@ -495,7 +527,7 @@ use_default_name:
spin_lock_init(&rdev->bss_lock);
INIT_LIST_HEAD(&rdev->bss_list);
INIT_LIST_HEAD(&rdev->sched_scan_req_list);
- INIT_WORK(&rdev->scan_done_wk, __cfg80211_scan_done);
+ wiphy_work_init(&rdev->scan_done_wk, __cfg80211_scan_done);
INIT_DELAYED_WORK(&rdev->dfs_update_channels_wk,
cfg80211_dfs_channels_update_work);
#ifdef CONFIG_CFG80211_WEXT
@@ -508,7 +540,7 @@ use_default_name:
device_enable_async_suspend(&rdev->wiphy.dev);
INIT_WORK(&rdev->destroy_work, cfg80211_destroy_iface_wk);
- INIT_WORK(&rdev->sched_scan_stop_wk, cfg80211_sched_scan_stop_wk);
+ wiphy_work_init(&rdev->sched_scan_stop_wk, cfg80211_sched_scan_stop_wk);
INIT_WORK(&rdev->sched_scan_res_wk, cfg80211_sched_scan_results_wk);
INIT_WORK(&rdev->propagate_radar_detect_wk,
cfg80211_propagate_radar_detect_wk);
@@ -533,6 +565,9 @@ use_default_name:
return NULL;
}
+ INIT_WORK(&rdev->wiphy_work, cfg80211_wiphy_work);
+ INIT_LIST_HEAD(&rdev->wiphy_work_list);
+ spin_lock_init(&rdev->wiphy_work_lock);
INIT_WORK(&rdev->rfkill_block, cfg80211_rfkill_block_work);
INIT_WORK(&rdev->conn_work, cfg80211_conn_work);
INIT_WORK(&rdev->event_work, cfg80211_event_work);
@@ -941,8 +976,10 @@ int wiphy_register(struct wiphy *wiphy)
rdev->wiphy.features |= NL80211_FEATURE_SCAN_FLUSH;
rtnl_lock();
+ wiphy_lock(&rdev->wiphy);
res = device_add(&rdev->wiphy.dev);
if (res) {
+ wiphy_unlock(&rdev->wiphy);
rtnl_unlock();
return res;
}
@@ -956,6 +993,7 @@ int wiphy_register(struct wiphy *wiphy)
cfg80211_debugfs_rdev_add(rdev);
nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY);
+ wiphy_unlock(&rdev->wiphy);
/* set up regulatory info */
wiphy_regulatory_register(wiphy);
@@ -1027,6 +1065,31 @@ void wiphy_rfkill_start_polling(struct wiphy *wiphy)
}
EXPORT_SYMBOL(wiphy_rfkill_start_polling);
+void cfg80211_process_wiphy_works(struct cfg80211_registered_device *rdev)
+{
+ unsigned int runaway_limit = 100;
+ unsigned long flags;
+
+ lockdep_assert_held(&rdev->wiphy.mtx);
+
+ spin_lock_irqsave(&rdev->wiphy_work_lock, flags);
+ while (!list_empty(&rdev->wiphy_work_list)) {
+ struct wiphy_work *wk;
+
+ wk = list_first_entry(&rdev->wiphy_work_list,
+ struct wiphy_work, entry);
+ list_del_init(&wk->entry);
+ spin_unlock_irqrestore(&rdev->wiphy_work_lock, flags);
+
+ wk->func(&rdev->wiphy, wk);
+
+ spin_lock_irqsave(&rdev->wiphy_work_lock, flags);
+ if (WARN_ON(--runaway_limit == 0))
+ INIT_LIST_HEAD(&rdev->wiphy_work_list);
+ }
+ spin_unlock_irqrestore(&rdev->wiphy_work_lock, flags);
+}
+
void wiphy_unregister(struct wiphy *wiphy)
{
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
@@ -1065,25 +1128,29 @@ void wiphy_unregister(struct wiphy *wiphy)
cfg80211_rdev_list_generation++;
device_del(&rdev->wiphy.dev);
+#ifdef CONFIG_PM
+ if (rdev->wiphy.wowlan_config && rdev->ops->set_wakeup)
+ rdev_set_wakeup(rdev, false);
+#endif
+
+ /* surely nothing is reachable now, clean up work */
+ cfg80211_process_wiphy_works(rdev);
wiphy_unlock(&rdev->wiphy);
rtnl_unlock();
- flush_work(&rdev->scan_done_wk);
+ /* this has nothing to do now but make sure it's gone */
+ cancel_work_sync(&rdev->wiphy_work);
+
cancel_work_sync(&rdev->conn_work);
flush_work(&rdev->event_work);
cancel_delayed_work_sync(&rdev->dfs_update_channels_wk);
cancel_delayed_work_sync(&rdev->background_cac_done_wk);
flush_work(&rdev->destroy_work);
- flush_work(&rdev->sched_scan_stop_wk);
flush_work(&rdev->propagate_radar_detect_wk);
flush_work(&rdev->propagate_cac_done_wk);
flush_work(&rdev->mgmt_registrations_update_wk);
flush_work(&rdev->background_cac_abort_wk);
-#ifdef CONFIG_PM
- if (rdev->wiphy.wowlan_config && rdev->ops->set_wakeup)
- rdev_set_wakeup(rdev, false);
-#endif
cfg80211_rdev_free_wowlan(rdev);
cfg80211_rdev_free_coalesce(rdev);
}
@@ -1145,8 +1212,6 @@ static void _cfg80211_unregister_wdev(struct wireless_dev *wdev,
ASSERT_RTNL();
lockdep_assert_held(&rdev->wiphy.mtx);
- flush_work(&wdev->pmsr_free_wk);
-
nl80211_notify_iface(rdev, wdev, NL80211_CMD_DEL_INTERFACE);
wdev->registered = false;
@@ -1178,10 +1243,6 @@ static void _cfg80211_unregister_wdev(struct wireless_dev *wdev,
kfree_sensitive(wdev->wext.keys);
wdev->wext.keys = NULL;
#endif
- /* only initialized if we have a netdev */
- if (wdev->netdev)
- flush_work(&wdev->disconnect_wk);
-
cfg80211_cqm_config_free(wdev);
/*
@@ -1455,6 +1516,9 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
cfg80211_leave(rdev, wdev);
cfg80211_remove_links(wdev);
wiphy_unlock(&rdev->wiphy);
+ /* since we just did cfg80211_leave() nothing to do there */
+ cancel_work_sync(&wdev->disconnect_wk);
+ cancel_work_sync(&wdev->pmsr_free_wk);
break;
case NETDEV_DOWN:
wiphy_lock(&rdev->wiphy);
@@ -1564,6 +1628,66 @@ static struct pernet_operations cfg80211_pernet_ops = {
.exit = cfg80211_pernet_exit,
};
+void wiphy_work_queue(struct wiphy *wiphy, struct wiphy_work *work)
+{
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
+ unsigned long flags;
+
+ spin_lock_irqsave(&rdev->wiphy_work_lock, flags);
+ if (list_empty(&work->entry))
+ list_add_tail(&work->entry, &rdev->wiphy_work_list);
+ spin_unlock_irqrestore(&rdev->wiphy_work_lock, flags);
+
+ schedule_work(&rdev->wiphy_work);
+}
+EXPORT_SYMBOL_GPL(wiphy_work_queue);
+
+void wiphy_work_cancel(struct wiphy *wiphy, struct wiphy_work *work)
+{
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
+ unsigned long flags;
+
+ lockdep_assert_held(&wiphy->mtx);
+
+ spin_lock_irqsave(&rdev->wiphy_work_lock, flags);
+ if (!list_empty(&work->entry))
+ list_del_init(&work->entry);
+ spin_unlock_irqrestore(&rdev->wiphy_work_lock, flags);
+}
+EXPORT_SYMBOL_GPL(wiphy_work_cancel);
+
+void wiphy_delayed_work_timer(struct timer_list *t)
+{
+ struct wiphy_delayed_work *dwork = from_timer(dwork, t, timer);
+
+ wiphy_work_queue(dwork->wiphy, &dwork->work);
+}
+EXPORT_SYMBOL(wiphy_delayed_work_timer);
+
+void wiphy_delayed_work_queue(struct wiphy *wiphy,
+ struct wiphy_delayed_work *dwork,
+ unsigned long delay)
+{
+ if (!delay) {
+ wiphy_work_queue(wiphy, &dwork->work);
+ return;
+ }
+
+ dwork->wiphy = wiphy;
+ mod_timer(&dwork->timer, jiffies + delay);
+}
+EXPORT_SYMBOL_GPL(wiphy_delayed_work_queue);
+
+void wiphy_delayed_work_cancel(struct wiphy *wiphy,
+ struct wiphy_delayed_work *dwork)
+{
+ lockdep_assert_held(&wiphy->mtx);
+
+ del_timer_sync(&dwork->timer);
+ wiphy_work_cancel(wiphy, &dwork->work);
+}
+EXPORT_SYMBOL_GPL(wiphy_delayed_work_cancel);
+
static int __init cfg80211_init(void)
{
int err;
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 7c61752f6d83..291c6d83d56f 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -75,7 +75,7 @@ struct cfg80211_registered_device {
struct sk_buff *scan_msg;
struct list_head sched_scan_req_list;
time64_t suspend_at;
- struct work_struct scan_done_wk;
+ struct wiphy_work scan_done_wk;
struct genl_info *cur_cmd_info;
@@ -95,7 +95,7 @@ struct cfg80211_registered_device {
struct cfg80211_coalesce *coalesce;
struct work_struct destroy_work;
- struct work_struct sched_scan_stop_wk;
+ struct wiphy_work sched_scan_stop_wk;
struct work_struct sched_scan_res_wk;
struct cfg80211_chan_def radar_chandef;
@@ -108,6 +108,12 @@ struct cfg80211_registered_device {
/* lock for all wdev lists */
spinlock_t mgmt_registrations_lock;
+ struct work_struct wiphy_work;
+ struct list_head wiphy_work_list;
+ /* protects the list above */
+ spinlock_t wiphy_work_lock;
+ bool suspended;
+
/* must be last because of the way we do wiphy_priv(),
* and it should at least be aligned to NETDEV_ALIGN */
struct wiphy wiphy __aligned(NETDEV_ALIGN);
@@ -435,7 +441,7 @@ bool cfg80211_valid_key_idx(struct cfg80211_registered_device *rdev,
int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev,
struct key_params *params, int key_idx,
bool pairwise, const u8 *mac_addr);
-void __cfg80211_scan_done(struct work_struct *wk);
+void __cfg80211_scan_done(struct wiphy *wiphy, struct wiphy_work *wk);
void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev,
bool send_message);
void cfg80211_add_sched_scan_req(struct cfg80211_registered_device *rdev,
@@ -453,6 +459,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
struct net_device *dev, enum nl80211_iftype ntype,
struct vif_params *params);
void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev);
+void cfg80211_process_wiphy_works(struct cfg80211_registered_device *rdev);
void cfg80211_process_wdev_events(struct wireless_dev *wdev);
bool cfg80211_does_bw_fit_range(const struct ieee80211_freq_range *freq_range,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 087d60c0f6e4..772671b9bc42 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3081,6 +3081,7 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb)
if (state->filter_wiphy != -1 &&
state->filter_wiphy != rdev->wiphy_idx)
continue;
+ wiphy_lock(&rdev->wiphy);
/* attempt to fit multiple wiphy data chunks into the skb */
do {
ret = nl80211_send_wiphy(rdev, NL80211_CMD_NEW_WIPHY,
@@ -3107,6 +3108,7 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb)
cb->min_dump_alloc < 4096) {
cb->min_dump_alloc = 4096;
state->split_start = 0;
+ wiphy_unlock(&rdev->wiphy);
rtnl_unlock();
return 1;
}
@@ -3114,6 +3116,7 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb)
break;
}
} while (state->split_start > 0);
+ wiphy_unlock(&rdev->wiphy);
break;
}
rtnl_unlock();
@@ -19774,7 +19777,8 @@ static int nl80211_netlink_notify(struct notifier_block * nb,
list) {
if (sched_scan_req->owner_nlportid == notify->portid) {
sched_scan_req->nl_owner_dead = true;
- schedule_work(&rdev->sched_scan_stop_wk);
+ wiphy_work_queue(&rdev->wiphy,
+ &rdev->sched_scan_stop_wk);
}
}
diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c
index 2bc647720cda..77000a264855 100644
--- a/net/wireless/pmsr.c
+++ b/net/wireless/pmsr.c
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
- * Copyright (C) 2018 - 2021 Intel Corporation
+ * Copyright (C) 2018 - 2021, 2023 Intel Corporation
*/
#include <net/cfg80211.h>
#include "core.h"
@@ -623,9 +623,11 @@ void cfg80211_pmsr_free_wk(struct work_struct *work)
struct wireless_dev *wdev = container_of(work, struct wireless_dev,
pmsr_free_wk);
+ wiphy_lock(wdev->wiphy);
wdev_lock(wdev);
cfg80211_pmsr_process_abort(wdev);
wdev_unlock(wdev);
+ wiphy_unlock(wdev->wiphy);
}
void cfg80211_pmsr_wdev_down(struct wireless_dev *wdev)
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index c501db7bbdb3..ce2104dc05c6 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -1004,16 +1004,9 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev,
nl80211_send_scan_msg(rdev, msg);
}
-void __cfg80211_scan_done(struct work_struct *wk)
+void __cfg80211_scan_done(struct wiphy *wiphy, struct wiphy_work *wk)
{
- struct cfg80211_registered_device *rdev;
-
- rdev = container_of(wk, struct cfg80211_registered_device,
- scan_done_wk);
-
- wiphy_lock(&rdev->wiphy);
- ___cfg80211_scan_done(rdev, true);
- wiphy_unlock(&rdev->wiphy);
+ ___cfg80211_scan_done(wiphy_to_rdev(wiphy), true);
}
void cfg80211_scan_done(struct cfg80211_scan_request *request,
@@ -1039,7 +1032,8 @@ void cfg80211_scan_done(struct cfg80211_scan_request *request,
}
request->notified = true;
- queue_work(cfg80211_wq, &wiphy_to_rdev(request->wiphy)->scan_done_wk);
+ wiphy_work_queue(request->wiphy,
+ &wiphy_to_rdev(request->wiphy)->scan_done_wk);
}
EXPORT_SYMBOL(cfg80211_scan_done);
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 7bdeb8eea92d..247369004aaa 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -5,7 +5,7 @@
* (for nl80211's connect() and wext)
*
* Copyright 2009 Johannes Berg <johannes@sipsolutions.net>
- * Copyright (C) 2009, 2020, 2022 Intel Corporation. All rights reserved.
+ * Copyright (C) 2009, 2020, 2022-2023 Intel Corporation. All rights reserved.
* Copyright 2017 Intel Deutschland GmbH
*/
@@ -1569,6 +1569,7 @@ void cfg80211_autodisconnect_wk(struct work_struct *work)
container_of(work, struct wireless_dev, disconnect_wk);
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+ wiphy_lock(wdev->wiphy);
wdev_lock(wdev);
if (wdev->conn_owner_nlportid) {
@@ -1607,4 +1608,5 @@ void cfg80211_autodisconnect_wk(struct work_struct *work)
}
wdev_unlock(wdev);
+ wiphy_unlock(wdev->wiphy);
}
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
index 268f670835e9..c629bac3f298 100644
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c
@@ -5,7 +5,7 @@
*
* Copyright 2005-2006 Jiri Benc <jbenc@suse.cz>
* Copyright 2006 Johannes Berg <johannes@sipsolutions.net>
- * Copyright (C) 2020-2021 Intel Corporation
+ * Copyright (C) 2020-2021, 2023 Intel Corporation
*/
#include <linux/device.h>
@@ -105,14 +105,18 @@ static int wiphy_suspend(struct device *dev)
cfg80211_leave_all(rdev);
cfg80211_process_rdev_events(rdev);
}
+ cfg80211_process_wiphy_works(rdev);
if (rdev->ops->suspend)
ret = rdev_suspend(rdev, rdev->wiphy.wowlan_config);
if (ret == 1) {
/* Driver refuse to configure wowlan */
cfg80211_leave_all(rdev);
cfg80211_process_rdev_events(rdev);
+ cfg80211_process_wiphy_works(rdev);
ret = rdev_suspend(rdev, NULL);
}
+ if (ret == 0)
+ rdev->suspended = true;
}
wiphy_unlock(&rdev->wiphy);
rtnl_unlock();
@@ -132,6 +136,8 @@ static int wiphy_resume(struct device *dev)
wiphy_lock(&rdev->wiphy);
if (rdev->wiphy.registered && rdev->ops->resume)
ret = rdev_resume(rdev);
+ rdev->suspended = false;
+ schedule_work(&rdev->wiphy_work);
wiphy_unlock(&rdev->wiphy);
if (ret)
diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c
index f231207ca210..f3eaa3388694 100644
--- a/net/wireless/wext-sme.c
+++ b/net/wireless/wext-sme.c
@@ -3,7 +3,7 @@
* cfg80211 wext compat for managed mode.
*
* Copyright 2009 Johannes Berg <johannes@sipsolutions.net>
- * Copyright (C) 2009, 2020-2022 Intel Corporation
+ * Copyright (C) 2009, 2020-2023 Intel Corporation
*/
#include <linux/export.h>
@@ -338,6 +338,7 @@ int cfg80211_wext_siwgenie(struct net_device *dev,
if (!ie_len)
ie = NULL;
+ wiphy_lock(wdev->wiphy);
wdev_lock(wdev);
/* no change */
@@ -370,6 +371,7 @@ int cfg80211_wext_siwgenie(struct net_device *dev,
err = 0;
out:
wdev_unlock(wdev);
+ wiphy_unlock(wdev->wiphy);
return err;
}
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index b2df1e0f8153..26f6d304451e 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -350,7 +350,7 @@ void xp_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs)
{
struct xsk_dma_map *dma_map;
- if (pool->dma_pages_cnt == 0)
+ if (!pool->dma_pages)
return;
dma_map = xp_find_dma_map(pool);
@@ -364,6 +364,7 @@ void xp_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs)
__xp_dma_unmap(dma_map, attrs);
kvfree(pool->dma_pages);
+ pool->dma_pages = NULL;
pool->dma_pages_cnt = 0;
pool->dev = NULL;
}
@@ -503,7 +504,7 @@ static struct xdp_buff_xsk *__xp_alloc(struct xsk_buff_pool *pool)
if (pool->unaligned) {
xskb = pool->free_heads[--pool->free_heads_cnt];
xp_init_xskb_addr(xskb, pool, addr);
- if (pool->dma_pages_cnt)
+ if (pool->dma_pages)
xp_init_xskb_dma(xskb, pool, pool->dma_pages, addr);
} else {
xskb = &pool->heads[xp_aligned_extract_idx(pool, addr)];
@@ -569,7 +570,7 @@ static u32 xp_alloc_new_from_fq(struct xsk_buff_pool *pool, struct xdp_buff **xd
if (pool->unaligned) {
xskb = pool->free_heads[--pool->free_heads_cnt];
xp_init_xskb_addr(xskb, pool, addr);
- if (pool->dma_pages_cnt)
+ if (pool->dma_pages)
xp_init_xskb_dma(xskb, pool, pool->dma_pages, addr);
} else {
xskb = &pool->heads[xp_aligned_extract_idx(pool, addr)];
diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c
index 872b80188e83..3504925babdb 100644
--- a/net/xfrm/espintcp.c
+++ b/net/xfrm/espintcp.c
@@ -205,14 +205,16 @@ static int espintcp_sendskb_locked(struct sock *sk, struct espintcp_msg *emsg,
static int espintcp_sendskmsg_locked(struct sock *sk,
struct espintcp_msg *emsg, int flags)
{
+ struct msghdr msghdr = { .msg_flags = flags | MSG_SPLICE_PAGES, };
struct sk_msg *skmsg = &emsg->skmsg;
struct scatterlist *sg;
int done = 0;
int ret;
- flags |= MSG_SENDPAGE_NOTLAST;
+ msghdr.msg_flags |= MSG_SENDPAGE_NOTLAST;
sg = &skmsg->sg.data[skmsg->sg.start];
do {
+ struct bio_vec bvec;
size_t size = sg->length - emsg->offset;
int offset = sg->offset + emsg->offset;
struct page *p;
@@ -220,11 +222,13 @@ static int espintcp_sendskmsg_locked(struct sock *sk,
emsg->offset = 0;
if (sg_is_last(sg))
- flags &= ~MSG_SENDPAGE_NOTLAST;
+ msghdr.msg_flags &= ~MSG_SENDPAGE_NOTLAST;
p = sg_page(sg);
retry:
- ret = do_tcp_sendpages(sk, p, offset, size, flags);
+ bvec_set_page(&bvec, p, size, offset);
+ iov_iter_bvec(&msghdr.msg_iter, ITER_SOURCE, &bvec, 1, size);
+ ret = tcp_sendmsg_locked(sk, &msghdr, size);
if (ret < 0) {
emsg->offset = offset - sg->offset;
skmsg->sg.start += done;
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 408f5e55744e..533697e2488f 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -15,6 +15,7 @@
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <net/dst.h>
+#include <net/gso.h>
#include <net/xfrm.h>
#include <linux/notifier.h>
diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c
index 35279c220bd7..a3319965470a 100644
--- a/net/xfrm/xfrm_interface_core.c
+++ b/net/xfrm/xfrm_interface_core.c
@@ -33,6 +33,7 @@
#include <linux/uaccess.h>
#include <linux/atomic.h>
+#include <net/gso.h>
#include <net/icmp.h>
#include <net/ip.h>
#include <net/ipv6.h>
diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c
index 80143360bf09..9c0fa0e1786a 100644
--- a/net/xfrm/xfrm_ipcomp.c
+++ b/net/xfrm/xfrm_ipcomp.c
@@ -74,14 +74,11 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
if (!page)
return -ENOMEM;
- __skb_frag_set_page(frag, page);
-
len = PAGE_SIZE;
if (dlen < len)
len = dlen;
- skb_frag_off_set(frag, 0);
- skb_frag_size_set(frag, len);
+ skb_frag_fill_page_desc(frag, page, 0, len);
memcpy(skb_frag_address(frag), scratch, len);
skb->truesize += len;
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 369e5de8558f..662c83beb345 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -13,6 +13,7 @@
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <net/dst.h>
+#include <net/gso.h>
#include <net/icmp.h>
#include <net/inet_ecn.h>
#include <net/xfrm.h>