summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2018-07-30 03:39:22 +0300
committerDave Airlie <airlied@redhat.com>2018-07-30 03:39:22 +0300
commit3fce4618279373efc59a91adb16c11da46cd69e5 (patch)
tree086fad6c9f260a0bcc9b6a3532c6cddc387dd907 /net
parentecd7963f7cf967009882fd56eaee1e87a229bea2 (diff)
parentacb1872577b346bd15ab3a3f8dff780d6cca4b70 (diff)
downloadlinux-3fce4618279373efc59a91adb16c11da46cd69e5.tar.xz
BackMerge v4.18-rc7 into drm-next
rmk requested this for armada and I think we've had a few conflicts build up. Signed-off-by: Dave Airlie <airlied@redhat.com>
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c2
-rw-r--r--net/9p/client.c3
-rw-r--r--net/Makefile4
-rw-r--r--net/batman-adv/bat_iv_ogm.c4
-rw-r--r--net/batman-adv/bat_v.c4
-rw-r--r--net/batman-adv/debugfs.c40
-rw-r--r--net/batman-adv/debugfs.h11
-rw-r--r--net/batman-adv/hard-interface.c37
-rw-r--r--net/batman-adv/translation-table.c7
-rw-r--r--net/bpf/test_run.c17
-rw-r--r--net/bpfilter/Kconfig2
-rw-r--r--net/bpfilter/Makefile17
-rw-r--r--net/bpfilter/bpfilter_kern.c11
-rw-r--r--net/bpfilter/bpfilter_umh_blob.S7
-rw-r--r--net/caif/caif_dev.c4
-rw-r--r--net/core/dev_ioctl.c11
-rw-r--r--net/core/fib_rules.c80
-rw-r--r--net/core/filter.c235
-rw-r--r--net/core/gen_stats.c16
-rw-r--r--net/core/page_pool.c2
-rw-r--r--net/core/rtnetlink.c9
-rw-r--r--net/core/skbuff.c14
-rw-r--r--net/core/sock.c13
-rw-r--r--net/dns_resolver/dns_key.c28
-rw-r--r--net/ieee802154/6lowpan/core.c6
-rw-r--r--net/ipv4/fib_frontend.c1
-rw-r--r--net/ipv4/fou.c4
-rw-r--r--net/ipv4/gre_offload.c2
-rw-r--r--net/ipv4/igmp.c61
-rw-r--r--net/ipv4/inet_fragment.c2
-rw-r--r--net/ipv4/ip_output.c2
-rw-r--r--net/ipv4/ip_sockglue.c11
-rw-r--r--net/ipv4/netfilter/ip_tables.c1
-rw-r--r--net/ipv4/netfilter/nf_tproxy_ipv4.c18
-rw-r--r--net/ipv4/sysctl_net_ipv4.c23
-rw-r--r--net/ipv4/tcp.c16
-rw-r--r--net/ipv4/tcp_dctcp.c75
-rw-r--r--net/ipv4/tcp_input.c78
-rw-r--r--net/ipv4/tcp_ipv4.c23
-rw-r--r--net/ipv4/tcp_output.c36
-rw-r--r--net/ipv4/udp_offload.c2
-rw-r--r--net/ipv6/Kconfig1
-rw-r--r--net/ipv6/addrconf.c12
-rw-r--r--net/ipv6/calipso.c9
-rw-r--r--net/ipv6/datagram.c7
-rw-r--r--net/ipv6/exthdrs.c111
-rw-r--r--net/ipv6/icmp.c5
-rw-r--r--net/ipv6/ip6_fib.c156
-rw-r--r--net/ipv6/ip6_gre.c3
-rw-r--r--net/ipv6/ip6_output.c2
-rw-r--r--net/ipv6/ipv6_sockglue.c32
-rw-r--r--net/ipv6/mcast.c67
-rw-r--r--net/ipv6/ndisc.c2
-rw-r--r--net/ipv6/netfilter/ip6_tables.c1
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c8
-rw-r--r--net/ipv6/netfilter/nf_tproxy_ipv6.c18
-rw-r--r--net/ipv6/route.c51
-rw-r--r--net/ipv6/seg6_hmac.c2
-rw-r--r--net/ipv6/seg6_iptunnel.c2
-rw-r--r--net/ipv6/tcp_ipv6.c6
-rw-r--r--net/mac80211/rx.c5
-rw-r--r--net/mac80211/tx.c2
-rw-r--r--net/mac80211/util.c3
-rw-r--r--net/netfilter/Kconfig25
-rw-r--r--net/netfilter/Makefile7
-rw-r--r--net/netfilter/nf_conncount.c52
-rw-r--r--net/netfilter/nf_conntrack_core.c2
-rw-r--r--net/netfilter/nf_conntrack_helper.c5
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c8
-rw-r--r--net/netfilter/nf_log.c13
-rw-r--r--net/netfilter/nf_tables_api.c304
-rw-r--r--net/netfilter/nf_tables_set_core.c28
-rw-r--r--net/netfilter/nfnetlink_queue.c3
-rw-r--r--net/netfilter/nft_compat.c13
-rw-r--r--net/netfilter/nft_immediate.c3
-rw-r--r--net/netfilter/nft_lookup.c13
-rw-r--r--net/netfilter/nft_set_bitmap.c19
-rw-r--r--net/netfilter/nft_set_hash.c30
-rw-r--r--net/netfilter/nft_set_rbtree.c26
-rw-r--r--net/netfilter/xt_TPROXY.c8
-rw-r--r--net/nfc/llcp_commands.c9
-rw-r--r--net/nsh/nsh.c2
-rw-r--r--net/packet/af_packet.c2
-rw-r--r--net/qrtr/qrtr.c13
-rw-r--r--net/rds/connection.c11
-rw-r--r--net/rds/loop.c56
-rw-r--r--net/rds/loop.h2
-rw-r--r--net/sched/act_csum.c6
-rw-r--r--net/sched/act_tunnel_key.c6
-rw-r--r--net/sched/cls_api.c4
-rw-r--r--net/sched/sch_fq_codel.c25
-rw-r--r--net/sctp/transport.c2
-rw-r--r--net/smc/af_smc.c134
-rw-r--r--net/smc/smc.h8
-rw-r--r--net/smc/smc_clc.c3
-rw-r--r--net/smc/smc_close.c2
-rw-r--r--net/smc/smc_tx.c12
-rw-r--r--net/strparser/strparser.c17
-rw-r--r--net/tipc/discover.c18
-rw-r--r--net/tipc/net.c17
-rw-r--r--net/tipc/node.c7
-rw-r--r--net/tls/tls_sw.c10
-rw-r--r--net/wireless/nl80211.c60
-rw-r--r--net/wireless/reg.c28
-rw-r--r--net/wireless/trace.h18
-rw-r--r--net/xdp/xsk.c30
-rw-r--r--net/xdp/xsk_queue.h9
107 files changed, 1574 insertions, 940 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 73a65789271b..8ccee3d01822 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -693,7 +693,7 @@ static struct sk_buff **vlan_gro_receive(struct sk_buff **head,
out_unlock:
rcu_read_unlock();
out:
- NAPI_GRO_CB(skb)->flush |= flush;
+ skb_gro_flush_final(skb, pp, flush);
return pp;
}
diff --git a/net/9p/client.c b/net/9p/client.c
index 18c5271910dc..5c1343195292 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -225,7 +225,8 @@ static int parse_opts(char *opts, struct p9_client *clnt)
}
free_and_return:
- v9fs_put_trans(clnt->trans_mod);
+ if (ret)
+ v9fs_put_trans(clnt->trans_mod);
kfree(tmp_options);
return ret;
}
diff --git a/net/Makefile b/net/Makefile
index 13ec0d5415c7..bdaf53925acd 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -20,11 +20,7 @@ obj-$(CONFIG_TLS) += tls/
obj-$(CONFIG_XFRM) += xfrm/
obj-$(CONFIG_UNIX) += unix/
obj-$(CONFIG_NET) += ipv6/
-ifneq ($(CC_CAN_LINK),y)
-$(warning CC cannot link executables. Skipping bpfilter.)
-else
obj-$(CONFIG_BPFILTER) += bpfilter/
-endif
obj-$(CONFIG_PACKET) += packet/
obj-$(CONFIG_NET_KEY) += key/
obj-$(CONFIG_BRIDGE) += bridge/
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index be09a9883825..73bf6a93a3cf 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -2732,7 +2732,7 @@ static int batadv_iv_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
{
struct batadv_neigh_ifinfo *router_ifinfo = NULL;
struct batadv_neigh_node *router;
- struct batadv_gw_node *curr_gw;
+ struct batadv_gw_node *curr_gw = NULL;
int ret = 0;
void *hdr;
@@ -2780,6 +2780,8 @@ static int batadv_iv_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
ret = 0;
out:
+ if (curr_gw)
+ batadv_gw_node_put(curr_gw);
if (router_ifinfo)
batadv_neigh_ifinfo_put(router_ifinfo);
if (router)
diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c
index ec93337ee259..6baec4e68898 100644
--- a/net/batman-adv/bat_v.c
+++ b/net/batman-adv/bat_v.c
@@ -927,7 +927,7 @@ static int batadv_v_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
{
struct batadv_neigh_ifinfo *router_ifinfo = NULL;
struct batadv_neigh_node *router;
- struct batadv_gw_node *curr_gw;
+ struct batadv_gw_node *curr_gw = NULL;
int ret = 0;
void *hdr;
@@ -995,6 +995,8 @@ static int batadv_v_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq,
ret = 0;
out:
+ if (curr_gw)
+ batadv_gw_node_put(curr_gw);
if (router_ifinfo)
batadv_neigh_ifinfo_put(router_ifinfo);
if (router)
diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c
index 4229b01ac7b5..87479c60670e 100644
--- a/net/batman-adv/debugfs.c
+++ b/net/batman-adv/debugfs.c
@@ -19,6 +19,7 @@
#include "debugfs.h"
#include "main.h"
+#include <linux/dcache.h>
#include <linux/debugfs.h>
#include <linux/err.h>
#include <linux/errno.h>
@@ -344,6 +345,25 @@ out:
}
/**
+ * batadv_debugfs_rename_hardif() - Fix debugfs path for renamed hardif
+ * @hard_iface: hard interface which was renamed
+ */
+void batadv_debugfs_rename_hardif(struct batadv_hard_iface *hard_iface)
+{
+ const char *name = hard_iface->net_dev->name;
+ struct dentry *dir;
+ struct dentry *d;
+
+ dir = hard_iface->debug_dir;
+ if (!dir)
+ return;
+
+ d = debugfs_rename(dir->d_parent, dir, dir->d_parent, name);
+ if (!d)
+ pr_err("Can't rename debugfs dir to %s\n", name);
+}
+
+/**
* batadv_debugfs_del_hardif() - delete the base directory for a hard interface
* in debugfs.
* @hard_iface: hard interface which is deleted.
@@ -414,6 +434,26 @@ out:
}
/**
+ * batadv_debugfs_rename_meshif() - Fix debugfs path for renamed softif
+ * @dev: net_device which was renamed
+ */
+void batadv_debugfs_rename_meshif(struct net_device *dev)
+{
+ struct batadv_priv *bat_priv = netdev_priv(dev);
+ const char *name = dev->name;
+ struct dentry *dir;
+ struct dentry *d;
+
+ dir = bat_priv->debug_dir;
+ if (!dir)
+ return;
+
+ d = debugfs_rename(dir->d_parent, dir, dir->d_parent, name);
+ if (!d)
+ pr_err("Can't rename debugfs dir to %s\n", name);
+}
+
+/**
* batadv_debugfs_del_meshif() - Remove interface dependent debugfs entries
* @dev: netdev struct of the soft interface
*/
diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h
index 37b069698b04..08a592ffbee5 100644
--- a/net/batman-adv/debugfs.h
+++ b/net/batman-adv/debugfs.h
@@ -30,8 +30,10 @@ struct net_device;
void batadv_debugfs_init(void);
void batadv_debugfs_destroy(void);
int batadv_debugfs_add_meshif(struct net_device *dev);
+void batadv_debugfs_rename_meshif(struct net_device *dev);
void batadv_debugfs_del_meshif(struct net_device *dev);
int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface);
+void batadv_debugfs_rename_hardif(struct batadv_hard_iface *hard_iface);
void batadv_debugfs_del_hardif(struct batadv_hard_iface *hard_iface);
#else
@@ -49,6 +51,10 @@ static inline int batadv_debugfs_add_meshif(struct net_device *dev)
return 0;
}
+static inline void batadv_debugfs_rename_meshif(struct net_device *dev)
+{
+}
+
static inline void batadv_debugfs_del_meshif(struct net_device *dev)
{
}
@@ -60,6 +66,11 @@ int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface)
}
static inline
+void batadv_debugfs_rename_hardif(struct batadv_hard_iface *hard_iface)
+{
+}
+
+static inline
void batadv_debugfs_del_hardif(struct batadv_hard_iface *hard_iface)
{
}
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index c405d15befd6..2f0d42f2f913 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -989,6 +989,32 @@ void batadv_hardif_remove_interfaces(void)
rtnl_unlock();
}
+/**
+ * batadv_hard_if_event_softif() - Handle events for soft interfaces
+ * @event: NETDEV_* event to handle
+ * @net_dev: net_device which generated an event
+ *
+ * Return: NOTIFY_* result
+ */
+static int batadv_hard_if_event_softif(unsigned long event,
+ struct net_device *net_dev)
+{
+ struct batadv_priv *bat_priv;
+
+ switch (event) {
+ case NETDEV_REGISTER:
+ batadv_sysfs_add_meshif(net_dev);
+ bat_priv = netdev_priv(net_dev);
+ batadv_softif_create_vlan(bat_priv, BATADV_NO_FLAGS);
+ break;
+ case NETDEV_CHANGENAME:
+ batadv_debugfs_rename_meshif(net_dev);
+ break;
+ }
+
+ return NOTIFY_DONE;
+}
+
static int batadv_hard_if_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
@@ -997,12 +1023,8 @@ static int batadv_hard_if_event(struct notifier_block *this,
struct batadv_hard_iface *primary_if = NULL;
struct batadv_priv *bat_priv;
- if (batadv_softif_is_valid(net_dev) && event == NETDEV_REGISTER) {
- batadv_sysfs_add_meshif(net_dev);
- bat_priv = netdev_priv(net_dev);
- batadv_softif_create_vlan(bat_priv, BATADV_NO_FLAGS);
- return NOTIFY_DONE;
- }
+ if (batadv_softif_is_valid(net_dev))
+ return batadv_hard_if_event_softif(event, net_dev);
hard_iface = batadv_hardif_get_by_netdev(net_dev);
if (!hard_iface && (event == NETDEV_REGISTER ||
@@ -1051,6 +1073,9 @@ static int batadv_hard_if_event(struct notifier_block *this,
if (batadv_is_wifi_hardif(hard_iface))
hard_iface->num_bcasts = BATADV_NUM_BCASTS_WIRELESS;
break;
+ case NETDEV_CHANGENAME:
+ batadv_debugfs_rename_hardif(hard_iface);
+ break;
default:
break;
}
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 3986551397ca..12a2b7d21376 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -1705,7 +1705,9 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv,
ether_addr_copy(common->addr, tt_addr);
common->vid = vid;
- common->flags = flags;
+ if (!is_multicast_ether_addr(common->addr))
+ common->flags = flags & (~BATADV_TT_SYNC_MASK);
+
tt_global_entry->roam_at = 0;
/* node must store current time in case of roaming. This is
* needed to purge this entry out on timeout (if nobody claims
@@ -1768,7 +1770,8 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv,
* TT_CLIENT_TEMP, therefore they have to be copied in the
* client entry
*/
- common->flags |= flags & (~BATADV_TT_SYNC_MASK);
+ if (!is_multicast_ether_addr(common->addr))
+ common->flags |= flags & (~BATADV_TT_SYNC_MASK);
/* If there is the BATADV_TT_CLIENT_ROAM flag set, there is only
* one originator left in the list and we previously received a
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 68c3578343b4..22a78eedf4b1 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -96,6 +96,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
u32 size = kattr->test.data_size_in;
u32 repeat = kattr->test.repeat;
u32 retval, duration;
+ int hh_len = ETH_HLEN;
struct sk_buff *skb;
void *data;
int ret;
@@ -131,12 +132,22 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
skb_reset_network_header(skb);
if (is_l2)
- __skb_push(skb, ETH_HLEN);
+ __skb_push(skb, hh_len);
if (is_direct_pkt_access)
bpf_compute_data_pointers(skb);
retval = bpf_test_run(prog, skb, repeat, &duration);
- if (!is_l2)
- __skb_push(skb, ETH_HLEN);
+ if (!is_l2) {
+ if (skb_headroom(skb) < hh_len) {
+ int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb));
+
+ if (pskb_expand_head(skb, nhead, 0, GFP_USER)) {
+ kfree_skb(skb);
+ return -ENOMEM;
+ }
+ }
+ memset(__skb_push(skb, hh_len), 0, hh_len);
+ }
+
size = skb->len;
/* bpf program can never convert linear skb to non-linear */
if (WARN_ON_ONCE(skb_is_nonlinear(skb)))
diff --git a/net/bpfilter/Kconfig b/net/bpfilter/Kconfig
index a948b072c28f..76deb6615883 100644
--- a/net/bpfilter/Kconfig
+++ b/net/bpfilter/Kconfig
@@ -1,6 +1,5 @@
menuconfig BPFILTER
bool "BPF based packet filtering framework (BPFILTER)"
- default n
depends on NET && BPF && INET
help
This builds experimental bpfilter framework that is aiming to
@@ -9,6 +8,7 @@ menuconfig BPFILTER
if BPFILTER
config BPFILTER_UMH
tristate "bpfilter kernel module with user mode helper"
+ depends on $(success,$(srctree)/scripts/cc-can-link.sh $(CC))
default m
help
This builds bpfilter kernel module with embedded user mode helper
diff --git a/net/bpfilter/Makefile b/net/bpfilter/Makefile
index 051dc18b8ccb..39c6980b5d99 100644
--- a/net/bpfilter/Makefile
+++ b/net/bpfilter/Makefile
@@ -15,20 +15,7 @@ ifeq ($(CONFIG_BPFILTER_UMH), y)
HOSTLDFLAGS += -static
endif
-# a bit of elf magic to convert bpfilter_umh binary into a binary blob
-# inside bpfilter_umh.o elf file referenced by
-# _binary_net_bpfilter_bpfilter_umh_start symbol
-# which bpfilter_kern.c passes further into umh blob loader at run-time
-quiet_cmd_copy_umh = GEN $@
- cmd_copy_umh = echo ':' > $(obj)/.bpfilter_umh.o.cmd; \
- $(OBJCOPY) -I binary \
- `LC_ALL=C $(OBJDUMP) -f net/bpfilter/bpfilter_umh \
- |awk -F' |,' '/file format/{print "-O",$$NF} \
- /^architecture:/{print "-B",$$2}'` \
- --rename-section .data=.init.rodata $< $@
-
-$(obj)/bpfilter_umh.o: $(obj)/bpfilter_umh
- $(call cmd,copy_umh)
+$(obj)/bpfilter_umh_blob.o: $(obj)/bpfilter_umh
obj-$(CONFIG_BPFILTER_UMH) += bpfilter.o
-bpfilter-objs += bpfilter_kern.o bpfilter_umh.o
+bpfilter-objs += bpfilter_kern.o bpfilter_umh_blob.o
diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c
index 09522573f611..f0fc182d3db7 100644
--- a/net/bpfilter/bpfilter_kern.c
+++ b/net/bpfilter/bpfilter_kern.c
@@ -10,11 +10,8 @@
#include <linux/file.h>
#include "msgfmt.h"
-#define UMH_start _binary_net_bpfilter_bpfilter_umh_start
-#define UMH_end _binary_net_bpfilter_bpfilter_umh_end
-
-extern char UMH_start;
-extern char UMH_end;
+extern char bpfilter_umh_start;
+extern char bpfilter_umh_end;
static struct umh_info info;
/* since ip_getsockopt() can run in parallel, serialize access to umh */
@@ -93,7 +90,9 @@ static int __init load_umh(void)
int err;
/* fork usermode process */
- err = fork_usermode_blob(&UMH_start, &UMH_end - &UMH_start, &info);
+ err = fork_usermode_blob(&bpfilter_umh_start,
+ &bpfilter_umh_end - &bpfilter_umh_start,
+ &info);
if (err)
return err;
pr_info("Loaded bpfilter_umh pid %d\n", info.pid);
diff --git a/net/bpfilter/bpfilter_umh_blob.S b/net/bpfilter/bpfilter_umh_blob.S
new file mode 100644
index 000000000000..40311d10d2f2
--- /dev/null
+++ b/net/bpfilter/bpfilter_umh_blob.S
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+ .section .init.rodata, "a"
+ .global bpfilter_umh_start
+bpfilter_umh_start:
+ .incbin "net/bpfilter/bpfilter_umh"
+ .global bpfilter_umh_end
+bpfilter_umh_end:
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index e0adcd123f48..711d7156efd8 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -131,8 +131,10 @@ static void caif_flow_cb(struct sk_buff *skb)
caifd = caif_get(skb->dev);
WARN_ON(caifd == NULL);
- if (caifd == NULL)
+ if (!caifd) {
+ rcu_read_unlock();
return;
+ }
caifd_hold(caifd);
rcu_read_unlock();
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index a04e1e88bf3a..50537ff961a7 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -285,16 +285,9 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
if (ifr->ifr_qlen < 0)
return -EINVAL;
if (dev->tx_queue_len ^ ifr->ifr_qlen) {
- unsigned int orig_len = dev->tx_queue_len;
-
- dev->tx_queue_len = ifr->ifr_qlen;
- err = call_netdevice_notifiers(
- NETDEV_CHANGE_TX_QUEUE_LEN, dev);
- err = notifier_to_errno(err);
- if (err) {
- dev->tx_queue_len = orig_len;
+ err = dev_change_tx_queue_len(dev, ifr->ifr_qlen);
+ if (err)
return err;
- }
}
return 0;
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 126ffc5bc630..f64aa13811ea 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -416,6 +416,14 @@ static struct fib_rule *rule_find(struct fib_rules_ops *ops,
if (rule->mark && r->mark != rule->mark)
continue;
+ if (rule->suppress_ifgroup != -1 &&
+ r->suppress_ifgroup != rule->suppress_ifgroup)
+ continue;
+
+ if (rule->suppress_prefixlen != -1 &&
+ r->suppress_prefixlen != rule->suppress_prefixlen)
+ continue;
+
if (rule->mark_mask && r->mark_mask != rule->mark_mask)
continue;
@@ -436,6 +444,9 @@ static struct fib_rule *rule_find(struct fib_rules_ops *ops,
if (rule->ip_proto && r->ip_proto != rule->ip_proto)
continue;
+ if (rule->proto && r->proto != rule->proto)
+ continue;
+
if (fib_rule_port_range_set(&rule->sport_range) &&
!fib_rule_port_range_compare(&r->sport_range,
&rule->sport_range))
@@ -645,6 +656,73 @@ errout:
return err;
}
+static int rule_exists(struct fib_rules_ops *ops, struct fib_rule_hdr *frh,
+ struct nlattr **tb, struct fib_rule *rule)
+{
+ struct fib_rule *r;
+
+ list_for_each_entry(r, &ops->rules_list, list) {
+ if (r->action != rule->action)
+ continue;
+
+ if (r->table != rule->table)
+ continue;
+
+ if (r->pref != rule->pref)
+ continue;
+
+ if (memcmp(r->iifname, rule->iifname, IFNAMSIZ))
+ continue;
+
+ if (memcmp(r->oifname, rule->oifname, IFNAMSIZ))
+ continue;
+
+ if (r->mark != rule->mark)
+ continue;
+
+ if (r->suppress_ifgroup != rule->suppress_ifgroup)
+ continue;
+
+ if (r->suppress_prefixlen != rule->suppress_prefixlen)
+ continue;
+
+ if (r->mark_mask != rule->mark_mask)
+ continue;
+
+ if (r->tun_id != rule->tun_id)
+ continue;
+
+ if (r->fr_net != rule->fr_net)
+ continue;
+
+ if (r->l3mdev != rule->l3mdev)
+ continue;
+
+ if (!uid_eq(r->uid_range.start, rule->uid_range.start) ||
+ !uid_eq(r->uid_range.end, rule->uid_range.end))
+ continue;
+
+ if (r->ip_proto != rule->ip_proto)
+ continue;
+
+ if (r->proto != rule->proto)
+ continue;
+
+ if (!fib_rule_port_range_compare(&r->sport_range,
+ &rule->sport_range))
+ continue;
+
+ if (!fib_rule_port_range_compare(&r->dport_range,
+ &rule->dport_range))
+ continue;
+
+ if (!ops->compare(r, frh, tb))
+ continue;
+ return 1;
+ }
+ return 0;
+}
+
int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
@@ -679,7 +757,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
goto errout;
if ((nlh->nlmsg_flags & NLM_F_EXCL) &&
- rule_find(ops, frh, tb, rule, user_priority)) {
+ rule_exists(ops, frh, tb, rule)) {
err = -EEXIST;
goto errout_free;
}
diff --git a/net/core/filter.c b/net/core/filter.c
index e7f12e9f598c..06da770f543f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -459,11 +459,21 @@ static bool convert_bpf_ld_abs(struct sock_filter *fp, struct bpf_insn **insnp)
(!unaligned_ok && offset >= 0 &&
offset + ip_align >= 0 &&
offset + ip_align % size == 0))) {
+ bool ldx_off_ok = offset <= S16_MAX;
+
*insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_H);
*insn++ = BPF_ALU64_IMM(BPF_SUB, BPF_REG_TMP, offset);
- *insn++ = BPF_JMP_IMM(BPF_JSLT, BPF_REG_TMP, size, 2 + endian);
- *insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A, BPF_REG_D,
- offset);
+ *insn++ = BPF_JMP_IMM(BPF_JSLT, BPF_REG_TMP,
+ size, 2 + endian + (!ldx_off_ok * 2));
+ if (ldx_off_ok) {
+ *insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A,
+ BPF_REG_D, offset);
+ } else {
+ *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_D);
+ *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_TMP, offset);
+ *insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A,
+ BPF_REG_TMP, 0);
+ }
if (endian)
*insn++ = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, size * 8);
*insn++ = BPF_JMP_A(8);
@@ -1762,6 +1772,37 @@ static const struct bpf_func_proto bpf_skb_pull_data_proto = {
.arg2_type = ARG_ANYTHING,
};
+static inline int sk_skb_try_make_writable(struct sk_buff *skb,
+ unsigned int write_len)
+{
+ int err = __bpf_try_make_writable(skb, write_len);
+
+ bpf_compute_data_end_sk_skb(skb);
+ return err;
+}
+
+BPF_CALL_2(sk_skb_pull_data, struct sk_buff *, skb, u32, len)
+{
+ /* Idea is the following: should the needed direct read/write
+ * test fail during runtime, we can pull in more data and redo
+ * again, since implicitly, we invalidate previous checks here.
+ *
+ * Or, since we know how much we need to make read/writeable,
+ * this can be done once at the program beginning for direct
+ * access case. By this we overcome limitations of only current
+ * headroom being accessible.
+ */
+ return sk_skb_try_make_writable(skb, len ? : skb_headlen(skb));
+}
+
+static const struct bpf_func_proto sk_skb_pull_data_proto = {
+ .func = sk_skb_pull_data,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+};
+
BPF_CALL_5(bpf_l3_csum_replace, struct sk_buff *, skb, u32, offset,
u64, from, u64, to, u64, flags)
{
@@ -2779,7 +2820,8 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff)
static u32 __bpf_skb_max_len(const struct sk_buff *skb)
{
- return skb->dev->mtu + skb->dev->hard_header_len;
+ return skb->dev ? skb->dev->mtu + skb->dev->hard_header_len :
+ SKB_MAX_ALLOC;
}
static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff)
@@ -2863,8 +2905,8 @@ static int bpf_skb_trim_rcsum(struct sk_buff *skb, unsigned int new_len)
return __skb_trim_rcsum(skb, new_len);
}
-BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
- u64, flags)
+static inline int __bpf_skb_change_tail(struct sk_buff *skb, u32 new_len,
+ u64 flags)
{
u32 max_len = __bpf_skb_max_len(skb);
u32 min_len = __bpf_skb_min_len(skb);
@@ -2900,6 +2942,13 @@ BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
if (!ret && skb_is_gso(skb))
skb_gso_reset(skb);
}
+ return ret;
+}
+
+BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
+ u64, flags)
+{
+ int ret = __bpf_skb_change_tail(skb, new_len, flags);
bpf_compute_data_pointers(skb);
return ret;
@@ -2914,9 +2963,27 @@ static const struct bpf_func_proto bpf_skb_change_tail_proto = {
.arg3_type = ARG_ANYTHING,
};
-BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
+BPF_CALL_3(sk_skb_change_tail, struct sk_buff *, skb, u32, new_len,
u64, flags)
{
+ int ret = __bpf_skb_change_tail(skb, new_len, flags);
+
+ bpf_compute_data_end_sk_skb(skb);
+ return ret;
+}
+
+static const struct bpf_func_proto sk_skb_change_tail_proto = {
+ .func = sk_skb_change_tail,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+};
+
+static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room,
+ u64 flags)
+{
u32 max_len = __bpf_skb_max_len(skb);
u32 new_len = skb->len + head_room;
int ret;
@@ -2941,8 +3008,16 @@ BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
skb_reset_mac_header(skb);
}
+ return ret;
+}
+
+BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
+ u64, flags)
+{
+ int ret = __bpf_skb_change_head(skb, head_room, flags);
+
bpf_compute_data_pointers(skb);
- return 0;
+ return ret;
}
static const struct bpf_func_proto bpf_skb_change_head_proto = {
@@ -2954,6 +3029,23 @@ static const struct bpf_func_proto bpf_skb_change_head_proto = {
.arg3_type = ARG_ANYTHING,
};
+BPF_CALL_3(sk_skb_change_head, struct sk_buff *, skb, u32, head_room,
+ u64, flags)
+{
+ int ret = __bpf_skb_change_head(skb, head_room, flags);
+
+ bpf_compute_data_end_sk_skb(skb);
+ return ret;
+}
+
+static const struct bpf_func_proto sk_skb_change_head_proto = {
+ .func = sk_skb_change_head,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+};
static unsigned long xdp_get_metalen(const struct xdp_buff *xdp)
{
return xdp_data_meta_unsupported(xdp) ? 0 :
@@ -3046,12 +3138,16 @@ static int __bpf_tx_xdp(struct net_device *dev,
u32 index)
{
struct xdp_frame *xdpf;
- int sent;
+ int err, sent;
if (!dev->netdev_ops->ndo_xdp_xmit) {
return -EOPNOTSUPP;
}
+ err = xdp_ok_fwd_dev(dev, xdp->data_end - xdp->data);
+ if (unlikely(err))
+ return err;
+
xdpf = convert_to_xdp_frame(xdp);
if (unlikely(!xdpf))
return -EOVERFLOW;
@@ -3285,7 +3381,8 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
goto err;
}
- if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd))))
+ err = xdp_ok_fwd_dev(fwd, skb->len);
+ if (unlikely(err))
goto err;
skb->dev = fwd;
@@ -4073,8 +4170,9 @@ static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params,
memcpy(params->smac, dev->dev_addr, ETH_ALEN);
params->h_vlan_TCI = 0;
params->h_vlan_proto = 0;
+ params->ifindex = dev->ifindex;
- return dev->ifindex;
+ return 0;
}
#endif
@@ -4098,7 +4196,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
/* verify forwarding is enabled on this interface */
in_dev = __in_dev_get_rcu(dev);
if (unlikely(!in_dev || !IN_DEV_FORWARD(in_dev)))
- return 0;
+ return BPF_FIB_LKUP_RET_FWD_DISABLED;
if (flags & BPF_FIB_LOOKUP_OUTPUT) {
fl4.flowi4_iif = 1;
@@ -4123,7 +4221,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
tb = fib_get_table(net, tbid);
if (unlikely(!tb))
- return 0;
+ return BPF_FIB_LKUP_RET_NOT_FWDED;
err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);
} else {
@@ -4135,8 +4233,20 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
err = fib_lookup(net, &fl4, &res, FIB_LOOKUP_NOREF);
}
- if (err || res.type != RTN_UNICAST)
- return 0;
+ if (err) {
+ /* map fib lookup errors to RTN_ type */
+ if (err == -EINVAL)
+ return BPF_FIB_LKUP_RET_BLACKHOLE;
+ if (err == -EHOSTUNREACH)
+ return BPF_FIB_LKUP_RET_UNREACHABLE;
+ if (err == -EACCES)
+ return BPF_FIB_LKUP_RET_PROHIBIT;
+
+ return BPF_FIB_LKUP_RET_NOT_FWDED;
+ }
+
+ if (res.type != RTN_UNICAST)
+ return BPF_FIB_LKUP_RET_NOT_FWDED;
if (res.fi->fib_nhs > 1)
fib_select_path(net, &res, &fl4, NULL);
@@ -4144,19 +4254,16 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
if (check_mtu) {
mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst);
if (params->tot_len > mtu)
- return 0;
+ return BPF_FIB_LKUP_RET_FRAG_NEEDED;
}
nh = &res.fi->fib_nh[res.nh_sel];
/* do not handle lwt encaps right now */
if (nh->nh_lwtstate)
- return 0;
+ return BPF_FIB_LKUP_RET_UNSUPP_LWT;
dev = nh->nh_dev;
- if (unlikely(!dev))
- return 0;
-
if (nh->nh_gw)
params->ipv4_dst = nh->nh_gw;
@@ -4166,10 +4273,10 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
* rcu_read_lock_bh is not needed here
*/
neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)params->ipv4_dst);
- if (neigh)
- return bpf_fib_set_fwd_params(params, neigh, dev);
+ if (!neigh)
+ return BPF_FIB_LKUP_RET_NO_NEIGH;
- return 0;
+ return bpf_fib_set_fwd_params(params, neigh, dev);
}
#endif
@@ -4190,7 +4297,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
/* link local addresses are never forwarded */
if (rt6_need_strict(dst) || rt6_need_strict(src))
- return 0;
+ return BPF_FIB_LKUP_RET_NOT_FWDED;
dev = dev_get_by_index_rcu(net, params->ifindex);
if (unlikely(!dev))
@@ -4198,7 +4305,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
idev = __in6_dev_get_safely(dev);
if (unlikely(!idev || !net->ipv6.devconf_all->forwarding))
- return 0;
+ return BPF_FIB_LKUP_RET_FWD_DISABLED;
if (flags & BPF_FIB_LOOKUP_OUTPUT) {
fl6.flowi6_iif = 1;
@@ -4225,7 +4332,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
tb = ipv6_stub->fib6_get_table(net, tbid);
if (unlikely(!tb))
- return 0;
+ return BPF_FIB_LKUP_RET_NOT_FWDED;
f6i = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, strict);
} else {
@@ -4238,11 +4345,23 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
}
if (unlikely(IS_ERR_OR_NULL(f6i) || f6i == net->ipv6.fib6_null_entry))
- return 0;
+ return BPF_FIB_LKUP_RET_NOT_FWDED;
+
+ if (unlikely(f6i->fib6_flags & RTF_REJECT)) {
+ switch (f6i->fib6_type) {
+ case RTN_BLACKHOLE:
+ return BPF_FIB_LKUP_RET_BLACKHOLE;
+ case RTN_UNREACHABLE:
+ return BPF_FIB_LKUP_RET_UNREACHABLE;
+ case RTN_PROHIBIT:
+ return BPF_FIB_LKUP_RET_PROHIBIT;
+ default:
+ return BPF_FIB_LKUP_RET_NOT_FWDED;
+ }
+ }
- if (unlikely(f6i->fib6_flags & RTF_REJECT ||
- f6i->fib6_type != RTN_UNICAST))
- return 0;
+ if (f6i->fib6_type != RTN_UNICAST)
+ return BPF_FIB_LKUP_RET_NOT_FWDED;
if (f6i->fib6_nsiblings && fl6.flowi6_oif == 0)
f6i = ipv6_stub->fib6_multipath_select(net, f6i, &fl6,
@@ -4252,11 +4371,11 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
if (check_mtu) {
mtu = ipv6_stub->ip6_mtu_from_fib6(f6i, dst, src);
if (params->tot_len > mtu)
- return 0;
+ return BPF_FIB_LKUP_RET_FRAG_NEEDED;
}
if (f6i->fib6_nh.nh_lwtstate)
- return 0;
+ return BPF_FIB_LKUP_RET_UNSUPP_LWT;
if (f6i->fib6_flags & RTF_GATEWAY)
*dst = f6i->fib6_nh.nh_gw;
@@ -4270,10 +4389,10 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
*/
neigh = ___neigh_lookup_noref(ipv6_stub->nd_tbl, neigh_key_eq128,
ndisc_hashfn, dst, dev);
- if (neigh)
- return bpf_fib_set_fwd_params(params, neigh, dev);
+ if (!neigh)
+ return BPF_FIB_LKUP_RET_NO_NEIGH;
- return 0;
+ return bpf_fib_set_fwd_params(params, neigh, dev);
}
#endif
@@ -4315,7 +4434,7 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
struct bpf_fib_lookup *, params, int, plen, u32, flags)
{
struct net *net = dev_net(skb->dev);
- int index = -EAFNOSUPPORT;
+ int rc = -EAFNOSUPPORT;
if (plen < sizeof(*params))
return -EINVAL;
@@ -4326,25 +4445,25 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
switch (params->family) {
#if IS_ENABLED(CONFIG_INET)
case AF_INET:
- index = bpf_ipv4_fib_lookup(net, params, flags, false);
+ rc = bpf_ipv4_fib_lookup(net, params, flags, false);
break;
#endif
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
- index = bpf_ipv6_fib_lookup(net, params, flags, false);
+ rc = bpf_ipv6_fib_lookup(net, params, flags, false);
break;
#endif
}
- if (index > 0) {
+ if (!rc) {
struct net_device *dev;
- dev = dev_get_by_index_rcu(net, index);
+ dev = dev_get_by_index_rcu(net, params->ifindex);
if (!is_skb_forwardable(dev, skb))
- index = 0;
+ rc = BPF_FIB_LKUP_RET_FRAG_NEEDED;
}
- return index;
+ return rc;
}
static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {
@@ -4417,10 +4536,10 @@ static const struct bpf_func_proto bpf_lwt_push_encap_proto = {
.arg4_type = ARG_CONST_SIZE
};
+#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
BPF_CALL_4(bpf_lwt_seg6_store_bytes, struct sk_buff *, skb, u32, offset,
const void *, from, u32, len)
{
-#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
struct seg6_bpf_srh_state *srh_state =
this_cpu_ptr(&seg6_bpf_srh_states);
void *srh_tlvs, *srh_end, *ptr;
@@ -4446,9 +4565,6 @@ BPF_CALL_4(bpf_lwt_seg6_store_bytes, struct sk_buff *, skb, u32, offset,
memcpy(skb->data + offset, from, len);
return 0;
-#else /* CONFIG_IPV6_SEG6_BPF */
- return -EOPNOTSUPP;
-#endif
}
static const struct bpf_func_proto bpf_lwt_seg6_store_bytes_proto = {
@@ -4464,7 +4580,6 @@ static const struct bpf_func_proto bpf_lwt_seg6_store_bytes_proto = {
BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb,
u32, action, void *, param, u32, param_len)
{
-#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
struct seg6_bpf_srh_state *srh_state =
this_cpu_ptr(&seg6_bpf_srh_states);
struct ipv6_sr_hdr *srh;
@@ -4512,9 +4627,6 @@ BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb,
default:
return -EINVAL;
}
-#else /* CONFIG_IPV6_SEG6_BPF */
- return -EOPNOTSUPP;
-#endif
}
static const struct bpf_func_proto bpf_lwt_seg6_action_proto = {
@@ -4530,7 +4642,6 @@ static const struct bpf_func_proto bpf_lwt_seg6_action_proto = {
BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset,
s32, len)
{
-#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
struct seg6_bpf_srh_state *srh_state =
this_cpu_ptr(&seg6_bpf_srh_states);
void *srh_end, *srh_tlvs, *ptr;
@@ -4574,9 +4685,6 @@ BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset,
srh_state->hdrlen += len;
srh_state->valid = 0;
return 0;
-#else /* CONFIG_IPV6_SEG6_BPF */
- return -EOPNOTSUPP;
-#endif
}
static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = {
@@ -4587,6 +4695,7 @@ static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = {
.arg2_type = ARG_ANYTHING,
.arg3_type = ARG_ANYTHING,
};
+#endif /* CONFIG_IPV6_SEG6_BPF */
bool bpf_helper_changes_pkt_data(void *func)
{
@@ -4595,9 +4704,12 @@ bool bpf_helper_changes_pkt_data(void *func)
func == bpf_skb_store_bytes ||
func == bpf_skb_change_proto ||
func == bpf_skb_change_head ||
+ func == sk_skb_change_head ||
func == bpf_skb_change_tail ||
+ func == sk_skb_change_tail ||
func == bpf_skb_adjust_room ||
func == bpf_skb_pull_data ||
+ func == sk_skb_pull_data ||
func == bpf_clone_redirect ||
func == bpf_l3_csum_replace ||
func == bpf_l4_csum_replace ||
@@ -4605,11 +4717,12 @@ bool bpf_helper_changes_pkt_data(void *func)
func == bpf_xdp_adjust_meta ||
func == bpf_msg_pull_data ||
func == bpf_xdp_adjust_tail ||
- func == bpf_lwt_push_encap ||
+#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
func == bpf_lwt_seg6_store_bytes ||
func == bpf_lwt_seg6_adjust_srh ||
- func == bpf_lwt_seg6_action
- )
+ func == bpf_lwt_seg6_action ||
+#endif
+ func == bpf_lwt_push_encap)
return true;
return false;
@@ -4849,11 +4962,11 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
case BPF_FUNC_skb_load_bytes:
return &bpf_skb_load_bytes_proto;
case BPF_FUNC_skb_pull_data:
- return &bpf_skb_pull_data_proto;
+ return &sk_skb_pull_data_proto;
case BPF_FUNC_skb_change_tail:
- return &bpf_skb_change_tail_proto;
+ return &sk_skb_change_tail_proto;
case BPF_FUNC_skb_change_head:
- return &bpf_skb_change_head_proto;
+ return &sk_skb_change_head_proto;
case BPF_FUNC_get_socket_cookie:
return &bpf_get_socket_cookie_proto;
case BPF_FUNC_get_socket_uid:
@@ -4944,12 +5057,14 @@ static const struct bpf_func_proto *
lwt_seg6local_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
switch (func_id) {
+#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
case BPF_FUNC_lwt_seg6_store_bytes:
return &bpf_lwt_seg6_store_bytes_proto;
case BPF_FUNC_lwt_seg6_action:
return &bpf_lwt_seg6_action_proto;
case BPF_FUNC_lwt_seg6_adjust_srh:
return &bpf_lwt_seg6_adjust_srh_proto;
+#endif
default:
return lwt_out_func_proto(func_id, prog);
}
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index b2b2323bdc84..188d693cb251 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -77,8 +77,20 @@ gnet_stats_start_copy_compat(struct sk_buff *skb, int type, int tc_stats_type,
d->lock = lock;
spin_lock_bh(lock);
}
- if (d->tail)
- return gnet_stats_copy(d, type, NULL, 0, padattr);
+ if (d->tail) {
+ int ret = gnet_stats_copy(d, type, NULL, 0, padattr);
+
+ /* The initial attribute added in gnet_stats_copy() may be
+ * preceded by a padding attribute, in which case d->tail will
+ * end up pointing at the padding instead of the real attribute.
+ * Fix this so gnet_stats_finish_copy() adjusts the length of
+ * the right attribute.
+ */
+ if (ret == 0 && d->tail->nla_type == padattr)
+ d->tail = (struct nlattr *)((char *)d->tail +
+ NLA_ALIGN(d->tail->nla_len));
+ return ret;
+ }
return 0;
}
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 68bf07206744..43a932cb609b 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -269,7 +269,7 @@ static void __page_pool_empty_ring(struct page_pool *pool)
struct page *page;
/* Empty recycle ring */
- while ((page = ptr_ring_consume(&pool->ring))) {
+ while ((page = ptr_ring_consume_bh(&pool->ring))) {
/* Verify the refcnt invariant of cached pages */
if (!(page_ref_count(page) == 1))
pr_crit("%s() page_pool refcnt %d violation\n",
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 5ef61222fdef..e3f743c141b3 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2759,9 +2759,12 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm)
return err;
}
- dev->rtnl_link_state = RTNL_LINK_INITIALIZED;
-
- __dev_notify_flags(dev, old_flags, ~0U);
+ if (dev->rtnl_link_state == RTNL_LINK_INITIALIZED) {
+ __dev_notify_flags(dev, old_flags, 0U);
+ } else {
+ dev->rtnl_link_state = RTNL_LINK_INITIALIZED;
+ __dev_notify_flags(dev, old_flags, ~0U);
+ }
return 0;
}
EXPORT_SYMBOL(rtnl_configure_link);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c642304f178c..fb35b62af272 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -858,6 +858,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
n->cloned = 1;
n->nohdr = 0;
n->peeked = 0;
+ C(pfmemalloc);
n->destructor = NULL;
C(tail);
C(end);
@@ -3719,6 +3720,7 @@ normal:
net_warn_ratelimited(
"skb_segment: too many frags: %u %u\n",
pos, mss);
+ err = -EINVAL;
goto err;
}
@@ -3752,11 +3754,10 @@ skip_fraglist:
perform_csum_check:
if (!csum) {
- if (skb_has_shared_frag(nskb)) {
- err = __skb_linearize(nskb);
- if (err)
- goto err;
- }
+ if (skb_has_shared_frag(nskb) &&
+ __skb_linearize(nskb))
+ goto err;
+
if (!nskb->remcsum_offload)
nskb->ip_summed = CHECKSUM_NONE;
SKB_GSO_CB(nskb)->csum =
@@ -5276,8 +5277,7 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
if (npages >= 1 << order) {
page = alloc_pages((gfp_mask & ~__GFP_DIRECT_RECLAIM) |
__GFP_COMP |
- __GFP_NOWARN |
- __GFP_NORETRY,
+ __GFP_NOWARN,
order);
if (page)
goto fill_page;
diff --git a/net/core/sock.c b/net/core/sock.c
index bcc41829a16d..bc2d7a37297f 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2277,9 +2277,9 @@ int sk_alloc_sg(struct sock *sk, int len, struct scatterlist *sg,
pfrag->offset += use;
sge = sg + sg_curr - 1;
- if (sg_curr > first_coalesce && sg_page(sg) == pfrag->page &&
- sg->offset + sg->length == orig_offset) {
- sg->length += use;
+ if (sg_curr > first_coalesce && sg_page(sge) == pfrag->page &&
+ sge->offset + sge->length == orig_offset) {
+ sge->length += use;
} else {
sge = sg + sg_curr;
sg_unmark_end(sge);
@@ -3243,7 +3243,8 @@ static int req_prot_init(const struct proto *prot)
rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name,
rsk_prot->obj_size, 0,
- prot->slab_flags, NULL);
+ SLAB_ACCOUNT | prot->slab_flags,
+ NULL);
if (!rsk_prot->slab) {
pr_crit("%s: Can't create request sock SLAB cache!\n",
@@ -3258,7 +3259,8 @@ int proto_register(struct proto *prot, int alloc_slab)
if (alloc_slab) {
prot->slab = kmem_cache_create_usercopy(prot->name,
prot->obj_size, 0,
- SLAB_HWCACHE_ALIGN | prot->slab_flags,
+ SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT |
+ prot->slab_flags,
prot->useroffset, prot->usersize,
NULL);
@@ -3281,6 +3283,7 @@ int proto_register(struct proto *prot, int alloc_slab)
kmem_cache_create(prot->twsk_prot->twsk_slab_name,
prot->twsk_prot->twsk_obj_size,
0,
+ SLAB_ACCOUNT |
prot->slab_flags,
NULL);
if (prot->twsk_prot->twsk_slab == NULL)
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index 40c851693f77..0c9478b91fa5 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -86,35 +86,39 @@ dns_resolver_preparse(struct key_preparsed_payload *prep)
opt++;
kdebug("options: '%s'", opt);
do {
+ int opt_len, opt_nlen;
const char *eq;
- int opt_len, opt_nlen, opt_vlen, tmp;
+ char optval[128];
next_opt = memchr(opt, '#', end - opt) ?: end;
opt_len = next_opt - opt;
- if (opt_len <= 0 || opt_len > 128) {
+ if (opt_len <= 0 || opt_len > sizeof(optval)) {
pr_warn_ratelimited("Invalid option length (%d) for dns_resolver key\n",
opt_len);
return -EINVAL;
}
- eq = memchr(opt, '=', opt_len) ?: end;
- opt_nlen = eq - opt;
- eq++;
- opt_vlen = next_opt - eq; /* will be -1 if no value */
+ eq = memchr(opt, '=', opt_len);
+ if (eq) {
+ opt_nlen = eq - opt;
+ eq++;
+ memcpy(optval, eq, next_opt - eq);
+ optval[next_opt - eq] = '\0';
+ } else {
+ opt_nlen = opt_len;
+ optval[0] = '\0';
+ }
- tmp = opt_vlen >= 0 ? opt_vlen : 0;
- kdebug("option '%*.*s' val '%*.*s'",
- opt_nlen, opt_nlen, opt, tmp, tmp, eq);
+ kdebug("option '%*.*s' val '%s'",
+ opt_nlen, opt_nlen, opt, optval);
/* see if it's an error number representing a DNS error
* that's to be recorded as the result in this key */
if (opt_nlen == sizeof(DNS_ERRORNO_OPTION) - 1 &&
memcmp(opt, DNS_ERRORNO_OPTION, opt_nlen) == 0) {
kdebug("dns error number option");
- if (opt_vlen <= 0)
- goto bad_option_value;
- ret = kstrtoul(eq, 10, &derrno);
+ ret = kstrtoul(optval, 10, &derrno);
if (ret < 0)
goto bad_option_value;
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index 275449b0d633..3297e7fa9945 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -90,12 +90,18 @@ static int lowpan_neigh_construct(struct net_device *dev, struct neighbour *n)
return 0;
}
+static int lowpan_get_iflink(const struct net_device *dev)
+{
+ return lowpan_802154_dev(dev)->wdev->ifindex;
+}
+
static const struct net_device_ops lowpan_netdev_ops = {
.ndo_init = lowpan_dev_init,
.ndo_start_xmit = lowpan_xmit,
.ndo_open = lowpan_open,
.ndo_stop = lowpan_stop,
.ndo_neigh_construct = lowpan_neigh_construct,
+ .ndo_get_iflink = lowpan_get_iflink,
};
static void lowpan_setup(struct net_device *ldev)
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index b21833651394..e46cdd310e5f 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -300,6 +300,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) {
struct flowi4 fl4 = {
.flowi4_iif = LOOPBACK_IFINDEX,
+ .flowi4_oif = l3mdev_master_ifindex_rcu(dev),
.daddr = ip_hdr(skb)->saddr,
.flowi4_tos = RT_TOS(ip_hdr(skb)->tos),
.flowi4_scope = scope,
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 1540db65241a..c9ec1603666b 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -448,9 +448,7 @@ next_proto:
out_unlock:
rcu_read_unlock();
out:
- NAPI_GRO_CB(skb)->flush |= flush;
- skb_gro_remcsum_cleanup(skb, &grc);
- skb->remcsum_offload = 0;
+ skb_gro_flush_final_remcsum(skb, pp, flush, &grc);
return pp;
}
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 1859c473b21a..6a7d980105f6 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -223,7 +223,7 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head,
out_unlock:
rcu_read_unlock();
out:
- NAPI_GRO_CB(skb)->flush |= flush;
+ skb_gro_flush_final(skb, pp, flush);
return pp;
}
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 85b617b655bc..28fef7d15959 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1200,13 +1200,13 @@ static void igmpv3_del_delrec(struct in_device *in_dev, struct ip_mc_list *im)
spin_lock_bh(&im->lock);
if (pmc) {
im->interface = pmc->interface;
- im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
- im->sfmode = pmc->sfmode;
- if (pmc->sfmode == MCAST_INCLUDE) {
+ if (im->sfmode == MCAST_INCLUDE) {
im->tomb = pmc->tomb;
im->sources = pmc->sources;
for (psf = im->sources; psf; psf = psf->sf_next)
- psf->sf_crcount = im->crcount;
+ psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ } else {
+ im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
}
in_dev_put(pmc->interface);
kfree(pmc);
@@ -1288,7 +1288,7 @@ static void igmp_group_dropped(struct ip_mc_list *im)
#endif
}
-static void igmp_group_added(struct ip_mc_list *im)
+static void igmp_group_added(struct ip_mc_list *im, unsigned int mode)
{
struct in_device *in_dev = im->interface;
#ifdef CONFIG_IP_MULTICAST
@@ -1316,7 +1316,13 @@ static void igmp_group_added(struct ip_mc_list *im)
}
/* else, v3 */
- im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ /* Based on RFC3376 5.1, for newly added INCLUDE SSM, we should
+ * not send filter-mode change record as the mode should be from
+ * IN() to IN(A).
+ */
+ if (mode == MCAST_EXCLUDE)
+ im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+
igmp_ifc_event(in_dev);
#endif
}
@@ -1381,8 +1387,7 @@ static void ip_mc_hash_remove(struct in_device *in_dev,
/*
* A socket has joined a multicast group on device dev.
*/
-
-void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
+void __ip_mc_inc_group(struct in_device *in_dev, __be32 addr, unsigned int mode)
{
struct ip_mc_list *im;
#ifdef CONFIG_IP_MULTICAST
@@ -1394,7 +1399,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
for_each_pmc_rtnl(in_dev, im) {
if (im->multiaddr == addr) {
im->users++;
- ip_mc_add_src(in_dev, &addr, MCAST_EXCLUDE, 0, NULL, 0);
+ ip_mc_add_src(in_dev, &addr, mode, 0, NULL, 0);
goto out;
}
}
@@ -1408,8 +1413,8 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
in_dev_hold(in_dev);
im->multiaddr = addr;
/* initial mode is (EX, empty) */
- im->sfmode = MCAST_EXCLUDE;
- im->sfcount[MCAST_EXCLUDE] = 1;
+ im->sfmode = mode;
+ im->sfcount[mode] = 1;
refcount_set(&im->refcnt, 1);
spin_lock_init(&im->lock);
#ifdef CONFIG_IP_MULTICAST
@@ -1426,12 +1431,17 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
#ifdef CONFIG_IP_MULTICAST
igmpv3_del_delrec(in_dev, im);
#endif
- igmp_group_added(im);
+ igmp_group_added(im, mode);
if (!in_dev->dead)
ip_rt_multicast_event(in_dev);
out:
return;
}
+
+void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
+{
+ __ip_mc_inc_group(in_dev, addr, MCAST_EXCLUDE);
+}
EXPORT_SYMBOL(ip_mc_inc_group);
static int ip_mc_check_iphdr(struct sk_buff *skb)
@@ -1688,7 +1698,7 @@ void ip_mc_remap(struct in_device *in_dev)
#ifdef CONFIG_IP_MULTICAST
igmpv3_del_delrec(in_dev, pmc);
#endif
- igmp_group_added(pmc);
+ igmp_group_added(pmc, pmc->sfmode);
}
}
@@ -1751,7 +1761,7 @@ void ip_mc_up(struct in_device *in_dev)
#ifdef CONFIG_IP_MULTICAST
igmpv3_del_delrec(in_dev, pmc);
#endif
- igmp_group_added(pmc);
+ igmp_group_added(pmc, pmc->sfmode);
}
}
@@ -2130,8 +2140,8 @@ static void ip_mc_clear_src(struct ip_mc_list *pmc)
/* Join a multicast group
*/
-
-int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr)
+static int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr,
+ unsigned int mode)
{
__be32 addr = imr->imr_multiaddr.s_addr;
struct ip_mc_socklist *iml, *i;
@@ -2172,15 +2182,30 @@ int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr)
memcpy(&iml->multi, imr, sizeof(*imr));
iml->next_rcu = inet->mc_list;
iml->sflist = NULL;
- iml->sfmode = MCAST_EXCLUDE;
+ iml->sfmode = mode;
rcu_assign_pointer(inet->mc_list, iml);
- ip_mc_inc_group(in_dev, addr);
+ __ip_mc_inc_group(in_dev, addr, mode);
err = 0;
done:
return err;
}
+
+/* Join ASM (Any-Source Multicast) group
+ */
+int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr)
+{
+ return __ip_mc_join_group(sk, imr, MCAST_EXCLUDE);
+}
EXPORT_SYMBOL(ip_mc_join_group);
+/* Join SSM (Source-Specific Multicast) group
+ */
+int ip_mc_join_group_ssm(struct sock *sk, struct ip_mreqn *imr,
+ unsigned int mode)
+{
+ return __ip_mc_join_group(sk, imr, mode);
+}
+
static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml,
struct in_device *in_dev)
{
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index c9e35b81d093..1e4cf3ab560f 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -90,7 +90,7 @@ static void inet_frags_free_cb(void *ptr, void *arg)
void inet_frags_exit_net(struct netns_frags *nf)
{
- nf->low_thresh = 0; /* prevent creation of new frags */
+ nf->high_thresh = 0; /* prevent creation of new frags */
rhashtable_free_and_destroy(&nf->rhashtable, inet_frags_free_cb, NULL);
}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index b3308e9d9762..0e3edd25f881 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -523,6 +523,8 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
to->dev = from->dev;
to->mark = from->mark;
+ skb_copy_hash(to, from);
+
/* Copy the flags to each fragment. */
IPCB(to)->flags = IPCB(from)->flags;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index fc32fdbeefa6..c0fe5ad996f2 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -150,15 +150,18 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb)
{
struct sockaddr_in sin;
const struct iphdr *iph = ip_hdr(skb);
- __be16 *ports = (__be16 *)skb_transport_header(skb);
+ __be16 *ports;
+ int end;
- if (skb_transport_offset(skb) + 4 > (int)skb->len)
+ end = skb_transport_offset(skb) + 4;
+ if (end > 0 && !pskb_may_pull(skb, end))
return;
/* All current transport protocols have the port numbers in the
* first four bytes of the transport header and this function is
* written with this assumption in mind.
*/
+ ports = (__be16 *)skb_transport_header(skb);
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = iph->daddr;
@@ -984,7 +987,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
mreq.imr_multiaddr.s_addr = mreqs.imr_multiaddr;
mreq.imr_address.s_addr = mreqs.imr_interface;
mreq.imr_ifindex = 0;
- err = ip_mc_join_group(sk, &mreq);
+ err = ip_mc_join_group_ssm(sk, &mreq, MCAST_INCLUDE);
if (err && err != -EADDRINUSE)
break;
omode = MCAST_INCLUDE;
@@ -1061,7 +1064,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
mreq.imr_multiaddr = psin->sin_addr;
mreq.imr_address.s_addr = 0;
mreq.imr_ifindex = greqs.gsr_interface;
- err = ip_mc_join_group(sk, &mreq);
+ err = ip_mc_join_group_ssm(sk, &mreq, MCAST_INCLUDE);
if (err && err != -EADDRINUSE)
break;
greqs.gsr_interface = mreq.imr_ifindex;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index ca0dad90803a..e77872c93c20 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1898,6 +1898,7 @@ static struct xt_match ipt_builtin_mt[] __read_mostly = {
.checkentry = icmp_checkentry,
.proto = IPPROTO_ICMP,
.family = NFPROTO_IPV4,
+ .me = THIS_MODULE,
},
};
diff --git a/net/ipv4/netfilter/nf_tproxy_ipv4.c b/net/ipv4/netfilter/nf_tproxy_ipv4.c
index 805e83ec3ad9..164714104965 100644
--- a/net/ipv4/netfilter/nf_tproxy_ipv4.c
+++ b/net/ipv4/netfilter/nf_tproxy_ipv4.c
@@ -37,7 +37,7 @@ nf_tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb,
* to a listener socket if there's one */
struct sock *sk2;
- sk2 = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
+ sk2 = nf_tproxy_get_sock_v4(net, skb, iph->protocol,
iph->saddr, laddr ? laddr : iph->daddr,
hp->source, lport ? lport : hp->dest,
skb->dev, NF_TPROXY_LOOKUP_LISTENER);
@@ -71,7 +71,7 @@ __be32 nf_tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
EXPORT_SYMBOL_GPL(nf_tproxy_laddr4);
struct sock *
-nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
+nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb,
const u8 protocol,
const __be32 saddr, const __be32 daddr,
const __be16 sport, const __be16 dport,
@@ -79,16 +79,21 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
const enum nf_tproxy_lookup_t lookup_type)
{
struct sock *sk;
- struct tcphdr *tcph;
switch (protocol) {
- case IPPROTO_TCP:
+ case IPPROTO_TCP: {
+ struct tcphdr _hdr, *hp;
+
+ hp = skb_header_pointer(skb, ip_hdrlen(skb),
+ sizeof(struct tcphdr), &_hdr);
+ if (hp == NULL)
+ return NULL;
+
switch (lookup_type) {
case NF_TPROXY_LOOKUP_LISTENER:
- tcph = hp;
sk = inet_lookup_listener(net, &tcp_hashinfo, skb,
ip_hdrlen(skb) +
- __tcp_hdrlen(tcph),
+ __tcp_hdrlen(hp),
saddr, sport,
daddr, dport,
in->ifindex, 0);
@@ -110,6 +115,7 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
BUG();
}
break;
+ }
case IPPROTO_UDP:
sk = udp4_lib_lookup(net, saddr, sport, daddr, dport,
in->ifindex);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index d06247ba08b2..5fa335fd3852 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -189,8 +189,9 @@ static int ipv4_ping_group_range(struct ctl_table *table, int write,
if (write && ret == 0) {
low = make_kgid(user_ns, urange[0]);
high = make_kgid(user_ns, urange[1]);
- if (!gid_valid(low) || !gid_valid(high) ||
- (urange[1] < urange[0]) || gid_lt(high, low)) {
+ if (!gid_valid(low) || !gid_valid(high))
+ return -EINVAL;
+ if (urange[1] < urange[0] || gid_lt(high, low)) {
low = make_kgid(&init_user_ns, 1);
high = make_kgid(&init_user_ns, 0);
}
@@ -265,8 +266,9 @@ static int proc_tcp_fastopen_key(struct ctl_table *table, int write,
ipv4.sysctl_tcp_fastopen);
struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) };
struct tcp_fastopen_context *ctxt;
- int ret;
u32 user_key[4]; /* 16 bytes, matching TCP_FASTOPEN_KEY_LENGTH */
+ __le32 key[4];
+ int ret, i;
tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL);
if (!tbl.data)
@@ -275,11 +277,14 @@ static int proc_tcp_fastopen_key(struct ctl_table *table, int write,
rcu_read_lock();
ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx);
if (ctxt)
- memcpy(user_key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH);
+ memcpy(key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH);
else
- memset(user_key, 0, sizeof(user_key));
+ memset(key, 0, sizeof(key));
rcu_read_unlock();
+ for (i = 0; i < ARRAY_SIZE(key); i++)
+ user_key[i] = le32_to_cpu(key[i]);
+
snprintf(tbl.data, tbl.maxlen, "%08x-%08x-%08x-%08x",
user_key[0], user_key[1], user_key[2], user_key[3]);
ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
@@ -290,13 +295,17 @@ static int proc_tcp_fastopen_key(struct ctl_table *table, int write,
ret = -EINVAL;
goto bad_key;
}
- tcp_fastopen_reset_cipher(net, NULL, user_key,
+
+ for (i = 0; i < ARRAY_SIZE(user_key); i++)
+ key[i] = cpu_to_le32(user_key[i]);
+
+ tcp_fastopen_reset_cipher(net, NULL, key,
TCP_FASTOPEN_KEY_LENGTH);
}
bad_key:
pr_debug("proc FO key set 0x%x-%x-%x-%x <- 0x%s: %u\n",
- user_key[0], user_key[1], user_key[2], user_key[3],
+ user_key[0], user_key[1], user_key[2], user_key[3],
(char *)tbl.data, ret);
kfree(tbl.data);
return ret;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e7b53d2a971f..4491faf83f4f 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1998,7 +1998,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
* shouldn't happen.
*/
if (WARN(before(*seq, TCP_SKB_CB(skb)->seq),
- "recvmsg bug: copied %X seq %X rcvnxt %X fl %X\n",
+ "TCP recvmsg seq # bug: copied %X, seq %X, rcvnxt %X, fl %X\n",
*seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt,
flags))
break;
@@ -2013,7 +2013,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
goto found_fin_ok;
WARN(!(flags & MSG_PEEK),
- "recvmsg bug 2: copied %X seq %X rcvnxt %X fl %X\n",
+ "TCP recvmsg seq # bug 2: copied %X, seq %X, rcvnxt %X, fl %X\n",
*seq, TCP_SKB_CB(skb)->seq, tp->rcv_nxt, flags);
}
@@ -2562,6 +2562,8 @@ int tcp_disconnect(struct sock *sk, int flags)
tcp_clear_xmit_timers(sk);
__skb_queue_purge(&sk->sk_receive_queue);
+ tp->copied_seq = tp->rcv_nxt;
+ tp->urg_data = 0;
tcp_write_queue_purge(sk);
tcp_fastopen_active_disable_ofo_check(sk);
skb_rbtree_purge(&tp->out_of_order_queue);
@@ -2821,14 +2823,17 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
case TCP_REPAIR:
if (!tcp_can_repair_sock(sk))
err = -EPERM;
- else if (val == 1) {
+ else if (val == TCP_REPAIR_ON) {
tp->repair = 1;
sk->sk_reuse = SK_FORCE_REUSE;
tp->repair_queue = TCP_NO_QUEUE;
- } else if (val == 0) {
+ } else if (val == TCP_REPAIR_OFF) {
tp->repair = 0;
sk->sk_reuse = SK_NO_REUSE;
tcp_send_window_probe(sk);
+ } else if (val == TCP_REPAIR_OFF_NO_WP) {
+ tp->repair = 0;
+ sk->sk_reuse = SK_NO_REUSE;
} else
err = -EINVAL;
@@ -3720,8 +3725,7 @@ int tcp_abort(struct sock *sk, int err)
struct request_sock *req = inet_reqsk(sk);
local_bh_disable();
- inet_csk_reqsk_queue_drop_and_put(req->rsk_listener,
- req);
+ inet_csk_reqsk_queue_drop(req->rsk_listener, req);
local_bh_enable();
return 0;
}
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index 5f5e5936760e..8b637f9f23a2 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -55,7 +55,6 @@ struct dctcp {
u32 dctcp_alpha;
u32 next_seq;
u32 ce_state;
- u32 delayed_ack_reserved;
u32 loss_cwnd;
};
@@ -96,7 +95,6 @@ static void dctcp_init(struct sock *sk)
ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA);
- ca->delayed_ack_reserved = 0;
ca->loss_cwnd = 0;
ca->ce_state = 0;
@@ -131,23 +129,14 @@ static void dctcp_ce_state_0_to_1(struct sock *sk)
struct dctcp *ca = inet_csk_ca(sk);
struct tcp_sock *tp = tcp_sk(sk);
- /* State has changed from CE=0 to CE=1 and delayed
- * ACK has not sent yet.
- */
- if (!ca->ce_state && ca->delayed_ack_reserved) {
- u32 tmp_rcv_nxt;
-
- /* Save current rcv_nxt. */
- tmp_rcv_nxt = tp->rcv_nxt;
-
- /* Generate previous ack with CE=0. */
- tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
- tp->rcv_nxt = ca->prior_rcv_nxt;
-
- tcp_send_ack(sk);
-
- /* Recover current rcv_nxt. */
- tp->rcv_nxt = tmp_rcv_nxt;
+ if (!ca->ce_state) {
+ /* State has changed from CE=0 to CE=1, force an immediate
+ * ACK to reflect the new CE state. If an ACK was delayed,
+ * send that first to reflect the prior CE state.
+ */
+ if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
+ __tcp_send_ack(sk, ca->prior_rcv_nxt);
+ tcp_enter_quickack_mode(sk, 1);
}
ca->prior_rcv_nxt = tp->rcv_nxt;
@@ -161,23 +150,14 @@ static void dctcp_ce_state_1_to_0(struct sock *sk)
struct dctcp *ca = inet_csk_ca(sk);
struct tcp_sock *tp = tcp_sk(sk);
- /* State has changed from CE=1 to CE=0 and delayed
- * ACK has not sent yet.
- */
- if (ca->ce_state && ca->delayed_ack_reserved) {
- u32 tmp_rcv_nxt;
-
- /* Save current rcv_nxt. */
- tmp_rcv_nxt = tp->rcv_nxt;
-
- /* Generate previous ack with CE=1. */
- tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
- tp->rcv_nxt = ca->prior_rcv_nxt;
-
- tcp_send_ack(sk);
-
- /* Recover current rcv_nxt. */
- tp->rcv_nxt = tmp_rcv_nxt;
+ if (ca->ce_state) {
+ /* State has changed from CE=1 to CE=0, force an immediate
+ * ACK to reflect the new CE state. If an ACK was delayed,
+ * send that first to reflect the prior CE state.
+ */
+ if (inet_csk(sk)->icsk_ack.pending & ICSK_ACK_TIMER)
+ __tcp_send_ack(sk, ca->prior_rcv_nxt);
+ tcp_enter_quickack_mode(sk, 1);
}
ca->prior_rcv_nxt = tp->rcv_nxt;
@@ -248,25 +228,6 @@ static void dctcp_state(struct sock *sk, u8 new_state)
}
}
-static void dctcp_update_ack_reserved(struct sock *sk, enum tcp_ca_event ev)
-{
- struct dctcp *ca = inet_csk_ca(sk);
-
- switch (ev) {
- case CA_EVENT_DELAYED_ACK:
- if (!ca->delayed_ack_reserved)
- ca->delayed_ack_reserved = 1;
- break;
- case CA_EVENT_NON_DELAYED_ACK:
- if (ca->delayed_ack_reserved)
- ca->delayed_ack_reserved = 0;
- break;
- default:
- /* Don't care for the rest. */
- break;
- }
-}
-
static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
{
switch (ev) {
@@ -276,10 +237,6 @@ static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
case CA_EVENT_ECN_NO_CE:
dctcp_ce_state_1_to_0(sk);
break;
- case CA_EVENT_DELAYED_ACK:
- case CA_EVENT_NON_DELAYED_ACK:
- dctcp_update_ack_reserved(sk, ev);
- break;
default:
/* Don't care for the rest. */
break;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 355d3dffd021..3bcd30a2ba06 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -215,7 +215,7 @@ static void tcp_incr_quickack(struct sock *sk, unsigned int max_quickacks)
icsk->icsk_ack.quick = quickacks;
}
-static void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
+void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
{
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -223,6 +223,7 @@ static void tcp_enter_quickack_mode(struct sock *sk, unsigned int max_quickacks)
icsk->icsk_ack.pingpong = 0;
icsk->icsk_ack.ato = TCP_ATO_MIN;
}
+EXPORT_SYMBOL(tcp_enter_quickack_mode);
/* Send ACKs quickly, if "quick" count is not exhausted
* and the session is not interactive.
@@ -265,7 +266,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
* it is probably a retransmit.
*/
if (tp->ecn_flags & TCP_ECN_SEEN)
- tcp_enter_quickack_mode(sk, 1);
+ tcp_enter_quickack_mode(sk, 2);
break;
case INET_ECN_CE:
if (tcp_ca_needs_ecn(sk))
@@ -273,7 +274,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
/* Better not delay acks, sender can have a very low cwnd */
- tcp_enter_quickack_mode(sk, 1);
+ tcp_enter_quickack_mode(sk, 2);
tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
}
tp->ecn_flags |= TCP_ECN_SEEN;
@@ -3181,6 +3182,15 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack,
if (tcp_is_reno(tp)) {
tcp_remove_reno_sacks(sk, pkts_acked);
+
+ /* If any of the cumulatively ACKed segments was
+ * retransmitted, non-SACK case cannot confirm that
+ * progress was due to original transmission due to
+ * lack of TCPCB_SACKED_ACKED bits even if some of
+ * the packets may have been never retransmitted.
+ */
+ if (flag & FLAG_RETRANS_DATA_ACKED)
+ flag &= ~FLAG_ORIG_SACK_ACKED;
} else {
int delta;
@@ -4348,6 +4358,23 @@ static bool tcp_try_coalesce(struct sock *sk,
return true;
}
+static bool tcp_ooo_try_coalesce(struct sock *sk,
+ struct sk_buff *to,
+ struct sk_buff *from,
+ bool *fragstolen)
+{
+ bool res = tcp_try_coalesce(sk, to, from, fragstolen);
+
+ /* In case tcp_drop() is called later, update to->gso_segs */
+ if (res) {
+ u32 gso_segs = max_t(u16, 1, skb_shinfo(to)->gso_segs) +
+ max_t(u16, 1, skb_shinfo(from)->gso_segs);
+
+ skb_shinfo(to)->gso_segs = min_t(u32, gso_segs, 0xFFFF);
+ }
+ return res;
+}
+
static void tcp_drop(struct sock *sk, struct sk_buff *skb)
{
sk_drops_add(sk, skb);
@@ -4471,8 +4498,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
/* In the typical case, we are adding an skb to the end of the list.
* Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup.
*/
- if (tcp_try_coalesce(sk, tp->ooo_last_skb,
- skb, &fragstolen)) {
+ if (tcp_ooo_try_coalesce(sk, tp->ooo_last_skb,
+ skb, &fragstolen)) {
coalesce_done:
tcp_grow_window(sk, skb);
kfree_skb_partial(skb, fragstolen);
@@ -4500,7 +4527,7 @@ coalesce_done:
/* All the bits are present. Drop. */
NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPOFOMERGE);
- __kfree_skb(skb);
+ tcp_drop(sk, skb);
skb = NULL;
tcp_dsack_set(sk, seq, end_seq);
goto add_sack;
@@ -4519,11 +4546,11 @@ coalesce_done:
TCP_SKB_CB(skb1)->end_seq);
NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPOFOMERGE);
- __kfree_skb(skb1);
+ tcp_drop(sk, skb1);
goto merge_right;
}
- } else if (tcp_try_coalesce(sk, skb1,
- skb, &fragstolen)) {
+ } else if (tcp_ooo_try_coalesce(sk, skb1,
+ skb, &fragstolen)) {
goto coalesce_done;
}
p = &parent->rb_right;
@@ -4892,6 +4919,7 @@ end:
static void tcp_collapse_ofo_queue(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
+ u32 range_truesize, sum_tiny = 0;
struct sk_buff *skb, *head;
u32 start, end;
@@ -4903,6 +4931,7 @@ new_range:
}
start = TCP_SKB_CB(skb)->seq;
end = TCP_SKB_CB(skb)->end_seq;
+ range_truesize = skb->truesize;
for (head = skb;;) {
skb = skb_rb_next(skb);
@@ -4913,11 +4942,20 @@ new_range:
if (!skb ||
after(TCP_SKB_CB(skb)->seq, end) ||
before(TCP_SKB_CB(skb)->end_seq, start)) {
- tcp_collapse(sk, NULL, &tp->out_of_order_queue,
- head, skb, start, end);
+ /* Do not attempt collapsing tiny skbs */
+ if (range_truesize != head->truesize ||
+ end - start >= SKB_WITH_OVERHEAD(SK_MEM_QUANTUM)) {
+ tcp_collapse(sk, NULL, &tp->out_of_order_queue,
+ head, skb, start, end);
+ } else {
+ sum_tiny += range_truesize;
+ if (sum_tiny > sk->sk_rcvbuf >> 3)
+ return;
+ }
goto new_range;
}
+ range_truesize += skb->truesize;
if (unlikely(before(TCP_SKB_CB(skb)->seq, start)))
start = TCP_SKB_CB(skb)->seq;
if (after(TCP_SKB_CB(skb)->end_seq, end))
@@ -4932,6 +4970,7 @@ new_range:
* 2) not add too big latencies if thousands of packets sit there.
* (But if application shrinks SO_RCVBUF, we could still end up
* freeing whole queue here)
+ * 3) Drop at least 12.5 % of sk_rcvbuf to avoid malicious attacks.
*
* Return true if queue has shrunk.
*/
@@ -4939,20 +4978,26 @@ static bool tcp_prune_ofo_queue(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct rb_node *node, *prev;
+ int goal;
if (RB_EMPTY_ROOT(&tp->out_of_order_queue))
return false;
NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED);
+ goal = sk->sk_rcvbuf >> 3;
node = &tp->ooo_last_skb->rbnode;
do {
prev = rb_prev(node);
rb_erase(node, &tp->out_of_order_queue);
+ goal -= rb_to_skb(node)->truesize;
tcp_drop(sk, rb_to_skb(node));
- sk_mem_reclaim(sk);
- if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
- !tcp_under_memory_pressure(sk))
- break;
+ if (!prev || goal <= 0) {
+ sk_mem_reclaim(sk);
+ if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
+ !tcp_under_memory_pressure(sk))
+ break;
+ goal = sk->sk_rcvbuf >> 3;
+ }
node = prev;
} while (node);
tp->ooo_last_skb = rb_to_skb(prev);
@@ -4987,6 +5032,9 @@ static int tcp_prune_queue(struct sock *sk)
else if (tcp_under_memory_pressure(sk))
tp->rcv_ssthresh = min(tp->rcv_ssthresh, 4U * tp->advmss);
+ if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
+ return 0;
+
tcp_collapse_ofo_queue(sk);
if (!skb_queue_empty(&sk->sk_receive_queue))
tcp_collapse(sk, &sk->sk_receive_queue, NULL,
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index bea17f1e8302..3b2711e33e4c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -156,11 +156,24 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
*/
if (tcptw->tw_ts_recent_stamp &&
(!twp || (reuse && get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
- tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
- if (tp->write_seq == 0)
- tp->write_seq = 1;
- tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
- tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
+ /* In case of repair and re-using TIME-WAIT sockets we still
+ * want to be sure that it is safe as above but honor the
+ * sequence numbers and time stamps set as part of the repair
+ * process.
+ *
+ * Without this check re-using a TIME-WAIT socket with TCP
+ * repair would accumulate a -1 on the repair assigned
+ * sequence number. The first time it is reused the sequence
+ * is -1, the second time -2, etc. This fixes that issue
+ * without appearing to create any others.
+ */
+ if (likely(!tp->repair)) {
+ tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
+ if (tp->write_seq == 0)
+ tp->write_seq = 1;
+ tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
+ tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
+ }
sock_hold(sktw);
return 1;
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 8e08b409c71e..c4172c1fb198 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -160,7 +160,8 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
}
/* Account for an ACK we sent. */
-static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
+static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
+ u32 rcv_nxt)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -171,6 +172,9 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
__sock_put(sk);
}
+
+ if (unlikely(rcv_nxt != tp->rcv_nxt))
+ return; /* Special ACK sent by DCTCP to reflect ECN */
tcp_dec_quickack_mode(sk, pkts);
inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
}
@@ -1023,8 +1027,8 @@ static void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb)
* We are working here with either a clone of the original
* SKB, or a fresh unique copy made by the retransmit engine.
*/
-static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
- gfp_t gfp_mask)
+static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
+ int clone_it, gfp_t gfp_mask, u32 rcv_nxt)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
struct inet_sock *inet;
@@ -1100,7 +1104,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
th->source = inet->inet_sport;
th->dest = inet->inet_dport;
th->seq = htonl(tcb->seq);
- th->ack_seq = htonl(tp->rcv_nxt);
+ th->ack_seq = htonl(rcv_nxt);
*(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) |
tcb->tcp_flags);
@@ -1141,7 +1145,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
icsk->icsk_af_ops->send_check(sk, skb);
if (likely(tcb->tcp_flags & TCPHDR_ACK))
- tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
+ tcp_event_ack_sent(sk, tcp_skb_pcount(skb), rcv_nxt);
if (skb->len != tcp_header_size) {
tcp_event_data_sent(tp, sk);
@@ -1178,6 +1182,13 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
return err;
}
+static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
+ gfp_t gfp_mask)
+{
+ return __tcp_transmit_skb(sk, skb, clone_it, gfp_mask,
+ tcp_sk(sk)->rcv_nxt);
+}
+
/* This routine just queues the buffer for sending.
*
* NOTE: probe0 timer is not checked, do not forget tcp_push_pending_frames,
@@ -3523,8 +3534,6 @@ void tcp_send_delayed_ack(struct sock *sk)
int ato = icsk->icsk_ack.ato;
unsigned long timeout;
- tcp_ca_event(sk, CA_EVENT_DELAYED_ACK);
-
if (ato > TCP_DELACK_MIN) {
const struct tcp_sock *tp = tcp_sk(sk);
int max_ato = HZ / 2;
@@ -3573,7 +3582,7 @@ void tcp_send_delayed_ack(struct sock *sk)
}
/* This routine sends an ack and also updates the window. */
-void tcp_send_ack(struct sock *sk)
+void __tcp_send_ack(struct sock *sk, u32 rcv_nxt)
{
struct sk_buff *buff;
@@ -3581,8 +3590,6 @@ void tcp_send_ack(struct sock *sk)
if (sk->sk_state == TCP_CLOSE)
return;
- tcp_ca_event(sk, CA_EVENT_NON_DELAYED_ACK);
-
/* We are not putting this on the write queue, so
* tcp_transmit_skb() will set the ownership to this
* sock.
@@ -3608,9 +3615,14 @@ void tcp_send_ack(struct sock *sk)
skb_set_tcp_pure_ack(buff);
/* Send it off, this clears delayed acks for us. */
- tcp_transmit_skb(sk, buff, 0, (__force gfp_t)0);
+ __tcp_transmit_skb(sk, buff, 0, (__force gfp_t)0, rcv_nxt);
+}
+EXPORT_SYMBOL_GPL(__tcp_send_ack);
+
+void tcp_send_ack(struct sock *sk)
+{
+ __tcp_send_ack(sk, tcp_sk(sk)->rcv_nxt);
}
-EXPORT_SYMBOL_GPL(tcp_send_ack);
/* This routine sends a packet with an out of date sequence
* number. It assumes the other end will try to ack it.
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 92dc9e5a7ff3..69c54540d5b4 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -394,7 +394,7 @@ unflush:
out_unlock:
rcu_read_unlock();
out:
- NAPI_GRO_CB(skb)->flush |= flush;
+ skb_gro_flush_final(skb, pp, flush);
return pp;
}
EXPORT_SYMBOL(udp_gro_receive);
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 0eff75525da1..b3885ca22d6f 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -108,6 +108,7 @@ config IPV6_MIP6
config IPV6_ILA
tristate "IPv6: Identifier Locator Addressing (ILA)"
depends on NETFILTER
+ select DST_CACHE
select LWTUNNEL
---help---
Support for IPv6 Identifier Locator Addressing (ILA).
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index c134286d6a41..f66a1cae3366 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2374,7 +2374,8 @@ static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
continue;
if ((rt->fib6_flags & noflags) != 0)
continue;
- fib6_info_hold(rt);
+ if (!fib6_info_hold_safe(rt))
+ continue;
break;
}
out:
@@ -4528,6 +4529,7 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp,
unsigned long expires, u32 flags)
{
struct fib6_info *f6i;
+ u32 prio;
f6i = addrconf_get_prefix_route(&ifp->addr,
ifp->prefix_len,
@@ -4536,13 +4538,15 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp,
if (!f6i)
return -ENOENT;
- if (f6i->fib6_metric != ifp->rt_priority) {
+ prio = ifp->rt_priority ? : IP6_RT_PRIO_ADDRCONF;
+ if (f6i->fib6_metric != prio) {
+ /* delete old one */
+ ip6_del_rt(dev_net(ifp->idev->dev), f6i);
+
/* add new one */
addrconf_prefix_route(&ifp->addr, ifp->prefix_len,
ifp->rt_priority, ifp->idev->dev,
expires, flags, GFP_KERNEL);
- /* delete old one */
- ip6_del_rt(dev_net(ifp->idev->dev), f6i);
} else {
if (!expires)
fib6_clean_expires(f6i);
diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c
index 1323b9679cf7..1c0bb9fb76e6 100644
--- a/net/ipv6/calipso.c
+++ b/net/ipv6/calipso.c
@@ -799,8 +799,7 @@ static int calipso_opt_update(struct sock *sk, struct ipv6_opt_hdr *hop)
{
struct ipv6_txoptions *old = txopt_get(inet6_sk(sk)), *txopts;
- txopts = ipv6_renew_options_kern(sk, old, IPV6_HOPOPTS,
- hop, hop ? ipv6_optlen(hop) : 0);
+ txopts = ipv6_renew_options(sk, old, IPV6_HOPOPTS, hop);
txopt_put(old);
if (IS_ERR(txopts))
return PTR_ERR(txopts);
@@ -1222,8 +1221,7 @@ static int calipso_req_setattr(struct request_sock *req,
if (IS_ERR(new))
return PTR_ERR(new);
- txopts = ipv6_renew_options_kern(sk, req_inet->ipv6_opt, IPV6_HOPOPTS,
- new, new ? ipv6_optlen(new) : 0);
+ txopts = ipv6_renew_options(sk, req_inet->ipv6_opt, IPV6_HOPOPTS, new);
kfree(new);
@@ -1260,8 +1258,7 @@ static void calipso_req_delattr(struct request_sock *req)
if (calipso_opt_del(req_inet->ipv6_opt->hopopt, &new))
return; /* Nothing to do */
- txopts = ipv6_renew_options_kern(sk, req_inet->ipv6_opt, IPV6_HOPOPTS,
- new, new ? ipv6_optlen(new) : 0);
+ txopts = ipv6_renew_options(sk, req_inet->ipv6_opt, IPV6_HOPOPTS, new);
if (!IS_ERR(txopts)) {
txopts = xchg(&req_inet->ipv6_opt, txopts);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 2ee08b6a86a4..1a1f876f8e28 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -700,13 +700,16 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg,
}
if (np->rxopt.bits.rxorigdstaddr) {
struct sockaddr_in6 sin6;
- __be16 *ports = (__be16 *) skb_transport_header(skb);
+ __be16 *ports;
+ int end;
- if (skb_transport_offset(skb) + 4 <= (int)skb->len) {
+ end = skb_transport_offset(skb) + 4;
+ if (end <= 0 || pskb_may_pull(skb, end)) {
/* All current transport protocols have the port numbers in the
* first four bytes of the transport header and this function is
* written with this assumption in mind.
*/
+ ports = (__be16 *)skb_transport_header(skb);
sin6.sin6_family = AF_INET6;
sin6.sin6_addr = ipv6_hdr(skb)->daddr;
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 5bc2bf3733ab..20291c2036fc 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -1015,29 +1015,21 @@ ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt)
}
EXPORT_SYMBOL_GPL(ipv6_dup_options);
-static int ipv6_renew_option(void *ohdr,
- struct ipv6_opt_hdr __user *newopt, int newoptlen,
- int inherit,
- struct ipv6_opt_hdr **hdr,
- char **p)
+static void ipv6_renew_option(int renewtype,
+ struct ipv6_opt_hdr **dest,
+ struct ipv6_opt_hdr *old,
+ struct ipv6_opt_hdr *new,
+ int newtype, char **p)
{
- if (inherit) {
- if (ohdr) {
- memcpy(*p, ohdr, ipv6_optlen((struct ipv6_opt_hdr *)ohdr));
- *hdr = (struct ipv6_opt_hdr *)*p;
- *p += CMSG_ALIGN(ipv6_optlen(*hdr));
- }
- } else {
- if (newopt) {
- if (copy_from_user(*p, newopt, newoptlen))
- return -EFAULT;
- *hdr = (struct ipv6_opt_hdr *)*p;
- if (ipv6_optlen(*hdr) > newoptlen)
- return -EINVAL;
- *p += CMSG_ALIGN(newoptlen);
- }
- }
- return 0;
+ struct ipv6_opt_hdr *src;
+
+ src = (renewtype == newtype ? new : old);
+ if (!src)
+ return;
+
+ memcpy(*p, src, ipv6_optlen(src));
+ *dest = (struct ipv6_opt_hdr *)*p;
+ *p += CMSG_ALIGN(ipv6_optlen(*dest));
}
/**
@@ -1063,13 +1055,11 @@ static int ipv6_renew_option(void *ohdr,
*/
struct ipv6_txoptions *
ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt,
- int newtype,
- struct ipv6_opt_hdr __user *newopt, int newoptlen)
+ int newtype, struct ipv6_opt_hdr *newopt)
{
int tot_len = 0;
char *p;
struct ipv6_txoptions *opt2;
- int err;
if (opt) {
if (newtype != IPV6_HOPOPTS && opt->hopopt)
@@ -1082,8 +1072,8 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt,
tot_len += CMSG_ALIGN(ipv6_optlen(opt->dst1opt));
}
- if (newopt && newoptlen)
- tot_len += CMSG_ALIGN(newoptlen);
+ if (newopt)
+ tot_len += CMSG_ALIGN(ipv6_optlen(newopt));
if (!tot_len)
return NULL;
@@ -1098,29 +1088,19 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt,
opt2->tot_len = tot_len;
p = (char *)(opt2 + 1);
- err = ipv6_renew_option(opt ? opt->hopopt : NULL, newopt, newoptlen,
- newtype != IPV6_HOPOPTS,
- &opt2->hopopt, &p);
- if (err)
- goto out;
-
- err = ipv6_renew_option(opt ? opt->dst0opt : NULL, newopt, newoptlen,
- newtype != IPV6_RTHDRDSTOPTS,
- &opt2->dst0opt, &p);
- if (err)
- goto out;
-
- err = ipv6_renew_option(opt ? opt->srcrt : NULL, newopt, newoptlen,
- newtype != IPV6_RTHDR,
- (struct ipv6_opt_hdr **)&opt2->srcrt, &p);
- if (err)
- goto out;
-
- err = ipv6_renew_option(opt ? opt->dst1opt : NULL, newopt, newoptlen,
- newtype != IPV6_DSTOPTS,
- &opt2->dst1opt, &p);
- if (err)
- goto out;
+ ipv6_renew_option(IPV6_HOPOPTS, &opt2->hopopt,
+ (opt ? opt->hopopt : NULL),
+ newopt, newtype, &p);
+ ipv6_renew_option(IPV6_RTHDRDSTOPTS, &opt2->dst0opt,
+ (opt ? opt->dst0opt : NULL),
+ newopt, newtype, &p);
+ ipv6_renew_option(IPV6_RTHDR,
+ (struct ipv6_opt_hdr **)&opt2->srcrt,
+ (opt ? (struct ipv6_opt_hdr *)opt->srcrt : NULL),
+ newopt, newtype, &p);
+ ipv6_renew_option(IPV6_DSTOPTS, &opt2->dst1opt,
+ (opt ? opt->dst1opt : NULL),
+ newopt, newtype, &p);
opt2->opt_nflen = (opt2->hopopt ? ipv6_optlen(opt2->hopopt) : 0) +
(opt2->dst0opt ? ipv6_optlen(opt2->dst0opt) : 0) +
@@ -1128,37 +1108,6 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt,
opt2->opt_flen = (opt2->dst1opt ? ipv6_optlen(opt2->dst1opt) : 0);
return opt2;
-out:
- sock_kfree_s(sk, opt2, opt2->tot_len);
- return ERR_PTR(err);
-}
-
-/**
- * ipv6_renew_options_kern - replace a specific ext hdr with a new one.
- *
- * @sk: sock from which to allocate memory
- * @opt: original options
- * @newtype: option type to replace in @opt
- * @newopt: new option of type @newtype to replace (kernel-mem)
- * @newoptlen: length of @newopt
- *
- * See ipv6_renew_options(). The difference is that @newopt is
- * kernel memory, rather than user memory.
- */
-struct ipv6_txoptions *
-ipv6_renew_options_kern(struct sock *sk, struct ipv6_txoptions *opt,
- int newtype, struct ipv6_opt_hdr *newopt,
- int newoptlen)
-{
- struct ipv6_txoptions *ret_val;
- const mm_segment_t old_fs = get_fs();
-
- set_fs(KERNEL_DS);
- ret_val = ipv6_renew_options(sk, opt, newtype,
- (struct ipv6_opt_hdr __user *)newopt,
- newoptlen);
- set_fs(old_fs);
- return ret_val;
}
struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space,
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index be491bf6ab6e..ef2505aefc15 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -402,9 +402,10 @@ static int icmp6_iif(const struct sk_buff *skb)
/* for local traffic to local address, skb dev is the loopback
* device. Check if there is a dst attached to the skb and if so
- * get the real device index.
+ * get the real device index. Same is needed for replies to a link
+ * local address on a device enslaved to an L3 master device
*/
- if (unlikely(iif == LOOPBACK_IFINDEX)) {
+ if (unlikely(iif == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
const struct rt6_info *rt6 = skb_rt6_info(skb);
if (rt6)
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 1fb2f3118d60..d212738e9d10 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -935,20 +935,19 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
{
struct fib6_info *leaf = rcu_dereference_protected(fn->leaf,
lockdep_is_held(&rt->fib6_table->tb6_lock));
- enum fib_event_type event = FIB_EVENT_ENTRY_ADD;
- struct fib6_info *iter = NULL, *match = NULL;
+ struct fib6_info *iter = NULL;
struct fib6_info __rcu **ins;
+ struct fib6_info __rcu **fallback_ins = NULL;
int replace = (info->nlh &&
(info->nlh->nlmsg_flags & NLM_F_REPLACE));
- int append = (info->nlh &&
- (info->nlh->nlmsg_flags & NLM_F_APPEND));
int add = (!info->nlh ||
(info->nlh->nlmsg_flags & NLM_F_CREATE));
int found = 0;
+ bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
u16 nlflags = NLM_F_EXCL;
int err;
- if (append)
+ if (info->nlh && (info->nlh->nlmsg_flags & NLM_F_APPEND))
nlflags |= NLM_F_APPEND;
ins = &fn->leaf;
@@ -970,8 +969,13 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
nlflags &= ~NLM_F_EXCL;
if (replace) {
- found++;
- break;
+ if (rt_can_ecmp == rt6_qualify_for_ecmp(iter)) {
+ found++;
+ break;
+ }
+ if (rt_can_ecmp)
+ fallback_ins = fallback_ins ?: ins;
+ goto next_iter;
}
if (rt6_duplicate_nexthop(iter, rt)) {
@@ -986,51 +990,71 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
fib6_metric_set(iter, RTAX_MTU, rt->fib6_pmtu);
return -EEXIST;
}
-
- /* first route that matches */
- if (!match)
- match = iter;
+ /* If we have the same destination and the same metric,
+ * but not the same gateway, then the route we try to
+ * add is sibling to this route, increment our counter
+ * of siblings, and later we will add our route to the
+ * list.
+ * Only static routes (which don't have flag
+ * RTF_EXPIRES) are used for ECMPv6.
+ *
+ * To avoid long list, we only had siblings if the
+ * route have a gateway.
+ */
+ if (rt_can_ecmp &&
+ rt6_qualify_for_ecmp(iter))
+ rt->fib6_nsiblings++;
}
if (iter->fib6_metric > rt->fib6_metric)
break;
+next_iter:
ins = &iter->fib6_next;
}
+ if (fallback_ins && !found) {
+ /* No ECMP-able route found, replace first non-ECMP one */
+ ins = fallback_ins;
+ iter = rcu_dereference_protected(*ins,
+ lockdep_is_held(&rt->fib6_table->tb6_lock));
+ found++;
+ }
+
/* Reset round-robin state, if necessary */
if (ins == &fn->leaf)
fn->rr_ptr = NULL;
/* Link this route to others same route. */
- if (append && match) {
+ if (rt->fib6_nsiblings) {
+ unsigned int fib6_nsiblings;
struct fib6_info *sibling, *temp_sibling;
- if (rt->fib6_flags & RTF_REJECT) {
- NL_SET_ERR_MSG(extack,
- "Can not append a REJECT route");
- return -EINVAL;
- } else if (match->fib6_flags & RTF_REJECT) {
- NL_SET_ERR_MSG(extack,
- "Can not append to a REJECT route");
- return -EINVAL;
+ /* Find the first route that have the same metric */
+ sibling = leaf;
+ while (sibling) {
+ if (sibling->fib6_metric == rt->fib6_metric &&
+ rt6_qualify_for_ecmp(sibling)) {
+ list_add_tail(&rt->fib6_siblings,
+ &sibling->fib6_siblings);
+ break;
+ }
+ sibling = rcu_dereference_protected(sibling->fib6_next,
+ lockdep_is_held(&rt->fib6_table->tb6_lock));
}
- event = FIB_EVENT_ENTRY_APPEND;
- rt->fib6_nsiblings = match->fib6_nsiblings;
- list_add_tail(&rt->fib6_siblings, &match->fib6_siblings);
- match->fib6_nsiblings++;
-
/* For each sibling in the list, increment the counter of
* siblings. BUG() if counters does not match, list of siblings
* is broken!
*/
+ fib6_nsiblings = 0;
list_for_each_entry_safe(sibling, temp_sibling,
- &match->fib6_siblings, fib6_siblings) {
+ &rt->fib6_siblings, fib6_siblings) {
sibling->fib6_nsiblings++;
- BUG_ON(sibling->fib6_nsiblings != match->fib6_nsiblings);
+ BUG_ON(sibling->fib6_nsiblings != rt->fib6_nsiblings);
+ fib6_nsiblings++;
}
-
- rt6_multipath_rebalance(match);
+ BUG_ON(fib6_nsiblings != rt->fib6_nsiblings);
+ rt6_multipath_rebalance(temp_sibling);
}
/*
@@ -1043,8 +1067,9 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
add:
nlflags |= NLM_F_CREATE;
- err = call_fib6_entry_notifiers(info->nl_net, event, rt,
- extack);
+ err = call_fib6_entry_notifiers(info->nl_net,
+ FIB_EVENT_ENTRY_ADD,
+ rt, extack);
if (err)
return err;
@@ -1062,7 +1087,7 @@ add:
}
} else {
- struct fib6_info *tmp;
+ int nsiblings;
if (!found) {
if (add)
@@ -1077,57 +1102,48 @@ add:
if (err)
return err;
- /* if route being replaced has siblings, set tmp to
- * last one, otherwise tmp is current route. this is
- * used to set fib6_next for new route
- */
- if (iter->fib6_nsiblings)
- tmp = list_last_entry(&iter->fib6_siblings,
- struct fib6_info,
- fib6_siblings);
- else
- tmp = iter;
-
- /* insert new route */
atomic_inc(&rt->fib6_ref);
rcu_assign_pointer(rt->fib6_node, fn);
- rt->fib6_next = tmp->fib6_next;
+ rt->fib6_next = iter->fib6_next;
rcu_assign_pointer(*ins, rt);
-
if (!info->skip_notify)
inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE);
if (!(fn->fn_flags & RTN_RTINFO)) {
info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
fn->fn_flags |= RTN_RTINFO;
}
+ nsiblings = iter->fib6_nsiblings;
+ iter->fib6_node = NULL;
+ fib6_purge_rt(iter, fn, info->nl_net);
+ if (rcu_access_pointer(fn->rr_ptr) == iter)
+ fn->rr_ptr = NULL;
+ fib6_info_release(iter);
- /* delete old route */
- rt = iter;
-
- if (rt->fib6_nsiblings) {
- struct fib6_info *tmp;
-
+ if (nsiblings) {
/* Replacing an ECMP route, remove all siblings */
- list_for_each_entry_safe(iter, tmp, &rt->fib6_siblings,
- fib6_siblings) {
- iter->fib6_node = NULL;
- fib6_purge_rt(iter, fn, info->nl_net);
- if (rcu_access_pointer(fn->rr_ptr) == iter)
- fn->rr_ptr = NULL;
- fib6_info_release(iter);
-
- rt->fib6_nsiblings--;
- info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
+ ins = &rt->fib6_next;
+ iter = rcu_dereference_protected(*ins,
+ lockdep_is_held(&rt->fib6_table->tb6_lock));
+ while (iter) {
+ if (iter->fib6_metric > rt->fib6_metric)
+ break;
+ if (rt6_qualify_for_ecmp(iter)) {
+ *ins = iter->fib6_next;
+ iter->fib6_node = NULL;
+ fib6_purge_rt(iter, fn, info->nl_net);
+ if (rcu_access_pointer(fn->rr_ptr) == iter)
+ fn->rr_ptr = NULL;
+ fib6_info_release(iter);
+ nsiblings--;
+ info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
+ } else {
+ ins = &iter->fib6_next;
+ }
+ iter = rcu_dereference_protected(*ins,
+ lockdep_is_held(&rt->fib6_table->tb6_lock));
}
+ WARN_ON(nsiblings != 0);
}
-
- WARN_ON(rt->fib6_nsiblings != 0);
-
- rt->fib6_node = NULL;
- fib6_purge_rt(rt, fn, info->nl_net);
- if (rcu_access_pointer(fn->rr_ptr) == rt)
- fn->rr_ptr = NULL;
- fib6_info_release(rt);
}
return 0;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index c8cf2fdbb13b..cd2cfb04e5d8 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -927,7 +927,6 @@ tx_err:
static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
struct net_device *dev)
{
- struct ipv6hdr *ipv6h = ipv6_hdr(skb);
struct ip6_tnl *t = netdev_priv(dev);
struct dst_entry *dst = skb_dst(skb);
struct net_device_stats *stats;
@@ -1010,6 +1009,8 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
goto tx_err;
}
} else {
+ struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+
switch (skb->protocol) {
case htons(ETH_P_IP):
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index a14fb4fcdf18..3168847c30d1 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -570,6 +570,8 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
to->dev = from->dev;
to->mark = from->mark;
+ skb_copy_hash(to, from);
+
#ifdef CONFIG_NET_SCHED
to->tc_index = from->tc_index;
#endif
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 4d780c7f0130..568ca4187cd1 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -398,6 +398,12 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
case IPV6_DSTOPTS:
{
struct ipv6_txoptions *opt;
+ struct ipv6_opt_hdr *new = NULL;
+
+ /* hop-by-hop / destination options are privileged option */
+ retv = -EPERM;
+ if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW))
+ break;
/* remove any sticky options header with a zero option
* length, per RFC3542.
@@ -409,17 +415,22 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
else if (optlen < sizeof(struct ipv6_opt_hdr) ||
optlen & 0x7 || optlen > 8 * 255)
goto e_inval;
-
- /* hop-by-hop / destination options are privileged option */
- retv = -EPERM;
- if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW))
- break;
+ else {
+ new = memdup_user(optval, optlen);
+ if (IS_ERR(new)) {
+ retv = PTR_ERR(new);
+ break;
+ }
+ if (unlikely(ipv6_optlen(new) > optlen)) {
+ kfree(new);
+ goto e_inval;
+ }
+ }
opt = rcu_dereference_protected(np->opt,
lockdep_sock_is_held(sk));
- opt = ipv6_renew_options(sk, opt, optname,
- (struct ipv6_opt_hdr __user *)optval,
- optlen);
+ opt = ipv6_renew_options(sk, opt, optname, new);
+ kfree(new);
if (IS_ERR(opt)) {
retv = PTR_ERR(opt);
break;
@@ -718,8 +729,9 @@ done:
struct sockaddr_in6 *psin6;
psin6 = (struct sockaddr_in6 *)&greqs.gsr_group;
- retv = ipv6_sock_mc_join(sk, greqs.gsr_interface,
- &psin6->sin6_addr);
+ retv = ipv6_sock_mc_join_ssm(sk, greqs.gsr_interface,
+ &psin6->sin6_addr,
+ MCAST_INCLUDE);
/* prior join w/ different source is ok */
if (retv && retv != -EADDRINUSE)
break;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index c0c74088f2af..f60f310785fd 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -95,6 +95,8 @@ static int ip6_mc_add_src(struct inet6_dev *idev, const struct in6_addr *pmca,
int delta);
static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
struct inet6_dev *idev);
+static int __ipv6_dev_mc_inc(struct net_device *dev,
+ const struct in6_addr *addr, unsigned int mode);
#define MLD_QRV_DEFAULT 2
/* RFC3810, 9.2. Query Interval */
@@ -132,7 +134,8 @@ static int unsolicited_report_interval(struct inet6_dev *idev)
return iv > 0 ? iv : 1;
}
-int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
+static int __ipv6_sock_mc_join(struct sock *sk, int ifindex,
+ const struct in6_addr *addr, unsigned int mode)
{
struct net_device *dev = NULL;
struct ipv6_mc_socklist *mc_lst;
@@ -179,7 +182,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
}
mc_lst->ifindex = dev->ifindex;
- mc_lst->sfmode = MCAST_EXCLUDE;
+ mc_lst->sfmode = mode;
rwlock_init(&mc_lst->sflock);
mc_lst->sflist = NULL;
@@ -187,7 +190,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
* now add/increase the group membership on the device
*/
- err = ipv6_dev_mc_inc(dev, addr);
+ err = __ipv6_dev_mc_inc(dev, addr, mode);
if (err) {
sock_kfree_s(sk, mc_lst, sizeof(*mc_lst));
@@ -199,8 +202,19 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
return 0;
}
+
+int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
+{
+ return __ipv6_sock_mc_join(sk, ifindex, addr, MCAST_EXCLUDE);
+}
EXPORT_SYMBOL(ipv6_sock_mc_join);
+int ipv6_sock_mc_join_ssm(struct sock *sk, int ifindex,
+ const struct in6_addr *addr, unsigned int mode)
+{
+ return __ipv6_sock_mc_join(sk, ifindex, addr, mode);
+}
+
/*
* socket leave on multicast group
*/
@@ -646,7 +660,7 @@ bool inet6_mc_check(struct sock *sk, const struct in6_addr *mc_addr,
return rv;
}
-static void igmp6_group_added(struct ifmcaddr6 *mc)
+static void igmp6_group_added(struct ifmcaddr6 *mc, unsigned int mode)
{
struct net_device *dev = mc->idev->dev;
char buf[MAX_ADDR_LEN];
@@ -672,7 +686,13 @@ static void igmp6_group_added(struct ifmcaddr6 *mc)
}
/* else v2 */
- mc->mca_crcount = mc->idev->mc_qrv;
+ /* Based on RFC3810 6.1, for newly added INCLUDE SSM, we
+ * should not send filter-mode change record as the mode
+ * should be from IN() to IN(A).
+ */
+ if (mode == MCAST_EXCLUDE)
+ mc->mca_crcount = mc->idev->mc_qrv;
+
mld_ifc_event(mc->idev);
}
@@ -770,13 +790,13 @@ static void mld_del_delrec(struct inet6_dev *idev, struct ifmcaddr6 *im)
spin_lock_bh(&im->mca_lock);
if (pmc) {
im->idev = pmc->idev;
- im->mca_crcount = idev->mc_qrv;
- im->mca_sfmode = pmc->mca_sfmode;
- if (pmc->mca_sfmode == MCAST_INCLUDE) {
+ if (im->mca_sfmode == MCAST_INCLUDE) {
im->mca_tomb = pmc->mca_tomb;
im->mca_sources = pmc->mca_sources;
for (psf = im->mca_sources; psf; psf = psf->sf_next)
- psf->sf_crcount = im->mca_crcount;
+ psf->sf_crcount = idev->mc_qrv;
+ } else {
+ im->mca_crcount = idev->mc_qrv;
}
in6_dev_put(pmc->idev);
kfree(pmc);
@@ -831,7 +851,8 @@ static void ma_put(struct ifmcaddr6 *mc)
}
static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev,
- const struct in6_addr *addr)
+ const struct in6_addr *addr,
+ unsigned int mode)
{
struct ifmcaddr6 *mc;
@@ -849,9 +870,8 @@ static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev,
refcount_set(&mc->mca_refcnt, 1);
spin_lock_init(&mc->mca_lock);
- /* initial mode is (EX, empty) */
- mc->mca_sfmode = MCAST_EXCLUDE;
- mc->mca_sfcount[MCAST_EXCLUDE] = 1;
+ mc->mca_sfmode = mode;
+ mc->mca_sfcount[mode] = 1;
if (ipv6_addr_is_ll_all_nodes(&mc->mca_addr) ||
IPV6_ADDR_MC_SCOPE(&mc->mca_addr) < IPV6_ADDR_SCOPE_LINKLOCAL)
@@ -863,7 +883,8 @@ static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev,
/*
* device multicast group inc (add if not found)
*/
-int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr)
+static int __ipv6_dev_mc_inc(struct net_device *dev,
+ const struct in6_addr *addr, unsigned int mode)
{
struct ifmcaddr6 *mc;
struct inet6_dev *idev;
@@ -887,14 +908,13 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr)
if (ipv6_addr_equal(&mc->mca_addr, addr)) {
mc->mca_users++;
write_unlock_bh(&idev->lock);
- ip6_mc_add_src(idev, &mc->mca_addr, MCAST_EXCLUDE, 0,
- NULL, 0);
+ ip6_mc_add_src(idev, &mc->mca_addr, mode, 0, NULL, 0);
in6_dev_put(idev);
return 0;
}
}
- mc = mca_alloc(idev, addr);
+ mc = mca_alloc(idev, addr, mode);
if (!mc) {
write_unlock_bh(&idev->lock);
in6_dev_put(idev);
@@ -911,11 +931,16 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr)
write_unlock_bh(&idev->lock);
mld_del_delrec(idev, mc);
- igmp6_group_added(mc);
+ igmp6_group_added(mc, mode);
ma_put(mc);
return 0;
}
+int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr)
+{
+ return __ipv6_dev_mc_inc(dev, addr, MCAST_EXCLUDE);
+}
+
/*
* device multicast group del
*/
@@ -1751,7 +1776,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
psf_next = psf->sf_next;
- if (!is_in(pmc, psf, type, gdeleted, sdeleted)) {
+ if (!is_in(pmc, psf, type, gdeleted, sdeleted) && !crsend) {
psf_prev = psf;
continue;
}
@@ -2066,7 +2091,7 @@ static void mld_send_initial_cr(struct inet6_dev *idev)
if (pmc->mca_sfcount[MCAST_EXCLUDE])
type = MLD2_CHANGE_TO_EXCLUDE;
else
- type = MLD2_CHANGE_TO_INCLUDE;
+ type = MLD2_ALLOW_NEW_SOURCES;
skb = add_grec(skb, pmc, type, 0, 0, 1);
spin_unlock_bh(&pmc->mca_lock);
}
@@ -2546,7 +2571,7 @@ void ipv6_mc_up(struct inet6_dev *idev)
ipv6_mc_reset(idev);
for (i = idev->mc_list; i; i = i->next) {
mld_del_delrec(idev, i);
- igmp6_group_added(i);
+ igmp6_group_added(i, i->mca_sfmode);
}
read_unlock_bh(&idev->lock);
}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index e640d2f3c55c..0ec273997d1d 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -811,7 +811,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
return;
}
}
- if (ndopts.nd_opts_nonce)
+ if (ndopts.nd_opts_nonce && ndopts.nd_opts_nonce->nd_opt_len == 1)
memcpy(&nonce, (u8 *)(ndopts.nd_opts_nonce + 1), 6);
inc = ipv6_addr_is_multicast(daddr);
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 7eab959734bc..daf2e9e9193d 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1909,6 +1909,7 @@ static struct xt_match ip6t_builtin_mt[] __read_mostly = {
.checkentry = icmp6_checkentry,
.proto = IPPROTO_ICMPV6,
.family = NFPROTO_IPV6,
+ .me = THIS_MODULE,
},
};
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 5e0332014c17..e4d9e6976d3c 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -107,7 +107,7 @@ static int nf_ct_frag6_sysctl_register(struct net *net)
if (hdr == NULL)
goto err_reg;
- net->nf_frag.sysctl.frags_hdr = hdr;
+ net->nf_frag_frags_hdr = hdr;
return 0;
err_reg:
@@ -121,8 +121,8 @@ static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net)
{
struct ctl_table *table;
- table = net->nf_frag.sysctl.frags_hdr->ctl_table_arg;
- unregister_net_sysctl_table(net->nf_frag.sysctl.frags_hdr);
+ table = net->nf_frag_frags_hdr->ctl_table_arg;
+ unregister_net_sysctl_table(net->nf_frag_frags_hdr);
if (!net_eq(net, &init_net))
kfree(table);
}
@@ -585,6 +585,8 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
fq->q.meat == fq->q.len &&
nf_ct_frag6_reasm(fq, skb, dev))
ret = 0;
+ else
+ skb_dst_drop(skb);
out_unlock:
spin_unlock_bh(&fq->q.lock);
diff --git a/net/ipv6/netfilter/nf_tproxy_ipv6.c b/net/ipv6/netfilter/nf_tproxy_ipv6.c
index bf1d6c421e3b..5dfd33af6451 100644
--- a/net/ipv6/netfilter/nf_tproxy_ipv6.c
+++ b/net/ipv6/netfilter/nf_tproxy_ipv6.c
@@ -55,7 +55,7 @@ nf_tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
* to a listener socket if there's one */
struct sock *sk2;
- sk2 = nf_tproxy_get_sock_v6(net, skb, thoff, hp, tproto,
+ sk2 = nf_tproxy_get_sock_v6(net, skb, thoff, tproto,
&iph->saddr,
nf_tproxy_laddr6(skb, laddr, &iph->daddr),
hp->source,
@@ -72,7 +72,7 @@ nf_tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
EXPORT_SYMBOL_GPL(nf_tproxy_handle_time_wait6);
struct sock *
-nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
+nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff,
const u8 protocol,
const struct in6_addr *saddr, const struct in6_addr *daddr,
const __be16 sport, const __be16 dport,
@@ -80,15 +80,20 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
const enum nf_tproxy_lookup_t lookup_type)
{
struct sock *sk;
- struct tcphdr *tcph;
switch (protocol) {
- case IPPROTO_TCP:
+ case IPPROTO_TCP: {
+ struct tcphdr _hdr, *hp;
+
+ hp = skb_header_pointer(skb, thoff,
+ sizeof(struct tcphdr), &_hdr);
+ if (hp == NULL)
+ return NULL;
+
switch (lookup_type) {
case NF_TPROXY_LOOKUP_LISTENER:
- tcph = hp;
sk = inet6_lookup_listener(net, &tcp_hashinfo, skb,
- thoff + __tcp_hdrlen(tcph),
+ thoff + __tcp_hdrlen(hp),
saddr, sport,
daddr, ntohs(dport),
in->ifindex, 0);
@@ -110,6 +115,7 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
BUG();
}
break;
+ }
case IPPROTO_UDP:
sk = udp6_lib_lookup(net, saddr, sport, daddr, dport,
in->ifindex);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 86a0e4333d42..ec18b3ce8b6d 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -972,10 +972,10 @@ static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort)
rt->dst.lastuse = jiffies;
}
+/* Caller must already hold reference to @from */
static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
{
rt->rt6i_flags &= ~RTF_EXPIRES;
- fib6_info_hold(from);
rcu_assign_pointer(rt->from, from);
dst_init_metrics(&rt->dst, from->fib6_metrics->metrics, true);
if (from->fib6_metrics != &dst_default_metrics) {
@@ -984,6 +984,7 @@ static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
}
}
+/* Caller must already hold reference to @ort */
static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
{
struct net_device *dev = fib6_info_nh_dev(ort);
@@ -1044,9 +1045,14 @@ static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt)
struct net_device *dev = rt->fib6_nh.nh_dev;
struct rt6_info *nrt;
+ if (!fib6_info_hold_safe(rt))
+ return NULL;
+
nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
if (nrt)
ip6_rt_copy_init(nrt, rt);
+ else
+ fib6_info_release(rt);
return nrt;
}
@@ -1178,10 +1184,15 @@ static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,
* Clone the route.
*/
+ if (!fib6_info_hold_safe(ort))
+ return NULL;
+
dev = ip6_rt_get_dev_rcu(ort);
rt = ip6_dst_alloc(dev_net(dev), dev, 0);
- if (!rt)
+ if (!rt) {
+ fib6_info_release(ort);
return NULL;
+ }
ip6_rt_copy_init(rt, ort);
rt->rt6i_flags |= RTF_CACHE;
@@ -1210,12 +1221,17 @@ static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt)
struct net_device *dev;
struct rt6_info *pcpu_rt;
+ if (!fib6_info_hold_safe(rt))
+ return NULL;
+
rcu_read_lock();
dev = ip6_rt_get_dev_rcu(rt);
pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
rcu_read_unlock();
- if (!pcpu_rt)
+ if (!pcpu_rt) {
+ fib6_info_release(rt);
return NULL;
+ }
ip6_rt_copy_init(pcpu_rt, rt);
pcpu_rt->rt6i_flags |= RTF_PCPU;
return pcpu_rt;
@@ -2486,7 +2502,7 @@ restart:
out:
if (ret)
- dst_hold(&ret->dst);
+ ip6_hold_safe(net, &ret, true);
else
ret = ip6_create_rt_rcu(rt);
@@ -3303,7 +3319,8 @@ static int ip6_route_del(struct fib6_config *cfg,
continue;
if (cfg->fc_protocol && cfg->fc_protocol != rt->fib6_protocol)
continue;
- fib6_info_hold(rt);
+ if (!fib6_info_hold_safe(rt))
+ continue;
rcu_read_unlock();
/* if gateway was specified only delete the one hop */
@@ -3409,6 +3426,9 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
rcu_read_lock();
from = rcu_dereference(rt->from);
+ /* This fib6_info_hold() is safe here because we hold reference to rt
+ * and rt already holds reference to fib6_info.
+ */
fib6_info_hold(from);
rcu_read_unlock();
@@ -3470,7 +3490,8 @@ static struct fib6_info *rt6_get_route_info(struct net *net,
continue;
if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr))
continue;
- fib6_info_hold(rt);
+ if (!fib6_info_hold_safe(rt))
+ continue;
break;
}
out:
@@ -3530,8 +3551,8 @@ struct fib6_info *rt6_get_dflt_router(struct net *net,
ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr))
break;
}
- if (rt)
- fib6_info_hold(rt);
+ if (rt && !fib6_info_hold_safe(rt))
+ rt = NULL;
rcu_read_unlock();
return rt;
}
@@ -3579,8 +3600,8 @@ restart:
struct inet6_dev *idev = dev ? __in6_dev_get(dev) : NULL;
if (rt->fib6_flags & (RTF_DEFAULT | RTF_ADDRCONF) &&
- (!idev || idev->cnf.accept_ra != 2)) {
- fib6_info_hold(rt);
+ (!idev || idev->cnf.accept_ra != 2) &&
+ fib6_info_hold_safe(rt)) {
rcu_read_unlock();
ip6_del_rt(net, rt);
goto restart;
@@ -3842,7 +3863,7 @@ static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt)
lockdep_is_held(&rt->fib6_table->tb6_lock));
while (iter) {
if (iter->fib6_metric == rt->fib6_metric &&
- iter->fib6_nsiblings)
+ rt6_qualify_for_ecmp(iter))
return iter;
iter = rcu_dereference_protected(iter->fib6_next,
lockdep_is_held(&rt->fib6_table->tb6_lock));
@@ -4388,6 +4409,13 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
rt = NULL;
goto cleanup;
}
+ if (!rt6_qualify_for_ecmp(rt)) {
+ err = -EINVAL;
+ NL_SET_ERR_MSG(extack,
+ "Device only routes can not be added for IPv6 using the multipath API.");
+ fib6_info_release(rt);
+ goto cleanup;
+ }
rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1;
@@ -4439,7 +4467,6 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
*/
cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
NLM_F_REPLACE);
- cfg->fc_nlinfo.nlh->nlmsg_flags |= NLM_F_APPEND;
nhn++;
}
diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c
index 33fb35cbfac1..558fe8cc6d43 100644
--- a/net/ipv6/seg6_hmac.c
+++ b/net/ipv6/seg6_hmac.c
@@ -373,7 +373,7 @@ static int seg6_hmac_init_algo(void)
return -ENOMEM;
for_each_possible_cpu(cpu) {
- tfm = crypto_alloc_shash(algo->name, 0, GFP_KERNEL);
+ tfm = crypto_alloc_shash(algo->name, 0, 0);
if (IS_ERR(tfm))
return PTR_ERR(tfm);
p_tfm = per_cpu_ptr(algo->tfms, cpu);
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 19ccf0dc996c..a8854dd3e9c5 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -101,7 +101,7 @@ static __be32 seg6_make_flowlabel(struct net *net, struct sk_buff *skb,
if (do_flowlabel > 0) {
hash = skb_get_hash(skb);
- rol32(hash, 16);
+ hash = rol32(hash, 16);
flowlabel = (__force __be32)hash & IPV6_FLOWLABEL_MASK;
} else if (!do_flowlabel && skb->protocol == htons(ETH_P_IPV6)) {
flowlabel = ip6_flowlabel(inner_hdr);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 7efa9fd7e109..03e6b7a2bc53 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -938,7 +938,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
&tcp_hashinfo, NULL, 0,
&ipv6h->saddr,
th->source, &ipv6h->daddr,
- ntohs(th->source), tcp_v6_iif(skb),
+ ntohs(th->source),
+ tcp_v6_iif_l3_slave(skb),
tcp_v6_sdif(skb));
if (!sk1)
goto out;
@@ -1609,7 +1610,8 @@ do_time_wait:
skb, __tcp_hdrlen(th),
&ipv6_hdr(skb)->saddr, th->source,
&ipv6_hdr(skb)->daddr,
- ntohs(th->dest), tcp_v6_iif(skb),
+ ntohs(th->dest),
+ tcp_v6_iif_l3_slave(skb),
sdif);
if (sk2) {
struct inet_timewait_sock *tw = inet_twsk(sk);
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 0a38cc1cbebc..932985ca4e66 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2254,11 +2254,8 @@ static void ieee80211_deliver_skb_to_local_stack(struct sk_buff *skb,
sdata->control_port_over_nl80211)) {
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
bool noencrypt = status->flag & RX_FLAG_DECRYPTED;
- struct ethhdr *ehdr = eth_hdr(skb);
- cfg80211_rx_control_port(dev, skb->data, skb->len,
- ehdr->h_source,
- be16_to_cpu(skb->protocol), noencrypt);
+ cfg80211_rx_control_port(dev, skb, noencrypt);
dev_kfree_skb(skb);
} else {
/* deliver to local stack */
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 44b5dfe8727d..fa1f1e63a264 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -4845,7 +4845,9 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev,
skb_reset_network_header(skb);
skb_reset_mac_header(skb);
+ local_bh_disable();
__ieee80211_subif_start_xmit(skb, skb->dev, flags);
+ local_bh_enable();
return 0;
}
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 5e2e511c4a6f..d02fbfec3783 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -2111,7 +2111,8 @@ int ieee80211_reconfig(struct ieee80211_local *local)
if (!sta->uploaded)
continue;
- if (sta->sdata->vif.type != NL80211_IFTYPE_AP)
+ if (sta->sdata->vif.type != NL80211_IFTYPE_AP &&
+ sta->sdata->vif.type != NL80211_IFTYPE_AP_VLAN)
continue;
for (state = IEEE80211_STA_NOTEXIST;
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index dbd7d1fad277..f0a1c536ef15 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -460,6 +460,13 @@ config NF_TABLES
if NF_TABLES
+config NF_TABLES_SET
+ tristate "Netfilter nf_tables set infrastructure"
+ help
+ This option enables the nf_tables set infrastructure that allows to
+ look up for elements in a set and to build one-way mappings between
+ matchings and actions.
+
config NF_TABLES_INET
depends on IPV6
select NF_TABLES_IPV4
@@ -493,24 +500,6 @@ config NFT_FLOW_OFFLOAD
This option adds the "flow_offload" expression that you can use to
choose what flows are placed into the hardware.
-config NFT_SET_RBTREE
- tristate "Netfilter nf_tables rbtree set module"
- help
- This option adds the "rbtree" set type (Red Black tree) that is used
- to build interval-based sets.
-
-config NFT_SET_HASH
- tristate "Netfilter nf_tables hash set module"
- help
- This option adds the "hash" set type that is used to build one-way
- mappings between matchings and actions.
-
-config NFT_SET_BITMAP
- tristate "Netfilter nf_tables bitmap set module"
- help
- This option adds the "bitmap" set type that is used to build sets
- whose keys are smaller or equal to 16 bits.
-
config NFT_COUNTER
tristate "Netfilter nf_tables counter module"
help
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 44449389e527..8a76dced974d 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -78,7 +78,11 @@ nf_tables-objs := nf_tables_core.o nf_tables_api.o nft_chain_filter.o \
nft_bitwise.o nft_byteorder.o nft_payload.o nft_lookup.o \
nft_dynset.o nft_meta.o nft_rt.o nft_exthdr.o
+nf_tables_set-objs := nf_tables_set_core.o \
+ nft_set_hash.o nft_set_bitmap.o nft_set_rbtree.o
+
obj-$(CONFIG_NF_TABLES) += nf_tables.o
+obj-$(CONFIG_NF_TABLES_SET) += nf_tables_set.o
obj-$(CONFIG_NFT_COMPAT) += nft_compat.o
obj-$(CONFIG_NFT_CONNLIMIT) += nft_connlimit.o
obj-$(CONFIG_NFT_NUMGEN) += nft_numgen.o
@@ -91,9 +95,6 @@ obj-$(CONFIG_NFT_QUEUE) += nft_queue.o
obj-$(CONFIG_NFT_QUOTA) += nft_quota.o
obj-$(CONFIG_NFT_REJECT) += nft_reject.o
obj-$(CONFIG_NFT_REJECT_INET) += nft_reject_inet.o
-obj-$(CONFIG_NFT_SET_RBTREE) += nft_set_rbtree.o
-obj-$(CONFIG_NFT_SET_HASH) += nft_set_hash.o
-obj-$(CONFIG_NFT_SET_BITMAP) += nft_set_bitmap.o
obj-$(CONFIG_NFT_COUNTER) += nft_counter.o
obj-$(CONFIG_NFT_LOG) += nft_log.o
obj-$(CONFIG_NFT_MASQ) += nft_masq.o
diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index d8383609fe28..510039862aa9 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -47,6 +47,8 @@ struct nf_conncount_tuple {
struct hlist_node node;
struct nf_conntrack_tuple tuple;
struct nf_conntrack_zone zone;
+ int cpu;
+ u32 jiffies32;
};
struct nf_conncount_rb {
@@ -91,11 +93,42 @@ bool nf_conncount_add(struct hlist_head *head,
return false;
conn->tuple = *tuple;
conn->zone = *zone;
+ conn->cpu = raw_smp_processor_id();
+ conn->jiffies32 = (u32)jiffies;
hlist_add_head(&conn->node, head);
return true;
}
EXPORT_SYMBOL_GPL(nf_conncount_add);
+static const struct nf_conntrack_tuple_hash *
+find_or_evict(struct net *net, struct nf_conncount_tuple *conn)
+{
+ const struct nf_conntrack_tuple_hash *found;
+ unsigned long a, b;
+ int cpu = raw_smp_processor_id();
+ __s32 age;
+
+ found = nf_conntrack_find_get(net, &conn->zone, &conn->tuple);
+ if (found)
+ return found;
+ b = conn->jiffies32;
+ a = (u32)jiffies;
+
+ /* conn might have been added just before by another cpu and
+ * might still be unconfirmed. In this case, nf_conntrack_find()
+ * returns no result. Thus only evict if this cpu added the
+ * stale entry or if the entry is older than two jiffies.
+ */
+ age = a - b;
+ if (conn->cpu == cpu || age >= 2) {
+ hlist_del(&conn->node);
+ kmem_cache_free(conncount_conn_cachep, conn);
+ return ERR_PTR(-ENOENT);
+ }
+
+ return ERR_PTR(-EAGAIN);
+}
+
unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone,
@@ -103,18 +136,27 @@ unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
{
const struct nf_conntrack_tuple_hash *found;
struct nf_conncount_tuple *conn;
- struct hlist_node *n;
struct nf_conn *found_ct;
+ struct hlist_node *n;
unsigned int length = 0;
*addit = tuple ? true : false;
/* check the saved connections */
hlist_for_each_entry_safe(conn, n, head, node) {
- found = nf_conntrack_find_get(net, &conn->zone, &conn->tuple);
- if (found == NULL) {
- hlist_del(&conn->node);
- kmem_cache_free(conncount_conn_cachep, conn);
+ found = find_or_evict(net, conn);
+ if (IS_ERR(found)) {
+ /* Not found, but might be about to be confirmed */
+ if (PTR_ERR(found) == -EAGAIN) {
+ length++;
+ if (!tuple)
+ continue;
+
+ if (nf_ct_tuple_equal(&conn->tuple, tuple) &&
+ nf_ct_zone_id(&conn->zone, conn->zone.dir) ==
+ nf_ct_zone_id(zone, zone->dir))
+ *addit = false;
+ }
continue;
}
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 3465da2a98bd..3d5280425027 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -2043,7 +2043,7 @@ int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp)
return -EOPNOTSUPP;
/* On boot, we can set this without any fancy locking. */
- if (!nf_conntrack_htable_size)
+ if (!nf_conntrack_hash)
return param_set_uint(val, kp);
rc = kstrtouint(val, 0, &hashsize);
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 551a1eddf0fa..a75b11c39312 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -465,6 +465,11 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
nf_ct_expect_iterate_destroy(expect_iter_me, NULL);
nf_ct_iterate_destroy(unhelp, me);
+
+ /* Maybe someone has gotten the helper already when unhelp above.
+ * So need to wait it.
+ */
+ synchronize_rcu();
}
EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister);
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index abe647d5b8c6..9ce6336d1e55 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -243,14 +243,14 @@ dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] =
* We currently ignore Sync packets
*
* sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
- sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG,
+ sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG,
},
[DCCP_PKT_SYNCACK] = {
/*
* We currently ignore SyncAck packets
*
* sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
- sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG,
+ sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG,
},
},
[CT_DCCP_ROLE_SERVER] = {
@@ -371,14 +371,14 @@ dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] =
* We currently ignore Sync packets
*
* sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
- sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG,
+ sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG,
},
[DCCP_PKT_SYNCACK] = {
/*
* We currently ignore SyncAck packets
*
* sNO, sRQ, sRS, sPO, sOP, sCR, sCG, sTW */
- sIG, sIG, sIG, sIG, sIG, sIG, sIG, sIG,
+ sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG,
},
},
};
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 426457047578..a61d6df6e5f6 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -424,6 +424,10 @@ static int nf_log_proc_dostring(struct ctl_table *table, int write,
if (write) {
struct ctl_table tmp = *table;
+ /* proc_dostring() can append to existing strings, so we need to
+ * initialize it as an empty string.
+ */
+ buf[0] = '\0';
tmp.data = buf;
r = proc_dostring(&tmp, write, buffer, lenp, ppos);
if (r)
@@ -442,14 +446,17 @@ static int nf_log_proc_dostring(struct ctl_table *table, int write,
rcu_assign_pointer(net->nf.nf_loggers[tindex], logger);
mutex_unlock(&nf_log_mutex);
} else {
+ struct ctl_table tmp = *table;
+
+ tmp.data = buf;
mutex_lock(&nf_log_mutex);
logger = nft_log_dereference(net->nf.nf_loggers[tindex]);
if (!logger)
- table->data = "NONE";
+ strlcpy(buf, "NONE", sizeof(buf));
else
- table->data = logger->name;
- r = proc_dostring(table, write, buffer, lenp, ppos);
+ strlcpy(buf, logger->name, sizeof(buf));
mutex_unlock(&nf_log_mutex);
+ r = proc_dostring(&tmp, write, buffer, lenp, ppos);
}
return r;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 896d4a36081d..f5745e4c6513 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -75,6 +75,7 @@ static void nft_ctx_init(struct nft_ctx *ctx,
{
ctx->net = net;
ctx->family = family;
+ ctx->level = 0;
ctx->table = table;
ctx->chain = chain;
ctx->nla = nla;
@@ -1597,7 +1598,6 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
struct nft_base_chain *basechain;
struct nft_stats *stats = NULL;
struct nft_chain_hook hook;
- const struct nlattr *name;
struct nf_hook_ops *ops;
struct nft_trans *trans;
int err;
@@ -1645,12 +1645,11 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
return PTR_ERR(stats);
}
+ err = -ENOMEM;
trans = nft_trans_alloc(ctx, NFT_MSG_NEWCHAIN,
sizeof(struct nft_trans_chain));
- if (trans == NULL) {
- free_percpu(stats);
- return -ENOMEM;
- }
+ if (trans == NULL)
+ goto err;
nft_trans_chain_stats(trans) = stats;
nft_trans_chain_update(trans) = true;
@@ -1660,19 +1659,37 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
else
nft_trans_chain_policy(trans) = -1;
- name = nla[NFTA_CHAIN_NAME];
- if (nla[NFTA_CHAIN_HANDLE] && name) {
- nft_trans_chain_name(trans) =
- nla_strdup(name, GFP_KERNEL);
- if (!nft_trans_chain_name(trans)) {
- kfree(trans);
- free_percpu(stats);
- return -ENOMEM;
+ if (nla[NFTA_CHAIN_HANDLE] &&
+ nla[NFTA_CHAIN_NAME]) {
+ struct nft_trans *tmp;
+ char *name;
+
+ err = -ENOMEM;
+ name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL);
+ if (!name)
+ goto err;
+
+ err = -EEXIST;
+ list_for_each_entry(tmp, &ctx->net->nft.commit_list, list) {
+ if (tmp->msg_type == NFT_MSG_NEWCHAIN &&
+ tmp->ctx.table == table &&
+ nft_trans_chain_update(tmp) &&
+ nft_trans_chain_name(tmp) &&
+ strcmp(name, nft_trans_chain_name(tmp)) == 0) {
+ kfree(name);
+ goto err;
+ }
}
+
+ nft_trans_chain_name(trans) = name;
}
list_add_tail(&trans->list, &ctx->net->nft.commit_list);
return 0;
+err:
+ free_percpu(stats);
+ kfree(trans);
+ return err;
}
static int nf_tables_newchain(struct net *net, struct sock *nlsk,
@@ -2254,6 +2271,39 @@ done:
return skb->len;
}
+static int nf_tables_dump_rules_start(struct netlink_callback *cb)
+{
+ const struct nlattr * const *nla = cb->data;
+ struct nft_rule_dump_ctx *ctx = NULL;
+
+ if (nla[NFTA_RULE_TABLE] || nla[NFTA_RULE_CHAIN]) {
+ ctx = kzalloc(sizeof(*ctx), GFP_ATOMIC);
+ if (!ctx)
+ return -ENOMEM;
+
+ if (nla[NFTA_RULE_TABLE]) {
+ ctx->table = nla_strdup(nla[NFTA_RULE_TABLE],
+ GFP_ATOMIC);
+ if (!ctx->table) {
+ kfree(ctx);
+ return -ENOMEM;
+ }
+ }
+ if (nla[NFTA_RULE_CHAIN]) {
+ ctx->chain = nla_strdup(nla[NFTA_RULE_CHAIN],
+ GFP_ATOMIC);
+ if (!ctx->chain) {
+ kfree(ctx->table);
+ kfree(ctx);
+ return -ENOMEM;
+ }
+ }
+ }
+
+ cb->data = ctx;
+ return 0;
+}
+
static int nf_tables_dump_rules_done(struct netlink_callback *cb)
{
struct nft_rule_dump_ctx *ctx = cb->data;
@@ -2283,38 +2333,13 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
+ .start= nf_tables_dump_rules_start,
.dump = nf_tables_dump_rules,
.done = nf_tables_dump_rules_done,
.module = THIS_MODULE,
+ .data = (void *)nla,
};
- if (nla[NFTA_RULE_TABLE] || nla[NFTA_RULE_CHAIN]) {
- struct nft_rule_dump_ctx *ctx;
-
- ctx = kzalloc(sizeof(*ctx), GFP_ATOMIC);
- if (!ctx)
- return -ENOMEM;
-
- if (nla[NFTA_RULE_TABLE]) {
- ctx->table = nla_strdup(nla[NFTA_RULE_TABLE],
- GFP_ATOMIC);
- if (!ctx->table) {
- kfree(ctx);
- return -ENOMEM;
- }
- }
- if (nla[NFTA_RULE_CHAIN]) {
- ctx->chain = nla_strdup(nla[NFTA_RULE_CHAIN],
- GFP_ATOMIC);
- if (!ctx->chain) {
- kfree(ctx->table);
- kfree(ctx);
- return -ENOMEM;
- }
- }
- c.data = ctx;
- }
-
return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
}
@@ -2384,6 +2409,9 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
struct nft_rule *rule;
int err;
+ if (ctx->level == NFT_JUMP_STACK_SIZE)
+ return -EMLINK;
+
list_for_each_entry(rule, &chain->rules, list) {
if (!nft_is_active_next(ctx->net, rule))
continue;
@@ -3161,6 +3189,18 @@ done:
return skb->len;
}
+static int nf_tables_dump_sets_start(struct netlink_callback *cb)
+{
+ struct nft_ctx *ctx_dump = NULL;
+
+ ctx_dump = kmemdup(cb->data, sizeof(*ctx_dump), GFP_ATOMIC);
+ if (ctx_dump == NULL)
+ return -ENOMEM;
+
+ cb->data = ctx_dump;
+ return 0;
+}
+
static int nf_tables_dump_sets_done(struct netlink_callback *cb)
{
kfree(cb->data);
@@ -3188,18 +3228,12 @@ static int nf_tables_getset(struct net *net, struct sock *nlsk,
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
+ .start = nf_tables_dump_sets_start,
.dump = nf_tables_dump_sets,
.done = nf_tables_dump_sets_done,
+ .data = &ctx,
.module = THIS_MODULE,
};
- struct nft_ctx *ctx_dump;
-
- ctx_dump = kmalloc(sizeof(*ctx_dump), GFP_ATOMIC);
- if (ctx_dump == NULL)
- return -ENOMEM;
-
- *ctx_dump = ctx;
- c.data = ctx_dump;
return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
}
@@ -3849,6 +3883,15 @@ nla_put_failure:
return -ENOSPC;
}
+static int nf_tables_dump_set_start(struct netlink_callback *cb)
+{
+ struct nft_set_dump_ctx *dump_ctx = cb->data;
+
+ cb->data = kmemdup(dump_ctx, sizeof(*dump_ctx), GFP_ATOMIC);
+
+ return cb->data ? 0 : -ENOMEM;
+}
+
static int nf_tables_dump_set_done(struct netlink_callback *cb)
{
kfree(cb->data);
@@ -4002,20 +4045,17 @@ static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
+ .start = nf_tables_dump_set_start,
.dump = nf_tables_dump_set,
.done = nf_tables_dump_set_done,
.module = THIS_MODULE,
};
- struct nft_set_dump_ctx *dump_ctx;
-
- dump_ctx = kmalloc(sizeof(*dump_ctx), GFP_ATOMIC);
- if (!dump_ctx)
- return -ENOMEM;
-
- dump_ctx->set = set;
- dump_ctx->ctx = ctx;
+ struct nft_set_dump_ctx dump_ctx = {
+ .set = set,
+ .ctx = ctx,
+ };
- c.data = dump_ctx;
+ c.data = &dump_ctx;
return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
}
@@ -4975,38 +5015,42 @@ done:
return skb->len;
}
-static int nf_tables_dump_obj_done(struct netlink_callback *cb)
+static int nf_tables_dump_obj_start(struct netlink_callback *cb)
{
- struct nft_obj_filter *filter = cb->data;
+ const struct nlattr * const *nla = cb->data;
+ struct nft_obj_filter *filter = NULL;
- if (filter) {
- kfree(filter->table);
- kfree(filter);
+ if (nla[NFTA_OBJ_TABLE] || nla[NFTA_OBJ_TYPE]) {
+ filter = kzalloc(sizeof(*filter), GFP_ATOMIC);
+ if (!filter)
+ return -ENOMEM;
+
+ if (nla[NFTA_OBJ_TABLE]) {
+ filter->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_ATOMIC);
+ if (!filter->table) {
+ kfree(filter);
+ return -ENOMEM;
+ }
+ }
+
+ if (nla[NFTA_OBJ_TYPE])
+ filter->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
}
+ cb->data = filter;
return 0;
}
-static struct nft_obj_filter *
-nft_obj_filter_alloc(const struct nlattr * const nla[])
+static int nf_tables_dump_obj_done(struct netlink_callback *cb)
{
- struct nft_obj_filter *filter;
-
- filter = kzalloc(sizeof(*filter), GFP_ATOMIC);
- if (!filter)
- return ERR_PTR(-ENOMEM);
+ struct nft_obj_filter *filter = cb->data;
- if (nla[NFTA_OBJ_TABLE]) {
- filter->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_ATOMIC);
- if (!filter->table) {
- kfree(filter);
- return ERR_PTR(-ENOMEM);
- }
+ if (filter) {
+ kfree(filter->table);
+ kfree(filter);
}
- if (nla[NFTA_OBJ_TYPE])
- filter->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
- return filter;
+ return 0;
}
/* called with rcu_read_lock held */
@@ -5027,21 +5071,13 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk,
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
+ .start = nf_tables_dump_obj_start,
.dump = nf_tables_dump_obj,
.done = nf_tables_dump_obj_done,
.module = THIS_MODULE,
+ .data = (void *)nla,
};
- if (nla[NFTA_OBJ_TABLE] ||
- nla[NFTA_OBJ_TYPE]) {
- struct nft_obj_filter *filter;
-
- filter = nft_obj_filter_alloc(nla);
- if (IS_ERR(filter))
- return -ENOMEM;
-
- c.data = filter;
- }
return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
}
@@ -5320,8 +5356,6 @@ static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx,
flowtable->ops[i].priv = &flowtable->data;
flowtable->ops[i].hook = flowtable->data.type->hook;
flowtable->ops[i].dev = dev_array[i];
- flowtable->dev_name[i] = kstrdup(dev_array[i]->name,
- GFP_KERNEL);
}
return err;
@@ -5479,10 +5513,8 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
err6:
i = flowtable->ops_len;
err5:
- for (k = i - 1; k >= 0; k--) {
- kfree(flowtable->dev_name[k]);
+ for (k = i - 1; k >= 0; k--)
nf_unregister_net_hook(net, &flowtable->ops[k]);
- }
kfree(flowtable->ops);
err4:
@@ -5581,9 +5613,10 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net,
goto nla_put_failure;
for (i = 0; i < flowtable->ops_len; i++) {
- if (flowtable->dev_name[i][0] &&
- nla_put_string(skb, NFTA_DEVICE_NAME,
- flowtable->dev_name[i]))
+ const struct net_device *dev = READ_ONCE(flowtable->ops[i].dev);
+
+ if (dev &&
+ nla_put_string(skb, NFTA_DEVICE_NAME, dev->name))
goto nla_put_failure;
}
nla_nest_end(skb, nest_devs);
@@ -5650,37 +5683,39 @@ done:
return skb->len;
}
-static int nf_tables_dump_flowtable_done(struct netlink_callback *cb)
+static int nf_tables_dump_flowtable_start(struct netlink_callback *cb)
{
- struct nft_flowtable_filter *filter = cb->data;
+ const struct nlattr * const *nla = cb->data;
+ struct nft_flowtable_filter *filter = NULL;
- if (!filter)
- return 0;
+ if (nla[NFTA_FLOWTABLE_TABLE]) {
+ filter = kzalloc(sizeof(*filter), GFP_ATOMIC);
+ if (!filter)
+ return -ENOMEM;
- kfree(filter->table);
- kfree(filter);
+ filter->table = nla_strdup(nla[NFTA_FLOWTABLE_TABLE],
+ GFP_ATOMIC);
+ if (!filter->table) {
+ kfree(filter);
+ return -ENOMEM;
+ }
+ }
+ cb->data = filter;
return 0;
}
-static struct nft_flowtable_filter *
-nft_flowtable_filter_alloc(const struct nlattr * const nla[])
+static int nf_tables_dump_flowtable_done(struct netlink_callback *cb)
{
- struct nft_flowtable_filter *filter;
+ struct nft_flowtable_filter *filter = cb->data;
- filter = kzalloc(sizeof(*filter), GFP_ATOMIC);
if (!filter)
- return ERR_PTR(-ENOMEM);
+ return 0;
- if (nla[NFTA_FLOWTABLE_TABLE]) {
- filter->table = nla_strdup(nla[NFTA_FLOWTABLE_TABLE],
- GFP_ATOMIC);
- if (!filter->table) {
- kfree(filter);
- return ERR_PTR(-ENOMEM);
- }
- }
- return filter;
+ kfree(filter->table);
+ kfree(filter);
+
+ return 0;
}
/* called with rcu_read_lock held */
@@ -5700,20 +5735,13 @@ static int nf_tables_getflowtable(struct net *net, struct sock *nlsk,
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
+ .start = nf_tables_dump_flowtable_start,
.dump = nf_tables_dump_flowtable,
.done = nf_tables_dump_flowtable_done,
.module = THIS_MODULE,
+ .data = (void *)nla,
};
- if (nla[NFTA_FLOWTABLE_TABLE]) {
- struct nft_flowtable_filter *filter;
-
- filter = nft_flowtable_filter_alloc(nla);
- if (IS_ERR(filter))
- return -ENOMEM;
-
- c.data = filter;
- }
return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
}
@@ -5783,6 +5811,7 @@ static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
kfree(flowtable->name);
flowtable->data.type->free(&flowtable->data);
module_put(flowtable->data.type->owner);
+ kfree(flowtable);
}
static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
@@ -5825,7 +5854,6 @@ static void nft_flowtable_event(unsigned long event, struct net_device *dev,
continue;
nf_unregister_net_hook(dev_net(dev), &flowtable->ops[i]);
- flowtable->dev_name[i][0] = '\0';
flowtable->ops[i].dev = NULL;
break;
}
@@ -6086,6 +6114,9 @@ static void nft_commit_release(struct nft_trans *trans)
case NFT_MSG_DELTABLE:
nf_tables_table_destroy(&trans->ctx);
break;
+ case NFT_MSG_NEWCHAIN:
+ kfree(nft_trans_chain_name(trans));
+ break;
case NFT_MSG_DELCHAIN:
nf_tables_chain_destroy(&trans->ctx);
break;
@@ -6315,13 +6346,15 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nf_tables_table_notify(&trans->ctx, NFT_MSG_DELTABLE);
break;
case NFT_MSG_NEWCHAIN:
- if (nft_trans_chain_update(trans))
+ if (nft_trans_chain_update(trans)) {
nft_chain_commit_update(trans);
- else
+ nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN);
+ /* trans destroyed after rcu grace period */
+ } else {
nft_clear(net, trans->ctx.chain);
-
- nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN);
- nft_trans_destroy(trans);
+ nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN);
+ nft_trans_destroy(trans);
+ }
break;
case NFT_MSG_DELCHAIN:
nft_chain_del(trans->ctx.chain);
@@ -6471,7 +6504,7 @@ static int __nf_tables_abort(struct net *net)
case NFT_MSG_NEWCHAIN:
if (nft_trans_chain_update(trans)) {
free_percpu(nft_trans_chain_stats(trans));
-
+ kfree(nft_trans_chain_name(trans));
nft_trans_destroy(trans);
} else {
trans->ctx.table->use--;
@@ -6837,13 +6870,6 @@ int nft_validate_register_store(const struct nft_ctx *ctx,
err = nf_tables_check_loops(ctx, data->verdict.chain);
if (err < 0)
return err;
-
- if (ctx->chain->level + 1 >
- data->verdict.chain->level) {
- if (ctx->chain->level + 1 == NFT_JUMP_STACK_SIZE)
- return -EMLINK;
- data->verdict.chain->level = ctx->chain->level + 1;
- }
}
return 0;
diff --git a/net/netfilter/nf_tables_set_core.c b/net/netfilter/nf_tables_set_core.c
new file mode 100644
index 000000000000..814789644bd3
--- /dev/null
+++ b/net/netfilter/nf_tables_set_core.c
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <net/netfilter/nf_tables_core.h>
+
+static int __init nf_tables_set_module_init(void)
+{
+ nft_register_set(&nft_set_hash_fast_type);
+ nft_register_set(&nft_set_hash_type);
+ nft_register_set(&nft_set_rhash_type);
+ nft_register_set(&nft_set_bitmap_type);
+ nft_register_set(&nft_set_rbtree_type);
+
+ return 0;
+}
+
+static void __exit nf_tables_set_module_exit(void)
+{
+ nft_unregister_set(&nft_set_rbtree_type);
+ nft_unregister_set(&nft_set_bitmap_type);
+ nft_unregister_set(&nft_set_rhash_type);
+ nft_unregister_set(&nft_set_hash_type);
+ nft_unregister_set(&nft_set_hash_fast_type);
+}
+
+module_init(nf_tables_set_module_init);
+module_exit(nf_tables_set_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NFT_SET();
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 4ccd2988f9db..ea4ba551abb2 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -1243,6 +1243,9 @@ static int nfqnl_recv_unsupp(struct net *net, struct sock *ctnl,
static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = {
[NFQA_CFG_CMD] = { .len = sizeof(struct nfqnl_msg_config_cmd) },
[NFQA_CFG_PARAMS] = { .len = sizeof(struct nfqnl_msg_config_params) },
+ [NFQA_CFG_QUEUE_MAXLEN] = { .type = NLA_U32 },
+ [NFQA_CFG_MASK] = { .type = NLA_U32 },
+ [NFQA_CFG_FLAGS] = { .type = NLA_U32 },
};
static const struct nf_queue_handler nfqh = {
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 8d1ff654e5af..32535eea51b2 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -832,10 +832,18 @@ nft_target_select_ops(const struct nft_ctx *ctx,
rev = ntohl(nla_get_be32(tb[NFTA_TARGET_REV]));
family = ctx->family;
+ if (strcmp(tg_name, XT_ERROR_TARGET) == 0 ||
+ strcmp(tg_name, XT_STANDARD_TARGET) == 0 ||
+ strcmp(tg_name, "standard") == 0)
+ return ERR_PTR(-EINVAL);
+
/* Re-use the existing target if it's already loaded. */
list_for_each_entry(nft_target, &nft_target_list, head) {
struct xt_target *target = nft_target->ops.data;
+ if (!target->target)
+ continue;
+
if (nft_target_cmp(target, tg_name, rev, family))
return &nft_target->ops;
}
@@ -844,6 +852,11 @@ nft_target_select_ops(const struct nft_ctx *ctx,
if (IS_ERR(target))
return ERR_PTR(-ENOENT);
+ if (!target->target) {
+ err = -EINVAL;
+ goto err;
+ }
+
if (target->targetsize > nla_len(tb[NFTA_TARGET_INFO])) {
err = -EINVAL;
goto err;
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index 15adf8ca82c3..0777a93211e2 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -98,6 +98,7 @@ static int nft_immediate_validate(const struct nft_ctx *ctx,
const struct nft_data **d)
{
const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+ struct nft_ctx *pctx = (struct nft_ctx *)ctx;
const struct nft_data *data;
int err;
@@ -109,9 +110,11 @@ static int nft_immediate_validate(const struct nft_ctx *ctx,
switch (data->verdict.code) {
case NFT_JUMP:
case NFT_GOTO:
+ pctx->level++;
err = nft_chain_validate(ctx, data->verdict.chain);
if (err < 0)
return err;
+ pctx->level--;
break;
default:
break;
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index 42e6fadf1417..c2a1d84cdfc4 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -155,7 +155,9 @@ static int nft_lookup_validate_setelem(const struct nft_ctx *ctx,
struct nft_set_elem *elem)
{
const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+ struct nft_ctx *pctx = (struct nft_ctx *)ctx;
const struct nft_data *data;
+ int err;
if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
*nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END)
@@ -165,10 +167,17 @@ static int nft_lookup_validate_setelem(const struct nft_ctx *ctx,
switch (data->verdict.code) {
case NFT_JUMP:
case NFT_GOTO:
- return nft_chain_validate(ctx, data->verdict.chain);
+ pctx->level++;
+ err = nft_chain_validate(ctx, data->verdict.chain);
+ if (err < 0)
+ return err;
+ pctx->level--;
+ break;
default:
- return 0;
+ break;
}
+
+ return 0;
}
static int nft_lookup_validate(const struct nft_ctx *ctx,
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index d6626e01c7ee..128bc16f52dd 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -296,7 +296,7 @@ static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features,
return true;
}
-static struct nft_set_type nft_bitmap_type __read_mostly = {
+struct nft_set_type nft_set_bitmap_type __read_mostly = {
.owner = THIS_MODULE,
.ops = {
.privsize = nft_bitmap_privsize,
@@ -314,20 +314,3 @@ static struct nft_set_type nft_bitmap_type __read_mostly = {
.get = nft_bitmap_get,
},
};
-
-static int __init nft_bitmap_module_init(void)
-{
- return nft_register_set(&nft_bitmap_type);
-}
-
-static void __exit nft_bitmap_module_exit(void)
-{
- nft_unregister_set(&nft_bitmap_type);
-}
-
-module_init(nft_bitmap_module_init);
-module_exit(nft_bitmap_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
-MODULE_ALIAS_NFT_SET();
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 6f9a1365a09f..90c3e7e6cacb 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -387,6 +387,7 @@ static void nft_rhash_destroy(const struct nft_set *set)
struct nft_rhash *priv = nft_set_priv(set);
cancel_delayed_work_sync(&priv->gc_work);
+ rcu_barrier();
rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy,
(void *)set);
}
@@ -654,7 +655,7 @@ static bool nft_hash_fast_estimate(const struct nft_set_desc *desc, u32 features
return true;
}
-static struct nft_set_type nft_rhash_type __read_mostly = {
+struct nft_set_type nft_set_rhash_type __read_mostly = {
.owner = THIS_MODULE,
.features = NFT_SET_MAP | NFT_SET_OBJECT |
NFT_SET_TIMEOUT | NFT_SET_EVAL,
@@ -677,7 +678,7 @@ static struct nft_set_type nft_rhash_type __read_mostly = {
},
};
-static struct nft_set_type nft_hash_type __read_mostly = {
+struct nft_set_type nft_set_hash_type __read_mostly = {
.owner = THIS_MODULE,
.features = NFT_SET_MAP | NFT_SET_OBJECT,
.ops = {
@@ -697,7 +698,7 @@ static struct nft_set_type nft_hash_type __read_mostly = {
},
};
-static struct nft_set_type nft_hash_fast_type __read_mostly = {
+struct nft_set_type nft_set_hash_fast_type __read_mostly = {
.owner = THIS_MODULE,
.features = NFT_SET_MAP | NFT_SET_OBJECT,
.ops = {
@@ -716,26 +717,3 @@ static struct nft_set_type nft_hash_fast_type __read_mostly = {
.get = nft_hash_get,
},
};
-
-static int __init nft_hash_module_init(void)
-{
- if (nft_register_set(&nft_hash_fast_type) ||
- nft_register_set(&nft_hash_type) ||
- nft_register_set(&nft_rhash_type))
- return 1;
- return 0;
-}
-
-static void __exit nft_hash_module_exit(void)
-{
- nft_unregister_set(&nft_rhash_type);
- nft_unregister_set(&nft_hash_type);
- nft_unregister_set(&nft_hash_fast_type);
-}
-
-module_init(nft_hash_module_init);
-module_exit(nft_hash_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_SET();
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 7f3a9a211034..9873d734b494 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -381,7 +381,7 @@ static void nft_rbtree_gc(struct work_struct *work)
gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
if (!gcb)
- goto out;
+ break;
atomic_dec(&set->nelems);
nft_set_gc_batch_add(gcb, rbe);
@@ -390,10 +390,12 @@ static void nft_rbtree_gc(struct work_struct *work)
rbe = rb_entry(prev, struct nft_rbtree_elem, node);
atomic_dec(&set->nelems);
nft_set_gc_batch_add(gcb, rbe);
+ prev = NULL;
}
node = rb_next(node);
+ if (!node)
+ break;
}
-out:
if (gcb) {
for (i = 0; i < gcb->head.cnt; i++) {
rbe = gcb->elems[i];
@@ -440,6 +442,7 @@ static void nft_rbtree_destroy(const struct nft_set *set)
struct rb_node *node;
cancel_delayed_work_sync(&priv->gc_work);
+ rcu_barrier();
while ((node = priv->root.rb_node) != NULL) {
rb_erase(node, &priv->root);
rbe = rb_entry(node, struct nft_rbtree_elem, node);
@@ -462,7 +465,7 @@ static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
return true;
}
-static struct nft_set_type nft_rbtree_type __read_mostly = {
+struct nft_set_type nft_set_rbtree_type __read_mostly = {
.owner = THIS_MODULE,
.features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT,
.ops = {
@@ -481,20 +484,3 @@ static struct nft_set_type nft_rbtree_type __read_mostly = {
.get = nft_rbtree_get,
},
};
-
-static int __init nft_rbtree_module_init(void)
-{
- return nft_register_set(&nft_rbtree_type);
-}
-
-static void __exit nft_rbtree_module_exit(void)
-{
- nft_unregister_set(&nft_rbtree_type);
-}
-
-module_init(nft_rbtree_module_init);
-module_exit(nft_rbtree_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_SET();
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 58fce4e749a9..d76550a8b642 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -61,7 +61,7 @@ tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport,
* addresses, this happens if the redirect already happened
* and the current packet belongs to an already established
* connection */
- sk = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
+ sk = nf_tproxy_get_sock_v4(net, skb, iph->protocol,
iph->saddr, iph->daddr,
hp->source, hp->dest,
skb->dev, NF_TPROXY_LOOKUP_ESTABLISHED);
@@ -77,7 +77,7 @@ tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport,
else if (!sk)
/* no, there's no established connection, check if
* there's a listener on the redirected addr/port */
- sk = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
+ sk = nf_tproxy_get_sock_v4(net, skb, iph->protocol,
iph->saddr, laddr,
hp->source, lport,
skb->dev, NF_TPROXY_LOOKUP_LISTENER);
@@ -150,7 +150,7 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
* addresses, this happens if the redirect already happened
* and the current packet belongs to an already established
* connection */
- sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp, tproto,
+ sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, tproto,
&iph->saddr, &iph->daddr,
hp->source, hp->dest,
xt_in(par), NF_TPROXY_LOOKUP_ESTABLISHED);
@@ -171,7 +171,7 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
else if (!sk)
/* no there's no established connection, check if
* there's a listener on the redirected addr/port */
- sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp,
+ sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff,
tproto, &iph->saddr, laddr,
hp->source, lport,
xt_in(par), NF_TPROXY_LOOKUP_LISTENER);
diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c
index 2ceefa183cee..6a196e438b6c 100644
--- a/net/nfc/llcp_commands.c
+++ b/net/nfc/llcp_commands.c
@@ -752,11 +752,14 @@ int nfc_llcp_send_ui_frame(struct nfc_llcp_sock *sock, u8 ssap, u8 dsap,
pr_debug("Fragment %zd bytes remaining %zd",
frag_len, remaining_len);
- pdu = nfc_alloc_send_skb(sock->dev, &sock->sk, MSG_DONTWAIT,
+ pdu = nfc_alloc_send_skb(sock->dev, &sock->sk, 0,
frag_len + LLCP_HEADER_SIZE, &err);
if (pdu == NULL) {
- pr_err("Could not allocate PDU\n");
- continue;
+ pr_err("Could not allocate PDU (error=%d)\n", err);
+ len -= remaining_len;
+ if (len == 0)
+ len = err;
+ break;
}
pdu = llcp_add_header(pdu, dsap, ssap, LLCP_PDU_UI);
diff --git a/net/nsh/nsh.c b/net/nsh/nsh.c
index 9696ef96b719..1a30e165eeb4 100644
--- a/net/nsh/nsh.c
+++ b/net/nsh/nsh.c
@@ -104,7 +104,7 @@ static struct sk_buff *nsh_gso_segment(struct sk_buff *skb,
__skb_pull(skb, nsh_len);
skb_reset_mac_header(skb);
- skb_reset_mac_len(skb);
+ skb->mac_len = proto == htons(ETH_P_TEB) ? ETH_HLEN : 0;
skb->protocol = proto;
features &= NETIF_F_SG;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 57634bc3da74..9b27d0cd766d 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2878,6 +2878,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
goto out_free;
} else if (reserve) {
skb_reserve(skb, -reserve);
+ if (len < reserve)
+ skb_reset_network_header(skb);
}
/* Returns -EFAULT on error */
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index 2aa07b547b16..86e1e37eb4e8 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -191,8 +191,13 @@ static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb,
hdr->type = cpu_to_le32(type);
hdr->src_node_id = cpu_to_le32(from->sq_node);
hdr->src_port_id = cpu_to_le32(from->sq_port);
- hdr->dst_node_id = cpu_to_le32(to->sq_node);
- hdr->dst_port_id = cpu_to_le32(to->sq_port);
+ if (to->sq_port == QRTR_PORT_CTRL) {
+ hdr->dst_node_id = cpu_to_le32(node->nid);
+ hdr->dst_port_id = cpu_to_le32(QRTR_NODE_BCAST);
+ } else {
+ hdr->dst_node_id = cpu_to_le32(to->sq_node);
+ hdr->dst_port_id = cpu_to_le32(to->sq_port);
+ }
hdr->size = cpu_to_le32(len);
hdr->confirm_rx = 0;
@@ -764,6 +769,10 @@ static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
node = NULL;
if (addr->sq_node == QRTR_NODE_BCAST) {
enqueue_fn = qrtr_bcast_enqueue;
+ if (addr->sq_port != QRTR_PORT_CTRL) {
+ release_sock(sk);
+ return -ENOTCONN;
+ }
} else if (addr->sq_node == ipc->us.sq_node) {
enqueue_fn = qrtr_local_enqueue;
} else {
diff --git a/net/rds/connection.c b/net/rds/connection.c
index abef75da89a7..cfb05953b0e5 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -659,11 +659,19 @@ static void rds_conn_info(struct socket *sock, unsigned int len,
int rds_conn_init(void)
{
+ int ret;
+
+ ret = rds_loop_net_init(); /* register pernet callback */
+ if (ret)
+ return ret;
+
rds_conn_slab = kmem_cache_create("rds_connection",
sizeof(struct rds_connection),
0, 0, NULL);
- if (!rds_conn_slab)
+ if (!rds_conn_slab) {
+ rds_loop_net_exit();
return -ENOMEM;
+ }
rds_info_register_func(RDS_INFO_CONNECTIONS, rds_conn_info);
rds_info_register_func(RDS_INFO_SEND_MESSAGES,
@@ -676,6 +684,7 @@ int rds_conn_init(void)
void rds_conn_exit(void)
{
+ rds_loop_net_exit(); /* unregister pernet callback */
rds_loop_exit();
WARN_ON(!hlist_empty(rds_conn_hash));
diff --git a/net/rds/loop.c b/net/rds/loop.c
index dac6218a460e..feea1f96ee2a 100644
--- a/net/rds/loop.c
+++ b/net/rds/loop.c
@@ -33,6 +33,8 @@
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/in.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
#include "rds_single_path.h"
#include "rds.h"
@@ -40,6 +42,17 @@
static DEFINE_SPINLOCK(loop_conns_lock);
static LIST_HEAD(loop_conns);
+static atomic_t rds_loop_unloading = ATOMIC_INIT(0);
+
+static void rds_loop_set_unloading(void)
+{
+ atomic_set(&rds_loop_unloading, 1);
+}
+
+static bool rds_loop_is_unloading(struct rds_connection *conn)
+{
+ return atomic_read(&rds_loop_unloading) != 0;
+}
/*
* This 'loopback' transport is a special case for flows that originate
@@ -165,6 +178,8 @@ void rds_loop_exit(void)
struct rds_loop_connection *lc, *_lc;
LIST_HEAD(tmp_list);
+ rds_loop_set_unloading();
+ synchronize_rcu();
/* avoid calling conn_destroy with irqs off */
spin_lock_irq(&loop_conns_lock);
list_splice(&loop_conns, &tmp_list);
@@ -177,6 +192,46 @@ void rds_loop_exit(void)
}
}
+static void rds_loop_kill_conns(struct net *net)
+{
+ struct rds_loop_connection *lc, *_lc;
+ LIST_HEAD(tmp_list);
+
+ spin_lock_irq(&loop_conns_lock);
+ list_for_each_entry_safe(lc, _lc, &loop_conns, loop_node) {
+ struct net *c_net = read_pnet(&lc->conn->c_net);
+
+ if (net != c_net)
+ continue;
+ list_move_tail(&lc->loop_node, &tmp_list);
+ }
+ spin_unlock_irq(&loop_conns_lock);
+
+ list_for_each_entry_safe(lc, _lc, &tmp_list, loop_node) {
+ WARN_ON(lc->conn->c_passive);
+ rds_conn_destroy(lc->conn);
+ }
+}
+
+static void __net_exit rds_loop_exit_net(struct net *net)
+{
+ rds_loop_kill_conns(net);
+}
+
+static struct pernet_operations rds_loop_net_ops = {
+ .exit = rds_loop_exit_net,
+};
+
+int rds_loop_net_init(void)
+{
+ return register_pernet_device(&rds_loop_net_ops);
+}
+
+void rds_loop_net_exit(void)
+{
+ unregister_pernet_device(&rds_loop_net_ops);
+}
+
/*
* This is missing .xmit_* because loop doesn't go through generic
* rds_send_xmit() and doesn't call rds_recv_incoming(). .listen_stop and
@@ -194,4 +249,5 @@ struct rds_transport rds_loop_transport = {
.inc_free = rds_loop_inc_free,
.t_name = "loopback",
.t_type = RDS_TRANS_LOOP,
+ .t_unloading = rds_loop_is_unloading,
};
diff --git a/net/rds/loop.h b/net/rds/loop.h
index 469fa4b2da4f..bbc8cdd030df 100644
--- a/net/rds/loop.h
+++ b/net/rds/loop.h
@@ -5,6 +5,8 @@
/* loop.c */
extern struct rds_transport rds_loop_transport;
+int rds_loop_net_init(void);
+void rds_loop_net_exit(void);
void rds_loop_exit(void);
#endif
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 526a8e491626..6e7124e57918 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -91,7 +91,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
}
params_old = rtnl_dereference(p->params);
- params_new->action = parm->action;
+ p->tcf_action = parm->action;
params_new->update_flags = parm->update_flags;
rcu_assign_pointer(p->params, params_new);
if (params_old)
@@ -561,7 +561,7 @@ static int tcf_csum(struct sk_buff *skb, const struct tc_action *a,
tcf_lastuse_update(&p->tcf_tm);
bstats_cpu_update(this_cpu_ptr(p->common.cpu_bstats), skb);
- action = params->action;
+ action = READ_ONCE(p->tcf_action);
if (unlikely(action == TC_ACT_SHOT))
goto drop_stats;
@@ -599,11 +599,11 @@ static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind,
.index = p->tcf_index,
.refcnt = p->tcf_refcnt - ref,
.bindcnt = p->tcf_bindcnt - bind,
+ .action = p->tcf_action,
};
struct tcf_t t;
params = rtnl_dereference(p->params);
- opt.action = params->action;
opt.update_flags = params->update_flags;
if (nla_put(skb, TCA_CSUM_PARMS, sizeof(opt), &opt))
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 626dac81a48a..9bc6c2ae98a5 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -36,7 +36,7 @@ static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a,
tcf_lastuse_update(&t->tcf_tm);
bstats_cpu_update(this_cpu_ptr(t->common.cpu_bstats), skb);
- action = params->action;
+ action = READ_ONCE(t->tcf_action);
switch (params->tcft_action) {
case TCA_TUNNEL_KEY_ACT_RELEASE:
@@ -182,7 +182,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
params_old = rtnl_dereference(t->params);
- params_new->action = parm->action;
+ t->tcf_action = parm->action;
params_new->tcft_action = parm->t_action;
params_new->tcft_enc_metadata = metadata;
@@ -254,13 +254,13 @@ static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a,
.index = t->tcf_index,
.refcnt = t->tcf_refcnt - ref,
.bindcnt = t->tcf_bindcnt - bind,
+ .action = t->tcf_action,
};
struct tcf_t tm;
params = rtnl_dereference(t->params);
opt.t_action = params->tcft_action;
- opt.action = params->action;
if (nla_put(skb, TCA_TUNNEL_KEY_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index cdc3c87c53e6..f74513a7c7a8 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1053,7 +1053,7 @@ static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
for (tp = rtnl_dereference(chain->filter_chain);
tp; tp = rtnl_dereference(tp->next))
tfilter_notify(net, oskb, n, tp, block,
- q, parent, 0, event, false);
+ q, parent, NULL, event, false);
}
static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
@@ -1444,7 +1444,7 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
memset(&cb->args[1], 0,
sizeof(cb->args) - sizeof(cb->args[0]));
if (cb->args[1] == 0) {
- if (tcf_fill_node(net, skb, tp, block, q, parent, 0,
+ if (tcf_fill_node(net, skb, tp, block, q, parent, NULL,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
RTM_NEWTFILTER) <= 0)
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index cd2e0e342fb6..6c0a9d5dbf94 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -479,24 +479,28 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt,
q->cparams.mtu = psched_mtu(qdisc_dev(sch));
if (opt) {
- int err = fq_codel_change(sch, opt, extack);
+ err = fq_codel_change(sch, opt, extack);
if (err)
- return err;
+ goto init_failure;
}
err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
if (err)
- return err;
+ goto init_failure;
if (!q->flows) {
q->flows = kvcalloc(q->flows_cnt,
sizeof(struct fq_codel_flow),
GFP_KERNEL);
- if (!q->flows)
- return -ENOMEM;
+ if (!q->flows) {
+ err = -ENOMEM;
+ goto init_failure;
+ }
q->backlogs = kvcalloc(q->flows_cnt, sizeof(u32), GFP_KERNEL);
- if (!q->backlogs)
- return -ENOMEM;
+ if (!q->backlogs) {
+ err = -ENOMEM;
+ goto alloc_failure;
+ }
for (i = 0; i < q->flows_cnt; i++) {
struct fq_codel_flow *flow = q->flows + i;
@@ -509,6 +513,13 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt,
else
sch->flags &= ~TCQ_F_CAN_BYPASS;
return 0;
+
+alloc_failure:
+ kvfree(q->flows);
+ q->flows = NULL;
+init_failure:
+ q->flows_cnt = 0;
+ return err;
}
static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb)
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 445b7ef61677..12cac85da994 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -282,7 +282,7 @@ bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
if (dst) {
/* Re-fetch, as under layers may have a higher minimum size */
- pmtu = SCTP_TRUNC4(dst_mtu(dst));
+ pmtu = sctp_dst_mtu(dst);
change = t->pathmtu != pmtu;
}
t->pathmtu = pmtu;
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 973b4471b532..05e4ffe5aabd 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -45,6 +45,7 @@ static DEFINE_MUTEX(smc_create_lgr_pending); /* serialize link group
*/
static void smc_tcp_listen_work(struct work_struct *);
+static void smc_connect_work(struct work_struct *);
static void smc_set_keepalive(struct sock *sk, int val)
{
@@ -122,6 +123,12 @@ static int smc_release(struct socket *sock)
goto out;
smc = smc_sk(sk);
+
+ /* cleanup for a dangling non-blocking connect */
+ flush_work(&smc->connect_work);
+ kfree(smc->connect_info);
+ smc->connect_info = NULL;
+
if (sk->sk_state == SMC_LISTEN)
/* smc_close_non_accepted() is called and acquires
* sock lock for child sockets again
@@ -140,7 +147,8 @@ static int smc_release(struct socket *sock)
smc->clcsock = NULL;
}
if (smc->use_fallback) {
- sock_put(sk); /* passive closing */
+ if (sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_INIT)
+ sock_put(sk); /* passive closing */
sk->sk_state = SMC_CLOSED;
sk->sk_state_change(sk);
}
@@ -186,6 +194,7 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
sk->sk_protocol = protocol;
smc = smc_sk(sk);
INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
+ INIT_WORK(&smc->connect_work, smc_connect_work);
INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work);
INIT_LIST_HEAD(&smc->accept_q);
spin_lock_init(&smc->accept_q_lock);
@@ -409,12 +418,18 @@ static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code)
{
int rc;
- if (reason_code < 0) /* error, fallback is not possible */
+ if (reason_code < 0) { /* error, fallback is not possible */
+ if (smc->sk.sk_state == SMC_INIT)
+ sock_put(&smc->sk); /* passive closing */
return reason_code;
+ }
if (reason_code != SMC_CLC_DECL_REPLY) {
rc = smc_clc_send_decline(smc, reason_code);
- if (rc < 0)
+ if (rc < 0) {
+ if (smc->sk.sk_state == SMC_INIT)
+ sock_put(&smc->sk); /* passive closing */
return rc;
+ }
}
return smc_connect_fallback(smc);
}
@@ -427,8 +442,6 @@ static int smc_connect_abort(struct smc_sock *smc, int reason_code,
smc_lgr_forget(smc->conn.lgr);
mutex_unlock(&smc_create_lgr_pending);
smc_conn_free(&smc->conn);
- if (reason_code < 0 && smc->sk.sk_state == SMC_INIT)
- sock_put(&smc->sk); /* passive closing */
return reason_code;
}
@@ -576,6 +589,35 @@ static int __smc_connect(struct smc_sock *smc)
return 0;
}
+static void smc_connect_work(struct work_struct *work)
+{
+ struct smc_sock *smc = container_of(work, struct smc_sock,
+ connect_work);
+ int rc;
+
+ lock_sock(&smc->sk);
+ rc = kernel_connect(smc->clcsock, &smc->connect_info->addr,
+ smc->connect_info->alen, smc->connect_info->flags);
+ if (smc->clcsock->sk->sk_err) {
+ smc->sk.sk_err = smc->clcsock->sk->sk_err;
+ goto out;
+ }
+ if (rc < 0) {
+ smc->sk.sk_err = -rc;
+ goto out;
+ }
+
+ rc = __smc_connect(smc);
+ if (rc < 0)
+ smc->sk.sk_err = -rc;
+
+out:
+ smc->sk.sk_state_change(&smc->sk);
+ kfree(smc->connect_info);
+ smc->connect_info = NULL;
+ release_sock(&smc->sk);
+}
+
static int smc_connect(struct socket *sock, struct sockaddr *addr,
int alen, int flags)
{
@@ -605,15 +647,32 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
smc_copy_sock_settings_to_clc(smc);
tcp_sk(smc->clcsock->sk)->syn_smc = 1;
- rc = kernel_connect(smc->clcsock, addr, alen, flags);
- if (rc)
- goto out;
+ if (flags & O_NONBLOCK) {
+ if (smc->connect_info) {
+ rc = -EALREADY;
+ goto out;
+ }
+ smc->connect_info = kzalloc(alen + 2 * sizeof(int), GFP_KERNEL);
+ if (!smc->connect_info) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ smc->connect_info->alen = alen;
+ smc->connect_info->flags = flags ^ O_NONBLOCK;
+ memcpy(&smc->connect_info->addr, addr, alen);
+ schedule_work(&smc->connect_work);
+ rc = -EINPROGRESS;
+ } else {
+ rc = kernel_connect(smc->clcsock, addr, alen, flags);
+ if (rc)
+ goto out;
- rc = __smc_connect(smc);
- if (rc < 0)
- goto out;
- else
- rc = 0; /* success cases including fallback */
+ rc = __smc_connect(smc);
+ if (rc < 0)
+ goto out;
+ else
+ rc = 0; /* success cases including fallback */
+ }
out:
release_sock(sk);
@@ -1279,40 +1338,20 @@ static __poll_t smc_poll(struct file *file, struct socket *sock,
struct sock *sk = sock->sk;
__poll_t mask = 0;
struct smc_sock *smc;
- int rc;
if (!sk)
return EPOLLNVAL;
smc = smc_sk(sock->sk);
- sock_hold(sk);
- lock_sock(sk);
if ((sk->sk_state == SMC_INIT) || smc->use_fallback) {
/* delegate to CLC child sock */
- release_sock(sk);
mask = smc->clcsock->ops->poll(file, smc->clcsock, wait);
- lock_sock(sk);
sk->sk_err = smc->clcsock->sk->sk_err;
- if (sk->sk_err) {
+ if (sk->sk_err)
mask |= EPOLLERR;
- } else {
- /* if non-blocking connect finished ... */
- if (sk->sk_state == SMC_INIT &&
- mask & EPOLLOUT &&
- smc->clcsock->sk->sk_state != TCP_CLOSE) {
- rc = __smc_connect(smc);
- if (rc < 0)
- mask |= EPOLLERR;
- /* success cases including fallback */
- mask |= EPOLLOUT | EPOLLWRNORM;
- }
- }
} else {
- if (sk->sk_state != SMC_CLOSED) {
- release_sock(sk);
+ if (sk->sk_state != SMC_CLOSED)
sock_poll_wait(file, sk_sleep(sk), wait);
- lock_sock(sk);
- }
if (sk->sk_err)
mask |= EPOLLERR;
if ((sk->sk_shutdown == SHUTDOWN_MASK) ||
@@ -1338,10 +1377,7 @@ static __poll_t smc_poll(struct file *file, struct socket *sock,
}
if (smc->conn.urg_state == SMC_URG_VALID)
mask |= EPOLLPRI;
-
}
- release_sock(sk);
- sock_put(sk);
return mask;
}
@@ -1421,7 +1457,8 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
if (optlen < sizeof(int))
return -EINVAL;
- get_user(val, (int __user *)optval);
+ if (get_user(val, (int __user *)optval))
+ return -EFAULT;
lock_sock(sk);
switch (optname) {
@@ -1489,10 +1526,13 @@ static int smc_ioctl(struct socket *sock, unsigned int cmd,
return -EBADF;
return smc->clcsock->ops->ioctl(smc->clcsock, cmd, arg);
}
+ lock_sock(&smc->sk);
switch (cmd) {
case SIOCINQ: /* same as FIONREAD */
- if (smc->sk.sk_state == SMC_LISTEN)
+ if (smc->sk.sk_state == SMC_LISTEN) {
+ release_sock(&smc->sk);
return -EINVAL;
+ }
if (smc->sk.sk_state == SMC_INIT ||
smc->sk.sk_state == SMC_CLOSED)
answ = 0;
@@ -1501,8 +1541,10 @@ static int smc_ioctl(struct socket *sock, unsigned int cmd,
break;
case SIOCOUTQ:
/* output queue size (not send + not acked) */
- if (smc->sk.sk_state == SMC_LISTEN)
+ if (smc->sk.sk_state == SMC_LISTEN) {
+ release_sock(&smc->sk);
return -EINVAL;
+ }
if (smc->sk.sk_state == SMC_INIT ||
smc->sk.sk_state == SMC_CLOSED)
answ = 0;
@@ -1512,8 +1554,10 @@ static int smc_ioctl(struct socket *sock, unsigned int cmd,
break;
case SIOCOUTQNSD:
/* output queue size (not send only) */
- if (smc->sk.sk_state == SMC_LISTEN)
+ if (smc->sk.sk_state == SMC_LISTEN) {
+ release_sock(&smc->sk);
return -EINVAL;
+ }
if (smc->sk.sk_state == SMC_INIT ||
smc->sk.sk_state == SMC_CLOSED)
answ = 0;
@@ -1521,8 +1565,10 @@ static int smc_ioctl(struct socket *sock, unsigned int cmd,
answ = smc_tx_prepared_sends(&smc->conn);
break;
case SIOCATMARK:
- if (smc->sk.sk_state == SMC_LISTEN)
+ if (smc->sk.sk_state == SMC_LISTEN) {
+ release_sock(&smc->sk);
return -EINVAL;
+ }
if (smc->sk.sk_state == SMC_INIT ||
smc->sk.sk_state == SMC_CLOSED) {
answ = 0;
@@ -1538,8 +1584,10 @@ static int smc_ioctl(struct socket *sock, unsigned int cmd,
}
break;
default:
+ release_sock(&smc->sk);
return -ENOIOCTLCMD;
}
+ release_sock(&smc->sk);
return put_user(answ, (int __user *)arg);
}
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 51ae1f10d81a..d7ca26570482 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -187,11 +187,19 @@ struct smc_connection {
struct work_struct close_work; /* peer sent some closing */
};
+struct smc_connect_info {
+ int flags;
+ int alen;
+ struct sockaddr addr;
+};
+
struct smc_sock { /* smc sock container */
struct sock sk;
struct socket *clcsock; /* internal tcp socket */
struct smc_connection conn; /* smc connection */
struct smc_sock *listen_smc; /* listen parent */
+ struct smc_connect_info *connect_info; /* connect address & flags */
+ struct work_struct connect_work; /* handle non-blocking connect*/
struct work_struct tcp_listen_work;/* handle tcp socket accepts */
struct work_struct smc_listen_work;/* prepare new accept socket */
struct list_head accept_q; /* sockets to be accepted */
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index 717449b1da0b..ae5d168653ce 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -250,6 +250,7 @@ out:
int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
u8 expected_type)
{
+ long rcvtimeo = smc->clcsock->sk->sk_rcvtimeo;
struct sock *clc_sk = smc->clcsock->sk;
struct smc_clc_msg_hdr *clcm = buf;
struct msghdr msg = {NULL, 0};
@@ -306,7 +307,6 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
memset(&msg, 0, sizeof(struct msghdr));
iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1, datlen);
krflags = MSG_WAITALL;
- smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
len = sock_recvmsg(smc->clcsock, &msg, krflags);
if (len < datlen || !smc_clc_msg_hdr_valid(clcm)) {
smc->sk.sk_err = EPROTO;
@@ -322,6 +322,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
}
out:
+ smc->clcsock->sk->sk_rcvtimeo = rcvtimeo;
return reason_code;
}
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index fa41d9881741..ac961dfb1ea1 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -107,6 +107,8 @@ static void smc_close_active_abort(struct smc_sock *smc)
}
switch (sk->sk_state) {
case SMC_INIT:
+ sk->sk_state = SMC_PEERABORTWAIT;
+ break;
case SMC_ACTIVE:
sk->sk_state = SMC_PEERABORTWAIT;
release_sock(sk);
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index cee666400752..f82886b7d1d8 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -495,7 +495,8 @@ out:
void smc_tx_consumer_update(struct smc_connection *conn, bool force)
{
- union smc_host_cursor cfed, cons;
+ union smc_host_cursor cfed, cons, prod;
+ int sender_free = conn->rmb_desc->len;
int to_confirm;
smc_curs_write(&cons,
@@ -505,11 +506,18 @@ void smc_tx_consumer_update(struct smc_connection *conn, bool force)
smc_curs_read(&conn->rx_curs_confirmed, conn),
conn);
to_confirm = smc_curs_diff(conn->rmb_desc->len, &cfed, &cons);
+ if (to_confirm > conn->rmbe_update_limit) {
+ smc_curs_write(&prod,
+ smc_curs_read(&conn->local_rx_ctrl.prod, conn),
+ conn);
+ sender_free = conn->rmb_desc->len -
+ smc_curs_diff(conn->rmb_desc->len, &prod, &cfed);
+ }
if (conn->local_rx_ctrl.prod_flags.cons_curs_upd_req ||
force ||
((to_confirm > conn->rmbe_update_limit) &&
- ((to_confirm > (conn->rmb_desc->len / 2)) ||
+ ((sender_free <= (conn->rmb_desc->len / 2)) ||
conn->local_rx_ctrl.prod_flags.write_blocked))) {
if ((smc_cdc_get_slot_and_msg_send(conn) < 0) &&
conn->alert_token_local) { /* connection healthy */
diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
index 373836615c57..625acb27efcc 100644
--- a/net/strparser/strparser.c
+++ b/net/strparser/strparser.c
@@ -35,7 +35,6 @@ struct _strp_msg {
*/
struct strp_msg strp;
int accum_len;
- int early_eaten;
};
static inline struct _strp_msg *_strp_msg(struct sk_buff *skb)
@@ -115,20 +114,6 @@ static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
head = strp->skb_head;
if (head) {
/* Message already in progress */
-
- stm = _strp_msg(head);
- if (unlikely(stm->early_eaten)) {
- /* Already some number of bytes on the receive sock
- * data saved in skb_head, just indicate they
- * are consumed.
- */
- eaten = orig_len <= stm->early_eaten ?
- orig_len : stm->early_eaten;
- stm->early_eaten -= eaten;
-
- return eaten;
- }
-
if (unlikely(orig_offset)) {
/* Getting data with a non-zero offset when a message is
* in progress is not expected. If it does happen, we
@@ -297,9 +282,9 @@ static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb,
}
stm->accum_len += cand_len;
+ eaten += cand_len;
strp->need_bytes = stm->strp.full_len -
stm->accum_len;
- stm->early_eaten = cand_len;
STRP_STATS_ADD(strp->stats.bytes, cand_len);
desc->count = 0; /* Stop reading socket */
break;
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 9f666e0650e2..2830709957bd 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -133,6 +133,8 @@ static void disc_dupl_alert(struct tipc_bearer *b, u32 node_addr,
}
/* tipc_disc_addr_trial(): - handle an address uniqueness trial from peer
+ * Returns true if message should be dropped by caller, i.e., if it is a
+ * trial message or we are inside trial period. Otherwise false.
*/
static bool tipc_disc_addr_trial_msg(struct tipc_discoverer *d,
struct tipc_media_addr *maddr,
@@ -168,8 +170,9 @@ static bool tipc_disc_addr_trial_msg(struct tipc_discoverer *d,
msg_set_type(buf_msg(d->skb), DSC_REQ_MSG);
}
+ /* Accept regular link requests/responses only after trial period */
if (mtyp != DSC_TRIAL_MSG)
- return false;
+ return trial;
sugg_addr = tipc_node_try_addr(net, peer_id, src);
if (sugg_addr)
@@ -284,7 +287,6 @@ static void tipc_disc_timeout(struct timer_list *t)
{
struct tipc_discoverer *d = from_timer(d, t, timer);
struct tipc_net *tn = tipc_net(d->net);
- u32 self = tipc_own_addr(d->net);
struct tipc_media_addr maddr;
struct sk_buff *skb = NULL;
struct net *net = d->net;
@@ -298,12 +300,14 @@ static void tipc_disc_timeout(struct timer_list *t)
goto exit;
}
- /* Did we just leave the address trial period ? */
- if (!self && !time_before(jiffies, tn->addr_trial_end)) {
- self = tn->trial_addr;
- tipc_net_finalize(net, self);
- msg_set_prevnode(buf_msg(d->skb), self);
+ /* Trial period over ? */
+ if (!time_before(jiffies, tn->addr_trial_end)) {
+ /* Did we just leave it ? */
+ if (!tipc_own_addr(net))
+ tipc_net_finalize(net, tn->trial_addr);
+
msg_set_type(buf_msg(d->skb), DSC_REQ_MSG);
+ msg_set_prevnode(buf_msg(d->skb), tipc_own_addr(net));
}
/* Adjust timeout interval according to discovery phase */
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 4fbaa0464405..a7f6964c3a4b 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -121,12 +121,17 @@ int tipc_net_init(struct net *net, u8 *node_id, u32 addr)
void tipc_net_finalize(struct net *net, u32 addr)
{
- tipc_set_node_addr(net, addr);
- smp_mb();
- tipc_named_reinit(net);
- tipc_sk_reinit(net);
- tipc_nametbl_publish(net, TIPC_CFG_SRV, addr, addr,
- TIPC_CLUSTER_SCOPE, 0, addr);
+ struct tipc_net *tn = tipc_net(net);
+
+ spin_lock_bh(&tn->node_list_lock);
+ if (!tipc_own_addr(net)) {
+ tipc_set_node_addr(net, addr);
+ tipc_named_reinit(net);
+ tipc_sk_reinit(net);
+ tipc_nametbl_publish(net, TIPC_CFG_SRV, addr, addr,
+ TIPC_CLUSTER_SCOPE, 0, addr);
+ }
+ spin_unlock_bh(&tn->node_list_lock);
}
void tipc_net_stop(struct net *net)
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 6a44eb812baf..0453bd451ce8 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -797,6 +797,7 @@ static u32 tipc_node_suggest_addr(struct net *net, u32 addr)
}
/* tipc_node_try_addr(): Check if addr can be used by peer, suggest other if not
+ * Returns suggested address if any, otherwise 0
*/
u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr)
{
@@ -819,12 +820,14 @@ u32 tipc_node_try_addr(struct net *net, u8 *id, u32 addr)
if (n) {
addr = n->addr;
tipc_node_put(n);
+ return addr;
}
- /* Even this node may be in trial phase */
+
+ /* Even this node may be in conflict */
if (tn->trial_addr == addr)
return tipc_node_suggest_addr(net, addr);
- return addr;
+ return 0;
}
void tipc_node_check_dest(struct net *net, u32 addr,
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index d2380548f8f6..1f3d9789af30 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -440,7 +440,7 @@ alloc_encrypted:
ret = tls_push_record(sk, msg->msg_flags, record_type);
if (!ret)
continue;
- if (ret == -EAGAIN)
+ if (ret < 0)
goto send_end;
copied -= try_to_copy;
@@ -646,6 +646,9 @@ static struct sk_buff *tls_wait_data(struct sock *sk, int flags,
return NULL;
}
+ if (sk->sk_shutdown & RCV_SHUTDOWN)
+ return NULL;
+
if (sock_flag(sk, SOCK_DONE))
return NULL;
@@ -701,6 +704,10 @@ static int decrypt_skb(struct sock *sk, struct sk_buff *skb,
nsg = skb_to_sgvec(skb, &sgin[1],
rxm->offset + tls_ctx->rx.prepend_size,
rxm->full_len - tls_ctx->rx.prepend_size);
+ if (nsg < 0) {
+ ret = nsg;
+ goto out;
+ }
tls_make_aad(ctx->rx_aad_ciphertext,
rxm->full_len - tls_ctx->rx.overhead_size,
@@ -712,6 +719,7 @@ static int decrypt_skb(struct sock *sk, struct sk_buff *skb,
rxm->full_len - tls_ctx->rx.overhead_size,
skb, sk->sk_allocation);
+out:
if (sgin != &sgin_arr[0])
kfree(sgin);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index c7bbe5f0aae8..80bc986c79e5 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -4409,6 +4409,7 @@ static int parse_station_flags(struct genl_info *info,
params->sta_flags_mask = BIT(NL80211_STA_FLAG_AUTHENTICATED) |
BIT(NL80211_STA_FLAG_MFP) |
BIT(NL80211_STA_FLAG_AUTHORIZED);
+ break;
default:
return -EINVAL;
}
@@ -6231,7 +6232,7 @@ do { \
nl80211_check_s32);
/*
* Check HT operation mode based on
- * IEEE 802.11 2012 8.4.2.59 HT Operation element.
+ * IEEE 802.11-2016 9.4.2.57 HT Operation element.
*/
if (tb[NL80211_MESHCONF_HT_OPMODE]) {
ht_opmode = nla_get_u16(tb[NL80211_MESHCONF_HT_OPMODE]);
@@ -6241,22 +6242,9 @@ do { \
IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT))
return -EINVAL;
- if ((ht_opmode & IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT) &&
- (ht_opmode & IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT))
- return -EINVAL;
+ /* NON_HT_STA bit is reserved, but some programs set it */
+ ht_opmode &= ~IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT;
- switch (ht_opmode & IEEE80211_HT_OP_MODE_PROTECTION) {
- case IEEE80211_HT_OP_MODE_PROTECTION_NONE:
- case IEEE80211_HT_OP_MODE_PROTECTION_20MHZ:
- if (ht_opmode & IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT)
- return -EINVAL;
- break;
- case IEEE80211_HT_OP_MODE_PROTECTION_NONMEMBER:
- case IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED:
- if (!(ht_opmode & IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT))
- return -EINVAL;
- break;
- }
cfg->ht_opmode = ht_opmode;
mask |= (1 << (NL80211_MESHCONF_HT_OPMODE - 1));
}
@@ -10962,9 +10950,12 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
rem) {
u8 *mask_pat;
- nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat,
- nl80211_packet_pattern_policy,
- info->extack);
+ err = nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat,
+ nl80211_packet_pattern_policy,
+ info->extack);
+ if (err)
+ goto error;
+
err = -EINVAL;
if (!pat_tb[NL80211_PKTPAT_MASK] ||
!pat_tb[NL80211_PKTPAT_PATTERN])
@@ -11213,8 +11204,11 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev,
rem) {
u8 *mask_pat;
- nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat,
- nl80211_packet_pattern_policy, NULL);
+ err = nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat,
+ nl80211_packet_pattern_policy, NULL);
+ if (err)
+ return err;
+
if (!pat_tb[NL80211_PKTPAT_MASK] ||
!pat_tb[NL80211_PKTPAT_PATTERN])
return -EINVAL;
@@ -14930,20 +14924,24 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie,
EXPORT_SYMBOL(cfg80211_mgmt_tx_status);
static int __nl80211_rx_control_port(struct net_device *dev,
- const u8 *buf, size_t len,
- const u8 *addr, u16 proto,
+ struct sk_buff *skb,
bool unencrypted, gfp_t gfp)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+ struct ethhdr *ehdr = eth_hdr(skb);
+ const u8 *addr = ehdr->h_source;
+ u16 proto = be16_to_cpu(skb->protocol);
struct sk_buff *msg;
void *hdr;
+ struct nlattr *frame;
+
u32 nlportid = READ_ONCE(wdev->conn_owner_nlportid);
if (!nlportid)
return -ENOENT;
- msg = nlmsg_new(100 + len, gfp);
+ msg = nlmsg_new(100 + skb->len, gfp);
if (!msg)
return -ENOMEM;
@@ -14957,13 +14955,17 @@ static int __nl80211_rx_control_port(struct net_device *dev,
nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) ||
nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev),
NL80211_ATTR_PAD) ||
- nla_put(msg, NL80211_ATTR_FRAME, len, buf) ||
nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr) ||
nla_put_u16(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE, proto) ||
(unencrypted && nla_put_flag(msg,
NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT)))
goto nla_put_failure;
+ frame = nla_reserve(msg, NL80211_ATTR_FRAME, skb->len);
+ if (!frame)
+ goto nla_put_failure;
+
+ skb_copy_bits(skb, 0, nla_data(frame), skb->len);
genlmsg_end(msg, hdr);
return genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid);
@@ -14974,14 +14976,12 @@ static int __nl80211_rx_control_port(struct net_device *dev,
}
bool cfg80211_rx_control_port(struct net_device *dev,
- const u8 *buf, size_t len,
- const u8 *addr, u16 proto, bool unencrypted)
+ struct sk_buff *skb, bool unencrypted)
{
int ret;
- trace_cfg80211_rx_control_port(dev, buf, len, addr, proto, unencrypted);
- ret = __nl80211_rx_control_port(dev, buf, len, addr, proto,
- unencrypted, GFP_ATOMIC);
+ trace_cfg80211_rx_control_port(dev, skb, unencrypted);
+ ret = __nl80211_rx_control_port(dev, skb, unencrypted, GFP_ATOMIC);
trace_cfg80211_return_bool(ret == 0);
return ret == 0;
}
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index bbe6298e4bb9..4fc66a117b7d 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -2240,7 +2240,9 @@ static void wiphy_update_regulatory(struct wiphy *wiphy,
* as some drivers used this to restore its orig_* reg domain.
*/
if (initiator == NL80211_REGDOM_SET_BY_CORE &&
- wiphy->regulatory_flags & REGULATORY_CUSTOM_REG)
+ wiphy->regulatory_flags & REGULATORY_CUSTOM_REG &&
+ !(wiphy->regulatory_flags &
+ REGULATORY_WIPHY_SELF_MANAGED))
reg_call_notifier(wiphy, lr);
return;
}
@@ -2787,26 +2789,6 @@ static void notify_self_managed_wiphys(struct regulatory_request *request)
}
}
-static bool reg_only_self_managed_wiphys(void)
-{
- struct cfg80211_registered_device *rdev;
- struct wiphy *wiphy;
- bool self_managed_found = false;
-
- ASSERT_RTNL();
-
- list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
- wiphy = &rdev->wiphy;
- if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED)
- self_managed_found = true;
- else
- return false;
- }
-
- /* make sure at least one self-managed wiphy exists */
- return self_managed_found;
-}
-
/*
* Processes regulatory hints, this is all the NL80211_REGDOM_SET_BY_*
* Regulatory hints come on a first come first serve basis and we
@@ -2839,10 +2821,6 @@ static void reg_process_pending_hints(void)
spin_unlock(&reg_requests_lock);
notify_self_managed_wiphys(reg_request);
- if (reg_only_self_managed_wiphys()) {
- reg_free_request(reg_request);
- return;
- }
reg_process_hint(reg_request);
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 2b417a2fe63f..7c73510b161f 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -2627,23 +2627,25 @@ TRACE_EVENT(cfg80211_mgmt_tx_status,
);
TRACE_EVENT(cfg80211_rx_control_port,
- TP_PROTO(struct net_device *netdev, const u8 *buf, size_t len,
- const u8 *addr, u16 proto, bool unencrypted),
- TP_ARGS(netdev, buf, len, addr, proto, unencrypted),
+ TP_PROTO(struct net_device *netdev, struct sk_buff *skb,
+ bool unencrypted),
+ TP_ARGS(netdev, skb, unencrypted),
TP_STRUCT__entry(
NETDEV_ENTRY
- MAC_ENTRY(addr)
+ __field(int, len)
+ MAC_ENTRY(from)
__field(u16, proto)
__field(bool, unencrypted)
),
TP_fast_assign(
NETDEV_ASSIGN;
- MAC_ASSIGN(addr, addr);
- __entry->proto = proto;
+ __entry->len = skb->len;
+ MAC_ASSIGN(from, eth_hdr(skb)->h_source);
+ __entry->proto = be16_to_cpu(skb->protocol);
__entry->unencrypted = unencrypted;
),
- TP_printk(NETDEV_PR_FMT ", " MAC_PR_FMT " proto: 0x%x, unencrypted: %s",
- NETDEV_PR_ARG, MAC_PR_ARG(addr),
+ TP_printk(NETDEV_PR_FMT ", len=%d, " MAC_PR_FMT ", proto: 0x%x, unencrypted: %s",
+ NETDEV_PR_ARG, __entry->len, MAC_PR_ARG(from),
__entry->proto, BOOL_TO_STR(__entry->unencrypted))
);
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 59fb7d3c36a3..72335c2e8108 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -199,8 +199,11 @@ static void xsk_destruct_skb(struct sk_buff *skb)
{
u64 addr = (u64)(long)skb_shinfo(skb)->destructor_arg;
struct xdp_sock *xs = xdp_sk(skb->sk);
+ unsigned long flags;
+ spin_lock_irqsave(&xs->tx_completion_lock, flags);
WARN_ON_ONCE(xskq_produce_addr(xs->umem->cq, addr));
+ spin_unlock_irqrestore(&xs->tx_completion_lock, flags);
sock_wfree(skb);
}
@@ -215,9 +218,6 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
struct sk_buff *skb;
int err = 0;
- if (unlikely(!xs->tx))
- return -ENOBUFS;
-
mutex_lock(&xs->mutex);
while (xskq_peek_desc(xs->tx, &desc)) {
@@ -230,22 +230,13 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
goto out;
}
- if (xskq_reserve_addr(xs->umem->cq)) {
- err = -EAGAIN;
- goto out;
- }
-
- len = desc.len;
- if (unlikely(len > xs->dev->mtu)) {
- err = -EMSGSIZE;
+ if (xskq_reserve_addr(xs->umem->cq))
goto out;
- }
- if (xs->queue_id >= xs->dev->real_num_tx_queues) {
- err = -ENXIO;
+ if (xs->queue_id >= xs->dev->real_num_tx_queues)
goto out;
- }
+ len = desc.len;
skb = sock_alloc_send_skb(sk, len, 1, &err);
if (unlikely(!skb)) {
err = -EAGAIN;
@@ -268,15 +259,15 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m,
skb->destructor = xsk_destruct_skb;
err = dev_direct_xmit(skb, xs->queue_id);
+ xskq_discard_desc(xs->tx);
/* Ignore NET_XMIT_CN as packet might have been sent */
if (err == NET_XMIT_DROP || err == NETDEV_TX_BUSY) {
- err = -EAGAIN;
- /* SKB consumed by dev_direct_xmit() */
+ /* SKB completed but not sent */
+ err = -EBUSY;
goto out;
}
sent_frame = true;
- xskq_discard_desc(xs->tx);
}
out:
@@ -297,6 +288,8 @@ static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
return -ENXIO;
if (unlikely(!(xs->dev->flags & IFF_UP)))
return -ENETDOWN;
+ if (unlikely(!xs->tx))
+ return -ENOBUFS;
if (need_wait)
return -EOPNOTSUPP;
@@ -755,6 +748,7 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol,
xs = xdp_sk(sk);
mutex_init(&xs->mutex);
+ spin_lock_init(&xs->tx_completion_lock);
local_bh_disable();
sock_prot_inuse_add(net, &xsk_proto, 1);
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index ef6a6f0ec949..52ecaf770642 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -62,14 +62,9 @@ static inline u32 xskq_nb_avail(struct xsk_queue *q, u32 dcnt)
return (entries > dcnt) ? dcnt : entries;
}
-static inline u32 xskq_nb_free_lazy(struct xsk_queue *q, u32 producer)
-{
- return q->nentries - (producer - q->cons_tail);
-}
-
static inline u32 xskq_nb_free(struct xsk_queue *q, u32 producer, u32 dcnt)
{
- u32 free_entries = xskq_nb_free_lazy(q, producer);
+ u32 free_entries = q->nentries - (producer - q->cons_tail);
if (free_entries >= dcnt)
return free_entries;
@@ -129,7 +124,7 @@ static inline int xskq_produce_addr(struct xsk_queue *q, u64 addr)
{
struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
- if (xskq_nb_free(q, q->prod_tail, LAZY_UPDATE_THRESHOLD) == 0)
+ if (xskq_nb_free(q, q->prod_tail, 1) == 0)
return -ENOSPC;
ring->desc[q->prod_tail++ & q->ring_mask] = addr;