diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-03-10 04:15:56 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-03-10 04:15:56 +0300 |
commit | 05a59d79793d482f628a31753c671f2e92178a21 (patch) | |
tree | 2665a1cdd7dbc62b28569044c1dd35790ac48ef4 /net | |
parent | 6a30bedfdf3be7bb5bf4effb4b2a28920cd2db1a (diff) | |
parent | 8515455720c52a0841bd1c9c5f457c9616900110 (diff) | |
download | linux-05a59d79793d482f628a31753c671f2e92178a21.tar.xz |
Merge git://git.kernel.org:/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from David Miller:
1) Fix transmissions in dynamic SMPS mode in ath9k, from Felix Fietkau.
2) TX skb error handling fix in mt76 driver, also from Felix.
3) Fix BPF_FETCH atomic in x86 JIT, from Brendan Jackman.
4) Avoid double free of percpu pointers when freeing a cloned bpf prog.
From Cong Wang.
5) Use correct printf format for dma_addr_t in ath11k, from Geert
Uytterhoeven.
6) Fix resolve_btfids build with older toolchains, from Kun-Chuan
Hsieh.
7) Don't report truncated frames to mac80211 in mt76 driver, from
Lorenzop Bianconi.
8) Fix watcdog timeout on suspend/resume of stmmac, from Joakim Zhang.
9) mscc ocelot needs NET_DEVLINK selct in Kconfig, from Arnd Bergmann.
10) Fix sign comparison bug in TCP_ZEROCOPY_RECEIVE getsockopt(), from
Arjun Roy.
11) Ignore routes with deleted nexthop object in mlxsw, from Ido
Schimmel.
12) Need to undo tcp early demux lookup sometimes in nf_nat, from
Florian Westphal.
13) Fix gro aggregation for udp encaps with zero csum, from Daniel
Borkmann.
14) Make sure to always use imp*_ndo_send when necessaey, from Jason A.
Donenfeld.
15) Fix TRSCER masks in sh_eth driver from Sergey Shtylyov.
16) prevent overly huge skb allocationsd in qrtr, from Pavel Skripkin.
17) Prevent rx ring copnsumer index loss of sync in enetc, from Vladimir
Oltean.
18) Make sure textsearch copntrol block is large enough, from Wilem de
Bruijn.
19) Revert MAC changes to r8152 leading to instability, from Hates Wang.
20) Advance iov in 9p even for empty reads, from Jissheng Zhang.
21) Double hook unregister in nftables, from PabloNeira Ayuso.
22) Fix memleak in ixgbe, fropm Dinghao Liu.
23) Avoid dups in pkt scheduler class dumps, from Maximilian Heyne.
24) Various mptcp fixes from Florian Westphal, Paolo Abeni, and Geliang
Tang.
25) Fix DOI refcount bugs in cipso, from Paul Moore.
26) One too many irqsave in ibmvnic, from Junlin Yang.
27) Fix infinite loop with MPLS gso segmenting via virtio_net, from
Balazs Nemeth.
* git://git.kernel.org:/pub/scm/linux/kernel/git/netdev/net: (164 commits)
s390/qeth: fix notification for pending buffers during teardown
s390/qeth: schedule TX NAPI on QAOB completion
s390/qeth: improve completion of pending TX buffers
s390/qeth: fix memory leak after failed TX Buffer allocation
net: avoid infinite loop in mpls_gso_segment when mpls_hlen == 0
net: check if protocol extracted by virtio_net_hdr_set_proto is correct
net: dsa: xrs700x: check if partner is same as port in hsr join
net: lapbether: Remove netif_start_queue / netif_stop_queue
atm: idt77252: fix null-ptr-dereference
atm: uPD98402: fix incorrect allocation
atm: fix a typo in the struct description
net: qrtr: fix error return code of qrtr_sendmsg()
mptcp: fix length of ADD_ADDR with port sub-option
net: bonding: fix error return code of bond_neigh_init()
net: enetc: allow hardware timestamping on TX queues with tc-etf enabled
net: enetc: set MAC RX FIFO to recommended value
net: davicom: Use platform_get_irq_optional()
net: davicom: Fix regulator not turned off on driver removal
net: davicom: Fix regulator not turned off on failed probe
net: dsa: fix switchdev objects on bridge master mistakenly being applied on ports
...
Diffstat (limited to 'net')
37 files changed, 362 insertions, 242 deletions
diff --git a/net/9p/client.c b/net/9p/client.c index 4f62f299da0c..0a9019da18f3 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -1623,10 +1623,6 @@ p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to, } p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", count); - if (!count) { - p9_tag_remove(clnt, req); - return 0; - } if (non_zc) { int n = copy_to_iter(dataptr, count, to); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 0edc0b2baaa4..1bdcb33fb561 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2147,7 +2147,7 @@ out: out_err: cb->args[1] = idx; cb->args[0] = h; - cb->seq = net->dev_base_seq; + cb->seq = tgt_net->dev_base_seq; nl_dump_check_consistent(cb, nlmsg_hdr(skb)); if (netnsid >= 0) put_net(tgt_net); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 545a472273a5..c421c8f80925 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3659,6 +3659,8 @@ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, struct ts_state state; unsigned int ret; + BUILD_BUG_ON(sizeof(struct skb_seq_state) > sizeof(state.cb)); + config->get_next_block = skb_ts_get_next_block; config->finish = skb_ts_finish; diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index 3589224c8da9..58b8fc82cd3c 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -118,6 +118,8 @@ config NET_DSA_TAG_OCELOT config NET_DSA_TAG_OCELOT_8021Q tristate "Tag driver for Ocelot family of switches, using VLAN" + depends on MSCC_OCELOT_SWITCH_LIB || \ + (MSCC_OCELOT_SWITCH_LIB=n && COMPILE_TEST) select NET_DSA_TAG_8021Q help Say Y or M if you want to enable support for tagging frames with a diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 2eeaa42f2e08..9d4b0e9b1aa1 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -230,8 +230,8 @@ int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr); void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr); extern const struct phylink_mac_ops dsa_port_phylink_mac_ops; -static inline bool dsa_port_offloads_netdev(struct dsa_port *dp, - struct net_device *dev) +static inline bool dsa_port_offloads_bridge_port(struct dsa_port *dp, + struct net_device *dev) { /* Switchdev offloading can be configured on: */ @@ -241,12 +241,6 @@ static inline bool dsa_port_offloads_netdev(struct dsa_port *dp, */ return true; - if (dp->bridge_dev == dev) - /* DSA ports connected to a bridge, and event was emitted - * for the bridge. - */ - return true; - if (dp->lag_dev == dev) /* DSA ports connected to a bridge via a LAG */ return true; @@ -254,14 +248,23 @@ static inline bool dsa_port_offloads_netdev(struct dsa_port *dp, return false; } +static inline bool dsa_port_offloads_bridge(struct dsa_port *dp, + struct net_device *bridge_dev) +{ + /* DSA ports connected to a bridge, and event was emitted + * for the bridge. + */ + return dp->bridge_dev == bridge_dev; +} + /* Returns true if any port of this tree offloads the given net_device */ -static inline bool dsa_tree_offloads_netdev(struct dsa_switch_tree *dst, - struct net_device *dev) +static inline bool dsa_tree_offloads_bridge_port(struct dsa_switch_tree *dst, + struct net_device *dev) { struct dsa_port *dp; list_for_each_entry(dp, &dst->ports, list) - if (dsa_port_offloads_netdev(dp, dev)) + if (dsa_port_offloads_bridge_port(dp, dev)) return true; return false; diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 491e3761b5f4..992fcab4b552 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -278,28 +278,43 @@ static int dsa_slave_port_attr_set(struct net_device *dev, struct dsa_port *dp = dsa_slave_to_port(dev); int ret; - if (!dsa_port_offloads_netdev(dp, attr->orig_dev)) - return -EOPNOTSUPP; - switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_STP_STATE: + if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev)) + return -EOPNOTSUPP; + ret = dsa_port_set_state(dp, attr->u.stp_state); break; case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING: + if (!dsa_port_offloads_bridge(dp, attr->orig_dev)) + return -EOPNOTSUPP; + ret = dsa_port_vlan_filtering(dp, attr->u.vlan_filtering, extack); break; case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME: + if (!dsa_port_offloads_bridge(dp, attr->orig_dev)) + return -EOPNOTSUPP; + ret = dsa_port_ageing_time(dp, attr->u.ageing_time); break; case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS: + if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev)) + return -EOPNOTSUPP; + ret = dsa_port_pre_bridge_flags(dp, attr->u.brport_flags, extack); break; case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS: + if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev)) + return -EOPNOTSUPP; + ret = dsa_port_bridge_flags(dp, attr->u.brport_flags, extack); break; case SWITCHDEV_ATTR_ID_BRIDGE_MROUTER: + if (!dsa_port_offloads_bridge(dp, attr->orig_dev)) + return -EOPNOTSUPP; + ret = dsa_port_mrouter(dp->cpu_dp, attr->u.mrouter, extack); break; default: @@ -341,9 +356,6 @@ static int dsa_slave_vlan_add(struct net_device *dev, struct switchdev_obj_port_vlan vlan; int err; - if (!dsa_port_offloads_netdev(dp, obj->orig_dev)) - return -EOPNOTSUPP; - if (dsa_port_skip_vlan_configuration(dp)) { NL_SET_ERR_MSG_MOD(extack, "skipping configuration of VLAN"); return 0; @@ -391,27 +403,36 @@ static int dsa_slave_port_obj_add(struct net_device *dev, switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_MDB: - if (!dsa_port_offloads_netdev(dp, obj->orig_dev)) + if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev)) return -EOPNOTSUPP; + err = dsa_port_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj)); break; case SWITCHDEV_OBJ_ID_HOST_MDB: + if (!dsa_port_offloads_bridge(dp, obj->orig_dev)) + return -EOPNOTSUPP; + /* DSA can directly translate this to a normal MDB add, * but on the CPU port. */ err = dsa_port_mdb_add(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj)); break; case SWITCHDEV_OBJ_ID_PORT_VLAN: + if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev)) + return -EOPNOTSUPP; + err = dsa_slave_vlan_add(dev, obj, extack); break; case SWITCHDEV_OBJ_ID_MRP: - if (!dsa_port_offloads_netdev(dp, obj->orig_dev)) + if (!dsa_port_offloads_bridge(dp, obj->orig_dev)) return -EOPNOTSUPP; + err = dsa_port_mrp_add(dp, SWITCHDEV_OBJ_MRP(obj)); break; case SWITCHDEV_OBJ_ID_RING_ROLE_MRP: - if (!dsa_port_offloads_netdev(dp, obj->orig_dev)) + if (!dsa_port_offloads_bridge(dp, obj->orig_dev)) return -EOPNOTSUPP; + err = dsa_port_mrp_add_ring_role(dp, SWITCHDEV_OBJ_RING_ROLE_MRP(obj)); break; @@ -431,9 +452,6 @@ static int dsa_slave_vlan_del(struct net_device *dev, struct switchdev_obj_port_vlan *vlan; int err; - if (!dsa_port_offloads_netdev(dp, obj->orig_dev)) - return -EOPNOTSUPP; - if (dsa_port_skip_vlan_configuration(dp)) return 0; @@ -459,27 +477,36 @@ static int dsa_slave_port_obj_del(struct net_device *dev, switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_MDB: - if (!dsa_port_offloads_netdev(dp, obj->orig_dev)) + if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev)) return -EOPNOTSUPP; + err = dsa_port_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj)); break; case SWITCHDEV_OBJ_ID_HOST_MDB: + if (!dsa_port_offloads_bridge(dp, obj->orig_dev)) + return -EOPNOTSUPP; + /* DSA can directly translate this to a normal MDB add, * but on the CPU port. */ err = dsa_port_mdb_del(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj)); break; case SWITCHDEV_OBJ_ID_PORT_VLAN: + if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev)) + return -EOPNOTSUPP; + err = dsa_slave_vlan_del(dev, obj); break; case SWITCHDEV_OBJ_ID_MRP: - if (!dsa_port_offloads_netdev(dp, obj->orig_dev)) + if (!dsa_port_offloads_bridge(dp, obj->orig_dev)) return -EOPNOTSUPP; + err = dsa_port_mrp_del(dp, SWITCHDEV_OBJ_MRP(obj)); break; case SWITCHDEV_OBJ_ID_RING_ROLE_MRP: - if (!dsa_port_offloads_netdev(dp, obj->orig_dev)) + if (!dsa_port_offloads_bridge(dp, obj->orig_dev)) return -EOPNOTSUPP; + err = dsa_port_mrp_del_ring_role(dp, SWITCHDEV_OBJ_RING_ROLE_MRP(obj)); break; @@ -2298,7 +2325,7 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused, * other ports bridged with the LAG should be able to * autonomously forward towards it. */ - if (dsa_tree_offloads_netdev(dp->ds->dst, dev)) + if (dsa_tree_offloads_bridge_port(dp->ds->dst, dev)) return NOTIFY_DONE; } diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c index 38dcdded74c0..59748487664f 100644 --- a/net/dsa/tag_mtk.c +++ b/net/dsa/tag_mtk.c @@ -13,6 +13,7 @@ #define MTK_HDR_LEN 4 #define MTK_HDR_XMIT_UNTAGGED 0 #define MTK_HDR_XMIT_TAGGED_TPID_8100 1 +#define MTK_HDR_XMIT_TAGGED_TPID_88A8 2 #define MTK_HDR_RECV_SOURCE_PORT_MASK GENMASK(2, 0) #define MTK_HDR_XMIT_DP_BIT_MASK GENMASK(5, 0) #define MTK_HDR_XMIT_SA_DIS BIT(6) @@ -21,8 +22,8 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_port *dp = dsa_slave_to_port(dev); + u8 xmit_tpid; u8 *mtk_tag; - bool is_vlan_skb = true; unsigned char *dest = eth_hdr(skb)->h_dest; bool is_multicast_skb = is_multicast_ether_addr(dest) && !is_broadcast_ether_addr(dest); @@ -33,10 +34,17 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, * the both special and VLAN tag at the same time and then look up VLAN * table with VID. */ - if (!skb_vlan_tagged(skb)) { + switch (skb->protocol) { + case htons(ETH_P_8021Q): + xmit_tpid = MTK_HDR_XMIT_TAGGED_TPID_8100; + break; + case htons(ETH_P_8021AD): + xmit_tpid = MTK_HDR_XMIT_TAGGED_TPID_88A8; + break; + default: + xmit_tpid = MTK_HDR_XMIT_UNTAGGED; skb_push(skb, MTK_HDR_LEN); memmove(skb->data, skb->data + MTK_HDR_LEN, 2 * ETH_ALEN); - is_vlan_skb = false; } mtk_tag = skb->data + 2 * ETH_ALEN; @@ -44,8 +52,7 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, /* Mark tag attribute on special tag insertion to notify hardware * whether that's a combined special tag with 802.1Q header. */ - mtk_tag[0] = is_vlan_skb ? MTK_HDR_XMIT_TAGGED_TPID_8100 : - MTK_HDR_XMIT_UNTAGGED; + mtk_tag[0] = xmit_tpid; mtk_tag[1] = (1 << dp->index) & MTK_HDR_XMIT_DP_BIT_MASK; /* Disable SA learning for multicast frames */ @@ -53,7 +60,7 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, mtk_tag[1] |= MTK_HDR_XMIT_SA_DIS; /* Tag control information is kept for 802.1Q */ - if (!is_vlan_skb) { + if (xmit_tpid == MTK_HDR_XMIT_UNTAGGED) { mtk_tag[2] = 0; mtk_tag[3] = 0; } diff --git a/net/dsa/tag_rtl4_a.c b/net/dsa/tag_rtl4_a.c index c17d39b4a1a0..e9176475bac8 100644 --- a/net/dsa/tag_rtl4_a.c +++ b/net/dsa/tag_rtl4_a.c @@ -35,14 +35,12 @@ static struct sk_buff *rtl4a_tag_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_port *dp = dsa_slave_to_port(dev); + __be16 *p; u8 *tag; - u16 *p; u16 out; /* Pad out to at least 60 bytes */ - if (unlikely(eth_skb_pad(skb))) - return NULL; - if (skb_cow_head(skb, RTL4_A_HDR_LEN) < 0) + if (unlikely(__skb_put_padto(skb, ETH_ZLEN, false))) return NULL; netdev_dbg(dev, "add realtek tag to package to port %d\n", @@ -53,13 +51,13 @@ static struct sk_buff *rtl4a_tag_xmit(struct sk_buff *skb, tag = skb->data + 2 * ETH_ALEN; /* Set Ethertype */ - p = (u16 *)tag; + p = (__be16 *)tag; *p = htons(RTL4_A_ETHERTYPE); out = (RTL4_A_PROTOCOL_RTL8366RB << 12) | (2 << 8); - /* The lower bits is the port numer */ + /* The lower bits is the port number */ out |= (u8)dp->index; - p = (u16 *)(tag + 2); + p = (__be16 *)(tag + 2); *p = htons(out); return skb; diff --git a/net/ethtool/channels.c b/net/ethtool/channels.c index 25a9e566ef5c..6a070dc8e4b0 100644 --- a/net/ethtool/channels.c +++ b/net/ethtool/channels.c @@ -116,10 +116,9 @@ int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info) struct ethtool_channels channels = {}; struct ethnl_req_info req_info = {}; struct nlattr **tb = info->attrs; - const struct nlattr *err_attr; + u32 err_attr, max_rx_in_use = 0; const struct ethtool_ops *ops; struct net_device *dev; - u32 max_rx_in_use = 0; int ret; ret = ethnl_parse_header_dev_get(&req_info, @@ -157,34 +156,35 @@ int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info) /* ensure new channel counts are within limits */ if (channels.rx_count > channels.max_rx) - err_attr = tb[ETHTOOL_A_CHANNELS_RX_COUNT]; + err_attr = ETHTOOL_A_CHANNELS_RX_COUNT; else if (channels.tx_count > channels.max_tx) - err_attr = tb[ETHTOOL_A_CHANNELS_TX_COUNT]; + err_attr = ETHTOOL_A_CHANNELS_TX_COUNT; else if (channels.other_count > channels.max_other) - err_attr = tb[ETHTOOL_A_CHANNELS_OTHER_COUNT]; + err_attr = ETHTOOL_A_CHANNELS_OTHER_COUNT; else if (channels.combined_count > channels.max_combined) - err_attr = tb[ETHTOOL_A_CHANNELS_COMBINED_COUNT]; + err_attr = ETHTOOL_A_CHANNELS_COMBINED_COUNT; else - err_attr = NULL; + err_attr = 0; if (err_attr) { ret = -EINVAL; - NL_SET_ERR_MSG_ATTR(info->extack, err_attr, + NL_SET_ERR_MSG_ATTR(info->extack, tb[err_attr], "requested channel count exceeds maximum"); goto out_ops; } /* ensure there is at least one RX and one TX channel */ if (!channels.combined_count && !channels.rx_count) - err_attr = tb[ETHTOOL_A_CHANNELS_RX_COUNT]; + err_attr = ETHTOOL_A_CHANNELS_RX_COUNT; else if (!channels.combined_count && !channels.tx_count) - err_attr = tb[ETHTOOL_A_CHANNELS_TX_COUNT]; + err_attr = ETHTOOL_A_CHANNELS_TX_COUNT; else - err_attr = NULL; + err_attr = 0; if (err_attr) { if (mod_combined) - err_attr = tb[ETHTOOL_A_CHANNELS_COMBINED_COUNT]; + err_attr = ETHTOOL_A_CHANNELS_COMBINED_COUNT; ret = -EINVAL; - NL_SET_ERR_MSG_ATTR(info->extack, err_attr, "requested channel counts would result in no RX or TX channel being configured"); + NL_SET_ERR_MSG_ATTR(info->extack, tb[err_attr], + "requested channel counts would result in no RX or TX channel being configured"); goto out_ops; } diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 471d33a0d095..bfaf327e9d12 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -519,16 +519,10 @@ int cipso_v4_doi_remove(u32 doi, struct netlbl_audit *audit_info) ret_val = -ENOENT; goto doi_remove_return; } - if (!refcount_dec_and_test(&doi_def->refcount)) { - spin_unlock(&cipso_v4_doi_list_lock); - ret_val = -EBUSY; - goto doi_remove_return; - } list_del_rcu(&doi_def->list); spin_unlock(&cipso_v4_doi_list_lock); - cipso_v4_cache_invalidate(); - call_rcu(&doi_def->rcu, cipso_v4_doi_free_rcu); + cipso_v4_doi_putdef(doi_def); ret_val = 0; doi_remove_return: @@ -585,9 +579,6 @@ void cipso_v4_doi_putdef(struct cipso_v4_doi *doi_def) if (!refcount_dec_and_test(&doi_def->refcount)) return; - spin_lock(&cipso_v4_doi_list_lock); - list_del_rcu(&doi_def->list); - spin_unlock(&cipso_v4_doi_list_lock); cipso_v4_cache_invalidate(); call_rcu(&doi_def->rcu, cipso_v4_doi_free_rcu); @@ -1162,7 +1153,7 @@ static void cipso_v4_gentag_hdr(const struct cipso_v4_doi *doi_def, { buf[0] = IPOPT_CIPSO; buf[1] = CIPSO_V4_HDR_LEN + len; - *(__be32 *)&buf[2] = htonl(doi_def->doi); + put_unaligned_be32(doi_def->doi, &buf[2]); } /** diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index ff327a62c9ce..da21dfce24d7 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -65,7 +65,7 @@ EXPORT_SYMBOL_GPL(inet_peer_base_init); #define PEER_MAX_GC 32 /* Exported for sysctl_net_ipv4. */ -int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries more +int inet_peer_threshold __read_mostly; /* start to throw entries more * aggressively at this stage */ int inet_peer_minttl __read_mostly = 120 * HZ; /* TTL under high load: 120 sec */ int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min */ @@ -73,20 +73,13 @@ int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min /* Called from ip_output.c:ip_init */ void __init inet_initpeers(void) { - struct sysinfo si; + u64 nr_entries; - /* Use the straight interface to information about memory. */ - si_meminfo(&si); - /* The values below were suggested by Alexey Kuznetsov - * <kuznet@ms2.inr.ac.ru>. I don't have any opinion about the values - * myself. --SAW - */ - if (si.totalram <= (32768*1024)/PAGE_SIZE) - inet_peer_threshold >>= 1; /* max pool size about 1MB on IA32 */ - if (si.totalram <= (16384*1024)/PAGE_SIZE) - inet_peer_threshold >>= 1; /* about 512KB */ - if (si.totalram <= (8192*1024)/PAGE_SIZE) - inet_peer_threshold >>= 2; /* about 128KB */ + /* 1% of physical memory */ + nr_entries = div64_ul((u64)totalram_pages() << PAGE_SHIFT, + 100 * L1_CACHE_ALIGN(sizeof(struct inet_peer))); + + inet_peer_threshold = clamp_val(nr_entries, 4096, 65536 + 128); peer_cachep = kmem_cache_create("inet_peer_cache", sizeof(struct inet_peer), diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 76a420c76f16..f6cc26de5ed3 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -502,8 +502,7 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, if (!skb_is_gso(skb) && (inner_iph->frag_off & htons(IP_DF)) && mtu < pkt_size) { - memset(IPCB(skb), 0, sizeof(*IPCB(skb))); - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); + icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); return -E2BIG; } } @@ -527,7 +526,7 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU && mtu < pkt_size) { - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); return -E2BIG; } } diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index abc171e79d3e..eb207089ece0 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -238,13 +238,13 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, if (skb->len > mtu) { skb_dst_update_pmtu_no_confirm(skb, mtu); if (skb->protocol == htons(ETH_P_IP)) { - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, - htonl(mtu)); + icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, + htonl(mtu)); } else { if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); } dst_release(dst); diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index f1c6cbdb9e43..743777bce179 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -1399,7 +1399,7 @@ out: /* rtnl */ /* remove all nexthops tied to a device being deleted */ -static void nexthop_flush_dev(struct net_device *dev) +static void nexthop_flush_dev(struct net_device *dev, unsigned long event) { unsigned int hash = nh_dev_hashfn(dev->ifindex); struct net *net = dev_net(dev); @@ -1411,6 +1411,10 @@ static void nexthop_flush_dev(struct net_device *dev) if (nhi->fib_nhc.nhc_dev != dev) continue; + if (nhi->reject_nh && + (event == NETDEV_DOWN || event == NETDEV_CHANGE)) + continue; + remove_nexthop(net, nhi->nh_parent, NULL); } } @@ -2189,11 +2193,11 @@ static int nh_netdev_event(struct notifier_block *this, switch (event) { case NETDEV_DOWN: case NETDEV_UNREGISTER: - nexthop_flush_dev(dev); + nexthop_flush_dev(dev, event); break; case NETDEV_CHANGE: if (!(dev_get_flags(dev) & (IFF_RUNNING | IFF_LOWER_UP))) - nexthop_flush_dev(dev); + nexthop_flush_dev(dev, event); break; case NETDEV_CHANGEMTU: info_ext = ptr; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a3422e42784e..de7cc8445ac0 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3469,16 +3469,23 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname, break; case TCP_QUEUE_SEQ: - if (sk->sk_state != TCP_CLOSE) + if (sk->sk_state != TCP_CLOSE) { err = -EPERM; - else if (tp->repair_queue == TCP_SEND_QUEUE) - WRITE_ONCE(tp->write_seq, val); - else if (tp->repair_queue == TCP_RECV_QUEUE) { - WRITE_ONCE(tp->rcv_nxt, val); - WRITE_ONCE(tp->copied_seq, val); - } - else + } else if (tp->repair_queue == TCP_SEND_QUEUE) { + if (!tcp_rtx_queue_empty(sk)) + err = -EPERM; + else + WRITE_ONCE(tp->write_seq, val); + } else if (tp->repair_queue == TCP_RECV_QUEUE) { + if (tp->rcv_nxt != tp->copied_seq) { + err = -EPERM; + } else { + WRITE_ONCE(tp->rcv_nxt, val); + WRITE_ONCE(tp->copied_seq, val); + } + } else { err = -EINVAL; + } break; case TCP_REPAIR_OPTIONS: @@ -4143,7 +4150,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level, if (get_user(len, optlen)) return -EFAULT; - if (len < offsetofend(struct tcp_zerocopy_receive, length)) + if (len < 0 || + len < offsetofend(struct tcp_zerocopy_receive, length)) return -EINVAL; if (unlikely(len > sizeof(zc))) { err = check_zeroed_user(optval + sizeof(zc), diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index b76c48efd37e..c5b4b586570f 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -526,7 +526,7 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, } if (!sk || NAPI_GRO_CB(skb)->encap_mark || - (skb->ip_summed != CHECKSUM_PARTIAL && + (uh->check && skb->ip_summed != CHECKSUM_PARTIAL && NAPI_GRO_CB(skb)->csum_cnt == 0 && !NAPI_GRO_CB(skb)->csum_valid) || !udp_sk(sk)->gro_receive) diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c index 51184a70ac7e..1578ed9e97d8 100644 --- a/net/ipv6/calipso.c +++ b/net/ipv6/calipso.c @@ -83,6 +83,9 @@ struct calipso_map_cache_entry { static struct calipso_map_cache_bkt *calipso_cache; +static void calipso_cache_invalidate(void); +static void calipso_doi_putdef(struct calipso_doi *doi_def); + /* Label Mapping Cache Functions */ @@ -444,15 +447,10 @@ static int calipso_doi_remove(u32 doi, struct netlbl_audit *audit_info) ret_val = -ENOENT; goto doi_remove_return; } - if (!refcount_dec_and_test(&doi_def->refcount)) { - spin_unlock(&calipso_doi_list_lock); - ret_val = -EBUSY; - goto doi_remove_return; - } list_del_rcu(&doi_def->list); spin_unlock(&calipso_doi_list_lock); - call_rcu(&doi_def->rcu, calipso_doi_free_rcu); + calipso_doi_putdef(doi_def); ret_val = 0; doi_remove_return: @@ -508,10 +506,8 @@ static void calipso_doi_putdef(struct calipso_doi *doi_def) if (!refcount_dec_and_test(&doi_def->refcount)) return; - spin_lock(&calipso_doi_list_lock); - list_del_rcu(&doi_def->list); - spin_unlock(&calipso_doi_list_lock); + calipso_cache_invalidate(); call_rcu(&doi_def->rcu, calipso_doi_free_rcu); } diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index c3bc89b6b1a1..1baf43aacb2e 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -678,8 +678,8 @@ static int prepare_ip6gre_xmit_ipv6(struct sk_buff *skb, tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset]; if (tel->encap_limit == 0) { - icmpv6_send(skb, ICMPV6_PARAMPROB, - ICMPV6_HDR_FIELD, offset + 2); + icmpv6_ndo_send(skb, ICMPV6_PARAMPROB, + ICMPV6_HDR_FIELD, offset + 2); return -1; } *encap_limit = tel->encap_limit - 1; @@ -805,8 +805,8 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev) if (err != 0) { /* XXX: send ICMP error even if DF is not set. */ if (err == -EMSGSIZE) - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, - htonl(mtu)); + icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, + htonl(mtu)); return -1; } @@ -837,7 +837,7 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev) &mtu, skb->protocol); if (err != 0) { if (err == -EMSGSIZE) - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); return -1; } @@ -1063,10 +1063,10 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, /* XXX: send ICMP error even if DF is not set. */ if (err == -EMSGSIZE) { if (skb->protocol == htons(ETH_P_IP)) - icmp_send(skb, ICMP_DEST_UNREACH, - ICMP_FRAG_NEEDED, htonl(mtu)); + icmp_ndo_send(skb, ICMP_DEST_UNREACH, + ICMP_FRAG_NEEDED, htonl(mtu)); else - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); } goto tx_err; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index a7950baa05e5..3fa0eca5a06f 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1332,8 +1332,8 @@ ipxip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, tel = (void *)&skb_network_header(skb)[offset]; if (tel->encap_limit == 0) { - icmpv6_send(skb, ICMPV6_PARAMPROB, - ICMPV6_HDR_FIELD, offset + 2); + icmpv6_ndo_send(skb, ICMPV6_PARAMPROB, + ICMPV6_HDR_FIELD, offset + 2); return -1; } encap_limit = tel->encap_limit - 1; @@ -1385,11 +1385,11 @@ ipxip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, if (err == -EMSGSIZE) switch (protocol) { case IPPROTO_IPIP: - icmp_send(skb, ICMP_DEST_UNREACH, - ICMP_FRAG_NEEDED, htonl(mtu)); + icmp_ndo_send(skb, ICMP_DEST_UNREACH, + ICMP_FRAG_NEEDED, htonl(mtu)); break; case IPPROTO_IPV6: - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); break; default: break; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 0225fd694192..f10e7a72ea62 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -521,10 +521,10 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); } else { - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, - htonl(mtu)); + icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, + htonl(mtu)); } err = -EMSGSIZE; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 93636867aee2..63ccd9f2dccc 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -987,7 +987,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, skb_dst_update_pmtu_no_confirm(skb, mtu); if (skb->len > mtu && !skb_is_gso(skb)) { - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); ip_rt_put(rt); goto tx_error; } diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 7be5103ff2a8..203890e378cb 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -649,9 +649,9 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, /* Parse and check optional cookie */ if (session->peer_cookie_len > 0) { if (memcmp(ptr, &session->peer_cookie[0], session->peer_cookie_len)) { - pr_warn_ratelimited("%s: cookie mismatch (%u/%u). Discarding.\n", - tunnel->name, tunnel->tunnel_id, - session->session_id); + pr_debug_ratelimited("%s: cookie mismatch (%u/%u). Discarding.\n", + tunnel->name, tunnel->tunnel_id, + session->session_id); atomic_long_inc(&session->stats.rx_cookie_discards); goto discard; } @@ -702,8 +702,8 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, * If user has configured mandatory sequence numbers, discard. */ if (session->recv_seq) { - pr_warn_ratelimited("%s: recv data has no seq numbers when required. Discarding.\n", - session->name); + pr_debug_ratelimited("%s: recv data has no seq numbers when required. Discarding.\n", + session->name); atomic_long_inc(&session->stats.rx_seq_discards); goto discard; } @@ -718,8 +718,8 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, session->send_seq = 0; l2tp_session_set_header_len(session, tunnel->version); } else if (session->send_seq) { - pr_warn_ratelimited("%s: recv data has no seq numbers when required. Discarding.\n", - session->name); + pr_debug_ratelimited("%s: recv data has no seq numbers when required. Discarding.\n", + session->name); atomic_long_inc(&session->stats.rx_seq_discards); goto discard; } @@ -809,9 +809,9 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb) /* Short packet? */ if (!pskb_may_pull(skb, L2TP_HDR_SIZE_MAX)) { - pr_warn_ratelimited("%s: recv short packet (len=%d)\n", - tunnel->name, skb->len); - goto error; + pr_debug_ratelimited("%s: recv short packet (len=%d)\n", + tunnel->name, skb->len); + goto invalid; } /* Point to L2TP header */ @@ -824,9 +824,9 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb) /* Check protocol version */ version = hdrflags & L2TP_HDR_VER_MASK; if (version != tunnel->version) { - pr_warn_ratelimited("%s: recv protocol version mismatch: got %d expected %d\n", - tunnel->name, version, tunnel->version); - goto error; + pr_debug_ratelimited("%s: recv protocol version mismatch: got %d expected %d\n", + tunnel->name, version, tunnel->version); + goto invalid; } /* Get length of L2TP packet */ @@ -834,7 +834,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb) /* If type is control packet, it is handled by userspace. */ if (hdrflags & L2TP_HDRFLAG_T) - goto error; + goto pass; /* Skip flags */ ptr += 2; @@ -863,21 +863,24 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb) l2tp_session_dec_refcount(session); /* Not found? Pass to userspace to deal with */ - pr_warn_ratelimited("%s: no session found (%u/%u). Passing up.\n", - tunnel->name, tunnel_id, session_id); - goto error; + pr_debug_ratelimited("%s: no session found (%u/%u). Passing up.\n", + tunnel->name, tunnel_id, session_id); + goto pass; } if (tunnel->version == L2TP_HDR_VER_3 && l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) - goto error; + goto invalid; l2tp_recv_common(session, skb, ptr, optr, hdrflags, length); l2tp_session_dec_refcount(session); return 0; -error: +invalid: + atomic_long_inc(&tunnel->stats.rx_invalid); + +pass: /* Put UDP header back */ __skb_push(skb, sizeof(struct udphdr)); diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index cb21d906343e..98ea98eb9567 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -39,6 +39,7 @@ struct l2tp_stats { atomic_long_t rx_oos_packets; atomic_long_t rx_errors; atomic_long_t rx_cookie_discards; + atomic_long_t rx_invalid; }; struct l2tp_tunnel; diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 83956c9ee1fc..96eb91be9238 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -428,6 +428,9 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla L2TP_ATTR_STATS_PAD) || nla_put_u64_64bit(skb, L2TP_ATTR_RX_ERRORS, atomic_long_read(&tunnel->stats.rx_errors), + L2TP_ATTR_STATS_PAD) || + nla_put_u64_64bit(skb, L2TP_ATTR_RX_INVALID, + atomic_long_read(&tunnel->stats.rx_invalid), L2TP_ATTR_STATS_PAD)) goto nla_put_failure; nla_nest_end(skb, nest); @@ -771,6 +774,9 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl L2TP_ATTR_STATS_PAD) || nla_put_u64_64bit(skb, L2TP_ATTR_RX_ERRORS, atomic_long_read(&session->stats.rx_errors), + L2TP_ATTR_STATS_PAD) || + nla_put_u64_64bit(skb, L2TP_ATTR_RX_INVALID, + atomic_long_read(&session->stats.rx_invalid), L2TP_ATTR_STATS_PAD)) goto nla_put_failure; nla_nest_end(skb, nest); diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c index b1690149b6fa..1482259de9b5 100644 --- a/net/mpls/mpls_gso.c +++ b/net/mpls/mpls_gso.c @@ -14,6 +14,7 @@ #include <linux/netdev_features.h> #include <linux/netdevice.h> #include <linux/skbuff.h> +#include <net/mpls.h> static struct sk_buff *mpls_gso_segment(struct sk_buff *skb, netdev_features_t features) @@ -27,6 +28,8 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb, skb_reset_network_header(skb); mpls_hlen = skb_inner_network_header(skb) - skb_network_header(skb); + if (unlikely(!mpls_hlen || mpls_hlen % MPLS_HLEN)) + goto out; if (unlikely(!pskb_may_pull(skb, mpls_hlen))) goto out; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index c5d5e68940ea..76958570ae7f 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1061,6 +1061,12 @@ out: } } +static void __mptcp_clean_una_wakeup(struct sock *sk) +{ + __mptcp_clean_una(sk); + mptcp_write_space(sk); +} + static void mptcp_enter_memory_pressure(struct sock *sk) { struct mptcp_subflow_context *subflow; @@ -1189,6 +1195,7 @@ static bool mptcp_tx_cache_refill(struct sock *sk, int size, */ while (skbs->qlen > 1) { skb = __skb_dequeue_tail(skbs); + *total_ts -= skb->truesize; __kfree_skb(skb); } return skbs->qlen > 0; @@ -1444,7 +1451,7 @@ static void mptcp_push_release(struct sock *sk, struct sock *ssk, release_sock(ssk); } -static void mptcp_push_pending(struct sock *sk, unsigned int flags) +static void __mptcp_push_pending(struct sock *sk, unsigned int flags) { struct sock *prev_ssk = NULL, *ssk = NULL; struct mptcp_sock *msk = mptcp_sk(sk); @@ -1696,14 +1703,14 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) wait_for_memory: mptcp_set_nospace(sk); - mptcp_push_pending(sk, msg->msg_flags); + __mptcp_push_pending(sk, msg->msg_flags); ret = sk_stream_wait_memory(sk, &timeo); if (ret) goto out; } if (copied) - mptcp_push_pending(sk, msg->msg_flags); + __mptcp_push_pending(sk, msg->msg_flags); out: release_sock(sk); @@ -2115,6 +2122,14 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk) return backup; } +static void mptcp_dispose_initial_subflow(struct mptcp_sock *msk) +{ + if (msk->subflow) { + iput(SOCK_INODE(msk->subflow)); + msk->subflow = NULL; + } +} + /* subflow sockets can be either outgoing (connect) or incoming * (accept). * @@ -2126,6 +2141,8 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk) static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, struct mptcp_subflow_context *subflow) { + struct mptcp_sock *msk = mptcp_sk(sk); + list_del(&subflow->node); lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); @@ -2154,6 +2171,18 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, release_sock(ssk); sock_put(ssk); + + if (ssk == msk->last_snd) + msk->last_snd = NULL; + + if (ssk == msk->ack_hint) + msk->ack_hint = NULL; + + if (ssk == msk->first) + msk->first = NULL; + + if (msk->subflow && ssk == msk->subflow->sk) + mptcp_dispose_initial_subflow(msk); } void mptcp_close_ssk(struct sock *sk, struct sock *ssk, @@ -2238,14 +2267,58 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk) mptcp_close_wake_up(sk); } -static void mptcp_worker(struct work_struct *work) +static void __mptcp_retrans(struct sock *sk) { - struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work); - struct sock *ssk, *sk = &msk->sk.icsk_inet.sk; + struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_sendmsg_info info = {}; struct mptcp_data_frag *dfrag; size_t copied = 0; - int state, ret; + struct sock *ssk; + int ret; + + __mptcp_clean_una_wakeup(sk); + dfrag = mptcp_rtx_head(sk); + if (!dfrag) + return; + + ssk = mptcp_subflow_get_retrans(msk); + if (!ssk) + goto reset_timer; + + lock_sock(ssk); + + /* limit retransmission to the bytes already sent on some subflows */ + info.sent = 0; + info.limit = dfrag->already_sent; + while (info.sent < dfrag->already_sent) { + if (!mptcp_alloc_tx_skb(sk, ssk)) + break; + + ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info); + if (ret <= 0) + break; + + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RETRANSSEGS); + copied += ret; + info.sent += ret; + } + if (copied) + tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, + info.size_goal); + + mptcp_set_timeout(sk, ssk); + release_sock(ssk); + +reset_timer: + if (!mptcp_timer_pending(sk)) + mptcp_reset_timer(sk); +} + +static void mptcp_worker(struct work_struct *work) +{ + struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work); + struct sock *sk = &msk->sk.icsk_inet.sk; + int state; lock_sock(sk); state = sk->sk_state; @@ -2280,45 +2353,8 @@ static void mptcp_worker(struct work_struct *work) if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) __mptcp_close_subflow(msk); - if (!test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags)) - goto unlock; - - __mptcp_clean_una(sk); - dfrag = mptcp_rtx_head(sk); - if (!dfrag) - goto unlock; - - ssk = mptcp_subflow_get_retrans(msk); - if (!ssk) - goto reset_unlock; - - lock_sock(ssk); - - /* limit retransmission to the bytes already sent on some subflows */ - info.sent = 0; - info.limit = dfrag->already_sent; - while (info.sent < dfrag->already_sent) { - if (!mptcp_alloc_tx_skb(sk, ssk)) - break; - - ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info); - if (ret <= 0) - break; - - MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RETRANSSEGS); - copied += ret; - info.sent += ret; - } - if (copied) - tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, - info.size_goal); - - mptcp_set_timeout(sk, ssk); - release_sock(ssk); - -reset_unlock: - if (!mptcp_timer_pending(sk)) - mptcp_reset_timer(sk); + if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags)) + __mptcp_retrans(sk); unlock: release_sock(sk); @@ -2523,12 +2559,6 @@ static void __mptcp_destroy_sock(struct sock *sk) might_sleep(); - /* dispose the ancillatory tcp socket, if any */ - if (msk->subflow) { - iput(SOCK_INODE(msk->subflow)); - msk->subflow = NULL; - } - /* be sure to always acquire the join list lock, to sync vs * mptcp_finish_join(). */ @@ -2553,6 +2583,7 @@ static void __mptcp_destroy_sock(struct sock *sk) sk_stream_kill_queues(sk); xfrm_sk_free_policy(sk); sk_refcnt_debug_release(sk); + mptcp_dispose_initial_subflow(msk); sock_put(sk); } @@ -2934,13 +2965,14 @@ static void mptcp_release_cb(struct sock *sk) { unsigned long flags, nflags; - /* push_pending may touch wmem_reserved, do it before the later - * cleanup - */ - if (test_and_clear_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->flags)) - __mptcp_clean_una(sk); - if (test_and_clear_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags)) { - /* mptcp_push_pending() acquires the subflow socket lock + for (;;) { + flags = 0; + if (test_and_clear_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags)) + flags |= MPTCP_PUSH_PENDING; + if (!flags) + break; + + /* the following actions acquire the subflow socket lock * * 1) can't be invoked in atomic scope * 2) must avoid ABBA deadlock with msk socket spinlock: the RX @@ -2949,13 +2981,21 @@ static void mptcp_release_cb(struct sock *sk) */ spin_unlock_bh(&sk->sk_lock.slock); - mptcp_push_pending(sk, 0); + if (flags & MPTCP_PUSH_PENDING) + __mptcp_push_pending(sk, 0); + + cond_resched(); spin_lock_bh(&sk->sk_lock.slock); } + + if (test_and_clear_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->flags)) + __mptcp_clean_una_wakeup(sk); if (test_and_clear_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags)) __mptcp_error_report(sk); - /* clear any wmem reservation and errors */ + /* push_pending may touch wmem_reserved, ensure we do the cleanup + * later + */ __mptcp_update_wmem(sk); __mptcp_update_rmem(sk); @@ -3285,6 +3325,9 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, /* PM/worker can now acquire the first subflow socket * lock without racing with listener queue cleanup, * we can notify it, if needed. + * + * Even if remote has reset the initial subflow by now + * the refcnt is still at least one. */ subflow = mptcp_subflow_ctx(msk->first); list_add(&subflow->node, &msk->conn_list); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 91827d949766..e21a5bc36cf0 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -52,14 +52,15 @@ #define TCPOLEN_MPTCP_DSS_MAP64 14 #define TCPOLEN_MPTCP_DSS_CHECKSUM 2 #define TCPOLEN_MPTCP_ADD_ADDR 16 -#define TCPOLEN_MPTCP_ADD_ADDR_PORT 20 +#define TCPOLEN_MPTCP_ADD_ADDR_PORT 18 #define TCPOLEN_MPTCP_ADD_ADDR_BASE 8 -#define TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT 12 +#define TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT 10 #define TCPOLEN_MPTCP_ADD_ADDR6 28 -#define TCPOLEN_MPTCP_ADD_ADDR6_PORT 32 +#define TCPOLEN_MPTCP_ADD_ADDR6_PORT 30 #define TCPOLEN_MPTCP_ADD_ADDR6_BASE 20 -#define TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT 24 -#define TCPOLEN_MPTCP_PORT_LEN 4 +#define TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT 22 +#define TCPOLEN_MPTCP_PORT_LEN 2 +#define TCPOLEN_MPTCP_PORT_ALIGN 2 #define TCPOLEN_MPTCP_RM_ADDR_BASE 4 #define TCPOLEN_MPTCP_PRIO 3 #define TCPOLEN_MPTCP_PRIO_ALIGN 4 @@ -701,8 +702,9 @@ static inline unsigned int mptcp_add_addr_len(int family, bool echo, bool port) len = TCPOLEN_MPTCP_ADD_ADDR6_BASE; if (!echo) len += MPTCPOPT_THMAC_LEN; + /* account for 2 trailing 'nop' options */ if (port) - len += TCPOLEN_MPTCP_PORT_LEN; + len += TCPOLEN_MPTCP_PORT_LEN + TCPOLEN_MPTCP_PORT_ALIGN; return len; } diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index e1fbcab257e6..3d47d670e665 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -687,11 +687,6 @@ create_child: /* move the msk reference ownership to the subflow */ subflow_req->msk = NULL; ctx->conn = (struct sock *)owner; - if (!mptcp_finish_join(child)) - goto dispose_child; - - SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX); - tcp_rsk(req)->drop_req = true; if (subflow_use_different_sport(owner, sk)) { pr_debug("ack inet_sport=%d %d", @@ -699,10 +694,16 @@ create_child: ntohs(inet_sk((struct sock *)owner)->inet_sport)); if (!mptcp_pm_sport_in_anno_list(owner, sk)) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MISMATCHPORTACKRX); - goto out; + goto dispose_child; } SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINPORTACKRX); } + + if (!mptcp_finish_join(child)) + goto dispose_child; + + SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX); + tcp_rsk(req)->drop_req = true; } } @@ -1297,6 +1298,7 @@ failed_unlink: spin_lock_bh(&msk->join_list_lock); list_del(&subflow->node); spin_unlock_bh(&msk->join_list_lock); + sock_put(mptcp_subflow_tcp_sock(subflow)); failed: subflow->disposable = 1; diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 118f415928ae..b055187235f8 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -219,7 +219,7 @@ nf_ct_lookup_helper(struct nf_conn *ct, struct net *net) return NULL; pr_info("nf_conntrack: default automatic helper assignment " "has been turned off for security reasons and CT-based " - " firewall rule not found. Use the iptables CT target " + "firewall rule not found. Use the iptables CT target " "to attach helpers instead.\n"); net->ct.auto_assign_helper_warned = 1; return NULL; @@ -228,7 +228,6 @@ nf_ct_lookup_helper(struct nf_conn *ct, struct net *net) return __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); } - int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, gfp_t flags) { diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 1d7e1c595546..ec23330687a5 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -982,8 +982,10 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct, IP_CT_EXP_CHALLENGE_ACK; } spin_unlock_bh(&ct->lock); - nf_ct_l4proto_log_invalid(skb, ct, "invalid packet ignored in " - "state %s ", tcp_conntrack_names[old_state]); + nf_ct_l4proto_log_invalid(skb, ct, + "packet (index %d) in dir %d ignored, state %s", + index, dir, + tcp_conntrack_names[old_state]); return NF_ACCEPT; case TCP_CONNTRACK_MAX: /* Special case for SYN proxy: when the SYN to the server or diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c index e87b6bd6b3cd..4731d21fc3ad 100644 --- a/net/netfilter/nf_nat_proto.c +++ b/net/netfilter/nf_nat_proto.c @@ -646,8 +646,8 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb, } static unsigned int -nf_nat_ipv4_in(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) +nf_nat_ipv4_pre_routing(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) { unsigned int ret; __be32 daddr = ip_hdr(skb)->daddr; @@ -660,6 +660,23 @@ nf_nat_ipv4_in(void *priv, struct sk_buff *skb, } static unsigned int +nf_nat_ipv4_local_in(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + __be32 saddr = ip_hdr(skb)->saddr; + struct sock *sk = skb->sk; + unsigned int ret; + + ret = nf_nat_ipv4_fn(priv, skb, state); + + if (ret == NF_ACCEPT && sk && saddr != ip_hdr(skb)->saddr && + !inet_sk_transparent(sk)) + skb_orphan(skb); /* TCP edemux obtained wrong socket */ + + return ret; +} + +static unsigned int nf_nat_ipv4_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { @@ -736,7 +753,7 @@ nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb, static const struct nf_hook_ops nf_nat_ipv4_ops[] = { /* Before packet filtering, change destination */ { - .hook = nf_nat_ipv4_in, + .hook = nf_nat_ipv4_pre_routing, .pf = NFPROTO_IPV4, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP_PRI_NAT_DST, @@ -757,7 +774,7 @@ static const struct nf_hook_ops nf_nat_ipv4_ops[] = { }, /* After packet filtering, change source */ { - .hook = nf_nat_ipv4_fn, + .hook = nf_nat_ipv4_local_in, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP_PRI_NAT_SRC, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index c1eb5cdb3033..224c8e537cb3 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -916,6 +916,12 @@ static int nf_tables_updtable(struct nft_ctx *ctx) if (flags == ctx->table->flags) return 0; + if ((nft_table_has_owner(ctx->table) && + !(flags & NFT_TABLE_F_OWNER)) || + (!nft_table_has_owner(ctx->table) && + flags & NFT_TABLE_F_OWNER)) + return -EOPNOTSUPP; + trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE, sizeof(struct nft_trans_table)); if (trans == NULL) @@ -9022,8 +9028,12 @@ static void __nft_release_hooks(struct net *net) { struct nft_table *table; - list_for_each_entry(table, &net->nft.tables, list) + list_for_each_entry(table, &net->nft.tables, list) { + if (nft_table_has_owner(table)) + continue; + __nft_release_hook(net, table); + } } static void __nft_release_table(struct net *net, struct nft_table *table) @@ -9073,13 +9083,12 @@ static void __nft_release_table(struct net *net, struct nft_table *table) nf_tables_table_destroy(&ctx); } -static void __nft_release_tables(struct net *net, u32 nlpid) +static void __nft_release_tables(struct net *net) { struct nft_table *table, *nt; list_for_each_entry_safe(table, nt, &net->nft.tables, list) { - if (nft_table_has_owner(table) && - nlpid != table->nlpid) + if (nft_table_has_owner(table)) continue; __nft_release_table(net, table); @@ -9145,7 +9154,7 @@ static void __net_exit nf_tables_exit_net(struct net *net) mutex_lock(&net->nft.commit_mutex); if (!list_empty(&net->nft.commit_list)) __nf_tables_abort(net, NFNL_ABORT_NONE); - __nft_release_tables(net, 0); + __nft_release_tables(net); mutex_unlock(&net->nft.commit_mutex); WARN_ON_ONCE(!list_empty(&net->nft.tables)); WARN_ON_ONCE(!list_empty(&net->nft.module_list)); diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index acce622582e3..bce6ca203d46 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -330,6 +330,7 @@ static int match_revfn(u8 af, const char *name, u8 revision, int *bestp) const struct xt_match *m; int have_rev = 0; + mutex_lock(&xt[af].mutex); list_for_each_entry(m, &xt[af].match, list) { if (strcmp(m->name, name) == 0) { if (m->revision > *bestp) @@ -338,6 +339,7 @@ static int match_revfn(u8 af, const char *name, u8 revision, int *bestp) have_rev = 1; } } + mutex_unlock(&xt[af].mutex); if (af != NFPROTO_UNSPEC && !have_rev) return match_revfn(NFPROTO_UNSPEC, name, revision, bestp); @@ -350,6 +352,7 @@ static int target_revfn(u8 af, const char *name, u8 revision, int *bestp) const struct xt_target *t; int have_rev = 0; + mutex_lock(&xt[af].mutex); list_for_each_entry(t, &xt[af].target, list) { if (strcmp(t->name, name) == 0) { if (t->revision > *bestp) @@ -358,6 +361,7 @@ static int target_revfn(u8 af, const char *name, u8 revision, int *bestp) have_rev = 1; } } + mutex_unlock(&xt[af].mutex); if (af != NFPROTO_UNSPEC && !have_rev) return target_revfn(NFPROTO_UNSPEC, name, revision, bestp); @@ -371,12 +375,10 @@ int xt_find_revision(u8 af, const char *name, u8 revision, int target, { int have_rev, best = -1; - mutex_lock(&xt[af].mutex); if (target == 1) have_rev = target_revfn(af, name, revision, &best); else have_rev = match_revfn(af, name, revision, &best); - mutex_unlock(&xt[af].mutex); /* Nothing at all? Return 0 to try loading module. */ if (best == -1) { diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index 726dda95934c..4f50a64315cf 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -575,6 +575,7 @@ list_start: break; } + cipso_v4_doi_putdef(doi_def); rcu_read_unlock(); genlmsg_end(ans_skb, data); @@ -583,12 +584,14 @@ list_start: list_retry: /* XXX - this limit is a guesstimate */ if (nlsze_mult < 4) { + cipso_v4_doi_putdef(doi_def); rcu_read_unlock(); kfree_skb(ans_skb); nlsze_mult *= 2; goto list_start; } list_failure_lock: + cipso_v4_doi_putdef(doi_def); rcu_read_unlock(); list_failure: kfree_skb(ans_skb); diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index b34358282f37..edb6ac17ceca 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -439,7 +439,7 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) if (len == 0 || len & 3) return -EINVAL; - skb = netdev_alloc_skb(NULL, len); + skb = __netdev_alloc_skb(NULL, len, GFP_ATOMIC | __GFP_NOWARN); if (!skb) return -ENOMEM; @@ -958,8 +958,10 @@ static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) plen = (len + 3) & ~3; skb = sock_alloc_send_skb(sk, plen + QRTR_HDR_MAX_SIZE, msg->msg_flags & MSG_DONTWAIT, &rc); - if (!skb) + if (!skb) { + rc = -ENOMEM; goto out_node; + } skb_reserve(skb, QRTR_HDR_MAX_SIZE); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index e2e4353db8a7..f87d07736a14 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -2168,7 +2168,7 @@ static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb, static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, struct tcmsg *tcm, struct netlink_callback *cb, - int *t_p, int s_t) + int *t_p, int s_t, bool recur) { struct Qdisc *q; int b; @@ -2179,7 +2179,7 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0) return -1; - if (!qdisc_dev(root)) + if (!qdisc_dev(root) || !recur) return 0; if (tcm->tcm_parent) { @@ -2214,13 +2214,13 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) s_t = cb->args[0]; t = 0; - if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0) + if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t, true) < 0) goto done; dev_queue = dev_ingress_queue(dev); if (dev_queue && tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, - &t, s_t) < 0) + &t, s_t, false) < 0) goto done; done: diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c index a9c6af5795d8..5ba456727f63 100644 --- a/net/sctp/tsnmap.c +++ b/net/sctp/tsnmap.c @@ -75,7 +75,7 @@ int sctp_tsnmap_check(const struct sctp_tsnmap *map, __u32 tsn) return 1; /* Verify that we can hold this TSN and that it will not - * overlfow our map + * overflow our map */ if (!TSN_lt(tsn, map->base_tsn + SCTP_TSN_MAP_SIZE)) return -1; |