summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/6lowpan/Makefile2
-rw-r--r--net/6lowpan/core.c40
-rw-r--r--net/6lowpan/iphc.c19
-rw-r--r--net/8021q/vlan_dev.c3
-rw-r--r--net/Kconfig7
-rw-r--r--net/atm/br2684.c9
-rw-r--r--net/batman-adv/distributed-arp-table.c7
-rw-r--r--net/batman-adv/gateway_client.c8
-rw-r--r--net/batman-adv/multicast.c81
-rw-r--r--net/batman-adv/network-coding.c7
-rw-r--r--net/batman-adv/originator.c5
-rw-r--r--net/batman-adv/send.c3
-rw-r--r--net/batman-adv/soft-interface.c9
-rw-r--r--net/batman-adv/translation-table.c17
-rw-r--r--net/batman-adv/types.h15
-rw-r--r--net/bluetooth/6lowpan.c32
-rw-r--r--net/bluetooth/Kconfig5
-rw-r--r--net/bluetooth/Makefile3
-rw-r--r--net/bluetooth/a2mp.c17
-rw-r--r--net/bluetooth/a2mp.h19
-rw-r--r--net/bluetooth/amp.c134
-rw-r--r--net/bluetooth/amp.h14
-rw-r--r--net/bluetooth/cmtp/capi.c8
-rw-r--r--net/bluetooth/hci_conn.c235
-rw-r--r--net/bluetooth/hci_core.c44
-rw-r--r--net/bluetooth/hci_event.c212
-rw-r--r--net/bluetooth/hci_request.c6
-rw-r--r--net/bluetooth/l2cap_core.c6
-rw-r--r--net/bluetooth/l2cap_sock.c41
-rw-r--r--net/bluetooth/mgmt.c38
-rw-r--r--net/bridge/br_device.c4
-rw-r--r--net/bridge/br_if.c1
-rw-r--r--net/bridge/br_mdb.c144
-rw-r--r--net/bridge/br_multicast.c44
-rw-r--r--net/bridge/br_netfilter_hooks.c20
-rw-r--r--net/bridge/br_netfilter_ipv6.c2
-rw-r--r--net/bridge/br_netlink.c18
-rw-r--r--net/bridge/br_private.h16
-rw-r--r--net/bridge/br_vlan.c18
-rw-r--r--net/caif/caif_dev.c2
-rw-r--r--net/core/Makefile1
-rw-r--r--net/core/dev.c43
-rw-r--r--net/core/dst.c110
-rw-r--r--net/core/fib_rules.c24
-rw-r--r--net/core/filter.c152
-rw-r--r--net/core/flow_dissector.c58
-rw-r--r--net/core/lwtunnel.c248
-rw-r--r--net/core/neighbour.c14
-rw-r--r--net/core/net-sysfs.c31
-rw-r--r--net/core/pktgen.c5
-rw-r--r--net/core/rtnetlink.c42
-rw-r--r--net/core/timestamping.c6
-rw-r--r--net/core/utils.c17
-rw-r--r--net/dsa/dsa.c52
-rw-r--r--net/dsa/dsa_priv.h8
-rw-r--r--net/dsa/slave.c394
-rw-r--r--net/dsa/tag_brcm.c15
-rw-r--r--net/dsa/tag_dsa.c12
-rw-r--r--net/dsa/tag_edsa.c12
-rw-r--r--net/dsa/tag_trailer.c12
-rw-r--r--net/ethernet/eth.c2
-rw-r--r--net/hsr/hsr_device.c2
-rw-r--r--net/ieee802154/6lowpan/6lowpan_i.h11
-rw-r--r--net/ieee802154/6lowpan/core.c81
-rw-r--r--net/ieee802154/6lowpan/rx.c45
-rw-r--r--net/ieee802154/6lowpan/tx.c4
-rw-r--r--net/ieee802154/nl802154.c33
-rw-r--r--net/ieee802154/rdev-ops.h33
-rw-r--r--net/ieee802154/sysfs.c38
-rw-r--r--net/ieee802154/trace.h41
-rw-r--r--net/ipv4/af_inet.c8
-rw-r--r--net/ipv4/arp.c80
-rw-r--r--net/ipv4/datagram.c2
-rw-r--r--net/ipv4/fib_frontend.c73
-rw-r--r--net/ipv4/fib_semantics.c153
-rw-r--r--net/ipv4/fib_trie.c7
-rw-r--r--net/ipv4/gre_demux.c235
-rw-r--r--net/ipv4/icmp.c10
-rw-r--r--net/ipv4/inet_hashtables.c38
-rw-r--r--net/ipv4/inet_timewait_sock.c55
-rw-r--r--net/ipv4/ip_fragment.c44
-rw-r--r--net/ipv4/ip_gre.c446
-rw-r--r--net/ipv4/ip_input.c3
-rw-r--r--net/ipv4/ip_output.c7
-rw-r--r--net/ipv4/ip_tunnel.c37
-rw-r--r--net/ipv4/ip_tunnel_core.c230
-rw-r--r--net/ipv4/ipconfig.c2
-rw-r--r--net/ipv4/ipip.c2
-rw-r--r--net/ipv4/netfilter/Kconfig12
-rw-r--r--net/ipv4/netfilter/Makefile3
-rw-r--r--net/ipv4/netfilter/arp_tables.c32
-rw-r--r--net/ipv4/netfilter/ip_tables.c68
-rw-r--r--net/ipv4/netfilter/ipt_ECN.c2
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c2
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c4
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c22
-rw-r--r--net/ipv4/netfilter/nf_dup_ipv4.c120
-rw-r--r--net/ipv4/netfilter/nf_nat_l3proto_ipv4.c4
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_icmp.c2
-rw-r--r--net/ipv4/netfilter/nft_dup_ipv4.c110
-rw-r--r--net/ipv4/ping.c3
-rw-r--r--net/ipv4/proc.c2
-rw-r--r--net/ipv4/route.c36
-rw-r--r--net/ipv4/tcp_bic.c2
-rw-r--r--net/ipv4/tcp_cdg.c2
-rw-r--r--net/ipv4/tcp_cong.c6
-rw-r--r--net/ipv4/tcp_cubic.c4
-rw-r--r--net/ipv4/tcp_highspeed.c2
-rw-r--r--net/ipv4/tcp_htcp.c2
-rw-r--r--net/ipv4/tcp_hybla.c2
-rw-r--r--net/ipv4/tcp_illinois.c2
-rw-r--r--net/ipv4/tcp_input.c64
-rw-r--r--net/ipv4/tcp_ipv4.c7
-rw-r--r--net/ipv4/tcp_metrics.c2
-rw-r--r--net/ipv4/tcp_minisocks.c6
-rw-r--r--net/ipv4/tcp_output.c49
-rw-r--r--net/ipv4/tcp_scalable.c2
-rw-r--r--net/ipv4/tcp_timer.c1
-rw-r--r--net/ipv4/tcp_vegas.c6
-rw-r--r--net/ipv4/tcp_veno.c2
-rw-r--r--net/ipv4/udp.c22
-rw-r--r--net/ipv4/xfrm4_policy.c11
-rw-r--r--net/ipv6/Kconfig30
-rw-r--r--net/ipv6/Makefile1
-rw-r--r--net/ipv6/addrconf.c343
-rw-r--r--net/ipv6/addrconf_core.c11
-rw-r--r--net/ipv6/af_inet6.c12
-rw-r--r--net/ipv6/datagram.c10
-rw-r--r--net/ipv6/exthdrs.c2
-rw-r--r--net/ipv6/icmp.c6
-rw-r--r--net/ipv6/ila.c210
-rw-r--r--net/ipv6/inet6_hashtables.c9
-rw-r--r--net/ipv6/ip6_fib.c1
-rw-r--r--net/ipv6/ip6_gre.c5
-rw-r--r--net/ipv6/ip6_input.c5
-rw-r--r--net/ipv6/ip6_output.c18
-rw-r--r--net/ipv6/ip6_tunnel.c2
-rw-r--r--net/ipv6/ndisc.c26
-rw-r--r--net/ipv6/netfilter/Kconfig12
-rw-r--r--net/ipv6/netfilter/Makefile3
-rw-r--r--net/ipv6/netfilter/ip6_tables.c52
-rw-r--r--net/ipv6/netfilter/ip6t_REJECT.c5
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c2
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c5
-rw-r--r--net/ipv6/netfilter/nf_defrag_ipv6_hooks.c23
-rw-r--r--net/ipv6/netfilter/nf_dup_ipv6.c96
-rw-r--r--net/ipv6/netfilter/nf_nat_l3proto_ipv6.c4
-rw-r--r--net/ipv6/netfilter/nf_nat_proto_icmpv6.c2
-rw-r--r--net/ipv6/netfilter/nft_dup_ipv6.c108
-rw-r--r--net/ipv6/raw.c3
-rw-r--r--net/ipv6/route.c103
-rw-r--r--net/ipv6/sit.c2
-rw-r--r--net/ipv6/sysctl_net_ipv6.c15
-rw-r--r--net/ipv6/tcp_ipv6.c7
-rw-r--r--net/ipv6/udp.c3
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c3
-rw-r--r--net/ipv6/xfrm6_policy.c7
-rw-r--r--net/mac80211/Kconfig1
-rw-r--r--net/mac80211/Makefile1
-rw-r--r--net/mac80211/aes_cmac.c17
-rw-r--r--net/mac80211/cfg.c161
-rw-r--r--net/mac80211/chan.c31
-rw-r--r--net/mac80211/debugfs.c2
-rw-r--r--net/mac80211/debugfs_key.c2
-rw-r--r--net/mac80211/debugfs_netdev.c34
-rw-r--r--net/mac80211/driver-ops.c41
-rw-r--r--net/mac80211/driver-ops.h29
-rw-r--r--net/mac80211/ieee80211_i.h42
-rw-r--r--net/mac80211/iface.c16
-rw-r--r--net/mac80211/key.c1
-rw-r--r--net/mac80211/key.h3
-rw-r--r--net/mac80211/main.c17
-rw-r--r--net/mac80211/mesh.c2
-rw-r--r--net/mac80211/mesh_hwmp.c80
-rw-r--r--net/mac80211/mesh_plink.c326
-rw-r--r--net/mac80211/mesh_ps.c42
-rw-r--r--net/mac80211/mesh_sync.c16
-rw-r--r--net/mac80211/mlme.c61
-rw-r--r--net/mac80211/ocb.c2
-rw-r--r--net/mac80211/rate.c310
-rw-r--r--net/mac80211/rate.h60
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c8
-rw-r--r--net/mac80211/rx.c101
-rw-r--r--net/mac80211/sta_info.c52
-rw-r--r--net/mac80211/sta_info.h120
-rw-r--r--net/mac80211/status.c5
-rw-r--r--net/mac80211/tdls.c242
-rw-r--r--net/mac80211/tx.c35
-rw-r--r--net/mac80211/util.c75
-rw-r--r--net/mac80211/vht.c34
-rw-r--r--net/mac80211/wpa.c83
-rw-r--r--net/mac802154/cfg.c81
-rw-r--r--net/mac802154/ieee802154_i.h11
-rw-r--r--net/mac802154/iface.c32
-rw-r--r--net/mac802154/main.c19
-rw-r--r--net/mac802154/rx.c14
-rw-r--r--net/mac802154/tx.c27
-rw-r--r--net/mac802154/util.c8
-rw-r--r--net/mpls/Kconfig8
-rw-r--r--net/mpls/Makefile1
-rw-r--r--net/mpls/af_mpls.c197
-rw-r--r--net/mpls/internal.h9
-rw-r--r--net/mpls/mpls_iptunnel.c230
-rw-r--r--net/netfilter/Kconfig2
-rw-r--r--net/netfilter/core.c225
-rw-r--r--net/netfilter/ipvs/ip_vs_nfct.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_sched.c2
-rw-r--r--net/netfilter/nf_conntrack_core.c134
-rw-r--r--net/netfilter/nf_conntrack_expect.c21
-rw-r--r--net/netfilter/nf_conntrack_netlink.c228
-rw-r--r--net/netfilter/nf_conntrack_pptp.c3
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c101
-rw-r--r--net/netfilter/nf_conntrack_seqadj.c9
-rw-r--r--net/netfilter/nf_conntrack_standalone.c39
-rw-r--r--net/netfilter/nf_internals.h2
-rw-r--r--net/netfilter/nf_nat_core.c24
-rw-r--r--net/netfilter/nf_nat_proto_dccp.c2
-rw-r--r--net/netfilter/nf_nat_proto_tcp.c2
-rw-r--r--net/netfilter/nf_nat_proto_udp.c2
-rw-r--r--net/netfilter/nf_nat_proto_udplite.c2
-rw-r--r--net/netfilter/nf_queue.c12
-rw-r--r--net/netfilter/nf_synproxy_core.c6
-rw-r--r--net/netfilter/nf_tables_api.c8
-rw-r--r--net/netfilter/nf_tables_core.c5
-rw-r--r--net/netfilter/nfnetlink_acct.c71
-rw-r--r--net/netfilter/nft_counter.c97
-rw-r--r--net/netfilter/nft_limit.c188
-rw-r--r--net/netfilter/nft_meta.c4
-rw-r--r--net/netfilter/nft_payload.c57
-rw-r--r--net/netfilter/x_tables.c29
-rw-r--r--net/netfilter/xt_CT.c26
-rw-r--r--net/netfilter/xt_TCPMSS.c8
-rw-r--r--net/netfilter/xt_TCPOPTSTRIP.c2
-rw-r--r--net/netfilter/xt_TEE.c163
-rw-r--r--net/netfilter/xt_TPROXY.c6
-rw-r--r--net/netfilter/xt_connlimit.c9
-rw-r--r--net/netfilter/xt_nfacct.c2
-rw-r--r--net/openvswitch/Kconfig2
-rw-r--r--net/openvswitch/Makefile2
-rw-r--r--net/openvswitch/actions.c29
-rw-r--r--net/openvswitch/datapath.c19
-rw-r--r--net/openvswitch/datapath.h5
-rw-r--r--net/openvswitch/dp_notify.c5
-rw-r--r--net/openvswitch/flow.c4
-rw-r--r--net/openvswitch/flow.h79
-rw-r--r--net/openvswitch/flow_netlink.c112
-rw-r--r--net/openvswitch/flow_netlink.h3
-rw-r--r--net/openvswitch/flow_table.c6
-rw-r--r--net/openvswitch/vport-geneve.c21
-rw-r--r--net/openvswitch/vport-gre.c239
-rw-r--r--net/openvswitch/vport-internal_dev.c38
-rw-r--r--net/openvswitch/vport-netdev.c117
-rw-r--r--net/openvswitch/vport-netdev.h16
-rw-r--r--net/openvswitch/vport-vxlan.c224
-rw-r--r--net/openvswitch/vport-vxlan.h11
-rw-r--r--net/openvswitch/vport.c34
-rw-r--r--net/openvswitch/vport.h27
-rw-r--r--net/packet/af_packet.c136
-rw-r--r--net/packet/internal.h5
-rw-r--r--net/rds/bind.c3
-rw-r--r--net/rds/connection.c16
-rw-r--r--net/rds/ib.c2
-rw-r--r--net/rds/ib_cm.c5
-rw-r--r--net/rds/iw.c2
-rw-r--r--net/rds/iw_cm.c5
-rw-r--r--net/rds/rds.h23
-rw-r--r--net/rds/send.c3
-rw-r--r--net/rds/tcp.c165
-rw-r--r--net/rds/tcp.h7
-rw-r--r--net/rds/tcp_connect.c9
-rw-r--r--net/rds/tcp_listen.c40
-rw-r--r--net/rds/transport.c4
-rw-r--r--net/rfkill/Kconfig3
-rw-r--r--net/rfkill/rfkill-gpio.c1
-rw-r--r--net/sched/act_api.c44
-rw-r--r--net/sched/act_bpf.c53
-rw-r--r--net/sched/act_connmark.c9
-rw-r--r--net/sched/act_csum.c3
-rw-r--r--net/sched/act_gact.c44
-rw-r--r--net/sched/act_ipt.c2
-rw-r--r--net/sched/act_mirred.c58
-rw-r--r--net/sched/act_nat.c10
-rw-r--r--net/sched/act_pedit.c3
-rw-r--r--net/sched/act_simple.c3
-rw-r--r--net/sched/act_skbedit.c3
-rw-r--r--net/sched/act_vlan.c3
-rw-r--r--net/sched/cls_cgroup.c23
-rw-r--r--net/sched/sch_fifo.c2
-rw-r--r--net/sched/sch_generic.c6
-rw-r--r--net/sched/sch_gred.c8
-rw-r--r--net/sched/sch_htb.c6
-rw-r--r--net/sched/sch_plug.c8
-rw-r--r--net/sched/sch_qfq.c1
-rw-r--r--net/sched/sch_sfb.c2
-rw-r--r--net/sctp/protocol.c42
-rw-r--r--net/sctp/sm_statefuns.c2
-rw-r--r--net/switchdev/switchdev.c113
-rw-r--r--net/tipc/bcast.c31
-rw-r--r--net/tipc/bcast.h1
-rw-r--r--net/tipc/bearer.c30
-rw-r--r--net/tipc/bearer.h3
-rw-r--r--net/tipc/core.h10
-rw-r--r--net/tipc/discover.c130
-rw-r--r--net/tipc/link.c2030
-rw-r--r--net/tipc/link.h109
-rw-r--r--net/tipc/msg.c86
-rw-r--r--net/tipc/msg.h112
-rw-r--r--net/tipc/name_distr.c6
-rw-r--r--net/tipc/netlink_compat.c2
-rw-r--r--net/tipc/node.c964
-rw-r--r--net/tipc/node.h84
-rw-r--r--net/tipc/socket.c385
-rw-r--r--net/tipc/socket.h2
-rw-r--r--net/tipc/udp_media.c3
-rw-r--r--net/wimax/op-rfkill.c3
-rw-r--r--net/wireless/core.c5
-rw-r--r--net/wireless/core.h5
-rw-r--r--net/wireless/mlme.c75
-rw-r--r--net/wireless/nl80211.c4
-rw-r--r--net/wireless/rdev-ops.h2
-rw-r--r--net/wireless/reg.c73
-rw-r--r--net/xfrm/xfrm_policy.c24
-rw-r--r--net/xfrm/xfrm_user.c8
323 files changed, 10710 insertions, 5584 deletions
diff --git a/net/6lowpan/Makefile b/net/6lowpan/Makefile
index eb8baa72adc8..c6ffc55ee0d7 100644
--- a/net/6lowpan/Makefile
+++ b/net/6lowpan/Makefile
@@ -1,6 +1,6 @@
obj-$(CONFIG_6LOWPAN) += 6lowpan.o
-6lowpan-y := iphc.o nhc.o
+6lowpan-y := core.o iphc.o nhc.o
#rfc6282 nhcs
obj-$(CONFIG_6LOWPAN_NHC_DEST) += nhc_dest.o
diff --git a/net/6lowpan/core.c b/net/6lowpan/core.c
new file mode 100644
index 000000000000..ae1896fa45e2
--- /dev/null
+++ b/net/6lowpan/core.c
@@ -0,0 +1,40 @@
+/* This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Authors:
+ * (C) 2015 Pengutronix, Alexander Aring <aar@pengutronix.de>
+ */
+
+#include <linux/module.h>
+
+#include <net/6lowpan.h>
+
+void lowpan_netdev_setup(struct net_device *dev, enum lowpan_lltypes lltype)
+{
+ lowpan_priv(dev)->lltype = lltype;
+}
+EXPORT_SYMBOL(lowpan_netdev_setup);
+
+static int __init lowpan_module_init(void)
+{
+ request_module_nowait("ipv6");
+
+ request_module_nowait("nhc_dest");
+ request_module_nowait("nhc_fragment");
+ request_module_nowait("nhc_hop");
+ request_module_nowait("nhc_ipv6");
+ request_module_nowait("nhc_mobility");
+ request_module_nowait("nhc_routing");
+ request_module_nowait("nhc_udp");
+
+ return 0;
+}
+module_init(lowpan_module_init);
+
+MODULE_LICENSE("GPL");
diff --git a/net/6lowpan/iphc.c b/net/6lowpan/iphc.c
index 94a375c04f21..1e0071fdcf72 100644
--- a/net/6lowpan/iphc.c
+++ b/net/6lowpan/iphc.c
@@ -48,7 +48,6 @@
#include <linux/bitops.h>
#include <linux/if_arp.h>
-#include <linux/module.h>
#include <linux/netdevice.h>
#include <net/6lowpan.h>
#include <net/ipv6.h>
@@ -284,7 +283,7 @@ lowpan_header_decompress(struct sk_buff *skb, struct net_device *dev,
if (lowpan_fetch_skb(skb, &tmp, sizeof(tmp)))
return -EINVAL;
- hdr.flow_lbl[0] = (skb->data[0] & 0x0F) | ((tmp >> 2) & 0x30);
+ hdr.flow_lbl[0] = (tmp & 0x0F) | ((tmp >> 2) & 0x30);
memcpy(&hdr.flow_lbl[1], &skb->data[0], 2);
skb_pull(skb, 2);
break;
@@ -610,19 +609,3 @@ int lowpan_header_compress(struct sk_buff *skb, struct net_device *dev,
return 0;
}
EXPORT_SYMBOL_GPL(lowpan_header_compress);
-
-static int __init lowpan_module_init(void)
-{
- request_module_nowait("nhc_dest");
- request_module_nowait("nhc_fragment");
- request_module_nowait("nhc_hop");
- request_module_nowait("nhc_ipv6");
- request_module_nowait("nhc_mobility");
- request_module_nowait("nhc_routing");
- request_module_nowait("nhc_udp");
-
- return 0;
-}
-module_init(lowpan_module_init);
-
-MODULE_LICENSE("GPL");
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 01d7ba840df8..fded86508117 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -791,10 +791,9 @@ void vlan_setup(struct net_device *dev)
{
ether_setup(dev);
- dev->priv_flags |= IFF_802_1Q_VLAN;
+ dev->priv_flags |= IFF_802_1Q_VLAN | IFF_NO_QUEUE;
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
netif_keep_dst(dev);
- dev->tx_queue_len = 0;
dev->netdev_ops = &vlan_netdev_ops;
dev->destructor = vlan_dev_free;
diff --git a/net/Kconfig b/net/Kconfig
index 57a7c5af3175..7021c1bf44d6 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -374,6 +374,13 @@ source "net/caif/Kconfig"
source "net/ceph/Kconfig"
source "net/nfc/Kconfig"
+config LWTUNNEL
+ bool "Network light weight tunnels"
+ ---help---
+ This feature provides an infrastructure to support light weight
+ tunnels like mpls. There is no netdevice associated with a light
+ weight tunnel endpoint. Tunnel encapsulation parameters are stored
+ with light weight tunnel state associated with fib routes.
endif # if NET
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index cc78538d163b..aa0047c5c467 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -802,13 +802,10 @@ static int br2684_seq_show(struct seq_file *seq, void *v)
(brdev->payload == p_bridged) ? "bridged" : "routed",
brvcc->copies_failed, brvcc->copies_needed);
#ifdef CONFIG_ATM_BR2684_IPFILTER
-#define b1(var, byte) ((u8 *) &brvcc->filter.var)[byte]
-#define bs(var) b1(var, 0), b1(var, 1), b1(var, 2), b1(var, 3)
if (brvcc->filter.netmask != 0)
- seq_printf(seq, " filter=%d.%d.%d.%d/"
- "%d.%d.%d.%d\n", bs(prefix), bs(netmask));
-#undef bs
-#undef b1
+ seq_printf(seq, " filter=%pI4/%pI4\n",
+ &brvcc->filter.prefix,
+ &brvcc->filter.netmask);
#endif /* CONFIG_ATM_BR2684_IPFILTER */
}
return 0;
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index 6d0b471eede8..cc7d87d64987 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -19,6 +19,7 @@
#include "main.h"
#include <linux/atomic.h>
+#include <linux/bitops.h>
#include <linux/byteorder/generic.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
@@ -453,7 +454,7 @@ static bool batadv_is_orig_node_eligible(struct batadv_dat_candidate *res,
int j;
/* check if orig node candidate is running DAT */
- if (!(candidate->capabilities & BATADV_ORIG_CAPA_HAS_DAT))
+ if (!test_bit(BATADV_ORIG_CAPA_HAS_DAT, &candidate->capabilities))
goto out;
/* Check if this node has already been selected... */
@@ -713,9 +714,9 @@ static void batadv_dat_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
uint16_t tvlv_value_len)
{
if (flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND)
- orig->capabilities &= ~BATADV_ORIG_CAPA_HAS_DAT;
+ clear_bit(BATADV_ORIG_CAPA_HAS_DAT, &orig->capabilities);
else
- orig->capabilities |= BATADV_ORIG_CAPA_HAS_DAT;
+ set_bit(BATADV_ORIG_CAPA_HAS_DAT, &orig->capabilities);
}
/**
diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index cffa92dd9877..6012e2b4af4f 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -153,15 +153,11 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
struct batadv_neigh_node *router;
struct batadv_neigh_ifinfo *router_ifinfo;
struct batadv_gw_node *gw_node, *curr_gw = NULL;
- uint32_t max_gw_factor = 0, tmp_gw_factor = 0;
- uint32_t gw_divisor;
+ uint64_t max_gw_factor = 0, tmp_gw_factor = 0;
uint8_t max_tq = 0;
uint8_t tq_avg;
struct batadv_orig_node *orig_node;
- gw_divisor = BATADV_TQ_LOCAL_WINDOW_SIZE * BATADV_TQ_LOCAL_WINDOW_SIZE;
- gw_divisor *= 64;
-
rcu_read_lock();
hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) {
if (gw_node->deleted)
@@ -187,7 +183,7 @@ batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv)
tmp_gw_factor = tq_avg * tq_avg;
tmp_gw_factor *= gw_node->bandwidth_down;
tmp_gw_factor *= 100 * 100;
- tmp_gw_factor /= gw_divisor;
+ tmp_gw_factor >>= 18;
if ((tmp_gw_factor > max_gw_factor) ||
((tmp_gw_factor == max_gw_factor) &&
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index 7aa480b7edd0..68a9554961eb 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -19,6 +19,8 @@
#include "main.h"
#include <linux/atomic.h>
+#include <linux/bitops.h>
+#include <linux/bug.h>
#include <linux/byteorder/generic.h>
#include <linux/errno.h>
#include <linux/etherdevice.h>
@@ -588,19 +590,26 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
*
* If the BATADV_MCAST_WANT_ALL_UNSNOOPABLES flag of this originator,
* orig, has toggled then this method updates counter and list accordingly.
+ *
+ * Caller needs to hold orig->mcast_handler_lock.
*/
static void batadv_mcast_want_unsnoop_update(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig,
uint8_t mcast_flags)
{
+ struct hlist_node *node = &orig->mcast_want_all_unsnoopables_node;
+ struct hlist_head *head = &bat_priv->mcast.want_all_unsnoopables_list;
+
/* switched from flag unset to set */
if (mcast_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES &&
!(orig->mcast_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES)) {
atomic_inc(&bat_priv->mcast.num_want_all_unsnoopables);
spin_lock_bh(&bat_priv->mcast.want_lists_lock);
- hlist_add_head_rcu(&orig->mcast_want_all_unsnoopables_node,
- &bat_priv->mcast.want_all_unsnoopables_list);
+ /* flag checks above + mcast_handler_lock prevents this */
+ WARN_ON(!hlist_unhashed(node));
+
+ hlist_add_head_rcu(node, head);
spin_unlock_bh(&bat_priv->mcast.want_lists_lock);
/* switched from flag set to unset */
} else if (!(mcast_flags & BATADV_MCAST_WANT_ALL_UNSNOOPABLES) &&
@@ -608,7 +617,10 @@ static void batadv_mcast_want_unsnoop_update(struct batadv_priv *bat_priv,
atomic_dec(&bat_priv->mcast.num_want_all_unsnoopables);
spin_lock_bh(&bat_priv->mcast.want_lists_lock);
- hlist_del_rcu(&orig->mcast_want_all_unsnoopables_node);
+ /* flag checks above + mcast_handler_lock prevents this */
+ WARN_ON(hlist_unhashed(node));
+
+ hlist_del_init_rcu(node);
spin_unlock_bh(&bat_priv->mcast.want_lists_lock);
}
}
@@ -621,19 +633,26 @@ static void batadv_mcast_want_unsnoop_update(struct batadv_priv *bat_priv,
*
* If the BATADV_MCAST_WANT_ALL_IPV4 flag of this originator, orig, has
* toggled then this method updates counter and list accordingly.
+ *
+ * Caller needs to hold orig->mcast_handler_lock.
*/
static void batadv_mcast_want_ipv4_update(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig,
uint8_t mcast_flags)
{
+ struct hlist_node *node = &orig->mcast_want_all_ipv4_node;
+ struct hlist_head *head = &bat_priv->mcast.want_all_ipv4_list;
+
/* switched from flag unset to set */
if (mcast_flags & BATADV_MCAST_WANT_ALL_IPV4 &&
!(orig->mcast_flags & BATADV_MCAST_WANT_ALL_IPV4)) {
atomic_inc(&bat_priv->mcast.num_want_all_ipv4);
spin_lock_bh(&bat_priv->mcast.want_lists_lock);
- hlist_add_head_rcu(&orig->mcast_want_all_ipv4_node,
- &bat_priv->mcast.want_all_ipv4_list);
+ /* flag checks above + mcast_handler_lock prevents this */
+ WARN_ON(!hlist_unhashed(node));
+
+ hlist_add_head_rcu(node, head);
spin_unlock_bh(&bat_priv->mcast.want_lists_lock);
/* switched from flag set to unset */
} else if (!(mcast_flags & BATADV_MCAST_WANT_ALL_IPV4) &&
@@ -641,7 +660,10 @@ static void batadv_mcast_want_ipv4_update(struct batadv_priv *bat_priv,
atomic_dec(&bat_priv->mcast.num_want_all_ipv4);
spin_lock_bh(&bat_priv->mcast.want_lists_lock);
- hlist_del_rcu(&orig->mcast_want_all_ipv4_node);
+ /* flag checks above + mcast_handler_lock prevents this */
+ WARN_ON(hlist_unhashed(node));
+
+ hlist_del_init_rcu(node);
spin_unlock_bh(&bat_priv->mcast.want_lists_lock);
}
}
@@ -654,19 +676,26 @@ static void batadv_mcast_want_ipv4_update(struct batadv_priv *bat_priv,
*
* If the BATADV_MCAST_WANT_ALL_IPV6 flag of this originator, orig, has
* toggled then this method updates counter and list accordingly.
+ *
+ * Caller needs to hold orig->mcast_handler_lock.
*/
static void batadv_mcast_want_ipv6_update(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig,
uint8_t mcast_flags)
{
+ struct hlist_node *node = &orig->mcast_want_all_ipv6_node;
+ struct hlist_head *head = &bat_priv->mcast.want_all_ipv6_list;
+
/* switched from flag unset to set */
if (mcast_flags & BATADV_MCAST_WANT_ALL_IPV6 &&
!(orig->mcast_flags & BATADV_MCAST_WANT_ALL_IPV6)) {
atomic_inc(&bat_priv->mcast.num_want_all_ipv6);
spin_lock_bh(&bat_priv->mcast.want_lists_lock);
- hlist_add_head_rcu(&orig->mcast_want_all_ipv6_node,
- &bat_priv->mcast.want_all_ipv6_list);
+ /* flag checks above + mcast_handler_lock prevents this */
+ WARN_ON(!hlist_unhashed(node));
+
+ hlist_add_head_rcu(node, head);
spin_unlock_bh(&bat_priv->mcast.want_lists_lock);
/* switched from flag set to unset */
} else if (!(mcast_flags & BATADV_MCAST_WANT_ALL_IPV6) &&
@@ -674,7 +703,10 @@ static void batadv_mcast_want_ipv6_update(struct batadv_priv *bat_priv,
atomic_dec(&bat_priv->mcast.num_want_all_ipv6);
spin_lock_bh(&bat_priv->mcast.want_lists_lock);
- hlist_del_rcu(&orig->mcast_want_all_ipv6_node);
+ /* flag checks above + mcast_handler_lock prevents this */
+ WARN_ON(hlist_unhashed(node));
+
+ hlist_del_init_rcu(node);
spin_unlock_bh(&bat_priv->mcast.want_lists_lock);
}
}
@@ -697,39 +729,42 @@ static void batadv_mcast_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
uint8_t mcast_flags = BATADV_NO_FLAGS;
bool orig_initialized;
- orig_initialized = orig->capa_initialized & BATADV_ORIG_CAPA_HAS_MCAST;
+ if (orig_mcast_enabled && tvlv_value &&
+ (tvlv_value_len >= sizeof(mcast_flags)))
+ mcast_flags = *(uint8_t *)tvlv_value;
+
+ spin_lock_bh(&orig->mcast_handler_lock);
+ orig_initialized = test_bit(BATADV_ORIG_CAPA_HAS_MCAST,
+ &orig->capa_initialized);
/* If mcast support is turned on decrease the disabled mcast node
* counter only if we had increased it for this node before. If this
* is a completely new orig_node no need to decrease the counter.
*/
if (orig_mcast_enabled &&
- !(orig->capabilities & BATADV_ORIG_CAPA_HAS_MCAST)) {
+ !test_bit(BATADV_ORIG_CAPA_HAS_MCAST, &orig->capabilities)) {
if (orig_initialized)
atomic_dec(&bat_priv->mcast.num_disabled);
- orig->capabilities |= BATADV_ORIG_CAPA_HAS_MCAST;
+ set_bit(BATADV_ORIG_CAPA_HAS_MCAST, &orig->capabilities);
/* If mcast support is being switched off or if this is an initial
* OGM without mcast support then increase the disabled mcast
* node counter.
*/
} else if (!orig_mcast_enabled &&
- (orig->capabilities & BATADV_ORIG_CAPA_HAS_MCAST ||
+ (test_bit(BATADV_ORIG_CAPA_HAS_MCAST, &orig->capabilities) ||
!orig_initialized)) {
atomic_inc(&bat_priv->mcast.num_disabled);
- orig->capabilities &= ~BATADV_ORIG_CAPA_HAS_MCAST;
+ clear_bit(BATADV_ORIG_CAPA_HAS_MCAST, &orig->capabilities);
}
- orig->capa_initialized |= BATADV_ORIG_CAPA_HAS_MCAST;
-
- if (orig_mcast_enabled && tvlv_value &&
- (tvlv_value_len >= sizeof(mcast_flags)))
- mcast_flags = *(uint8_t *)tvlv_value;
+ set_bit(BATADV_ORIG_CAPA_HAS_MCAST, &orig->capa_initialized);
batadv_mcast_want_unsnoop_update(bat_priv, orig, mcast_flags);
batadv_mcast_want_ipv4_update(bat_priv, orig, mcast_flags);
batadv_mcast_want_ipv6_update(bat_priv, orig, mcast_flags);
orig->mcast_flags = mcast_flags;
+ spin_unlock_bh(&orig->mcast_handler_lock);
}
/**
@@ -763,11 +798,15 @@ void batadv_mcast_purge_orig(struct batadv_orig_node *orig)
{
struct batadv_priv *bat_priv = orig->bat_priv;
- if (!(orig->capabilities & BATADV_ORIG_CAPA_HAS_MCAST) &&
- orig->capa_initialized & BATADV_ORIG_CAPA_HAS_MCAST)
+ spin_lock_bh(&orig->mcast_handler_lock);
+
+ if (!test_bit(BATADV_ORIG_CAPA_HAS_MCAST, &orig->capabilities) &&
+ test_bit(BATADV_ORIG_CAPA_HAS_MCAST, &orig->capa_initialized))
atomic_dec(&bat_priv->mcast.num_disabled);
batadv_mcast_want_unsnoop_update(bat_priv, orig, BATADV_NO_FLAGS);
batadv_mcast_want_ipv4_update(bat_priv, orig, BATADV_NO_FLAGS);
batadv_mcast_want_ipv6_update(bat_priv, orig, BATADV_NO_FLAGS);
+
+ spin_unlock_bh(&orig->mcast_handler_lock);
}
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index f0a50f31d822..46604010dcd4 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -19,6 +19,7 @@
#include "main.h"
#include <linux/atomic.h>
+#include <linux/bitops.h>
#include <linux/byteorder/generic.h>
#include <linux/compiler.h>
#include <linux/debugfs.h>
@@ -134,9 +135,9 @@ static void batadv_nc_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
uint16_t tvlv_value_len)
{
if (flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND)
- orig->capabilities &= ~BATADV_ORIG_CAPA_HAS_NC;
+ clear_bit(BATADV_ORIG_CAPA_HAS_NC, &orig->capabilities);
else
- orig->capabilities |= BATADV_ORIG_CAPA_HAS_NC;
+ set_bit(BATADV_ORIG_CAPA_HAS_NC, &orig->capabilities);
}
/**
@@ -894,7 +895,7 @@ void batadv_nc_update_nc_node(struct batadv_priv *bat_priv,
goto out;
/* check if orig node is network coding enabled */
- if (!(orig_node->capabilities & BATADV_ORIG_CAPA_HAS_NC))
+ if (!test_bit(BATADV_ORIG_CAPA_HAS_NC, &orig_node->capabilities))
goto out;
/* accept ogms from 'good' neighbors and single hop neighbors */
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 018b7495ad84..32a0fcfab36d 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -696,8 +696,13 @@ struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv,
orig_node->last_seen = jiffies;
reset_time = jiffies - 1 - msecs_to_jiffies(BATADV_RESET_PROTECTION_MS);
orig_node->bcast_seqno_reset = reset_time;
+
#ifdef CONFIG_BATMAN_ADV_MCAST
orig_node->mcast_flags = BATADV_NO_FLAGS;
+ INIT_HLIST_NODE(&orig_node->mcast_want_all_unsnoopables_node);
+ INIT_HLIST_NODE(&orig_node->mcast_want_all_ipv4_node);
+ INIT_HLIST_NODE(&orig_node->mcast_want_all_ipv6_node);
+ spin_lock_init(&orig_node->mcast_handler_lock);
#endif
/* create a vlan object for the "untagged" LAN */
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 0a01992e65ab..191076ef1eca 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -616,7 +616,8 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
* we delete only packets belonging to the given interface
*/
if ((hard_iface) &&
- (forw_packet->if_incoming != hard_iface))
+ (forw_packet->if_incoming != hard_iface) &&
+ (forw_packet->if_outgoing != hard_iface))
continue;
spin_unlock_bh(&bat_priv->forw_bcast_list_lock);
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index a2fc843c2243..49d3d3aa59cb 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -202,6 +202,7 @@ static int batadv_interface_tx(struct sk_buff *skb,
int gw_mode;
enum batadv_forw_mode forw_mode;
struct batadv_orig_node *mcast_single_orig = NULL;
+ int network_offset = ETH_HLEN;
if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE)
goto dropped;
@@ -214,14 +215,18 @@ static int batadv_interface_tx(struct sk_buff *skb,
case ETH_P_8021Q:
vhdr = vlan_eth_hdr(skb);
- if (vhdr->h_vlan_encapsulated_proto != ethertype)
+ if (vhdr->h_vlan_encapsulated_proto != ethertype) {
+ network_offset += VLAN_HLEN;
break;
+ }
/* fall through */
case ETH_P_BATMAN:
goto dropped;
}
+ skb_set_network_header(skb, network_offset);
+
if (batadv_bla_tx(bat_priv, skb, vid))
goto dropped;
@@ -936,7 +941,7 @@ static void batadv_softif_init_early(struct net_device *dev)
dev->netdev_ops = &batadv_netdev_ops;
dev->destructor = batadv_softif_free;
dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER;
- dev->tx_queue_len = 0;
+ dev->priv_flags |= IFF_NO_QUEUE;
/* can't call min_mtu, because the needed variables
* have not been initialized yet
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 5809b39c1922..c1eb7b72ab15 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -19,6 +19,7 @@
#include "main.h"
#include <linux/atomic.h>
+#include <linux/bitops.h>
#include <linux/bug.h>
#include <linux/byteorder/generic.h>
#include <linux/compiler.h>
@@ -1882,7 +1883,7 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv,
}
spin_unlock_bh(list_lock);
}
- orig_node->capa_initialized &= ~BATADV_ORIG_CAPA_HAS_TT;
+ clear_bit(BATADV_ORIG_CAPA_HAS_TT, &orig_node->capa_initialized);
}
static bool batadv_tt_global_to_purge(struct batadv_tt_global_entry *tt_global,
@@ -2215,7 +2216,7 @@ static void batadv_tt_req_list_free(struct batadv_priv *bat_priv)
spin_lock_bh(&bat_priv->tt.req_list_lock);
list_for_each_entry_safe(node, safe, &bat_priv->tt.req_list, list) {
- list_del(&node->list);
+ list_del_init(&node->list);
kfree(node);
}
@@ -2251,7 +2252,7 @@ static void batadv_tt_req_purge(struct batadv_priv *bat_priv)
list_for_each_entry_safe(node, safe, &bat_priv->tt.req_list, list) {
if (batadv_has_timed_out(node->issued_at,
BATADV_TT_REQUEST_TIMEOUT)) {
- list_del(&node->list);
+ list_del_init(&node->list);
kfree(node);
}
}
@@ -2533,7 +2534,8 @@ out:
batadv_hardif_free_ref(primary_if);
if (ret && tt_req_node) {
spin_lock_bh(&bat_priv->tt.req_list_lock);
- list_del(&tt_req_node->list);
+ /* list_del_init() verifies tt_req_node still is in the list */
+ list_del_init(&tt_req_node->list);
spin_unlock_bh(&bat_priv->tt.req_list_lock);
kfree(tt_req_node);
}
@@ -2841,7 +2843,7 @@ static void _batadv_tt_update_changes(struct batadv_priv *bat_priv,
return;
}
}
- orig_node->capa_initialized |= BATADV_ORIG_CAPA_HAS_TT;
+ set_bit(BATADV_ORIG_CAPA_HAS_TT, &orig_node->capa_initialized);
}
static void batadv_tt_fill_gtable(struct batadv_priv *bat_priv,
@@ -2970,7 +2972,7 @@ static void batadv_handle_tt_response(struct batadv_priv *bat_priv,
list_for_each_entry_safe(node, safe, &bat_priv->tt.req_list, list) {
if (!batadv_compare_eth(node->addr, resp_src))
continue;
- list_del(&node->list);
+ list_del_init(&node->list);
kfree(node);
}
@@ -3343,7 +3345,8 @@ static void batadv_tt_update_orig(struct batadv_priv *bat_priv,
bool has_tt_init;
tt_vlan = (struct batadv_tvlv_tt_vlan_data *)tt_buff;
- has_tt_init = orig_node->capa_initialized & BATADV_ORIG_CAPA_HAS_TT;
+ has_tt_init = test_bit(BATADV_ORIG_CAPA_HAS_TT,
+ &orig_node->capa_initialized);
/* orig table not initialised AND first diff is in the OGM OR the ttvn
* increased by one -> we can apply the attached changes
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 67d63483618e..55610a805b53 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -221,6 +221,7 @@ struct batadv_orig_bat_iv {
* @batadv_dat_addr_t: address of the orig node in the distributed hash
* @last_seen: time when last packet from this node was received
* @bcast_seqno_reset: time when the broadcast seqno window was reset
+ * @mcast_handler_lock: synchronizes mcast-capability and -flag changes
* @mcast_flags: multicast flags announced by the orig node
* @mcast_want_all_unsnoop_node: a list node for the
* mcast.want_all_unsnoopables list
@@ -268,13 +269,15 @@ struct batadv_orig_node {
unsigned long last_seen;
unsigned long bcast_seqno_reset;
#ifdef CONFIG_BATMAN_ADV_MCAST
+ /* synchronizes mcast tvlv specific orig changes */
+ spinlock_t mcast_handler_lock;
uint8_t mcast_flags;
struct hlist_node mcast_want_all_unsnoopables_node;
struct hlist_node mcast_want_all_ipv4_node;
struct hlist_node mcast_want_all_ipv6_node;
#endif
- uint8_t capabilities;
- uint8_t capa_initialized;
+ unsigned long capabilities;
+ unsigned long capa_initialized;
atomic_t last_ttvn;
unsigned char *tt_buff;
int16_t tt_buff_len;
@@ -313,10 +316,10 @@ struct batadv_orig_node {
* (= orig node announces a tvlv of type BATADV_TVLV_MCAST)
*/
enum batadv_orig_capabilities {
- BATADV_ORIG_CAPA_HAS_DAT = BIT(0),
- BATADV_ORIG_CAPA_HAS_NC = BIT(1),
- BATADV_ORIG_CAPA_HAS_TT = BIT(2),
- BATADV_ORIG_CAPA_HAS_MCAST = BIT(3),
+ BATADV_ORIG_CAPA_HAS_DAT,
+ BATADV_ORIG_CAPA_HAS_NC,
+ BATADV_ORIG_CAPA_HAS_TT,
+ BATADV_ORIG_CAPA_HAS_MCAST,
};
/**
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 2fb7b3064904..131e79cde350 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -85,7 +85,7 @@ struct lowpan_dev {
static inline struct lowpan_dev *lowpan_dev(const struct net_device *netdev)
{
- return netdev_priv(netdev);
+ return (struct lowpan_dev *)lowpan_priv(netdev)->priv;
}
static inline void peer_add(struct lowpan_dev *dev, struct lowpan_peer *peer)
@@ -848,8 +848,9 @@ static int setup_netdev(struct l2cap_chan *chan, struct lowpan_dev **dev)
struct net_device *netdev;
int err = 0;
- netdev = alloc_netdev(sizeof(struct lowpan_dev), IFACE_NAME_TEMPLATE,
- NET_NAME_UNKNOWN, netdev_setup);
+ netdev = alloc_netdev(LOWPAN_PRIV_SIZE(sizeof(struct lowpan_dev)),
+ IFACE_NAME_TEMPLATE, NET_NAME_UNKNOWN,
+ netdev_setup);
if (!netdev)
return -ENOMEM;
@@ -859,9 +860,24 @@ static int setup_netdev(struct l2cap_chan *chan, struct lowpan_dev **dev)
SET_NETDEV_DEV(netdev, &chan->conn->hcon->hdev->dev);
SET_NETDEV_DEVTYPE(netdev, &bt_type);
+ *dev = lowpan_dev(netdev);
+ (*dev)->netdev = netdev;
+ (*dev)->hdev = chan->conn->hcon->hdev;
+ INIT_LIST_HEAD(&(*dev)->peers);
+
+ spin_lock(&devices_lock);
+ INIT_LIST_HEAD(&(*dev)->list);
+ list_add_rcu(&(*dev)->list, &bt_6lowpan_devices);
+ spin_unlock(&devices_lock);
+
+ lowpan_netdev_setup(netdev, LOWPAN_LLTYPE_BTLE);
+
err = register_netdev(netdev);
if (err < 0) {
BT_INFO("register_netdev failed %d", err);
+ spin_lock(&devices_lock);
+ list_del_rcu(&(*dev)->list);
+ spin_unlock(&devices_lock);
free_netdev(netdev);
goto out;
}
@@ -871,16 +887,6 @@ static int setup_netdev(struct l2cap_chan *chan, struct lowpan_dev **dev)
&chan->src, chan->src_type);
set_bit(__LINK_STATE_PRESENT, &netdev->state);
- *dev = netdev_priv(netdev);
- (*dev)->netdev = netdev;
- (*dev)->hdev = chan->conn->hcon->hdev;
- INIT_LIST_HEAD(&(*dev)->peers);
-
- spin_lock(&devices_lock);
- INIT_LIST_HEAD(&(*dev)->list);
- list_add_rcu(&(*dev)->list, &bt_6lowpan_devices);
- spin_unlock(&devices_lock);
-
return 0;
out:
diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig
index b8c794b87523..95d1a66ba03a 100644
--- a/net/bluetooth/Kconfig
+++ b/net/bluetooth/Kconfig
@@ -53,6 +53,11 @@ source "net/bluetooth/cmtp/Kconfig"
source "net/bluetooth/hidp/Kconfig"
+config BT_HS
+ bool "Bluetooth High Speed (HS) features"
+ depends on BT_BREDR
+ default y
+
config BT_LE
bool "Bluetooth Low Energy (LE) features"
depends on BT
diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile
index 29c12ae72a66..2b15ae8c1def 100644
--- a/net/bluetooth/Makefile
+++ b/net/bluetooth/Makefile
@@ -13,9 +13,10 @@ bluetooth_6lowpan-y := 6lowpan.o
bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o \
hci_sock.o hci_sysfs.o l2cap_core.o l2cap_sock.o smp.o lib.o \
- a2mp.o amp.o ecc.o hci_request.o mgmt_util.o
+ ecc.o hci_request.o mgmt_util.o
bluetooth-$(CONFIG_BT_BREDR) += sco.o
+bluetooth-$(CONFIG_BT_HS) += a2mp.o amp.o
bluetooth-$(CONFIG_BT_DEBUGFS) += hci_debugfs.o
bluetooth-$(CONFIG_BT_SELFTEST) += selftest.o
diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c
index 5a04eb1a7e57..5f123c3320a7 100644
--- a/net/bluetooth/a2mp.c
+++ b/net/bluetooth/a2mp.c
@@ -16,6 +16,7 @@
#include <net/bluetooth/hci_core.h>
#include <net/bluetooth/l2cap.h>
+#include "hci_request.h"
#include "a2mp.h"
#include "amp.h"
@@ -286,11 +287,21 @@ static int a2mp_change_notify(struct amp_mgr *mgr, struct sk_buff *skb,
return 0;
}
+static void read_local_amp_info_complete(struct hci_dev *hdev, u8 status,
+ u16 opcode)
+{
+ BT_DBG("%s status 0x%2.2x", hdev->name, status);
+
+ a2mp_send_getinfo_rsp(hdev);
+}
+
static int a2mp_getinfo_req(struct amp_mgr *mgr, struct sk_buff *skb,
struct a2mp_cmd *hdr)
{
struct a2mp_info_req *req = (void *) skb->data;
struct hci_dev *hdev;
+ struct hci_request hreq;
+ int err = 0;
if (le16_to_cpu(hdr->len) < sizeof(*req))
return -EINVAL;
@@ -311,7 +322,11 @@ static int a2mp_getinfo_req(struct amp_mgr *mgr, struct sk_buff *skb,
}
set_bit(READ_LOC_AMP_INFO, &mgr->state);
- hci_send_cmd(hdev, HCI_OP_READ_LOCAL_AMP_INFO, 0, NULL);
+ hci_req_init(&hreq, hdev);
+ hci_req_add(&hreq, HCI_OP_READ_LOCAL_AMP_INFO, 0, NULL);
+ err = hci_req_run(&hreq, read_local_amp_info_complete);
+ if (err < 0)
+ a2mp_send_getinfo_rsp(hdev);
done:
if (hdev)
diff --git a/net/bluetooth/a2mp.h b/net/bluetooth/a2mp.h
index 296f665adb09..a4ff3ea9b38a 100644
--- a/net/bluetooth/a2mp.h
+++ b/net/bluetooth/a2mp.h
@@ -130,10 +130,29 @@ struct a2mp_physlink_rsp {
#define A2MP_STATUS_SECURITY_VIOLATION 0x06
struct amp_mgr *amp_mgr_get(struct amp_mgr *mgr);
+
+#if IS_ENABLED(CONFIG_BT_HS)
int amp_mgr_put(struct amp_mgr *mgr);
struct l2cap_chan *a2mp_channel_create(struct l2cap_conn *conn,
struct sk_buff *skb);
void a2mp_discover_amp(struct l2cap_chan *chan);
+#else
+static inline int amp_mgr_put(struct amp_mgr *mgr)
+{
+ return 0;
+}
+
+static inline struct l2cap_chan *a2mp_channel_create(struct l2cap_conn *conn,
+ struct sk_buff *skb)
+{
+ return NULL;
+}
+
+static inline void a2mp_discover_amp(struct l2cap_chan *chan)
+{
+}
+#endif
+
void a2mp_send_getinfo_rsp(struct hci_dev *hdev);
void a2mp_send_getampassoc_rsp(struct hci_dev *hdev, u8 status);
void a2mp_send_create_phy_link_req(struct hci_dev *hdev, u8 status);
diff --git a/net/bluetooth/amp.c b/net/bluetooth/amp.c
index ee016f039100..e32f34189007 100644
--- a/net/bluetooth/amp.c
+++ b/net/bluetooth/amp.c
@@ -16,6 +16,7 @@
#include <net/bluetooth/hci_core.h>
#include <crypto/hash.h>
+#include "hci_request.h"
#include "a2mp.h"
#include "amp.h"
@@ -220,10 +221,49 @@ int phylink_gen_key(struct hci_conn *conn, u8 *data, u8 *len, u8 *type)
return hmac_sha256(gamp_key, HCI_AMP_LINK_KEY_SIZE, "802b", 4, data);
}
+static void read_local_amp_assoc_complete(struct hci_dev *hdev, u8 status,
+ u16 opcode, struct sk_buff *skb)
+{
+ struct hci_rp_read_local_amp_assoc *rp = (void *)skb->data;
+ struct amp_assoc *assoc = &hdev->loc_assoc;
+ size_t rem_len, frag_len;
+
+ BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
+
+ if (rp->status)
+ goto send_rsp;
+
+ frag_len = skb->len - sizeof(*rp);
+ rem_len = __le16_to_cpu(rp->rem_len);
+
+ if (rem_len > frag_len) {
+ BT_DBG("frag_len %zu rem_len %zu", frag_len, rem_len);
+
+ memcpy(assoc->data + assoc->offset, rp->frag, frag_len);
+ assoc->offset += frag_len;
+
+ /* Read other fragments */
+ amp_read_loc_assoc_frag(hdev, rp->phy_handle);
+
+ return;
+ }
+
+ memcpy(assoc->data + assoc->offset, rp->frag, rem_len);
+ assoc->len = assoc->offset + rem_len;
+ assoc->offset = 0;
+
+send_rsp:
+ /* Send A2MP Rsp when all fragments are received */
+ a2mp_send_getampassoc_rsp(hdev, rp->status);
+ a2mp_send_create_phy_link_req(hdev, rp->status);
+}
+
void amp_read_loc_assoc_frag(struct hci_dev *hdev, u8 phy_handle)
{
struct hci_cp_read_local_amp_assoc cp;
struct amp_assoc *loc_assoc = &hdev->loc_assoc;
+ struct hci_request req;
+ int err = 0;
BT_DBG("%s handle %d", hdev->name, phy_handle);
@@ -231,12 +271,18 @@ void amp_read_loc_assoc_frag(struct hci_dev *hdev, u8 phy_handle)
cp.max_len = cpu_to_le16(hdev->amp_assoc_size);
cp.len_so_far = cpu_to_le16(loc_assoc->offset);
- hci_send_cmd(hdev, HCI_OP_READ_LOCAL_AMP_ASSOC, sizeof(cp), &cp);
+ hci_req_init(&req, hdev);
+ hci_req_add(&req, HCI_OP_READ_LOCAL_AMP_ASSOC, sizeof(cp), &cp);
+ err = hci_req_run_skb(&req, read_local_amp_assoc_complete);
+ if (err < 0)
+ a2mp_send_getampassoc_rsp(hdev, A2MP_STATUS_INVALID_CTRL_ID);
}
void amp_read_loc_assoc(struct hci_dev *hdev, struct amp_mgr *mgr)
{
struct hci_cp_read_local_amp_assoc cp;
+ struct hci_request req;
+ int err = 0;
memset(&hdev->loc_assoc, 0, sizeof(struct amp_assoc));
memset(&cp, 0, sizeof(cp));
@@ -244,7 +290,11 @@ void amp_read_loc_assoc(struct hci_dev *hdev, struct amp_mgr *mgr)
cp.max_len = cpu_to_le16(hdev->amp_assoc_size);
set_bit(READ_LOC_AMP_ASSOC, &mgr->state);
- hci_send_cmd(hdev, HCI_OP_READ_LOCAL_AMP_ASSOC, sizeof(cp), &cp);
+ hci_req_init(&req, hdev);
+ hci_req_add(&req, HCI_OP_READ_LOCAL_AMP_ASSOC, sizeof(cp), &cp);
+ hci_req_run_skb(&req, read_local_amp_assoc_complete);
+ if (err < 0)
+ a2mp_send_getampassoc_rsp(hdev, A2MP_STATUS_INVALID_CTRL_ID);
}
void amp_read_loc_assoc_final_data(struct hci_dev *hdev,
@@ -252,6 +302,8 @@ void amp_read_loc_assoc_final_data(struct hci_dev *hdev,
{
struct hci_cp_read_local_amp_assoc cp;
struct amp_mgr *mgr = hcon->amp_mgr;
+ struct hci_request req;
+ int err = 0;
cp.phy_handle = hcon->handle;
cp.len_so_far = cpu_to_le16(0);
@@ -260,7 +312,25 @@ void amp_read_loc_assoc_final_data(struct hci_dev *hdev,
set_bit(READ_LOC_AMP_ASSOC_FINAL, &mgr->state);
/* Read Local AMP Assoc final link information data */
- hci_send_cmd(hdev, HCI_OP_READ_LOCAL_AMP_ASSOC, sizeof(cp), &cp);
+ hci_req_init(&req, hdev);
+ hci_req_add(&req, HCI_OP_READ_LOCAL_AMP_ASSOC, sizeof(cp), &cp);
+ hci_req_run_skb(&req, read_local_amp_assoc_complete);
+ if (err < 0)
+ a2mp_send_getampassoc_rsp(hdev, A2MP_STATUS_INVALID_CTRL_ID);
+}
+
+static void write_remote_amp_assoc_complete(struct hci_dev *hdev, u8 status,
+ u16 opcode, struct sk_buff *skb)
+{
+ struct hci_rp_write_remote_amp_assoc *rp = (void *)skb->data;
+
+ BT_DBG("%s status 0x%2.2x phy_handle 0x%2.2x",
+ hdev->name, rp->status, rp->phy_handle);
+
+ if (rp->status)
+ return;
+
+ amp_write_rem_assoc_continue(hdev, rp->phy_handle);
}
/* Write AMP Assoc data fragments, returns true with last fragment written*/
@@ -270,6 +340,7 @@ static bool amp_write_rem_assoc_frag(struct hci_dev *hdev,
struct hci_cp_write_remote_amp_assoc *cp;
struct amp_mgr *mgr = hcon->amp_mgr;
struct amp_ctrl *ctrl;
+ struct hci_request req;
u16 frag_len, len;
ctrl = amp_ctrl_lookup(mgr, hcon->remote_id);
@@ -307,7 +378,9 @@ static bool amp_write_rem_assoc_frag(struct hci_dev *hdev,
amp_ctrl_put(ctrl);
- hci_send_cmd(hdev, HCI_OP_WRITE_REMOTE_AMP_ASSOC, len, cp);
+ hci_req_init(&req, hdev);
+ hci_req_add(&req, HCI_OP_WRITE_REMOTE_AMP_ASSOC, len, cp);
+ hci_req_run_skb(&req, write_remote_amp_assoc_complete);
kfree(cp);
@@ -344,10 +417,37 @@ void amp_write_remote_assoc(struct hci_dev *hdev, u8 handle)
amp_write_rem_assoc_frag(hdev, hcon);
}
+static void create_phylink_complete(struct hci_dev *hdev, u8 status,
+ u16 opcode)
+{
+ struct hci_cp_create_phy_link *cp;
+
+ BT_DBG("%s status 0x%2.2x", hdev->name, status);
+
+ cp = hci_sent_cmd_data(hdev, HCI_OP_CREATE_PHY_LINK);
+ if (!cp)
+ return;
+
+ hci_dev_lock(hdev);
+
+ if (status) {
+ struct hci_conn *hcon;
+
+ hcon = hci_conn_hash_lookup_handle(hdev, cp->phy_handle);
+ if (hcon)
+ hci_conn_del(hcon);
+ } else {
+ amp_write_remote_assoc(hdev, cp->phy_handle);
+ }
+
+ hci_dev_unlock(hdev);
+}
+
void amp_create_phylink(struct hci_dev *hdev, struct amp_mgr *mgr,
struct hci_conn *hcon)
{
struct hci_cp_create_phy_link cp;
+ struct hci_request req;
cp.phy_handle = hcon->handle;
@@ -360,13 +460,33 @@ void amp_create_phylink(struct hci_dev *hdev, struct amp_mgr *mgr,
return;
}
- hci_send_cmd(hdev, HCI_OP_CREATE_PHY_LINK, sizeof(cp), &cp);
+ hci_req_init(&req, hdev);
+ hci_req_add(&req, HCI_OP_CREATE_PHY_LINK, sizeof(cp), &cp);
+ hci_req_run(&req, create_phylink_complete);
+}
+
+static void accept_phylink_complete(struct hci_dev *hdev, u8 status,
+ u16 opcode)
+{
+ struct hci_cp_accept_phy_link *cp;
+
+ BT_DBG("%s status 0x%2.2x", hdev->name, status);
+
+ if (status)
+ return;
+
+ cp = hci_sent_cmd_data(hdev, HCI_OP_ACCEPT_PHY_LINK);
+ if (!cp)
+ return;
+
+ amp_write_remote_assoc(hdev, cp->phy_handle);
}
void amp_accept_phylink(struct hci_dev *hdev, struct amp_mgr *mgr,
struct hci_conn *hcon)
{
struct hci_cp_accept_phy_link cp;
+ struct hci_request req;
cp.phy_handle = hcon->handle;
@@ -379,7 +499,9 @@ void amp_accept_phylink(struct hci_dev *hdev, struct amp_mgr *mgr,
return;
}
- hci_send_cmd(hdev, HCI_OP_ACCEPT_PHY_LINK, sizeof(cp), &cp);
+ hci_req_init(&req, hdev);
+ hci_req_add(&req, HCI_OP_ACCEPT_PHY_LINK, sizeof(cp), &cp);
+ hci_req_run(&req, accept_phylink_complete);
}
void amp_physical_cfm(struct hci_conn *bredr_hcon, struct hci_conn *hs_hcon)
diff --git a/net/bluetooth/amp.h b/net/bluetooth/amp.h
index 7ea3db77ba89..8848f8158ae4 100644
--- a/net/bluetooth/amp.h
+++ b/net/bluetooth/amp.h
@@ -44,6 +44,20 @@ void amp_create_phylink(struct hci_dev *hdev, struct amp_mgr *mgr,
struct hci_conn *hcon);
void amp_accept_phylink(struct hci_dev *hdev, struct amp_mgr *mgr,
struct hci_conn *hcon);
+
+#if IS_ENABLED(CONFIG_BT_HS)
+void amp_create_logical_link(struct l2cap_chan *chan);
+void amp_disconnect_logical_link(struct hci_chan *hchan);
+#else
+static inline void amp_create_logical_link(struct l2cap_chan *chan)
+{
+}
+
+static inline void amp_disconnect_logical_link(struct hci_chan *hchan)
+{
+}
+#endif
+
void amp_write_remote_assoc(struct hci_dev *hdev, u8 handle);
void amp_write_rem_assoc_continue(struct hci_dev *hdev, u8 handle);
void amp_physical_cfm(struct hci_conn *bredr_hcon, struct hci_conn *hs_hcon);
diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c
index b0c6c6af76ef..9a50338772f3 100644
--- a/net/bluetooth/cmtp/capi.c
+++ b/net/bluetooth/cmtp/capi.c
@@ -100,9 +100,9 @@ static void cmtp_application_del(struct cmtp_session *session, struct cmtp_appli
static struct cmtp_application *cmtp_application_get(struct cmtp_session *session, int pattern, __u16 value)
{
struct cmtp_application *app;
- struct list_head *p, *n;
+ struct list_head *p;
- list_for_each_safe(p, n, &session->applications) {
+ list_for_each(p, &session->applications) {
app = list_entry(p, struct cmtp_application, list);
switch (pattern) {
case CMTP_MSGNUM:
@@ -511,13 +511,13 @@ static int cmtp_proc_show(struct seq_file *m, void *v)
struct capi_ctr *ctrl = m->private;
struct cmtp_session *session = ctrl->driverdata;
struct cmtp_application *app;
- struct list_head *p, *n;
+ struct list_head *p;
seq_printf(m, "%s\n\n", cmtp_procinfo(ctrl));
seq_printf(m, "addr %s\n", session->name);
seq_printf(m, "ctrl %d\n", session->num);
- list_for_each_safe(p, n, &session->applications) {
+ list_for_each(p, &session->applications) {
app = list_entry(p, struct cmtp_application, list);
seq_printf(m, "appl %d -> %d\n", app->appl, app->mapping);
}
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 2c48bf0b5afb..b4548c739a64 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -64,6 +64,48 @@ static void hci_le_create_connection_cancel(struct hci_conn *conn)
hci_send_cmd(conn->hdev, HCI_OP_LE_CREATE_CONN_CANCEL, 0, NULL);
}
+/* This function requires the caller holds hdev->lock */
+static void hci_connect_le_scan_cleanup(struct hci_conn *conn)
+{
+ struct hci_conn_params *params;
+ struct smp_irk *irk;
+ bdaddr_t *bdaddr;
+ u8 bdaddr_type;
+
+ bdaddr = &conn->dst;
+ bdaddr_type = conn->dst_type;
+
+ /* Check if we need to convert to identity address */
+ irk = hci_get_irk(conn->hdev, bdaddr, bdaddr_type);
+ if (irk) {
+ bdaddr = &irk->bdaddr;
+ bdaddr_type = irk->addr_type;
+ }
+
+ params = hci_explicit_connect_lookup(conn->hdev, bdaddr, bdaddr_type);
+ if (!params)
+ return;
+
+ /* The connection attempt was doing scan for new RPA, and is
+ * in scan phase. If params are not associated with any other
+ * autoconnect action, remove them completely. If they are, just unmark
+ * them as waiting for connection, by clearing explicit_connect field.
+ */
+ if (params->auto_connect == HCI_AUTO_CONN_EXPLICIT)
+ hci_conn_params_del(conn->hdev, bdaddr, bdaddr_type);
+ else
+ params->explicit_connect = false;
+}
+
+/* This function requires the caller holds hdev->lock */
+static void hci_connect_le_scan_remove(struct hci_conn *conn)
+{
+ hci_connect_le_scan_cleanup(conn);
+
+ hci_conn_hash_del(conn->hdev, conn);
+ hci_update_background_scan(conn->hdev);
+}
+
static void hci_acl_create_connection(struct hci_conn *conn)
{
struct hci_dev *hdev = conn->hdev;
@@ -340,8 +382,12 @@ static void hci_conn_timeout(struct work_struct *work)
if (conn->out) {
if (conn->type == ACL_LINK)
hci_acl_create_connection_cancel(conn);
- else if (conn->type == LE_LINK)
- hci_le_create_connection_cancel(conn);
+ else if (conn->type == LE_LINK) {
+ if (test_bit(HCI_CONN_SCANNING, &conn->flags))
+ hci_connect_le_scan_remove(conn);
+ else
+ hci_le_create_connection_cancel(conn);
+ }
} else if (conn->type == SCO_LINK || conn->type == ESCO_LINK) {
hci_reject_sco(conn);
}
@@ -637,15 +683,18 @@ static void create_le_conn_complete(struct hci_dev *hdev, u8 status, u16 opcode)
{
struct hci_conn *conn;
- if (status == 0)
- return;
+ hci_dev_lock(hdev);
+
+ conn = hci_lookup_le_connect(hdev);
+
+ if (!status) {
+ hci_connect_le_scan_cleanup(conn);
+ goto done;
+ }
BT_ERR("HCI request failed to create LE connection: status 0x%2.2x",
status);
- hci_dev_lock(hdev);
-
- conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT);
if (!conn)
goto done;
@@ -685,6 +734,7 @@ static void hci_req_add_le_create_conn(struct hci_request *req,
hci_req_add(req, HCI_OP_LE_CREATE_CONN, sizeof(cp), &cp);
conn->state = BT_CONNECT;
+ clear_bit(HCI_CONN_SCANNING, &conn->flags);
}
static void hci_req_directed_advertising(struct hci_request *req,
@@ -728,7 +778,7 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
u8 role)
{
struct hci_conn_params *params;
- struct hci_conn *conn;
+ struct hci_conn *conn, *conn_unfinished;
struct smp_irk *irk;
struct hci_request req;
int err;
@@ -751,26 +801,29 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
* and return the object found.
*/
conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, dst);
+ conn_unfinished = NULL;
if (conn) {
- conn->pending_sec_level = sec_level;
- goto done;
+ if (conn->state == BT_CONNECT &&
+ test_bit(HCI_CONN_SCANNING, &conn->flags)) {
+ BT_DBG("will continue unfinished conn %pMR", dst);
+ conn_unfinished = conn;
+ } else {
+ if (conn->pending_sec_level < sec_level)
+ conn->pending_sec_level = sec_level;
+ goto done;
+ }
}
/* Since the controller supports only one LE connection attempt at a
* time, we return -EBUSY if there is any connection attempt running.
*/
- conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT);
- if (conn)
+ if (hci_lookup_le_connect(hdev))
return ERR_PTR(-EBUSY);
/* When given an identity address with existing identity
* resolving key, the connection needs to be established
* to a resolvable random address.
*
- * This uses the cached random resolvable address from
- * a previous scan. When no cached address is available,
- * try connecting to the identity address instead.
- *
* Storing the resolvable random address is required here
* to handle connection failures. The address will later
* be resolved back into the original identity address
@@ -782,15 +835,23 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst,
dst_type = ADDR_LE_DEV_RANDOM;
}
- conn = hci_conn_add(hdev, LE_LINK, dst, role);
+ if (conn_unfinished) {
+ conn = conn_unfinished;
+ bacpy(&conn->dst, dst);
+ } else {
+ conn = hci_conn_add(hdev, LE_LINK, dst, role);
+ }
+
if (!conn)
return ERR_PTR(-ENOMEM);
conn->dst_type = dst_type;
conn->sec_level = BT_SECURITY_LOW;
- conn->pending_sec_level = sec_level;
conn->conn_timeout = conn_timeout;
+ if (!conn_unfinished)
+ conn->pending_sec_level = sec_level;
+
hci_req_init(&req, hdev);
/* Disable advertising if we're active. For master role
@@ -855,6 +916,144 @@ create_conn:
}
done:
+ /* If this is continuation of connect started by hci_connect_le_scan,
+ * it already called hci_conn_hold and calling it again would mess the
+ * counter.
+ */
+ if (!conn_unfinished)
+ hci_conn_hold(conn);
+
+ return conn;
+}
+
+static void hci_connect_le_scan_complete(struct hci_dev *hdev, u8 status,
+ u16 opcode)
+{
+ struct hci_conn *conn;
+
+ if (!status)
+ return;
+
+ BT_ERR("Failed to add device to auto conn whitelist: status 0x%2.2x",
+ status);
+
+ hci_dev_lock(hdev);
+
+ conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT);
+ if (conn)
+ hci_le_conn_failed(conn, status);
+
+ hci_dev_unlock(hdev);
+}
+
+static bool is_connected(struct hci_dev *hdev, bdaddr_t *addr, u8 type)
+{
+ struct hci_conn *conn;
+
+ conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, addr);
+ if (!conn)
+ return false;
+
+ if (conn->dst_type != type)
+ return false;
+
+ if (conn->state != BT_CONNECTED)
+ return false;
+
+ return true;
+}
+
+/* This function requires the caller holds hdev->lock */
+static int hci_explicit_conn_params_set(struct hci_request *req,
+ bdaddr_t *addr, u8 addr_type)
+{
+ struct hci_dev *hdev = req->hdev;
+ struct hci_conn_params *params;
+
+ if (is_connected(hdev, addr, addr_type))
+ return -EISCONN;
+
+ params = hci_conn_params_add(hdev, addr, addr_type);
+ if (!params)
+ return -EIO;
+
+ /* If we created new params, or existing params were marked as disabled,
+ * mark them to be used just once to connect.
+ */
+ if (params->auto_connect == HCI_AUTO_CONN_DISABLED) {
+ params->auto_connect = HCI_AUTO_CONN_EXPLICIT;
+ list_del_init(&params->action);
+ list_add(&params->action, &hdev->pend_le_conns);
+ }
+
+ params->explicit_connect = true;
+ __hci_update_background_scan(req);
+
+ BT_DBG("addr %pMR (type %u) auto_connect %u", addr, addr_type,
+ params->auto_connect);
+
+ return 0;
+}
+
+/* This function requires the caller holds hdev->lock */
+struct hci_conn *hci_connect_le_scan(struct hci_dev *hdev, bdaddr_t *dst,
+ u8 dst_type, u8 sec_level,
+ u16 conn_timeout, u8 role)
+{
+ struct hci_conn *conn;
+ struct hci_request req;
+ int err;
+
+ /* Let's make sure that le is enabled.*/
+ if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) {
+ if (lmp_le_capable(hdev))
+ return ERR_PTR(-ECONNREFUSED);
+
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ /* Some devices send ATT messages as soon as the physical link is
+ * established. To be able to handle these ATT messages, the user-
+ * space first establishes the connection and then starts the pairing
+ * process.
+ *
+ * So if a hci_conn object already exists for the following connection
+ * attempt, we simply update pending_sec_level and auth_type fields
+ * and return the object found.
+ */
+ conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, dst);
+ if (conn) {
+ if (conn->pending_sec_level < sec_level)
+ conn->pending_sec_level = sec_level;
+ goto done;
+ }
+
+ BT_DBG("requesting refresh of dst_addr");
+
+ conn = hci_conn_add(hdev, LE_LINK, dst, role);
+ if (!conn)
+ return ERR_PTR(-ENOMEM);
+
+ hci_req_init(&req, hdev);
+
+ if (hci_explicit_conn_params_set(&req, dst, dst_type) < 0)
+ return ERR_PTR(-EBUSY);
+
+ conn->state = BT_CONNECT;
+ set_bit(HCI_CONN_SCANNING, &conn->flags);
+
+ err = hci_req_run(&req, hci_connect_le_scan_complete);
+ if (err && err != -ENODATA) {
+ hci_conn_del(conn);
+ return ERR_PTR(err);
+ }
+
+ conn->dst_type = dst_type;
+ conn->sec_level = BT_SECURITY_LOW;
+ conn->pending_sec_level = sec_level;
+ conn->conn_timeout = conn_timeout;
+
+done:
hci_conn_hold(conn);
return conn;
}
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 2f8fb33067e1..adcbc74c2432 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -2822,10 +2822,6 @@ struct hci_conn_params *hci_conn_params_lookup(struct hci_dev *hdev,
{
struct hci_conn_params *params;
- /* The conn params list only contains identity addresses */
- if (!hci_is_identity_address(addr, addr_type))
- return NULL;
-
list_for_each_entry(params, &hdev->le_conn_params, list) {
if (bacmp(&params->addr, addr) == 0 &&
params->addr_type == addr_type) {
@@ -2842,10 +2838,6 @@ struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list,
{
struct hci_conn_params *param;
- /* The list only contains identity addresses */
- if (!hci_is_identity_address(addr, addr_type))
- return NULL;
-
list_for_each_entry(param, list, action) {
if (bacmp(&param->addr, addr) == 0 &&
param->addr_type == addr_type)
@@ -2856,14 +2848,35 @@ struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list,
}
/* This function requires the caller holds hdev->lock */
+struct hci_conn_params *hci_explicit_connect_lookup(struct hci_dev *hdev,
+ bdaddr_t *addr,
+ u8 addr_type)
+{
+ struct hci_conn_params *param;
+
+ list_for_each_entry(param, &hdev->pend_le_conns, action) {
+ if (bacmp(&param->addr, addr) == 0 &&
+ param->addr_type == addr_type &&
+ param->explicit_connect)
+ return param;
+ }
+
+ list_for_each_entry(param, &hdev->pend_le_reports, action) {
+ if (bacmp(&param->addr, addr) == 0 &&
+ param->addr_type == addr_type &&
+ param->explicit_connect)
+ return param;
+ }
+
+ return NULL;
+}
+
+/* This function requires the caller holds hdev->lock */
struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev,
bdaddr_t *addr, u8 addr_type)
{
struct hci_conn_params *params;
- if (!hci_is_identity_address(addr, addr_type))
- return NULL;
-
params = hci_conn_params_lookup(hdev, addr, addr_type);
if (params)
return params;
@@ -2927,6 +2940,15 @@ void hci_conn_params_clear_disabled(struct hci_dev *hdev)
list_for_each_entry_safe(params, tmp, &hdev->le_conn_params, list) {
if (params->auto_connect != HCI_AUTO_CONN_DISABLED)
continue;
+
+ /* If trying to estabilish one time connection to disabled
+ * device, leave the params, but mark them as just once.
+ */
+ if (params->explicit_connect) {
+ params->auto_connect = HCI_AUTO_CONN_EXPLICIT;
+ continue;
+ }
+
list_del(&params->list);
kfree(params);
}
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 32363c2b7f83..7ba35a9ba6b7 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -823,7 +823,7 @@ static void hci_cc_read_local_amp_info(struct hci_dev *hdev,
BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
if (rp->status)
- goto a2mp_rsp;
+ return;
hdev->amp_status = rp->amp_status;
hdev->amp_total_bw = __le32_to_cpu(rp->total_bw);
@@ -835,46 +835,6 @@ static void hci_cc_read_local_amp_info(struct hci_dev *hdev,
hdev->amp_assoc_size = __le16_to_cpu(rp->max_assoc_size);
hdev->amp_be_flush_to = __le32_to_cpu(rp->be_flush_to);
hdev->amp_max_flush_to = __le32_to_cpu(rp->max_flush_to);
-
-a2mp_rsp:
- a2mp_send_getinfo_rsp(hdev);
-}
-
-static void hci_cc_read_local_amp_assoc(struct hci_dev *hdev,
- struct sk_buff *skb)
-{
- struct hci_rp_read_local_amp_assoc *rp = (void *) skb->data;
- struct amp_assoc *assoc = &hdev->loc_assoc;
- size_t rem_len, frag_len;
-
- BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
-
- if (rp->status)
- goto a2mp_rsp;
-
- frag_len = skb->len - sizeof(*rp);
- rem_len = __le16_to_cpu(rp->rem_len);
-
- if (rem_len > frag_len) {
- BT_DBG("frag_len %zu rem_len %zu", frag_len, rem_len);
-
- memcpy(assoc->data + assoc->offset, rp->frag, frag_len);
- assoc->offset += frag_len;
-
- /* Read other fragments */
- amp_read_loc_assoc_frag(hdev, rp->phy_handle);
-
- return;
- }
-
- memcpy(assoc->data + assoc->offset, rp->frag, rem_len);
- assoc->len = assoc->offset + rem_len;
- assoc->offset = 0;
-
-a2mp_rsp:
- /* Send A2MP Rsp when all fragments are received */
- a2mp_send_getampassoc_rsp(hdev, rp->status);
- a2mp_send_create_phy_link_req(hdev, rp->status);
}
static void hci_cc_read_inq_rsp_tx_power(struct hci_dev *hdev,
@@ -1099,7 +1059,7 @@ static void hci_cc_le_set_adv_enable(struct hci_dev *hdev, struct sk_buff *skb)
hci_dev_set_flag(hdev, HCI_LE_ADV);
- conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT);
+ conn = hci_lookup_le_connect(hdev);
if (conn)
queue_delayed_work(hdev->workqueue,
&conn->le_conn_timeout,
@@ -1409,20 +1369,6 @@ static void hci_cc_set_adv_param(struct hci_dev *hdev, struct sk_buff *skb)
hci_dev_unlock(hdev);
}
-static void hci_cc_write_remote_amp_assoc(struct hci_dev *hdev,
- struct sk_buff *skb)
-{
- struct hci_rp_write_remote_amp_assoc *rp = (void *) skb->data;
-
- BT_DBG("%s status 0x%2.2x phy_handle 0x%2.2x",
- hdev->name, rp->status, rp->phy_handle);
-
- if (rp->status)
- return;
-
- amp_write_rem_assoc_continue(hdev, rp->phy_handle);
-}
-
static void hci_cc_read_rssi(struct hci_dev *hdev, struct sk_buff *skb)
{
struct hci_rp_read_rssi *rp = (void *) skb->data;
@@ -1944,47 +1890,6 @@ static void hci_cs_disconnect(struct hci_dev *hdev, u8 status)
hci_dev_unlock(hdev);
}
-static void hci_cs_create_phylink(struct hci_dev *hdev, u8 status)
-{
- struct hci_cp_create_phy_link *cp;
-
- BT_DBG("%s status 0x%2.2x", hdev->name, status);
-
- cp = hci_sent_cmd_data(hdev, HCI_OP_CREATE_PHY_LINK);
- if (!cp)
- return;
-
- hci_dev_lock(hdev);
-
- if (status) {
- struct hci_conn *hcon;
-
- hcon = hci_conn_hash_lookup_handle(hdev, cp->phy_handle);
- if (hcon)
- hci_conn_del(hcon);
- } else {
- amp_write_remote_assoc(hdev, cp->phy_handle);
- }
-
- hci_dev_unlock(hdev);
-}
-
-static void hci_cs_accept_phylink(struct hci_dev *hdev, u8 status)
-{
- struct hci_cp_accept_phy_link *cp;
-
- BT_DBG("%s status 0x%2.2x", hdev->name, status);
-
- if (status)
- return;
-
- cp = hci_sent_cmd_data(hdev, HCI_OP_ACCEPT_PHY_LINK);
- if (!cp)
- return;
-
- amp_write_remote_assoc(hdev, cp->phy_handle);
-}
-
static void hci_cs_le_create_conn(struct hci_dev *hdev, u8 status)
{
struct hci_cp_le_create_conn *cp;
@@ -2998,10 +2903,6 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb,
hci_cc_read_clock(hdev, skb);
break;
- case HCI_OP_READ_LOCAL_AMP_ASSOC:
- hci_cc_read_local_amp_assoc(hdev, skb);
- break;
-
case HCI_OP_READ_INQ_RSP_TX_POWER:
hci_cc_read_inq_rsp_tx_power(hdev, skb);
break;
@@ -3106,10 +3007,6 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb,
hci_cc_set_adv_param(hdev, skb);
break;
- case HCI_OP_WRITE_REMOTE_AMP_ASSOC:
- hci_cc_write_remote_amp_assoc(hdev, skb);
- break;
-
case HCI_OP_READ_RSSI:
hci_cc_read_rssi(hdev, skb);
break;
@@ -3193,14 +3090,6 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb,
hci_cs_setup_sync_conn(hdev, ev->status);
break;
- case HCI_OP_CREATE_PHY_LINK:
- hci_cs_create_phylink(hdev, ev->status);
- break;
-
- case HCI_OP_ACCEPT_PHY_LINK:
- hci_cs_accept_phylink(hdev, ev->status);
- break;
-
case HCI_OP_SNIFF_MODE:
hci_cs_sniff_mode(hdev, ev->status);
break;
@@ -4399,6 +4288,23 @@ unlock:
hci_dev_unlock(hdev);
}
+#if IS_ENABLED(CONFIG_BT_HS)
+static void hci_chan_selected_evt(struct hci_dev *hdev, struct sk_buff *skb)
+{
+ struct hci_ev_channel_selected *ev = (void *)skb->data;
+ struct hci_conn *hcon;
+
+ BT_DBG("%s handle 0x%2.2x", hdev->name, ev->phy_handle);
+
+ skb_pull(skb, sizeof(*ev));
+
+ hcon = hci_conn_hash_lookup_handle(hdev, ev->phy_handle);
+ if (!hcon)
+ return;
+
+ amp_read_loc_assoc_final_data(hdev, hcon);
+}
+
static void hci_phy_link_complete_evt(struct hci_dev *hdev,
struct sk_buff *skb)
{
@@ -4522,6 +4428,7 @@ static void hci_disconn_phylink_complete_evt(struct hci_dev *hdev,
hci_dev_unlock(hdev);
}
+#endif
static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
{
@@ -4540,7 +4447,7 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
*/
hci_dev_clear_flag(hdev, HCI_LE_ADV);
- conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT);
+ conn = hci_lookup_le_connect(hdev);
if (!conn) {
conn = hci_conn_add(hdev, LE_LINK, &ev->bdaddr, ev->role);
if (!conn) {
@@ -4733,42 +4640,49 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev,
/* If we're not connectable only connect devices that we have in
* our pend_le_conns list.
*/
- params = hci_pend_le_action_lookup(&hdev->pend_le_conns,
- addr, addr_type);
+ params = hci_explicit_connect_lookup(hdev, addr, addr_type);
+
if (!params)
return NULL;
- switch (params->auto_connect) {
- case HCI_AUTO_CONN_DIRECT:
- /* Only devices advertising with ADV_DIRECT_IND are
- * triggering a connection attempt. This is allowing
- * incoming connections from slave devices.
- */
- if (adv_type != LE_ADV_DIRECT_IND)
+ if (!params->explicit_connect) {
+ switch (params->auto_connect) {
+ case HCI_AUTO_CONN_DIRECT:
+ /* Only devices advertising with ADV_DIRECT_IND are
+ * triggering a connection attempt. This is allowing
+ * incoming connections from slave devices.
+ */
+ if (adv_type != LE_ADV_DIRECT_IND)
+ return NULL;
+ break;
+ case HCI_AUTO_CONN_ALWAYS:
+ /* Devices advertising with ADV_IND or ADV_DIRECT_IND
+ * are triggering a connection attempt. This means
+ * that incoming connectioms from slave device are
+ * accepted and also outgoing connections to slave
+ * devices are established when found.
+ */
+ break;
+ default:
return NULL;
- break;
- case HCI_AUTO_CONN_ALWAYS:
- /* Devices advertising with ADV_IND or ADV_DIRECT_IND
- * are triggering a connection attempt. This means
- * that incoming connectioms from slave device are
- * accepted and also outgoing connections to slave
- * devices are established when found.
- */
- break;
- default:
- return NULL;
+ }
}
conn = hci_connect_le(hdev, addr, addr_type, BT_SECURITY_LOW,
HCI_LE_AUTOCONN_TIMEOUT, HCI_ROLE_MASTER);
if (!IS_ERR(conn)) {
- /* Store the pointer since we don't really have any
+ /* If HCI_AUTO_CONN_EXPLICIT is set, conn is already owned
+ * by higher layer that tried to connect, if no then
+ * store the pointer since we don't really have any
* other owner of the object besides the params that
* triggered it. This way we can abort the connection if
* the parameters get removed and keep the reference
* count consistent once the connection is established.
*/
- params->conn = hci_conn_get(conn);
+
+ if (!params->explicit_connect)
+ params->conn = hci_conn_get(conn);
+
return conn;
}
@@ -5206,22 +5120,6 @@ static void hci_le_meta_evt(struct hci_dev *hdev, struct sk_buff *skb)
}
}
-static void hci_chan_selected_evt(struct hci_dev *hdev, struct sk_buff *skb)
-{
- struct hci_ev_channel_selected *ev = (void *) skb->data;
- struct hci_conn *hcon;
-
- BT_DBG("%s handle 0x%2.2x", hdev->name, ev->phy_handle);
-
- skb_pull(skb, sizeof(*ev));
-
- hcon = hci_conn_hash_lookup_handle(hdev, ev->phy_handle);
- if (!hcon)
- return;
-
- amp_read_loc_assoc_final_data(hdev, hcon);
-}
-
static bool hci_get_cmd_complete(struct hci_dev *hdev, u16 opcode,
u8 event, struct sk_buff *skb)
{
@@ -5442,14 +5340,15 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
hci_le_meta_evt(hdev, skb);
break;
- case HCI_EV_CHANNEL_SELECTED:
- hci_chan_selected_evt(hdev, skb);
- break;
-
case HCI_EV_REMOTE_OOB_DATA_REQUEST:
hci_remote_oob_data_request_evt(hdev, skb);
break;
+#if IS_ENABLED(CONFIG_BT_HS)
+ case HCI_EV_CHANNEL_SELECTED:
+ hci_chan_selected_evt(hdev, skb);
+ break;
+
case HCI_EV_PHY_LINK_COMPLETE:
hci_phy_link_complete_evt(hdev, skb);
break;
@@ -5465,6 +5364,7 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
case HCI_EV_DISCONN_PHY_LINK_COMPLETE:
hci_disconn_phylink_complete_evt(hdev, skb);
break;
+#endif
case HCI_EV_NUM_COMP_BLOCKS:
hci_num_comp_blocks_evt(hdev, skb);
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index d6025d6e6d59..b7369220c9ef 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -317,7 +317,7 @@ static void set_random_addr(struct hci_request *req, bdaddr_t *rpa)
* address be updated at the next cycle.
*/
if (hci_dev_test_flag(hdev, HCI_LE_ADV) ||
- hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT)) {
+ hci_lookup_le_connect(hdev)) {
BT_DBG("Deferring random address update");
hci_dev_set_flag(hdev, HCI_RPA_EXPIRED);
return;
@@ -479,7 +479,6 @@ void hci_update_page_scan(struct hci_dev *hdev)
void __hci_update_background_scan(struct hci_request *req)
{
struct hci_dev *hdev = req->hdev;
- struct hci_conn *conn;
if (!test_bit(HCI_UP, &hdev->flags) ||
test_bit(HCI_INIT, &hdev->flags) ||
@@ -529,8 +528,7 @@ void __hci_update_background_scan(struct hci_request *req)
* since some controllers are not able to scan and connect at
* the same time.
*/
- conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT);
- if (conn)
+ if (hci_lookup_le_connect(hdev))
return;
/* If controller is currently scanning, we stop it to ensure we
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index 45fffa413642..7c65ee200c29 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -7113,8 +7113,10 @@ int l2cap_chan_connect(struct l2cap_chan *chan, __le16 psm, u16 cid,
else
role = HCI_ROLE_MASTER;
- hcon = hci_connect_le(hdev, dst, dst_type, chan->sec_level,
- HCI_LE_CONN_TIMEOUT, role);
+ hcon = hci_connect_le_scan(hdev, dst, dst_type,
+ chan->sec_level,
+ HCI_LE_CONN_TIMEOUT,
+ role);
} else {
u8 auth_type = l2cap_get_auth_type(chan);
hcon = hci_connect_acl(hdev, dst, chan->sec_level, auth_type);
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 244287706f91..586b3d580cfc 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -1054,18 +1054,23 @@ static void l2cap_sock_kill(struct sock *sk)
sock_put(sk);
}
-static int __l2cap_wait_ack(struct sock *sk)
+static int __l2cap_wait_ack(struct sock *sk, struct l2cap_chan *chan)
{
- struct l2cap_chan *chan = l2cap_pi(sk)->chan;
DECLARE_WAITQUEUE(wait, current);
int err = 0;
- int timeo = HZ/5;
+ int timeo = L2CAP_WAIT_ACK_POLL_PERIOD;
+ /* Timeout to prevent infinite loop */
+ unsigned long timeout = jiffies + L2CAP_WAIT_ACK_TIMEOUT;
add_wait_queue(sk_sleep(sk), &wait);
set_current_state(TASK_INTERRUPTIBLE);
- while (chan->unacked_frames > 0 && chan->conn) {
+ do {
+ BT_DBG("Waiting for %d ACKs, timeout %04d ms",
+ chan->unacked_frames, time_after(jiffies, timeout) ? 0 :
+ jiffies_to_msecs(timeout - jiffies));
+
if (!timeo)
- timeo = HZ/5;
+ timeo = L2CAP_WAIT_ACK_POLL_PERIOD;
if (signal_pending(current)) {
err = sock_intr_errno(timeo);
@@ -1080,7 +1085,15 @@ static int __l2cap_wait_ack(struct sock *sk)
err = sock_error(sk);
if (err)
break;
- }
+
+ if (time_after(jiffies, timeout)) {
+ err = -ENOLINK;
+ break;
+ }
+
+ } while (chan->unacked_frames > 0 &&
+ chan->state == BT_CONNECTED);
+
set_current_state(TASK_RUNNING);
remove_wait_queue(sk_sleep(sk), &wait);
return err;
@@ -1098,7 +1111,12 @@ static int l2cap_sock_shutdown(struct socket *sock, int how)
if (!sk)
return 0;
+ /* prevent sk structure from being freed whilst unlocked */
+ sock_hold(sk);
+
chan = l2cap_pi(sk)->chan;
+ /* prevent chan structure from being freed whilst unlocked */
+ l2cap_chan_hold(chan);
conn = chan->conn;
BT_DBG("chan %p state %s", chan, state_to_string(chan->state));
@@ -1110,8 +1128,10 @@ static int l2cap_sock_shutdown(struct socket *sock, int how)
lock_sock(sk);
if (!sk->sk_shutdown) {
- if (chan->mode == L2CAP_MODE_ERTM)
- err = __l2cap_wait_ack(sk);
+ if (chan->mode == L2CAP_MODE_ERTM &&
+ chan->unacked_frames > 0 &&
+ chan->state == BT_CONNECTED)
+ err = __l2cap_wait_ack(sk, chan);
sk->sk_shutdown = SHUTDOWN_MASK;
@@ -1134,6 +1154,11 @@ static int l2cap_sock_shutdown(struct socket *sock, int how)
if (conn)
mutex_unlock(&conn->chan_lock);
+ l2cap_chan_put(chan);
+ sock_put(sk);
+
+ BT_DBG("err: %d", err);
+
return err;
}
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 92720f3fe573..ccaf5a436d8f 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -3564,9 +3564,10 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
*/
hci_conn_params_add(hdev, &cp->addr.bdaddr, addr_type);
- conn = hci_connect_le(hdev, &cp->addr.bdaddr, addr_type,
- sec_level, HCI_LE_CONN_TIMEOUT,
- HCI_ROLE_MASTER);
+ conn = hci_connect_le_scan(hdev, &cp->addr.bdaddr,
+ addr_type, sec_level,
+ HCI_LE_CONN_TIMEOUT,
+ HCI_ROLE_MASTER);
}
if (IS_ERR(conn)) {
@@ -4210,7 +4211,7 @@ static bool trigger_le_scan(struct hci_request *req, u16 interval, u8 *status)
/* Don't let discovery abort an outgoing connection attempt
* that's using directed advertising.
*/
- if (hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT)) {
+ if (hci_lookup_le_connect(hdev)) {
*status = MGMT_STATUS_REJECTED;
return false;
}
@@ -6107,6 +6108,12 @@ static int hci_conn_params_set(struct hci_request *req, bdaddr_t *addr,
switch (auto_connect) {
case HCI_AUTO_CONN_DISABLED:
case HCI_AUTO_CONN_LINK_LOSS:
+ /* If auto connect is being disabled when we're trying to
+ * connect to device, keep connecting.
+ */
+ if (params->explicit_connect)
+ list_add(&params->action, &hdev->pend_le_conns);
+
__hci_update_background_scan(req);
break;
case HCI_AUTO_CONN_REPORT:
@@ -6226,6 +6233,17 @@ static int add_device(struct sock *sk, struct hci_dev *hdev,
else
auto_conn = HCI_AUTO_CONN_REPORT;
+ /* Kernel internally uses conn_params with resolvable private
+ * address, but Add Device allows only identity addresses.
+ * Make sure it is enforced before calling
+ * hci_conn_params_lookup.
+ */
+ if (!hci_is_identity_address(&cp->addr.bdaddr, addr_type)) {
+ err = cmd->cmd_complete(cmd, MGMT_STATUS_INVALID_PARAMS);
+ mgmt_pending_remove(cmd);
+ goto unlock;
+ }
+
/* If the connection parameters don't exist for this device,
* they will be created and configured with defaults.
*/
@@ -6340,6 +6358,18 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev,
else
addr_type = ADDR_LE_DEV_RANDOM;
+ /* Kernel internally uses conn_params with resolvable private
+ * address, but Remove Device allows only identity addresses.
+ * Make sure it is enforced before calling
+ * hci_conn_params_lookup.
+ */
+ if (!hci_is_identity_address(&cp->addr.bdaddr, addr_type)) {
+ err = cmd->cmd_complete(cmd,
+ MGMT_STATUS_INVALID_PARAMS);
+ mgmt_pending_remove(cmd);
+ goto unlock;
+ }
+
params = hci_conn_params_lookup(hdev, &cp->addr.bdaddr,
addr_type);
if (!params) {
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 4ff77a16956c..6ed2feb51e3c 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -339,6 +339,7 @@ static const struct net_device_ops br_netdev_ops = {
.ndo_bridge_getlink = br_getlink,
.ndo_bridge_setlink = br_setlink,
.ndo_bridge_dellink = br_dellink,
+ .ndo_features_check = passthru_features_check,
};
static void br_dev_free(struct net_device *dev)
@@ -364,8 +365,7 @@ void br_dev_setup(struct net_device *dev)
dev->destructor = br_dev_free;
dev->ethtool_ops = &br_ethtool_ops;
SET_NETDEV_DEVTYPE(dev, &br_type);
- dev->tx_queue_len = 0;
- dev->priv_flags = IFF_EBRIDGE;
+ dev->priv_flags = IFF_EBRIDGE | IFF_NO_QUEUE;
dev->features = COMMON_FEATURES | NETIF_F_LLTX | NETIF_F_NETNS_LOCAL |
NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index a538cb1199a3..45e4757c6fd2 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -281,6 +281,7 @@ void br_dev_delete(struct net_device *dev, struct list_head *head)
br_fdb_delete_by_port(br, NULL, 0, 1);
br_vlan_flush(br);
+ br_multicast_dev_del(br);
del_timer_sync(&br->gc_timer);
br_sysfs_delbr(br->dev);
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index c94321955db7..d747275fad18 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -85,6 +85,7 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
memset(&e, 0, sizeof(e));
e.ifindex = port->dev->ifindex;
e.state = p->state;
+ e.vid = p->addr.vid;
if (p->addr.proto == htons(ETH_P_IP))
e.addr.u.ip4 = p->addr.u.ip4;
#if IS_ENABLED(CONFIG_IPV6)
@@ -230,7 +231,7 @@ errout:
}
void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
- struct br_ip *group, int type)
+ struct br_ip *group, int type, u8 state)
{
struct br_mdb_entry entry;
@@ -241,9 +242,78 @@ void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
#if IS_ENABLED(CONFIG_IPV6)
entry.addr.u.ip6 = group->u.ip6;
#endif
+ entry.state = state;
+ entry.vid = group->vid;
__br_mdb_notify(dev, &entry, type);
}
+static int nlmsg_populate_rtr_fill(struct sk_buff *skb,
+ struct net_device *dev,
+ int ifindex, u32 pid,
+ u32 seq, int type, unsigned int flags)
+{
+ struct br_port_msg *bpm;
+ struct nlmsghdr *nlh;
+ struct nlattr *nest;
+
+ nlh = nlmsg_put(skb, pid, seq, type, sizeof(*bpm), NLM_F_MULTI);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ bpm = nlmsg_data(nlh);
+ memset(bpm, 0, sizeof(*bpm));
+ bpm->family = AF_BRIDGE;
+ bpm->ifindex = dev->ifindex;
+ nest = nla_nest_start(skb, MDBA_ROUTER);
+ if (!nest)
+ goto cancel;
+
+ if (nla_put_u32(skb, MDBA_ROUTER_PORT, ifindex))
+ goto end;
+
+ nla_nest_end(skb, nest);
+ nlmsg_end(skb, nlh);
+ return 0;
+
+end:
+ nla_nest_end(skb, nest);
+cancel:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+}
+
+static inline size_t rtnl_rtr_nlmsg_size(void)
+{
+ return NLMSG_ALIGN(sizeof(struct br_port_msg))
+ + nla_total_size(sizeof(__u32));
+}
+
+void br_rtr_notify(struct net_device *dev, struct net_bridge_port *port,
+ int type)
+{
+ struct net *net = dev_net(dev);
+ struct sk_buff *skb;
+ int err = -ENOBUFS;
+ int ifindex;
+
+ ifindex = port ? port->dev->ifindex : 0;
+ skb = nlmsg_new(rtnl_rtr_nlmsg_size(), GFP_ATOMIC);
+ if (!skb)
+ goto errout;
+
+ err = nlmsg_populate_rtr_fill(skb, dev, ifindex, 0, 0, type, NTF_SELF);
+ if (err < 0) {
+ kfree_skb(skb);
+ goto errout;
+ }
+
+ rtnl_notify(skb, net, 0, RTNLGRP_MDB, NULL, GFP_ATOMIC);
+ return;
+
+errout:
+ rtnl_set_sk_err(net, RTNLGRP_MDB, err);
+}
+
static bool is_valid_mdb_entry(struct br_mdb_entry *entry)
{
if (entry->ifindex == 0)
@@ -263,6 +333,8 @@ static bool is_valid_mdb_entry(struct br_mdb_entry *entry)
return false;
if (entry->state != MDB_PERMANENT && entry->state != MDB_TEMPORARY)
return false;
+ if (entry->vid >= VLAN_VID_MASK)
+ return false;
return true;
}
@@ -374,6 +446,7 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br,
return -EINVAL;
memset(&ip, 0, sizeof(ip));
+ ip.vid = entry->vid;
ip.proto = entry->addr.proto;
if (ip.proto == htons(ETH_P_IP))
ip.u.ip4 = entry->addr.u.ip4;
@@ -391,8 +464,11 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br,
static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
+ unsigned short vid = VLAN_N_VID;
+ struct net_device *dev, *pdev;
struct br_mdb_entry *entry;
- struct net_device *dev;
+ struct net_bridge_port *p;
+ struct net_port_vlans *pv;
struct net_bridge *br;
int err;
@@ -402,9 +478,32 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
br = netdev_priv(dev);
- err = __br_mdb_add(net, br, entry);
- if (!err)
- __br_mdb_notify(dev, entry, RTM_NEWMDB);
+ /* If vlan filtering is enabled and VLAN is not specified
+ * install mdb entry on all vlans configured on the port.
+ */
+ pdev = __dev_get_by_index(net, entry->ifindex);
+ if (!pdev)
+ return -ENODEV;
+
+ p = br_port_get_rtnl(pdev);
+ if (!p || p->br != br || p->state == BR_STATE_DISABLED)
+ return -EINVAL;
+
+ pv = nbp_get_vlan_info(p);
+ if (br_vlan_enabled(br) && pv && entry->vid == 0) {
+ for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
+ entry->vid = vid;
+ err = __br_mdb_add(net, br, entry);
+ if (err)
+ break;
+ __br_mdb_notify(dev, entry, RTM_NEWMDB);
+ }
+ } else {
+ err = __br_mdb_add(net, br, entry);
+ if (!err)
+ __br_mdb_notify(dev, entry, RTM_NEWMDB);
+ }
+
return err;
}
@@ -421,6 +520,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
return -EINVAL;
memset(&ip, 0, sizeof(ip));
+ ip.vid = entry->vid;
ip.proto = entry->addr.proto;
if (ip.proto == htons(ETH_P_IP))
ip.u.ip4 = entry->addr.u.ip4;
@@ -465,8 +565,12 @@ unlock:
static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
{
- struct net_device *dev;
+ struct net *net = sock_net(skb->sk);
+ unsigned short vid = VLAN_N_VID;
+ struct net_device *dev, *pdev;
struct br_mdb_entry *entry;
+ struct net_bridge_port *p;
+ struct net_port_vlans *pv;
struct net_bridge *br;
int err;
@@ -476,9 +580,31 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
br = netdev_priv(dev);
- err = __br_mdb_del(br, entry);
- if (!err)
- __br_mdb_notify(dev, entry, RTM_DELMDB);
+ /* If vlan filtering is enabled and VLAN is not specified
+ * delete mdb entry on all vlans configured on the port.
+ */
+ pdev = __dev_get_by_index(net, entry->ifindex);
+ if (!pdev)
+ return -ENODEV;
+
+ p = br_port_get_rtnl(pdev);
+ if (!p || p->br != br || p->state == BR_STATE_DISABLED)
+ return -EINVAL;
+
+ pv = nbp_get_vlan_info(p);
+ if (br_vlan_enabled(br) && pv && entry->vid == 0) {
+ for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
+ entry->vid = vid;
+ err = __br_mdb_del(br, entry);
+ if (!err)
+ __br_mdb_notify(dev, entry, RTM_DELMDB);
+ }
+ } else {
+ err = __br_mdb_del(br, entry);
+ if (!err)
+ __br_mdb_notify(dev, entry, RTM_DELMDB);
+ }
+
return err;
}
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 1285eaf5dc22..66efdc21f548 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -283,6 +283,8 @@ static void br_multicast_del_pg(struct net_bridge *br,
rcu_assign_pointer(*pp, p->next);
hlist_del_init(&p->mglist);
del_timer(&p->timer);
+ br_mdb_notify(br->dev, p->port, &pg->addr, RTM_DELMDB,
+ p->state);
call_rcu_bh(&p->rcu, br_multicast_free_pg);
if (!mp->ports && !mp->mglist &&
@@ -704,7 +706,7 @@ static int br_multicast_add_group(struct net_bridge *br,
if (unlikely(!p))
goto err;
rcu_assign_pointer(*pp, p);
- br_mdb_notify(br->dev, port, group, RTM_NEWMDB);
+ br_mdb_notify(br->dev, port, group, RTM_NEWMDB, MDB_TEMPORARY);
found:
mod_timer(&p->timer, now + br->multicast_membership_interval);
@@ -764,6 +766,7 @@ static void br_multicast_router_expired(unsigned long data)
goto out;
hlist_del_init_rcu(&port->rlist);
+ br_rtr_notify(br->dev, port, RTM_DELMDB);
out:
spin_unlock(&br->multicast_lock);
@@ -924,6 +927,15 @@ void br_multicast_add_port(struct net_bridge_port *port)
void br_multicast_del_port(struct net_bridge_port *port)
{
+ struct net_bridge *br = port->br;
+ struct net_bridge_port_group *pg;
+ struct hlist_node *n;
+
+ /* Take care of the remaining groups, only perm ones should be left */
+ spin_lock_bh(&br->multicast_lock);
+ hlist_for_each_entry_safe(pg, n, &port->mglist, mglist)
+ br_multicast_del_pg(br, pg);
+ spin_unlock_bh(&br->multicast_lock);
del_timer_sync(&port->multicast_router_timer);
}
@@ -963,10 +975,13 @@ void br_multicast_disable_port(struct net_bridge_port *port)
spin_lock(&br->multicast_lock);
hlist_for_each_entry_safe(pg, n, &port->mglist, mglist)
- br_multicast_del_pg(br, pg);
+ if (pg->state == MDB_TEMPORARY)
+ br_multicast_del_pg(br, pg);
- if (!hlist_unhashed(&port->rlist))
+ if (!hlist_unhashed(&port->rlist)) {
hlist_del_init_rcu(&port->rlist);
+ br_rtr_notify(br->dev, port, RTM_DELMDB);
+ }
del_timer(&port->multicast_router_timer);
del_timer(&port->ip4_own_query.timer);
#if IS_ENABLED(CONFIG_IPV6)
@@ -1204,6 +1219,7 @@ static void br_multicast_add_router(struct net_bridge *br,
hlist_add_behind_rcu(&port->rlist, slot);
else
hlist_add_head_rcu(&port->rlist, &br->router_list);
+ br_rtr_notify(br->dev, port, RTM_NEWMDB);
}
static void br_multicast_mark_router(struct net_bridge *br,
@@ -1437,7 +1453,8 @@ br_multicast_leave_group(struct net_bridge *br,
hlist_del_init(&p->mglist);
del_timer(&p->timer);
call_rcu_bh(&p->rcu, br_multicast_free_pg);
- br_mdb_notify(br->dev, port, group, RTM_DELMDB);
+ br_mdb_notify(br->dev, port, group, RTM_DELMDB,
+ p->state);
if (!mp->ports && !mp->mglist &&
netif_running(br->dev))
@@ -1754,12 +1771,6 @@ void br_multicast_open(struct net_bridge *br)
void br_multicast_stop(struct net_bridge *br)
{
- struct net_bridge_mdb_htable *mdb;
- struct net_bridge_mdb_entry *mp;
- struct hlist_node *n;
- u32 ver;
- int i;
-
del_timer_sync(&br->multicast_router_timer);
del_timer_sync(&br->ip4_other_query.timer);
del_timer_sync(&br->ip4_own_query.timer);
@@ -1767,6 +1778,15 @@ void br_multicast_stop(struct net_bridge *br)
del_timer_sync(&br->ip6_other_query.timer);
del_timer_sync(&br->ip6_own_query.timer);
#endif
+}
+
+void br_multicast_dev_del(struct net_bridge *br)
+{
+ struct net_bridge_mdb_htable *mdb;
+ struct net_bridge_mdb_entry *mp;
+ struct hlist_node *n;
+ u32 ver;
+ int i;
spin_lock_bh(&br->multicast_lock);
mdb = mlock_dereference(br->mdb, br);
@@ -1834,8 +1854,10 @@ int br_multicast_set_port_router(struct net_bridge_port *p, unsigned long val)
p->multicast_router = val;
err = 0;
- if (val < 2 && !hlist_unhashed(&p->rlist))
+ if (val < 2 && !hlist_unhashed(&p->rlist)) {
hlist_del_init_rcu(&p->rlist);
+ br_rtr_notify(br->dev, p, RTM_DELMDB);
+ }
if (val == 1)
break;
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index c8b9bcfe997e..0a6f095bb0c9 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -49,9 +49,9 @@ static struct ctl_table_header *brnf_sysctl_header;
static int brnf_call_iptables __read_mostly = 1;
static int brnf_call_ip6tables __read_mostly = 1;
static int brnf_call_arptables __read_mostly = 1;
-static int brnf_filter_vlan_tagged __read_mostly = 0;
-static int brnf_filter_pppoe_tagged __read_mostly = 0;
-static int brnf_pass_vlan_indev __read_mostly = 0;
+static int brnf_filter_vlan_tagged __read_mostly;
+static int brnf_filter_pppoe_tagged __read_mostly;
+static int brnf_pass_vlan_indev __read_mostly;
#else
#define brnf_call_iptables 1
#define brnf_call_ip6tables 1
@@ -284,7 +284,7 @@ int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb)
nf_bridge->neigh_header,
ETH_HLEN-ETH_ALEN);
/* tell br_dev_xmit to continue with forwarding */
- nf_bridge->mask |= BRNF_BRIDGED_DNAT;
+ nf_bridge->bridged_dnat = 1;
/* FIXME Need to refragment */
ret = neigh->output(neigh, skb);
}
@@ -356,7 +356,7 @@ static int br_nf_pre_routing_finish(struct sock *sk, struct sk_buff *skb)
skb->pkt_type = PACKET_OTHERHOST;
nf_bridge->pkt_otherhost = false;
}
- nf_bridge->mask &= ~BRNF_NF_BRIDGE_PREROUTING;
+ nf_bridge->in_prerouting = 0;
if (br_nf_ipv4_daddr_was_changed(skb, nf_bridge)) {
if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) {
struct in_device *in_dev = __in_dev_get_rcu(dev);
@@ -444,7 +444,7 @@ struct net_device *setup_pre_routing(struct sk_buff *skb)
nf_bridge->pkt_otherhost = true;
}
- nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING;
+ nf_bridge->in_prerouting = 1;
nf_bridge->physindev = skb->dev;
skb->dev = brnf_get_logical_dev(skb, skb->dev);
@@ -850,10 +850,8 @@ static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- if (skb->nf_bridge &&
- !(skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)) {
+ if (skb->nf_bridge && !skb->nf_bridge->in_prerouting)
return NF_STOP;
- }
return NF_ACCEPT;
}
@@ -872,7 +870,7 @@ static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
skb_pull(skb, ETH_HLEN);
- nf_bridge->mask &= ~BRNF_BRIDGED_DNAT;
+ nf_bridge->bridged_dnat = 0;
BUILD_BUG_ON(sizeof(nf_bridge->neigh_header) != (ETH_HLEN - ETH_ALEN));
@@ -887,7 +885,7 @@ static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
static int br_nf_dev_xmit(struct sk_buff *skb)
{
- if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) {
+ if (skb->nf_bridge && skb->nf_bridge->bridged_dnat) {
br_nf_pre_routing_finish_bridge_slow(skb);
return 1;
}
diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c
index 13b7d1e3d185..77383bfe7ea3 100644
--- a/net/bridge/br_netfilter_ipv6.c
+++ b/net/bridge/br_netfilter_ipv6.c
@@ -174,7 +174,7 @@ static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb)
skb->pkt_type = PACKET_OTHERHOST;
nf_bridge->pkt_otherhost = false;
}
- nf_bridge->mask &= ~BRNF_NF_BRIDGE_PREROUTING;
+ nf_bridge->in_prerouting = 0;
if (br_nf_ipv6_daddr_was_changed(skb, nf_bridge)) {
skb_dst_drop(skb);
v6ops->route_input(skb);
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 4d74a0639c4c..dbcb1949ea58 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -166,8 +166,6 @@ static int br_fill_ifvlaninfo_range(struct sk_buff *skb, u16 vid_start,
sizeof(vinfo), &vinfo))
goto nla_put_failure;
- vinfo.flags &= ~BRIDGE_VLAN_INFO_RANGE_BEGIN;
-
vinfo.vid = vid_end;
vinfo.flags = flags | BRIDGE_VLAN_INFO_RANGE_END;
if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO,
@@ -730,6 +728,7 @@ static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = {
[IFLA_BR_AGEING_TIME] = { .type = NLA_U32 },
[IFLA_BR_STP_STATE] = { .type = NLA_U32 },
[IFLA_BR_PRIORITY] = { .type = NLA_U16 },
+ [IFLA_BR_VLAN_FILTERING] = { .type = NLA_U8 },
};
static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
@@ -777,6 +776,14 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
br_stp_set_bridge_priority(br, priority);
}
+ if (data[IFLA_BR_VLAN_FILTERING]) {
+ u8 vlan_filter = nla_get_u8(data[IFLA_BR_VLAN_FILTERING]);
+
+ err = __br_vlan_filter_toggle(br, vlan_filter);
+ if (err)
+ return err;
+ }
+
return 0;
}
@@ -788,6 +795,7 @@ static size_t br_get_size(const struct net_device *brdev)
nla_total_size(sizeof(u32)) + /* IFLA_BR_AGEING_TIME */
nla_total_size(sizeof(u32)) + /* IFLA_BR_STP_STATE */
nla_total_size(sizeof(u16)) + /* IFLA_BR_PRIORITY */
+ nla_total_size(sizeof(u8)) + /* IFLA_BR_VLAN_FILTERING */
0;
}
@@ -800,13 +808,15 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
u32 ageing_time = jiffies_to_clock_t(br->ageing_time);
u32 stp_enabled = br->stp_enabled;
u16 priority = (br->bridge_id.prio[0] << 8) | br->bridge_id.prio[1];
+ u8 vlan_enabled = br_vlan_enabled(br);
if (nla_put_u32(skb, IFLA_BR_FORWARD_DELAY, forward_delay) ||
nla_put_u32(skb, IFLA_BR_HELLO_TIME, hello_time) ||
nla_put_u32(skb, IFLA_BR_MAX_AGE, age_time) ||
nla_put_u32(skb, IFLA_BR_AGEING_TIME, ageing_time) ||
nla_put_u32(skb, IFLA_BR_STP_STATE, stp_enabled) ||
- nla_put_u16(skb, IFLA_BR_PRIORITY, priority))
+ nla_put_u16(skb, IFLA_BR_PRIORITY, priority) ||
+ nla_put_u8(skb, IFLA_BR_VLAN_FILTERING, vlan_enabled))
return -EMSGSIZE;
return 0;
@@ -839,7 +849,7 @@ struct rtnl_link_ops br_link_ops __read_mostly = {
.kind = "bridge",
.priv_size = sizeof(struct net_bridge),
.setup = br_dev_setup,
- .maxtype = IFLA_BRPORT_MAX,
+ .maxtype = IFLA_BR_MAX,
.policy = br_policy,
.validate = br_validate,
.newlink = br_dev_newlink,
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 8b21146b24a0..3d95647039d0 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -466,6 +466,7 @@ void br_multicast_disable_port(struct net_bridge_port *port);
void br_multicast_init(struct net_bridge *br);
void br_multicast_open(struct net_bridge *br);
void br_multicast_stop(struct net_bridge *br);
+void br_multicast_dev_del(struct net_bridge *br);
void br_multicast_deliver(struct net_bridge_mdb_entry *mdst,
struct sk_buff *skb);
void br_multicast_forward(struct net_bridge_mdb_entry *mdst,
@@ -488,7 +489,9 @@ br_multicast_new_port_group(struct net_bridge_port *port, struct br_ip *group,
void br_mdb_init(void);
void br_mdb_uninit(void);
void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
- struct br_ip *group, int type);
+ struct br_ip *group, int type, u8 state);
+void br_rtr_notify(struct net_device *dev, struct net_bridge_port *port,
+ int type);
#define mlock_dereference(X, br) \
rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock))
@@ -565,6 +568,10 @@ static inline void br_multicast_stop(struct net_bridge *br)
{
}
+static inline void br_multicast_dev_del(struct net_bridge *br)
+{
+}
+
static inline void br_multicast_deliver(struct net_bridge_mdb_entry *mdst,
struct sk_buff *skb)
{
@@ -607,6 +614,7 @@ int br_vlan_delete(struct net_bridge *br, u16 vid);
void br_vlan_flush(struct net_bridge *br);
bool br_vlan_find(struct net_bridge *br, u16 vid);
void br_recalculate_fwd_mask(struct net_bridge *br);
+int __br_vlan_filter_toggle(struct net_bridge *br, unsigned long val);
int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val);
int br_vlan_set_proto(struct net_bridge *br, unsigned long val);
int br_vlan_init(struct net_bridge *br);
@@ -764,6 +772,12 @@ static inline int br_vlan_enabled(struct net_bridge *br)
{
return 0;
}
+
+static inline int __br_vlan_filter_toggle(struct net_bridge *br,
+ unsigned long val)
+{
+ return -EOPNOTSUPP;
+}
#endif
struct nf_br_ops {
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 0d41f81838ff..3cef6892c0bb 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -468,21 +468,27 @@ void br_recalculate_fwd_mask(struct net_bridge *br)
~(1u << br->group_addr[5]);
}
-int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val)
+int __br_vlan_filter_toggle(struct net_bridge *br, unsigned long val)
{
- if (!rtnl_trylock())
- return restart_syscall();
-
if (br->vlan_enabled == val)
- goto unlock;
+ return 0;
br->vlan_enabled = val;
br_manage_promisc(br);
recalculate_group_addr(br);
br_recalculate_fwd_mask(br);
-unlock:
+ return 0;
+}
+
+int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val)
+{
+ if (!rtnl_trylock())
+ return restart_syscall();
+
+ __br_vlan_filter_toggle(br, val);
rtnl_unlock();
+
return 0;
}
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index edbca468fa73..d730a0f68f46 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -177,7 +177,7 @@ static int transmit(struct cflayer *layer, struct cfpkt *pkt)
skb->protocol = htons(ETH_P_CAIF);
/* Check if we need to handle xoff */
- if (likely(caifd->netdev->tx_queue_len == 0))
+ if (likely(caifd->netdev->priv_flags & IFF_NO_QUEUE))
goto noxoff;
if (unlikely(caifd->xoff))
diff --git a/net/core/Makefile b/net/core/Makefile
index fec0856dd6c0..086b01fbe1bd 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -23,3 +23,4 @@ obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o
obj-$(CONFIG_NET_PTP_CLASSIFY) += ptp_classifier.o
obj-$(CONFIG_CGROUP_NET_PRIO) += netprio_cgroup.o
obj-$(CONFIG_CGROUP_NET_CLASSID) += netclassid_cgroup.o
+obj-$(CONFIG_LWTUNNEL) += lwtunnel.o
diff --git a/net/core/dev.c b/net/core/dev.c
index a8e4dd430285..b1f3f4844e60 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3061,6 +3061,16 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
else
skb_dst_force(skb);
+#ifdef CONFIG_NET_SWITCHDEV
+ /* Don't forward if offload device already forwarded */
+ if (skb->offload_fwd_mark &&
+ skb->offload_fwd_mark == dev->offload_fwd_mark) {
+ consume_skb(skb);
+ rc = NET_XMIT_SUCCESS;
+ goto out;
+ }
+#endif
+
txq = netdev_pick_tx(dev, skb, accel_priv);
q = rcu_dereference_bh(txq->qdisc);
@@ -3645,7 +3655,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
qdisc_skb_cb(skb)->pkt_len = skb->len;
skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
- qdisc_bstats_update_cpu(cl->q, skb);
+ qdisc_bstats_cpu_update(cl->q, skb);
switch (tc_classify(skb, cl, &cl_res)) {
case TC_ACT_OK:
@@ -3653,7 +3663,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
skb->tc_index = TC_H_MIN(cl_res.classid);
break;
case TC_ACT_SHOT:
- qdisc_qstats_drop_cpu(cl->q);
+ qdisc_qstats_cpu_drop(cl->q);
case TC_ACT_STOLEN:
case TC_ACT_QUEUED:
kfree_skb(skb);
@@ -4985,7 +4995,7 @@ EXPORT_SYMBOL(netdev_all_upper_get_next_dev_rcu);
* Gets the next netdev_adjacent->private from the dev's lower neighbour
* list, starting from iter position. The caller must hold either hold the
* RTNL lock or its own locking that guarantees that the neighbour lower
- * list will remain unchainged.
+ * list will remain unchanged.
*/
void *netdev_lower_get_next_private(struct net_device *dev,
struct list_head **iter)
@@ -5040,7 +5050,7 @@ EXPORT_SYMBOL(netdev_lower_get_next_private_rcu);
* Gets the next netdev_adjacent from the dev's lower neighbour
* list, starting from iter position. The caller must hold RTNL lock or
* its own locking that guarantees that the neighbour lower
- * list will remain unchainged.
+ * list will remain unchanged.
*/
void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter)
{
@@ -6075,6 +6085,26 @@ int dev_get_phys_port_name(struct net_device *dev,
EXPORT_SYMBOL(dev_get_phys_port_name);
/**
+ * dev_change_proto_down - update protocol port state information
+ * @dev: device
+ * @proto_down: new value
+ *
+ * This info can be used by switch drivers to set the phys state of the
+ * port.
+ */
+int dev_change_proto_down(struct net_device *dev, bool proto_down)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ if (!ops->ndo_change_proto_down)
+ return -EOPNOTSUPP;
+ if (!netif_device_present(dev))
+ return -ENODEV;
+ return ops->ndo_change_proto_down(dev, proto_down);
+}
+EXPORT_SYMBOL(dev_change_proto_down);
+
+/**
* dev_new_index - allocate an ifindex
* @net: the applicable net namespace
*
@@ -6967,6 +6997,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
setup(dev);
+ if (!dev->tx_queue_len)
+ printk(KERN_WARNING "%s uses DEPRECATED zero tx_queue_len - convert driver to use IFF_NO_QUEUE instead.\n", name);
+
dev->num_tx_queues = txqs;
dev->real_num_tx_queues = txqs;
if (netif_alloc_netdev_queues(dev))
@@ -7639,7 +7672,7 @@ static int __init net_dev_init(void)
open_softirq(NET_RX_SOFTIRQ, net_rx_action);
hotcpu_notifier(dev_cpu_callback, 0);
- dst_init();
+ dst_subsys_init();
rc = 0;
out:
return rc;
diff --git a/net/core/dst.c b/net/core/dst.c
index 002144bea935..50dcdbb0ee46 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -20,8 +20,10 @@
#include <net/net_namespace.h>
#include <linux/sched.h>
#include <linux/prefetch.h>
+#include <net/lwtunnel.h>
#include <net/dst.h>
+#include <net/dst_metadata.h>
/*
* Theory of operations:
@@ -158,19 +160,10 @@ const u32 dst_default_metrics[RTAX_MAX + 1] = {
[RTAX_MAX] = 0xdeadbeef,
};
-
-void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
- int initial_ref, int initial_obsolete, unsigned short flags)
+void dst_init(struct dst_entry *dst, struct dst_ops *ops,
+ struct net_device *dev, int initial_ref, int initial_obsolete,
+ unsigned short flags)
{
- struct dst_entry *dst;
-
- if (ops->gc && dst_entries_get_fast(ops) > ops->gc_thresh) {
- if (ops->gc(ops))
- return NULL;
- }
- dst = kmem_cache_alloc(ops->kmem_cachep, GFP_ATOMIC);
- if (!dst)
- return NULL;
dst->child = NULL;
dst->dev = dev;
if (dev)
@@ -192,6 +185,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
#ifdef CONFIG_IP_ROUTE_CLASSID
dst->tclassid = 0;
#endif
+ dst->lwtstate = NULL;
atomic_set(&dst->__refcnt, initial_ref);
dst->__use = 0;
dst->lastuse = jiffies;
@@ -200,6 +194,25 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
dst->next = NULL;
if (!(flags & DST_NOCOUNT))
dst_entries_add(ops, 1);
+}
+EXPORT_SYMBOL(dst_init);
+
+void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
+ int initial_ref, int initial_obsolete, unsigned short flags)
+{
+ struct dst_entry *dst;
+
+ if (ops->gc && dst_entries_get_fast(ops) > ops->gc_thresh) {
+ if (ops->gc(ops))
+ return NULL;
+ }
+
+ dst = kmem_cache_alloc(ops->kmem_cachep, GFP_ATOMIC);
+ if (!dst)
+ return NULL;
+
+ dst_init(dst, ops, dev, initial_ref, initial_obsolete, flags);
+
return dst;
}
EXPORT_SYMBOL(dst_alloc);
@@ -248,7 +261,12 @@ again:
dst->ops->destroy(dst);
if (dst->dev)
dev_put(dst->dev);
- kmem_cache_free(dst->ops->kmem_cachep, dst);
+
+ if (dst->flags & DST_METADATA)
+ kfree(dst);
+ else
+ kmem_cache_free(dst->ops->kmem_cachep, dst);
+ lwtstate_put(dst->lwtstate);
dst = child;
if (dst) {
@@ -329,6 +347,70 @@ void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old)
}
EXPORT_SYMBOL(__dst_destroy_metrics_generic);
+static struct dst_ops md_dst_ops = {
+ .family = AF_UNSPEC,
+};
+
+static int dst_md_discard_sk(struct sock *sk, struct sk_buff *skb)
+{
+ WARN_ONCE(1, "Attempting to call output on metadata dst\n");
+ kfree_skb(skb);
+ return 0;
+}
+
+static int dst_md_discard(struct sk_buff *skb)
+{
+ WARN_ONCE(1, "Attempting to call input on metadata dst\n");
+ kfree_skb(skb);
+ return 0;
+}
+
+static void __metadata_dst_init(struct metadata_dst *md_dst, u8 optslen)
+{
+ struct dst_entry *dst;
+
+ dst = &md_dst->dst;
+ dst_init(dst, &md_dst_ops, NULL, 1, DST_OBSOLETE_NONE,
+ DST_METADATA | DST_NOCACHE | DST_NOCOUNT);
+
+ dst->input = dst_md_discard;
+ dst->output = dst_md_discard_sk;
+
+ memset(dst + 1, 0, sizeof(*md_dst) + optslen - sizeof(*dst));
+ md_dst->opts_len = optslen;
+}
+
+struct metadata_dst *metadata_dst_alloc(u8 optslen, gfp_t flags)
+{
+ struct metadata_dst *md_dst;
+
+ md_dst = kmalloc(sizeof(*md_dst) + optslen, flags);
+ if (!md_dst)
+ return NULL;
+
+ __metadata_dst_init(md_dst, optslen);
+
+ return md_dst;
+}
+EXPORT_SYMBOL_GPL(metadata_dst_alloc);
+
+struct metadata_dst __percpu *metadata_dst_alloc_percpu(u8 optslen, gfp_t flags)
+{
+ int cpu;
+ struct metadata_dst __percpu *md_dst;
+
+ md_dst = __alloc_percpu_gfp(sizeof(struct metadata_dst) + optslen,
+ __alignof__(struct metadata_dst), flags);
+ if (!md_dst)
+ return NULL;
+
+ for_each_possible_cpu(cpu)
+ __metadata_dst_init(per_cpu_ptr(md_dst, cpu), optslen);
+
+ return md_dst;
+}
+EXPORT_SYMBOL_GPL(metadata_dst_alloc_percpu);
+
/* Dirty hack. We did it in 2.2 (in __dst_free),
* we have _very_ good reasons not to repeat
* this mistake in 2.3, but we have no choice
@@ -393,7 +475,7 @@ static struct notifier_block dst_dev_notifier = {
.priority = -10, /* must be called after other network notifiers */
};
-void __init dst_init(void)
+void __init dst_subsys_init(void)
{
register_netdevice_notifier(&dst_dev_notifier);
}
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 9a12668f7d62..ae8306e7c56f 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -16,6 +16,7 @@
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/fib_rules.h>
+#include <net/ip_tunnels.h>
int fib_default_rule_add(struct fib_rules_ops *ops,
u32 pref, u32 table, u32 flags)
@@ -186,6 +187,9 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
if ((rule->mark ^ fl->flowi_mark) & rule->mark_mask)
goto out;
+ if (rule->tun_id && (rule->tun_id != fl->flowi_tun_key.tun_id))
+ goto out;
+
ret = ops->match(rule, fl, flags);
out:
return (rule->flags & FIB_RULE_INVERT) ? !ret : ret;
@@ -330,6 +334,9 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh)
if (tb[FRA_FWMASK])
rule->mark_mask = nla_get_u32(tb[FRA_FWMASK]);
+ if (tb[FRA_TUN_ID])
+ rule->tun_id = nla_get_be64(tb[FRA_TUN_ID]);
+
rule->action = frh->action;
rule->flags = frh->flags;
rule->table = frh_get_table(frh, tb);
@@ -407,6 +414,9 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh)
if (unresolved)
ops->unresolved_rules++;
+ if (rule->tun_id)
+ ip_tunnel_need_metadata();
+
notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid);
flush_route_cache(ops);
rules_ops_put(ops);
@@ -473,6 +483,10 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh)
(rule->mark_mask != nla_get_u32(tb[FRA_FWMASK])))
continue;
+ if (tb[FRA_TUN_ID] &&
+ (rule->tun_id != nla_get_be64(tb[FRA_TUN_ID])))
+ continue;
+
if (!ops->compare(rule, frh, tb))
continue;
@@ -487,6 +501,9 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh)
goto errout;
}
+ if (rule->tun_id)
+ ip_tunnel_unneed_metadata();
+
list_del_rcu(&rule->list);
if (rule->action == FR_ACT_GOTO) {
@@ -535,7 +552,8 @@ static inline size_t fib_rule_nlmsg_size(struct fib_rules_ops *ops,
+ nla_total_size(4) /* FRA_SUPPRESS_PREFIXLEN */
+ nla_total_size(4) /* FRA_SUPPRESS_IFGROUP */
+ nla_total_size(4) /* FRA_FWMARK */
- + nla_total_size(4); /* FRA_FWMASK */
+ + nla_total_size(4) /* FRA_FWMASK */
+ + nla_total_size(8); /* FRA_TUN_ID */
if (ops->nlmsg_payload)
payload += ops->nlmsg_payload(rule);
@@ -591,7 +609,9 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
((rule->mark_mask || rule->mark) &&
nla_put_u32(skb, FRA_FWMASK, rule->mark_mask)) ||
(rule->target &&
- nla_put_u32(skb, FRA_GOTO, rule->target)))
+ nla_put_u32(skb, FRA_GOTO, rule->target)) ||
+ (rule->tun_id &&
+ nla_put_be64(skb, FRA_TUN_ID, rule->tun_id)))
goto nla_put_failure;
if (rule->suppress_ifgroup != -1) {
diff --git a/net/core/filter.c b/net/core/filter.c
index be3098fb65e4..b4adc961413f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -47,6 +47,8 @@
#include <linux/if_vlan.h>
#include <linux/bpf.h>
#include <net/sch_generic.h>
+#include <net/cls_cgroup.h>
+#include <net/dst_metadata.h>
/**
* sk_filter - run a packet through a socket filter
@@ -1122,6 +1124,7 @@ int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog,
*pfp = fp;
return 0;
}
+EXPORT_SYMBOL_GPL(bpf_prog_create_from_user);
void bpf_prog_destroy(struct bpf_prog *fp)
{
@@ -1346,7 +1349,7 @@ const struct bpf_func_proto bpf_l3_csum_replace_proto = {
static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
{
struct sk_buff *skb = (struct sk_buff *) (long) r1;
- u32 is_pseudo = BPF_IS_PSEUDO_HEADER(flags);
+ bool is_pseudo = !!BPF_IS_PSEUDO_HEADER(flags);
int offset = (int) r2;
__sum16 sum, *ptr;
@@ -1424,6 +1427,136 @@ const struct bpf_func_proto bpf_clone_redirect_proto = {
.arg3_type = ARG_ANYTHING,
};
+static u64 bpf_get_cgroup_classid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+ return task_get_classid((struct sk_buff *) (unsigned long) r1);
+}
+
+static const struct bpf_func_proto bpf_get_cgroup_classid_proto = {
+ .func = bpf_get_cgroup_classid,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+
+static u64 bpf_skb_vlan_push(u64 r1, u64 r2, u64 vlan_tci, u64 r4, u64 r5)
+{
+ struct sk_buff *skb = (struct sk_buff *) (long) r1;
+ __be16 vlan_proto = (__force __be16) r2;
+
+ if (unlikely(vlan_proto != htons(ETH_P_8021Q) &&
+ vlan_proto != htons(ETH_P_8021AD)))
+ vlan_proto = htons(ETH_P_8021Q);
+
+ return skb_vlan_push(skb, vlan_proto, vlan_tci);
+}
+
+const struct bpf_func_proto bpf_skb_vlan_push_proto = {
+ .func = bpf_skb_vlan_push,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_ANYTHING,
+ .arg3_type = ARG_ANYTHING,
+};
+EXPORT_SYMBOL_GPL(bpf_skb_vlan_push_proto);
+
+static u64 bpf_skb_vlan_pop(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+ struct sk_buff *skb = (struct sk_buff *) (long) r1;
+
+ return skb_vlan_pop(skb);
+}
+
+const struct bpf_func_proto bpf_skb_vlan_pop_proto = {
+ .func = bpf_skb_vlan_pop,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+EXPORT_SYMBOL_GPL(bpf_skb_vlan_pop_proto);
+
+bool bpf_helper_changes_skb_data(void *func)
+{
+ if (func == bpf_skb_vlan_push)
+ return true;
+ if (func == bpf_skb_vlan_pop)
+ return true;
+ return false;
+}
+
+static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
+{
+ struct sk_buff *skb = (struct sk_buff *) (long) r1;
+ struct bpf_tunnel_key *to = (struct bpf_tunnel_key *) (long) r2;
+ struct ip_tunnel_info *info = skb_tunnel_info(skb);
+
+ if (unlikely(size != sizeof(struct bpf_tunnel_key) || flags || !info))
+ return -EINVAL;
+
+ to->tunnel_id = be64_to_cpu(info->key.tun_id);
+ to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src);
+
+ return 0;
+}
+
+const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = {
+ .func = bpf_skb_get_tunnel_key,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_STACK,
+ .arg3_type = ARG_CONST_STACK_SIZE,
+ .arg4_type = ARG_ANYTHING,
+};
+
+static struct metadata_dst __percpu *md_dst;
+
+static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5)
+{
+ struct sk_buff *skb = (struct sk_buff *) (long) r1;
+ struct bpf_tunnel_key *from = (struct bpf_tunnel_key *) (long) r2;
+ struct metadata_dst *md = this_cpu_ptr(md_dst);
+ struct ip_tunnel_info *info;
+
+ if (unlikely(size != sizeof(struct bpf_tunnel_key) || flags))
+ return -EINVAL;
+
+ skb_dst_drop(skb);
+ dst_hold((struct dst_entry *) md);
+ skb_dst_set(skb, (struct dst_entry *) md);
+
+ info = &md->u.tun_info;
+ info->mode = IP_TUNNEL_INFO_TX;
+ info->key.tun_id = cpu_to_be64(from->tunnel_id);
+ info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4);
+
+ return 0;
+}
+
+const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
+ .func = bpf_skb_set_tunnel_key,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+ .arg2_type = ARG_PTR_TO_STACK,
+ .arg3_type = ARG_CONST_STACK_SIZE,
+ .arg4_type = ARG_ANYTHING,
+};
+
+static const struct bpf_func_proto *bpf_get_skb_set_tunnel_key_proto(void)
+{
+ if (!md_dst) {
+ /* race is not possible, since it's called from
+ * verifier that is holding verifier mutex
+ */
+ md_dst = metadata_dst_alloc_percpu(0, GFP_KERNEL);
+ if (!md_dst)
+ return NULL;
+ }
+ return &bpf_skb_set_tunnel_key_proto;
+}
+
static const struct bpf_func_proto *
sk_filter_func_proto(enum bpf_func_id func_id)
{
@@ -1461,6 +1594,16 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
return &bpf_l4_csum_replace_proto;
case BPF_FUNC_clone_redirect:
return &bpf_clone_redirect_proto;
+ case BPF_FUNC_get_cgroup_classid:
+ return &bpf_get_cgroup_classid_proto;
+ case BPF_FUNC_skb_vlan_push:
+ return &bpf_skb_vlan_push_proto;
+ case BPF_FUNC_skb_vlan_pop:
+ return &bpf_skb_vlan_pop_proto;
+ case BPF_FUNC_skb_get_tunnel_key:
+ return &bpf_skb_get_tunnel_key_proto;
+ case BPF_FUNC_skb_set_tunnel_key:
+ return bpf_get_skb_set_tunnel_key_proto();
default:
return sk_filter_func_proto(func_id);
}
@@ -1569,6 +1712,13 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
offsetof(struct net_device, ifindex));
break;
+ case offsetof(struct __sk_buff, hash):
+ BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
+
+ *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+ offsetof(struct sk_buff, hash));
+ break;
+
case offsetof(struct __sk_buff, mark):
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 2a834c6179b9..11e6540fa386 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -590,6 +590,15 @@ void make_flow_keys_digest(struct flow_keys_digest *digest,
}
EXPORT_SYMBOL(make_flow_keys_digest);
+static inline void __skb_set_sw_hash(struct sk_buff *skb, u32 hash,
+ struct flow_keys *keys)
+{
+ if (keys->ports.ports)
+ skb->l4_hash = 1;
+ skb->sw_hash = 1;
+ skb->hash = hash;
+}
+
/**
* __skb_get_hash: calculate a flow hash
* @skb: sk_buff to calculate flow hash from
@@ -609,10 +618,8 @@ void __skb_get_hash(struct sk_buff *skb)
hash = ___skb_get_hash(skb, &keys, hashrnd);
if (!hash)
return;
- if (keys.ports.ports)
- skb->l4_hash = 1;
- skb->sw_hash = 1;
- skb->hash = hash;
+
+ __skb_set_sw_hash(skb, hash, &keys);
}
EXPORT_SYMBOL(__skb_get_hash);
@@ -624,6 +631,49 @@ __u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb)
}
EXPORT_SYMBOL(skb_get_hash_perturb);
+__u32 __skb_get_hash_flowi6(struct sk_buff *skb, struct flowi6 *fl6)
+{
+ struct flow_keys keys;
+
+ memset(&keys, 0, sizeof(keys));
+
+ memcpy(&keys.addrs.v6addrs.src, &fl6->saddr,
+ sizeof(keys.addrs.v6addrs.src));
+ memcpy(&keys.addrs.v6addrs.dst, &fl6->daddr,
+ sizeof(keys.addrs.v6addrs.dst));
+ keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ keys.ports.src = fl6->fl6_sport;
+ keys.ports.dst = fl6->fl6_dport;
+ keys.keyid.keyid = fl6->fl6_gre_key;
+ keys.tags.flow_label = (__force u32)fl6->flowlabel;
+ keys.basic.ip_proto = fl6->flowi6_proto;
+
+ __skb_set_sw_hash(skb, flow_hash_from_keys(&keys), &keys);
+
+ return skb->hash;
+}
+EXPORT_SYMBOL(__skb_get_hash_flowi6);
+
+__u32 __skb_get_hash_flowi4(struct sk_buff *skb, struct flowi4 *fl4)
+{
+ struct flow_keys keys;
+
+ memset(&keys, 0, sizeof(keys));
+
+ keys.addrs.v4addrs.src = fl4->saddr;
+ keys.addrs.v4addrs.dst = fl4->daddr;
+ keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
+ keys.ports.src = fl4->fl4_sport;
+ keys.ports.dst = fl4->fl4_dport;
+ keys.keyid.keyid = fl4->fl4_gre_key;
+ keys.basic.ip_proto = fl4->flowi4_proto;
+
+ __skb_set_sw_hash(skb, flow_hash_from_keys(&keys), &keys);
+
+ return skb->hash;
+}
+EXPORT_SYMBOL(__skb_get_hash_flowi4);
+
u32 __skb_get_poff(const struct sk_buff *skb, void *data,
const struct flow_keys *keys, int hlen)
{
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
new file mode 100644
index 000000000000..e924c2e08554
--- /dev/null
+++ b/net/core/lwtunnel.c
@@ -0,0 +1,248 @@
+/*
+ * lwtunnel Infrastructure for light weight tunnels like mpls
+ *
+ * Authors: Roopa Prabhu, <roopa@cumulusnetworks.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <linux/capability.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/lwtunnel.h>
+#include <linux/in.h>
+#include <linux/init.h>
+#include <linux/err.h>
+
+#include <net/lwtunnel.h>
+#include <net/rtnetlink.h>
+#include <net/ip6_fib.h>
+
+struct lwtunnel_state *lwtunnel_state_alloc(int encap_len)
+{
+ struct lwtunnel_state *lws;
+
+ lws = kzalloc(sizeof(*lws) + encap_len, GFP_ATOMIC);
+
+ return lws;
+}
+EXPORT_SYMBOL(lwtunnel_state_alloc);
+
+static const struct lwtunnel_encap_ops __rcu *
+ lwtun_encaps[LWTUNNEL_ENCAP_MAX + 1] __read_mostly;
+
+int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *ops,
+ unsigned int num)
+{
+ if (num > LWTUNNEL_ENCAP_MAX)
+ return -ERANGE;
+
+ return !cmpxchg((const struct lwtunnel_encap_ops **)
+ &lwtun_encaps[num],
+ NULL, ops) ? 0 : -1;
+}
+EXPORT_SYMBOL(lwtunnel_encap_add_ops);
+
+int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops,
+ unsigned int encap_type)
+{
+ int ret;
+
+ if (encap_type == LWTUNNEL_ENCAP_NONE ||
+ encap_type > LWTUNNEL_ENCAP_MAX)
+ return -ERANGE;
+
+ ret = (cmpxchg((const struct lwtunnel_encap_ops **)
+ &lwtun_encaps[encap_type],
+ ops, NULL) == ops) ? 0 : -1;
+
+ synchronize_net();
+
+ return ret;
+}
+EXPORT_SYMBOL(lwtunnel_encap_del_ops);
+
+int lwtunnel_build_state(struct net_device *dev, u16 encap_type,
+ struct nlattr *encap, struct lwtunnel_state **lws)
+{
+ const struct lwtunnel_encap_ops *ops;
+ int ret = -EINVAL;
+
+ if (encap_type == LWTUNNEL_ENCAP_NONE ||
+ encap_type > LWTUNNEL_ENCAP_MAX)
+ return ret;
+
+ ret = -EOPNOTSUPP;
+ rcu_read_lock();
+ ops = rcu_dereference(lwtun_encaps[encap_type]);
+ if (likely(ops && ops->build_state))
+ ret = ops->build_state(dev, encap, lws);
+ rcu_read_unlock();
+
+ return ret;
+}
+EXPORT_SYMBOL(lwtunnel_build_state);
+
+int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate)
+{
+ const struct lwtunnel_encap_ops *ops;
+ struct nlattr *nest;
+ int ret = -EINVAL;
+
+ if (!lwtstate)
+ return 0;
+
+ if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
+ lwtstate->type > LWTUNNEL_ENCAP_MAX)
+ return 0;
+
+ ret = -EOPNOTSUPP;
+ nest = nla_nest_start(skb, RTA_ENCAP);
+ rcu_read_lock();
+ ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
+ if (likely(ops && ops->fill_encap))
+ ret = ops->fill_encap(skb, lwtstate);
+ rcu_read_unlock();
+
+ if (ret)
+ goto nla_put_failure;
+ nla_nest_end(skb, nest);
+ ret = nla_put_u16(skb, RTA_ENCAP_TYPE, lwtstate->type);
+ if (ret)
+ goto nla_put_failure;
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+
+ return (ret == -EOPNOTSUPP ? 0 : ret);
+}
+EXPORT_SYMBOL(lwtunnel_fill_encap);
+
+int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate)
+{
+ const struct lwtunnel_encap_ops *ops;
+ int ret = 0;
+
+ if (!lwtstate)
+ return 0;
+
+ if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
+ lwtstate->type > LWTUNNEL_ENCAP_MAX)
+ return 0;
+
+ rcu_read_lock();
+ ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
+ if (likely(ops && ops->get_encap_size))
+ ret = nla_total_size(ops->get_encap_size(lwtstate));
+ rcu_read_unlock();
+
+ return ret;
+}
+EXPORT_SYMBOL(lwtunnel_get_encap_size);
+
+int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b)
+{
+ const struct lwtunnel_encap_ops *ops;
+ int ret = 0;
+
+ if (!a && !b)
+ return 0;
+
+ if (!a || !b)
+ return 1;
+
+ if (a->type != b->type)
+ return 1;
+
+ if (a->type == LWTUNNEL_ENCAP_NONE ||
+ a->type > LWTUNNEL_ENCAP_MAX)
+ return 0;
+
+ rcu_read_lock();
+ ops = rcu_dereference(lwtun_encaps[a->type]);
+ if (likely(ops && ops->cmp_encap))
+ ret = ops->cmp_encap(a, b);
+ rcu_read_unlock();
+
+ return ret;
+}
+EXPORT_SYMBOL(lwtunnel_cmp_encap);
+
+int lwtunnel_output(struct sock *sk, struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+ const struct lwtunnel_encap_ops *ops;
+ struct lwtunnel_state *lwtstate;
+ int ret = -EINVAL;
+
+ if (!dst)
+ goto drop;
+ lwtstate = dst->lwtstate;
+
+ if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
+ lwtstate->type > LWTUNNEL_ENCAP_MAX)
+ return 0;
+
+ ret = -EOPNOTSUPP;
+ rcu_read_lock();
+ ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
+ if (likely(ops && ops->output))
+ ret = ops->output(sk, skb);
+ rcu_read_unlock();
+
+ if (ret == -EOPNOTSUPP)
+ goto drop;
+
+ return ret;
+
+drop:
+ kfree_skb(skb);
+
+ return ret;
+}
+EXPORT_SYMBOL(lwtunnel_output);
+
+int lwtunnel_input(struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+ const struct lwtunnel_encap_ops *ops;
+ struct lwtunnel_state *lwtstate;
+ int ret = -EINVAL;
+
+ if (!dst)
+ goto drop;
+ lwtstate = dst->lwtstate;
+
+ if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
+ lwtstate->type > LWTUNNEL_ENCAP_MAX)
+ return 0;
+
+ ret = -EOPNOTSUPP;
+ rcu_read_lock();
+ ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
+ if (likely(ops && ops->input))
+ ret = ops->input(skb);
+ rcu_read_unlock();
+
+ if (ret == -EOPNOTSUPP)
+ goto drop;
+
+ return ret;
+
+drop:
+ kfree_skb(skb);
+
+ return ret;
+}
+EXPORT_SYMBOL(lwtunnel_input);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 84195dacb8b6..2b515ba7e94f 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -274,8 +274,12 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device
(entries >= tbl->gc_thresh2 &&
time_after(now, tbl->last_flush + 5 * HZ))) {
if (!neigh_forced_gc(tbl) &&
- entries >= tbl->gc_thresh3)
+ entries >= tbl->gc_thresh3) {
+ net_info_ratelimited("%s: neighbor table overflow!\n",
+ tbl->id);
+ NEIGH_CACHE_STAT_INC(tbl, table_fulls);
goto out_entries;
+ }
}
n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
@@ -1849,6 +1853,7 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
ndst.ndts_rcv_probes_ucast += st->rcv_probes_ucast;
ndst.ndts_periodic_gc_runs += st->periodic_gc_runs;
ndst.ndts_forced_gc_runs += st->forced_gc_runs;
+ ndst.ndts_table_fulls += st->table_fulls;
}
if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst))
@@ -2717,12 +2722,12 @@ static int neigh_stat_seq_show(struct seq_file *seq, void *v)
struct neigh_statistics *st = v;
if (v == SEQ_START_TOKEN) {
- seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards\n");
+ seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n");
return 0;
}
seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx "
- "%08lx %08lx %08lx %08lx %08lx\n",
+ "%08lx %08lx %08lx %08lx %08lx %08lx\n",
atomic_read(&tbl->entries),
st->allocs,
@@ -2739,7 +2744,8 @@ static int neigh_stat_seq_show(struct seq_file *seq, void *v)
st->periodic_gc_runs,
st->forced_gc_runs,
- st->unres_discards
+ st->unres_discards,
+ st->table_fulls
);
return 0;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 18b34d771ed4..b279077c3089 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -404,6 +404,19 @@ static ssize_t group_store(struct device *dev, struct device_attribute *attr,
NETDEVICE_SHOW(group, fmt_dec);
static DEVICE_ATTR(netdev_group, S_IRUGO | S_IWUSR, group_show, group_store);
+static int change_proto_down(struct net_device *dev, unsigned long proto_down)
+{
+ return dev_change_proto_down(dev, (bool) proto_down);
+}
+
+static ssize_t proto_down_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ return netdev_store(dev, attr, buf, len, change_proto_down);
+}
+NETDEVICE_SHOW_RW(proto_down, fmt_dec);
+
static ssize_t phys_port_id_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -501,6 +514,7 @@ static struct attribute *net_class_attrs[] = {
&dev_attr_phys_port_id.attr,
&dev_attr_phys_port_name.attr,
&dev_attr_phys_switch_id.attr,
+ &dev_attr_proto_down.attr,
NULL,
};
ATTRIBUTE_GROUPS(net_class);
@@ -675,7 +689,7 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
struct rps_map *old_map, *map;
cpumask_var_t mask;
int err, cpu, i;
- static DEFINE_SPINLOCK(rps_map_lock);
+ static DEFINE_MUTEX(rps_map_mutex);
if (!capable(CAP_NET_ADMIN))
return -EPERM;
@@ -708,18 +722,21 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
map = NULL;
}
- spin_lock(&rps_map_lock);
+ mutex_lock(&rps_map_mutex);
old_map = rcu_dereference_protected(queue->rps_map,
- lockdep_is_held(&rps_map_lock));
+ mutex_is_locked(&rps_map_mutex));
rcu_assign_pointer(queue->rps_map, map);
- spin_unlock(&rps_map_lock);
if (map)
static_key_slow_inc(&rps_needed);
- if (old_map) {
- kfree_rcu(old_map, rcu);
+ if (old_map)
static_key_slow_dec(&rps_needed);
- }
+
+ mutex_unlock(&rps_map_mutex);
+
+ if (old_map)
+ kfree_rcu(old_map, rcu);
+
free_cpumask_var(mask);
return len;
}
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 1cbd209192ea..de8d5cc5eb24 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -273,7 +273,6 @@ struct pktgen_dev {
/* runtime counters relating to clone_skb */
- __u64 allocated_skbs;
__u32 clone_count;
int last_ok; /* Was last skb sent?
* Or a failed transmit of some sort?
@@ -2279,7 +2278,7 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until)
static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev)
{
- pkt_dev->pkt_overhead = 0;
+ pkt_dev->pkt_overhead = LL_RESERVED_SPACE(pkt_dev->odev);
pkt_dev->pkt_overhead += pkt_dev->nr_labels*sizeof(u32);
pkt_dev->pkt_overhead += VLAN_TAG_SIZE(pkt_dev);
pkt_dev->pkt_overhead += SVLAN_TAG_SIZE(pkt_dev);
@@ -2788,6 +2787,7 @@ static struct sk_buff *pktgen_alloc_skb(struct net_device *dev,
} else {
skb = __netdev_alloc_skb(dev, size, GFP_NOWAIT);
}
+ skb_reserve(skb, LL_RESERVED_SPACE(dev));
return skb;
}
@@ -3397,7 +3397,6 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
return;
}
pkt_dev->last_pkt_size = pkt_dev->skb->len;
- pkt_dev->allocated_skbs++;
pkt_dev->clone_count = 0; /* reset counter */
}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index dc004b1e1f85..788ceed39463 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -896,7 +896,9 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ rtnl_link_get_size(dev) /* IFLA_LINKINFO */
+ rtnl_link_get_af_size(dev) /* IFLA_AF_SPEC */
+ nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */
- + nla_total_size(MAX_PHYS_ITEM_ID_LEN); /* IFLA_PHYS_SWITCH_ID */
+ + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */
+ + nla_total_size(1); /* IFLA_PROTO_DOWN */
+
}
static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
@@ -1082,7 +1084,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
(dev->ifalias &&
nla_put_string(skb, IFLA_IFALIAS, dev->ifalias)) ||
nla_put_u32(skb, IFLA_CARRIER_CHANGES,
- atomic_read(&dev->carrier_changes)))
+ atomic_read(&dev->carrier_changes)) ||
+ nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down))
goto nla_put_failure;
if (1) {
@@ -1319,6 +1322,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_CARRIER_CHANGES] = { .type = NLA_U32 }, /* ignored */
[IFLA_PHYS_SWITCH_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN },
[IFLA_LINK_NETNSID] = { .type = NLA_S32 },
+ [IFLA_PROTO_DOWN] = { .type = NLA_U8 },
};
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -1861,6 +1865,14 @@ static int do_setlink(const struct sk_buff *skb,
}
err = 0;
+ if (tb[IFLA_PROTO_DOWN]) {
+ err = dev_change_proto_down(dev,
+ nla_get_u8(tb[IFLA_PROTO_DOWN]));
+ if (err)
+ goto errout;
+ status |= DO_SETLINK_NOTIFY;
+ }
+
errout:
if (status & DO_SETLINK_MODIFIED) {
if (status & DO_SETLINK_NOTIFY)
@@ -1951,16 +1963,30 @@ static int rtnl_group_dellink(const struct net *net, int group)
return 0;
}
+int rtnl_delete_link(struct net_device *dev)
+{
+ const struct rtnl_link_ops *ops;
+ LIST_HEAD(list_kill);
+
+ ops = dev->rtnl_link_ops;
+ if (!ops || !ops->dellink)
+ return -EOPNOTSUPP;
+
+ ops->dellink(dev, &list_kill);
+ unregister_netdevice_many(&list_kill);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(rtnl_delete_link);
+
static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
- const struct rtnl_link_ops *ops;
struct net_device *dev;
struct ifinfomsg *ifm;
char ifname[IFNAMSIZ];
struct nlattr *tb[IFLA_MAX+1];
int err;
- LIST_HEAD(list_kill);
err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy);
if (err < 0)
@@ -1982,13 +2008,7 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
if (!dev)
return -ENODEV;
- ops = dev->rtnl_link_ops;
- if (!ops || !ops->dellink)
- return -EOPNOTSUPP;
-
- ops->dellink(dev, &list_kill);
- unregister_netdevice_many(&list_kill);
- return 0;
+ return rtnl_delete_link(dev);
}
int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm)
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
index 43d3dd62fcc8..42689d5c468c 100644
--- a/net/core/timestamping.c
+++ b/net/core/timestamping.c
@@ -60,11 +60,15 @@ bool skb_defer_rx_timestamp(struct sk_buff *skb)
struct phy_device *phydev;
unsigned int type;
+ if (!skb->dev || !skb->dev->phydev || !skb->dev->phydev->drv)
+ return false;
+
if (skb_headroom(skb) < ETH_HLEN)
return false;
+
__skb_push(skb, ETH_HLEN);
- type = classify(skb);
+ type = ptp_classify_raw(skb);
__skb_pull(skb, ETH_HLEN);
diff --git a/net/core/utils.c b/net/core/utils.c
index a7732a068043..3dffce953c39 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -301,7 +301,7 @@ out:
EXPORT_SYMBOL(in6_pton);
void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
- __be32 from, __be32 to, int pseudohdr)
+ __be32 from, __be32 to, bool pseudohdr)
{
if (skb->ip_summed != CHECKSUM_PARTIAL) {
csum_replace4(sum, from, to);
@@ -318,7 +318,7 @@ EXPORT_SYMBOL(inet_proto_csum_replace4);
void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
const __be32 *from, const __be32 *to,
- int pseudohdr)
+ bool pseudohdr)
{
__be32 diff[] = {
~from[0], ~from[1], ~from[2], ~from[3],
@@ -336,6 +336,19 @@ void inet_proto_csum_replace16(__sum16 *sum, struct sk_buff *skb,
}
EXPORT_SYMBOL(inet_proto_csum_replace16);
+void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb,
+ __wsum diff, bool pseudohdr)
+{
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
+ *sum = csum_fold(csum_add(diff, ~csum_unfold(*sum)));
+ if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
+ skb->csum = ~csum_add(diff, ~skb->csum);
+ } else if (pseudohdr) {
+ *sum = ~csum_fold(csum_add(diff, csum_unfold(*sum)));
+ }
+}
+EXPORT_SYMBOL(inet_proto_csum_replace_by_diff);
+
struct __net_random_once_work {
struct work_struct work;
struct static_key *key;
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index b445d492c115..053eb2b8e682 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -554,6 +554,31 @@ static int dsa_of_setup_routing_table(struct dsa_platform_data *pd,
return 0;
}
+static int dsa_of_probe_links(struct dsa_platform_data *pd,
+ struct dsa_chip_data *cd,
+ int chip_index, int port_index,
+ struct device_node *port,
+ const char *port_name)
+{
+ struct device_node *link;
+ int link_index;
+ int ret;
+
+ for (link_index = 0;; link_index++) {
+ link = of_parse_phandle(port, "link", link_index);
+ if (!link)
+ break;
+
+ if (!strcmp(port_name, "dsa") && pd->nr_chips > 1) {
+ ret = dsa_of_setup_routing_table(pd, cd, chip_index,
+ port_index, link);
+ if (ret)
+ return ret;
+ }
+ }
+ return 0;
+}
+
static void dsa_of_free_platform_data(struct dsa_platform_data *pd)
{
int i;
@@ -573,8 +598,8 @@ static void dsa_of_free_platform_data(struct dsa_platform_data *pd)
static int dsa_of_probe(struct device *dev)
{
struct device_node *np = dev->of_node;
- struct device_node *child, *mdio, *ethernet, *port, *link;
- struct mii_bus *mdio_bus;
+ struct device_node *child, *mdio, *ethernet, *port;
+ struct mii_bus *mdio_bus, *mdio_bus_switch;
struct net_device *ethernet_dev;
struct dsa_platform_data *pd;
struct dsa_chip_data *cd;
@@ -636,6 +661,16 @@ static int dsa_of_probe(struct device *dev)
if (!of_property_read_u32(child, "eeprom-length", &eeprom_len))
cd->eeprom_len = eeprom_len;
+ mdio = of_parse_phandle(child, "mii-bus", 0);
+ if (mdio) {
+ mdio_bus_switch = of_mdio_find_bus(mdio);
+ if (!mdio_bus_switch) {
+ ret = -EPROBE_DEFER;
+ goto out_free_chip;
+ }
+ cd->host_dev = &mdio_bus_switch->dev;
+ }
+
for_each_available_child_of_node(child, port) {
port_reg = of_get_property(port, "reg", NULL);
if (!port_reg)
@@ -658,15 +693,10 @@ static int dsa_of_probe(struct device *dev)
goto out_free_chip;
}
- link = of_parse_phandle(port, "link", 0);
-
- if (!strcmp(port_name, "dsa") && link &&
- pd->nr_chips > 1) {
- ret = dsa_of_setup_routing_table(pd, cd,
- chip_index, port_index, link);
- if (ret)
- goto out_free_chip;
- }
+ ret = dsa_of_probe_links(pd, cd, chip_index,
+ port_index, port, port_name);
+ if (ret)
+ goto out_free_chip;
}
}
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index d5f1f9b862ea..311796c809af 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -13,9 +13,10 @@
#include <linux/phy.h>
#include <linux/netdevice.h>
+#include <linux/netpoll.h>
struct dsa_device_ops {
- netdev_tx_t (*xmit)(struct sk_buff *skb, struct net_device *dev);
+ struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev);
int (*rcv)(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev);
};
@@ -26,7 +27,7 @@ struct dsa_slave_priv {
* switch port.
*/
struct net_device *dev;
- netdev_tx_t (*xmit)(struct sk_buff *skb,
+ struct sk_buff * (*xmit)(struct sk_buff *skb,
struct net_device *dev);
/*
@@ -47,6 +48,9 @@ struct dsa_slave_priv {
int old_duplex;
struct net_device *bridge_dev;
+#ifdef CONFIG_NET_POLL_CONTROLLER
+ struct netpoll *netpoll;
+#endif
};
/* dsa.c */
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 35c47ddd04f0..cce97385f743 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -18,6 +18,7 @@
#include <net/rtnetlink.h>
#include <net/switchdev.h>
#include <linux/if_bridge.h>
+#include <linux/netpoll.h>
#include "dsa_priv.h"
/* slave mii_bus handling ***************************************************/
@@ -199,103 +200,212 @@ out:
return 0;
}
-static int dsa_slave_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
- struct net_device *dev,
- const unsigned char *addr, u16 vid, u16 nlm_flags)
+static int dsa_bridge_check_vlan_range(struct dsa_switch *ds,
+ const struct net_device *bridge,
+ u16 vid_begin, u16 vid_end)
{
+ struct dsa_slave_priv *p;
+ struct net_device *dev, *vlan_br;
+ DECLARE_BITMAP(members, DSA_MAX_PORTS);
+ DECLARE_BITMAP(untagged, DSA_MAX_PORTS);
+ u16 vid;
+ int member, err;
+
+ if (!ds->drv->vlan_getnext || !vid_begin)
+ return -EOPNOTSUPP;
+
+ vid = vid_begin - 1;
+
+ do {
+ err = ds->drv->vlan_getnext(ds, &vid, members, untagged);
+ if (err)
+ break;
+
+ if (vid > vid_end)
+ break;
+
+ member = find_first_bit(members, DSA_MAX_PORTS);
+ if (member == DSA_MAX_PORTS)
+ continue;
+
+ dev = ds->ports[member];
+ p = netdev_priv(dev);
+ vlan_br = p->bridge_dev;
+ if (vlan_br == bridge)
+ continue;
+
+ netdev_dbg(vlan_br, "hardware VLAN %d already in use\n", vid);
+ return -EOPNOTSUPP;
+ } while (vid < vid_end);
+
+ return err == -ENOENT ? 0 : err;
+}
+
+static int dsa_slave_port_vlan_add(struct net_device *dev,
+ struct switchdev_obj *obj)
+{
+ struct switchdev_obj_vlan *vlan = &obj->u.vlan;
struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_switch *ds = p->parent;
- int ret = -EOPNOTSUPP;
+ u16 vid;
+ int err;
- if (ds->drv->fdb_add)
- ret = ds->drv->fdb_add(ds, p->port, addr, vid);
+ switch (obj->trans) {
+ case SWITCHDEV_TRANS_PREPARE:
+ if (!ds->drv->port_vlan_add || !ds->drv->port_pvid_set)
+ return -EOPNOTSUPP;
- return ret;
+ /* If the requested port doesn't belong to the same bridge as
+ * the VLAN members, fallback to software VLAN (hopefully).
+ */
+ err = dsa_bridge_check_vlan_range(ds, p->bridge_dev,
+ vlan->vid_begin,
+ vlan->vid_end);
+ if (err)
+ return err;
+ break;
+ case SWITCHDEV_TRANS_COMMIT:
+ for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) {
+ err = ds->drv->port_vlan_add(ds, p->port, vid,
+ vlan->flags &
+ BRIDGE_VLAN_INFO_UNTAGGED);
+ if (!err && vlan->flags & BRIDGE_VLAN_INFO_PVID)
+ err = ds->drv->port_pvid_set(ds, p->port, vid);
+ if (err)
+ return err;
+ }
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
}
-static int dsa_slave_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
- struct net_device *dev,
- const unsigned char *addr, u16 vid)
+static int dsa_slave_port_vlan_del(struct net_device *dev,
+ struct switchdev_obj *obj)
{
+ struct switchdev_obj_vlan *vlan = &obj->u.vlan;
struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_switch *ds = p->parent;
- int ret = -EOPNOTSUPP;
+ u16 vid;
+ int err;
- if (ds->drv->fdb_del)
- ret = ds->drv->fdb_del(ds, p->port, addr, vid);
+ if (!ds->drv->port_vlan_del)
+ return -EOPNOTSUPP;
- return ret;
+ for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) {
+ err = ds->drv->port_vlan_del(ds, p->port, vid);
+ if (err)
+ return err;
+ }
+
+ return 0;
}
-static int dsa_slave_fill_info(struct net_device *dev, struct sk_buff *skb,
- const unsigned char *addr, u16 vid,
- bool is_static,
- u32 portid, u32 seq, int type,
- unsigned int flags)
+static int dsa_slave_port_vlan_dump(struct net_device *dev,
+ struct switchdev_obj *obj)
{
- struct nlmsghdr *nlh;
- struct ndmsg *ndm;
+ struct switchdev_obj_vlan *vlan = &obj->u.vlan;
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_switch *ds = p->parent;
+ DECLARE_BITMAP(members, DSA_MAX_PORTS);
+ DECLARE_BITMAP(untagged, DSA_MAX_PORTS);
+ u16 pvid, vid = 0;
+ int err;
- nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags);
- if (!nlh)
- return -EMSGSIZE;
+ if (!ds->drv->vlan_getnext || !ds->drv->port_pvid_get)
+ return -EOPNOTSUPP;
- ndm = nlmsg_data(nlh);
- ndm->ndm_family = AF_BRIDGE;
- ndm->ndm_pad1 = 0;
- ndm->ndm_pad2 = 0;
- ndm->ndm_flags = NTF_EXT_LEARNED;
- ndm->ndm_type = 0;
- ndm->ndm_ifindex = dev->ifindex;
- ndm->ndm_state = is_static ? NUD_NOARP : NUD_REACHABLE;
+ err = ds->drv->port_pvid_get(ds, p->port, &pvid);
+ if (err)
+ return err;
- if (nla_put(skb, NDA_LLADDR, ETH_ALEN, addr))
- goto nla_put_failure;
+ for (;;) {
+ err = ds->drv->vlan_getnext(ds, &vid, members, untagged);
+ if (err)
+ break;
- if (vid && nla_put_u16(skb, NDA_VLAN, vid))
- goto nla_put_failure;
+ if (!test_bit(p->port, members))
+ continue;
- nlmsg_end(skb, nlh);
- return 0;
+ memset(vlan, 0, sizeof(*vlan));
+ vlan->vid_begin = vlan->vid_end = vid;
+
+ if (vid == pvid)
+ vlan->flags |= BRIDGE_VLAN_INFO_PVID;
+
+ if (test_bit(p->port, untagged))
+ vlan->flags |= BRIDGE_VLAN_INFO_UNTAGGED;
+
+ err = obj->cb(dev, obj);
+ if (err)
+ break;
+ }
+
+ return err == -ENOENT ? 0 : err;
+}
-nla_put_failure:
- nlmsg_cancel(skb, nlh);
- return -EMSGSIZE;
+static int dsa_slave_port_fdb_add(struct net_device *dev,
+ struct switchdev_obj *obj)
+{
+ struct switchdev_obj_fdb *fdb = &obj->u.fdb;
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_switch *ds = p->parent;
+ int ret = -EOPNOTSUPP;
+
+ if (obj->trans == SWITCHDEV_TRANS_PREPARE)
+ ret = ds->drv->port_fdb_add ? 0 : -EOPNOTSUPP;
+ else if (obj->trans == SWITCHDEV_TRANS_COMMIT)
+ ret = ds->drv->port_fdb_add(ds, p->port, fdb->addr, fdb->vid);
+
+ return ret;
}
-/* Dump information about entries, in response to GETNEIGH */
-static int dsa_slave_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
- struct net_device *dev,
- struct net_device *filter_dev, int idx)
+static int dsa_slave_port_fdb_del(struct net_device *dev,
+ struct switchdev_obj *obj)
+{
+ struct switchdev_obj_fdb *fdb = &obj->u.fdb;
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_switch *ds = p->parent;
+ int ret = -EOPNOTSUPP;
+
+ if (ds->drv->port_fdb_del)
+ ret = ds->drv->port_fdb_del(ds, p->port, fdb->addr, fdb->vid);
+
+ return ret;
+}
+
+static int dsa_slave_port_fdb_dump(struct net_device *dev,
+ struct switchdev_obj *obj)
{
struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_switch *ds = p->parent;
unsigned char addr[ETH_ALEN] = { 0 };
+ u16 vid = 0;
int ret;
- if (!ds->drv->fdb_getnext)
+ if (!ds->drv->port_fdb_getnext)
return -EOPNOTSUPP;
- for (; ; idx++) {
+ for (;;) {
bool is_static;
- ret = ds->drv->fdb_getnext(ds, p->port, addr, &is_static);
+ ret = ds->drv->port_fdb_getnext(ds, p->port, addr, &vid,
+ &is_static);
if (ret < 0)
break;
- if (idx < cb->args[0])
- continue;
+ obj->u.fdb.addr = addr;
+ obj->u.fdb.vid = vid;
+ obj->u.fdb.ndm_state = is_static ? NUD_NOARP : NUD_REACHABLE;
- ret = dsa_slave_fill_info(dev, skb, addr, 0,
- is_static,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- RTM_NEWNEIGH, NLM_F_MULTI);
+ ret = obj->cb(dev, obj);
if (ret < 0)
break;
}
- return idx;
+ return ret == -ENOENT ? 0 : ret;
}
static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
@@ -363,6 +473,71 @@ static int dsa_slave_port_attr_set(struct net_device *dev,
return ret;
}
+static int dsa_slave_port_obj_add(struct net_device *dev,
+ struct switchdev_obj *obj)
+{
+ int err;
+
+ /* For the prepare phase, ensure the full set of changes is feasable in
+ * one go in order to signal a failure properly. If an operation is not
+ * supported, return -EOPNOTSUPP.
+ */
+
+ switch (obj->id) {
+ case SWITCHDEV_OBJ_PORT_FDB:
+ err = dsa_slave_port_fdb_add(dev, obj);
+ break;
+ case SWITCHDEV_OBJ_PORT_VLAN:
+ err = dsa_slave_port_vlan_add(dev, obj);
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ break;
+ }
+
+ return err;
+}
+
+static int dsa_slave_port_obj_del(struct net_device *dev,
+ struct switchdev_obj *obj)
+{
+ int err;
+
+ switch (obj->id) {
+ case SWITCHDEV_OBJ_PORT_FDB:
+ err = dsa_slave_port_fdb_del(dev, obj);
+ break;
+ case SWITCHDEV_OBJ_PORT_VLAN:
+ err = dsa_slave_port_vlan_del(dev, obj);
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ break;
+ }
+
+ return err;
+}
+
+static int dsa_slave_port_obj_dump(struct net_device *dev,
+ struct switchdev_obj *obj)
+{
+ int err;
+
+ switch (obj->id) {
+ case SWITCHDEV_OBJ_PORT_FDB:
+ err = dsa_slave_port_fdb_dump(dev, obj);
+ break;
+ case SWITCHDEV_OBJ_PORT_VLAN:
+ err = dsa_slave_port_vlan_dump(dev, obj);
+ break;
+ default:
+ err = -EOPNOTSUPP;
+ break;
+ }
+
+ return err;
+}
+
static int dsa_slave_bridge_port_join(struct net_device *dev,
struct net_device *br)
{
@@ -418,24 +593,53 @@ static int dsa_slave_port_attr_get(struct net_device *dev,
return 0;
}
-static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
+static inline netdev_tx_t dsa_netpoll_send_skb(struct dsa_slave_priv *p,
+ struct sk_buff *skb)
{
- struct dsa_slave_priv *p = netdev_priv(dev);
-
- return p->xmit(skb, dev);
+#ifdef CONFIG_NET_POLL_CONTROLLER
+ if (p->netpoll)
+ netpoll_send_skb(p->netpoll, skb);
+#else
+ BUG();
+#endif
+ return NETDEV_TX_OK;
}
-static netdev_tx_t dsa_slave_notag_xmit(struct sk_buff *skb,
- struct net_device *dev)
+static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct dsa_slave_priv *p = netdev_priv(dev);
+ struct sk_buff *nskb;
+
+ dev->stats.tx_packets++;
+ dev->stats.tx_bytes += skb->len;
+
+ /* Transmit function may have to reallocate the original SKB */
+ nskb = p->xmit(skb, dev);
+ if (!nskb)
+ return NETDEV_TX_OK;
- skb->dev = p->parent->dst->master_netdev;
- dev_queue_xmit(skb);
+ /* SKB for netpoll still need to be mangled with the protocol-specific
+ * tag to be successfully transmitted
+ */
+ if (unlikely(netpoll_tx_running(dev)))
+ return dsa_netpoll_send_skb(p, nskb);
+
+ /* Queue the SKB for transmission on the parent interface, but
+ * do not modify its EtherType
+ */
+ nskb->dev = p->parent->dst->master_netdev;
+ dev_queue_xmit(nskb);
return NETDEV_TX_OK;
}
+static struct sk_buff *dsa_slave_notag_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ /* Just return the original SKB */
+ return skb;
+}
+
/* ethtool operations *******************************************************/
static int
@@ -665,6 +869,49 @@ static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e)
return ret;
}
+#ifdef CONFIG_NET_POLL_CONTROLLER
+static int dsa_slave_netpoll_setup(struct net_device *dev,
+ struct netpoll_info *ni)
+{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct dsa_switch *ds = p->parent;
+ struct net_device *master = ds->dst->master_netdev;
+ struct netpoll *netpoll;
+ int err = 0;
+
+ netpoll = kzalloc(sizeof(*netpoll), GFP_KERNEL);
+ if (!netpoll)
+ return -ENOMEM;
+
+ err = __netpoll_setup(netpoll, master);
+ if (err) {
+ kfree(netpoll);
+ goto out;
+ }
+
+ p->netpoll = netpoll;
+out:
+ return err;
+}
+
+static void dsa_slave_netpoll_cleanup(struct net_device *dev)
+{
+ struct dsa_slave_priv *p = netdev_priv(dev);
+ struct netpoll *netpoll = p->netpoll;
+
+ if (!netpoll)
+ return;
+
+ p->netpoll = NULL;
+
+ __netpoll_free_async(netpoll);
+}
+
+static void dsa_slave_poll_controller(struct net_device *dev)
+{
+}
+#endif
+
static const struct ethtool_ops dsa_slave_ethtool_ops = {
.get_settings = dsa_slave_get_settings,
.set_settings = dsa_slave_set_settings,
@@ -692,16 +939,27 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
.ndo_change_rx_flags = dsa_slave_change_rx_flags,
.ndo_set_rx_mode = dsa_slave_set_rx_mode,
.ndo_set_mac_address = dsa_slave_set_mac_address,
- .ndo_fdb_add = dsa_slave_fdb_add,
- .ndo_fdb_del = dsa_slave_fdb_del,
- .ndo_fdb_dump = dsa_slave_fdb_dump,
+ .ndo_fdb_add = switchdev_port_fdb_add,
+ .ndo_fdb_del = switchdev_port_fdb_del,
+ .ndo_fdb_dump = switchdev_port_fdb_dump,
.ndo_do_ioctl = dsa_slave_ioctl,
.ndo_get_iflink = dsa_slave_get_iflink,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+ .ndo_netpoll_setup = dsa_slave_netpoll_setup,
+ .ndo_netpoll_cleanup = dsa_slave_netpoll_cleanup,
+ .ndo_poll_controller = dsa_slave_poll_controller,
+#endif
+ .ndo_bridge_getlink = switchdev_port_bridge_getlink,
+ .ndo_bridge_setlink = switchdev_port_bridge_setlink,
+ .ndo_bridge_dellink = switchdev_port_bridge_dellink,
};
static const struct switchdev_ops dsa_slave_switchdev_ops = {
.switchdev_port_attr_get = dsa_slave_port_attr_get,
.switchdev_port_attr_set = dsa_slave_port_attr_set,
+ .switchdev_port_obj_add = dsa_slave_port_obj_add,
+ .switchdev_port_obj_del = dsa_slave_port_obj_del,
+ .switchdev_port_obj_dump = dsa_slave_port_obj_dump,
};
static void dsa_slave_adjust_link(struct net_device *dev)
@@ -889,7 +1147,7 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
slave_dev->features = master->vlan_features;
slave_dev->ethtool_ops = &dsa_slave_ethtool_ops;
eth_hw_addr_inherit(slave_dev, master);
- slave_dev->tx_queue_len = 0;
+ slave_dev->priv_flags |= IFF_NO_QUEUE;
slave_dev->netdev_ops = &dsa_slave_netdev_ops;
slave_dev->switchdev_ops = &dsa_slave_switchdev_ops;
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index 83d3572cdb20..e2aadb73111d 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -58,14 +58,11 @@
#define BRCM_EG_TC_MASK 0x7
#define BRCM_EG_PID_MASK 0x1f
-static netdev_tx_t brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev)
+static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct dsa_slave_priv *p = netdev_priv(dev);
u8 *brcm_tag;
- dev->stats.tx_packets++;
- dev->stats.tx_bytes += skb->len;
-
if (skb_cow_head(skb, BRCM_TAG_LEN) < 0)
goto out_free;
@@ -87,17 +84,11 @@ static netdev_tx_t brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev)
brcm_tag[2] = BRCM_IG_DSTMAP2_MASK;
brcm_tag[3] = (1 << p->port) & BRCM_IG_DSTMAP1_MASK;
- /* Queue the SKB for transmission on the parent interface, but
- * do not modify its EtherType
- */
- skb->dev = p->parent->dst->master_netdev;
- dev_queue_xmit(skb);
-
- return NETDEV_TX_OK;
+ return skb;
out_free:
kfree_skb(skb);
- return NETDEV_TX_OK;
+ return NULL;
}
static int brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev,
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index 2dab27063273..aa780e4ac0bd 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -15,14 +15,11 @@
#define DSA_HLEN 4
-static netdev_tx_t dsa_xmit(struct sk_buff *skb, struct net_device *dev)
+static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct dsa_slave_priv *p = netdev_priv(dev);
u8 *dsa_header;
- dev->stats.tx_packets++;
- dev->stats.tx_bytes += skb->len;
-
/*
* Convert the outermost 802.1q tag to a DSA tag for tagged
* packets, or insert a DSA tag between the addresses and
@@ -63,14 +60,11 @@ static netdev_tx_t dsa_xmit(struct sk_buff *skb, struct net_device *dev)
dsa_header[3] = 0x00;
}
- skb->dev = p->parent->dst->master_netdev;
- dev_queue_xmit(skb);
-
- return NETDEV_TX_OK;
+ return skb;
out_free:
kfree_skb(skb);
- return NETDEV_TX_OK;
+ return NULL;
}
static int dsa_rcv(struct sk_buff *skb, struct net_device *dev,
diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c
index 9aeda596f7ec..2288c8098c42 100644
--- a/net/dsa/tag_edsa.c
+++ b/net/dsa/tag_edsa.c
@@ -16,14 +16,11 @@
#define DSA_HLEN 4
#define EDSA_HLEN 8
-static netdev_tx_t edsa_xmit(struct sk_buff *skb, struct net_device *dev)
+static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct dsa_slave_priv *p = netdev_priv(dev);
u8 *edsa_header;
- dev->stats.tx_packets++;
- dev->stats.tx_bytes += skb->len;
-
/*
* Convert the outermost 802.1q tag to a DSA tag and prepend
* a DSA ethertype field is the packet is tagged, or insert
@@ -76,14 +73,11 @@ static netdev_tx_t edsa_xmit(struct sk_buff *skb, struct net_device *dev)
edsa_header[7] = 0x00;
}
- skb->dev = p->parent->dst->master_netdev;
- dev_queue_xmit(skb);
-
- return NETDEV_TX_OK;
+ return skb;
out_free:
kfree_skb(skb);
- return NETDEV_TX_OK;
+ return NULL;
}
static int edsa_rcv(struct sk_buff *skb, struct net_device *dev,
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
index e268f9db8893..d25efc93d8f1 100644
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@ -13,16 +13,13 @@
#include <linux/slab.h>
#include "dsa_priv.h"
-static netdev_tx_t trailer_xmit(struct sk_buff *skb, struct net_device *dev)
+static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct dsa_slave_priv *p = netdev_priv(dev);
struct sk_buff *nskb;
int padlen;
u8 *trailer;
- dev->stats.tx_packets++;
- dev->stats.tx_bytes += skb->len;
-
/*
* We have to make sure that the trailer ends up as the very
* last 4 bytes of the packet. This means that we have to pad
@@ -36,7 +33,7 @@ static netdev_tx_t trailer_xmit(struct sk_buff *skb, struct net_device *dev)
nskb = alloc_skb(NET_IP_ALIGN + skb->len + padlen + 4, GFP_ATOMIC);
if (nskb == NULL) {
kfree_skb(skb);
- return NETDEV_TX_OK;
+ return NULL;
}
skb_reserve(nskb, NET_IP_ALIGN);
@@ -57,10 +54,7 @@ static netdev_tx_t trailer_xmit(struct sk_buff *skb, struct net_device *dev)
trailer[2] = 0x10;
trailer[3] = 0x00;
- nskb->dev = p->parent->dst->master_netdev;
- dev_queue_xmit(nskb);
-
- return NETDEV_TX_OK;
+ return nskb;
}
static int trailer_rcv(struct sk_buff *skb, struct net_device *dev,
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 77e0f0e7a88e..217127c3a3ef 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -114,7 +114,7 @@ int eth_header(struct sk_buff *skb, struct net_device *dev,
EXPORT_SYMBOL(eth_header);
/**
- * eth_get_headlen - determine the the length of header for an ethernet frame
+ * eth_get_headlen - determine the length of header for an ethernet frame
* @data: pointer to start of frame
* @len: total length of frame
*
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index 44d27469ae55..35a9788bb3ae 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -392,7 +392,7 @@ void hsr_dev_setup(struct net_device *dev)
dev->header_ops = &hsr_header_ops;
dev->netdev_ops = &hsr_device_ops;
SET_NETDEV_DEVTYPE(dev, &hsr_type);
- dev->tx_queue_len = 0;
+ dev->priv_flags |= IFF_NO_QUEUE;
dev->destructor = hsr_dev_destroy;
diff --git a/net/ieee802154/6lowpan/6lowpan_i.h b/net/ieee802154/6lowpan/6lowpan_i.h
index e50f69da78eb..ea339fa94c27 100644
--- a/net/ieee802154/6lowpan/6lowpan_i.h
+++ b/net/ieee802154/6lowpan/6lowpan_i.h
@@ -5,6 +5,7 @@
#include <net/ieee802154_netdev.h>
#include <net/inet_frag.h>
+#include <net/6lowpan.h>
struct lowpan_create_arg {
u16 tag;
@@ -37,26 +38,18 @@ static inline u32 ieee802154_addr_hash(const struct ieee802154_addr *a)
}
}
-struct lowpan_dev_record {
- struct net_device *ldev;
- struct list_head list;
-};
-
/* private device info */
struct lowpan_dev_info {
struct net_device *real_dev; /* real WPAN device ptr */
- struct mutex dev_list_mtx; /* mutex for list ops */
u16 fragment_tag;
};
static inline struct
lowpan_dev_info *lowpan_dev_info(const struct net_device *dev)
{
- return netdev_priv(dev);
+ return (struct lowpan_dev_info *)lowpan_priv(dev)->priv;
}
-extern struct list_head lowpan_devices;
-
int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type);
void lowpan_net_frag_exit(void);
int lowpan_net_frag_init(void);
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index f20a387a1011..953b1c49f5d1 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -52,8 +52,7 @@
#include "6lowpan_i.h"
-LIST_HEAD(lowpan_devices);
-static int lowpan_open_count;
+static int open_count;
static struct header_ops lowpan_header_ops = {
.create = lowpan_header_create,
@@ -91,7 +90,7 @@ static void lowpan_setup(struct net_device *dev)
dev->hard_header_len = 2 + 1 + 20 + 14;
dev->needed_tailroom = 2; /* FCS */
dev->mtu = IPV6_MIN_MTU;
- dev->tx_queue_len = 0;
+ dev->priv_flags |= IFF_NO_QUEUE;
dev->flags = IFF_BROADCAST | IFF_MULTICAST;
dev->watchdog_timeo = 0;
@@ -114,7 +113,6 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[])
{
struct net_device *real_dev;
- struct lowpan_dev_record *entry;
int ret;
ASSERT_RTNL();
@@ -133,67 +131,52 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev,
return -EINVAL;
}
- lowpan_dev_info(dev)->real_dev = real_dev;
- mutex_init(&lowpan_dev_info(dev)->dev_list_mtx);
-
- entry = kzalloc(sizeof(*entry), GFP_KERNEL);
- if (!entry) {
+ if (real_dev->ieee802154_ptr->lowpan_dev) {
dev_put(real_dev);
- lowpan_dev_info(dev)->real_dev = NULL;
- return -ENOMEM;
+ return -EBUSY;
}
- entry->ldev = dev;
-
+ lowpan_dev_info(dev)->real_dev = real_dev;
/* Set the lowpan hardware address to the wpan hardware address. */
memcpy(dev->dev_addr, real_dev->dev_addr, IEEE802154_ADDR_LEN);
- mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx);
- INIT_LIST_HEAD(&entry->list);
- list_add_tail(&entry->list, &lowpan_devices);
- mutex_unlock(&lowpan_dev_info(dev)->dev_list_mtx);
+ lowpan_netdev_setup(dev, LOWPAN_LLTYPE_IEEE802154);
ret = register_netdevice(dev);
- if (ret >= 0) {
- if (!lowpan_open_count)
- lowpan_rx_init();
- lowpan_open_count++;
+ if (ret < 0) {
+ dev_put(real_dev);
+ return ret;
}
- return ret;
+ real_dev->ieee802154_ptr->lowpan_dev = dev;
+ if (!open_count)
+ lowpan_rx_init();
+
+ open_count++;
+
+ return 0;
}
static void lowpan_dellink(struct net_device *dev, struct list_head *head)
{
struct lowpan_dev_info *lowpan_dev = lowpan_dev_info(dev);
struct net_device *real_dev = lowpan_dev->real_dev;
- struct lowpan_dev_record *entry, *tmp;
ASSERT_RTNL();
- lowpan_open_count--;
- if (!lowpan_open_count)
- lowpan_rx_exit();
-
- mutex_lock(&lowpan_dev_info(dev)->dev_list_mtx);
- list_for_each_entry_safe(entry, tmp, &lowpan_devices, list) {
- if (entry->ldev == dev) {
- list_del(&entry->list);
- kfree(entry);
- }
- }
- mutex_unlock(&lowpan_dev_info(dev)->dev_list_mtx);
+ open_count--;
- mutex_destroy(&lowpan_dev_info(dev)->dev_list_mtx);
-
- unregister_netdevice_queue(dev, head);
+ if (!open_count)
+ lowpan_rx_exit();
+ real_dev->ieee802154_ptr->lowpan_dev = NULL;
+ unregister_netdevice(dev);
dev_put(real_dev);
}
static struct rtnl_link_ops lowpan_link_ops __read_mostly = {
.kind = "lowpan",
- .priv_size = sizeof(struct lowpan_dev_info),
+ .priv_size = LOWPAN_PRIV_SIZE(sizeof(struct lowpan_dev_info)),
.setup = lowpan_setup,
.newlink = lowpan_newlink,
.dellink = lowpan_dellink,
@@ -214,19 +197,21 @@ static int lowpan_device_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- LIST_HEAD(del_list);
- struct lowpan_dev_record *entry, *tmp;
if (dev->type != ARPHRD_IEEE802154)
goto out;
- if (event == NETDEV_UNREGISTER) {
- list_for_each_entry_safe(entry, tmp, &lowpan_devices, list) {
- if (lowpan_dev_info(entry->ldev)->real_dev == dev)
- lowpan_dellink(entry->ldev, &del_list);
- }
-
- unregister_netdevice_many(&del_list);
+ switch (event) {
+ case NETDEV_UNREGISTER:
+ /* Check if wpan interface is unregistered that we
+ * also delete possible lowpan interfaces which belongs
+ * to the wpan interface.
+ */
+ if (dev->ieee802154_ptr && dev->ieee802154_ptr->lowpan_dev)
+ lowpan_dellink(dev->ieee802154_ptr->lowpan_dev, NULL);
+ break;
+ default:
+ break;
}
out:
diff --git a/net/ieee802154/6lowpan/rx.c b/net/ieee802154/6lowpan/rx.c
index 4be1d289ab2d..12e10201d263 100644
--- a/net/ieee802154/6lowpan/rx.c
+++ b/net/ieee802154/6lowpan/rx.c
@@ -15,36 +15,14 @@
#include "6lowpan_i.h"
-static int lowpan_give_skb_to_devices(struct sk_buff *skb,
- struct net_device *dev)
+static int lowpan_give_skb_to_device(struct sk_buff *skb,
+ struct net_device *dev)
{
- struct lowpan_dev_record *entry;
- struct sk_buff *skb_cp;
- int stat = NET_RX_SUCCESS;
-
+ skb->dev = dev->ieee802154_ptr->lowpan_dev;
skb->protocol = htons(ETH_P_IPV6);
skb->pkt_type = PACKET_HOST;
- rcu_read_lock();
- list_for_each_entry_rcu(entry, &lowpan_devices, list)
- if (lowpan_dev_info(entry->ldev)->real_dev == skb->dev) {
- skb_cp = skb_copy(skb, GFP_ATOMIC);
- if (!skb_cp) {
- kfree_skb(skb);
- rcu_read_unlock();
- return NET_RX_DROP;
- }
-
- skb_cp->dev = entry->ldev;
- stat = netif_rx(skb_cp);
- if (stat == NET_RX_DROP)
- break;
- }
- rcu_read_unlock();
-
- consume_skb(skb);
-
- return stat;
+ return netif_rx(skb);
}
static int
@@ -89,6 +67,10 @@ static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev,
struct ieee802154_hdr hdr;
int ret;
+ if (dev->type != ARPHRD_IEEE802154 ||
+ !dev->ieee802154_ptr->lowpan_dev)
+ goto drop;
+
skb = skb_share_check(skb, GFP_ATOMIC);
if (!skb)
goto drop;
@@ -99,9 +81,6 @@ static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev,
if (skb->pkt_type == PACKET_OTHERHOST)
goto drop_skb;
- if (dev->type != ARPHRD_IEEE802154)
- goto drop_skb;
-
if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0)
goto drop_skb;
@@ -109,7 +88,7 @@ static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev,
if (skb->data[0] == LOWPAN_DISPATCH_IPV6) {
/* Pull off the 1-byte of 6lowpan header. */
skb_pull(skb, 1);
- return lowpan_give_skb_to_devices(skb, NULL);
+ return lowpan_give_skb_to_device(skb, dev);
} else {
switch (skb->data[0] & 0xe0) {
case LOWPAN_DISPATCH_IPHC: /* ipv6 datagram */
@@ -117,7 +96,7 @@ static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev,
if (ret < 0)
goto drop_skb;
- return lowpan_give_skb_to_devices(skb, NULL);
+ return lowpan_give_skb_to_device(skb, dev);
case LOWPAN_DISPATCH_FRAG1: /* first fragment header */
ret = lowpan_frag_rcv(skb, LOWPAN_DISPATCH_FRAG1);
if (ret == 1) {
@@ -125,7 +104,7 @@ static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev,
if (ret < 0)
goto drop_skb;
- return lowpan_give_skb_to_devices(skb, NULL);
+ return lowpan_give_skb_to_device(skb, dev);
} else if (ret == -1) {
return NET_RX_DROP;
} else {
@@ -138,7 +117,7 @@ static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev,
if (ret < 0)
goto drop_skb;
- return lowpan_give_skb_to_devices(skb, NULL);
+ return lowpan_give_skb_to_device(skb, dev);
} else if (ret == -1) {
return NET_RX_DROP;
} else {
diff --git a/net/ieee802154/6lowpan/tx.c b/net/ieee802154/6lowpan/tx.c
index 2597abbf7f4b..f6263fc12340 100644
--- a/net/ieee802154/6lowpan/tx.c
+++ b/net/ieee802154/6lowpan/tx.c
@@ -112,7 +112,7 @@ lowpan_xmit_fragment(struct sk_buff *skb, const struct ieee802154_hdr *wpan_hdr,
frag = lowpan_alloc_frag(skb, frag_hdrlen + len, wpan_hdr);
if (IS_ERR(frag))
- return -PTR_ERR(frag);
+ return PTR_ERR(frag);
memcpy(skb_put(frag, frag_hdrlen), frag_hdr, frag_hdrlen);
memcpy(skb_put(frag, len), skb_network_header(skb) + offset, len);
@@ -224,7 +224,7 @@ static int lowpan_header(struct sk_buff *skb, struct net_device *dev)
} else {
da.mode = IEEE802154_ADDR_LONG;
da.extended_addr = ieee802154_devaddr_from_raw(daddr);
- cb->ackreq = wpan_dev->frame_retries >= 0;
+ cb->ackreq = wpan_dev->ackreq;
}
return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev,
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index 68f24016860c..1b00a14850cb 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -230,6 +230,8 @@ static const struct nla_policy nl802154_policy[NL802154_ATTR_MAX+1] = {
[NL802154_ATTR_WPAN_PHY_CAPS] = { .type = NLA_NESTED },
[NL802154_ATTR_SUPPORTED_COMMANDS] = { .type = NLA_NESTED },
+
+ [NL802154_ATTR_ACKREQ_DEFAULT] = { .type = NLA_U8 },
};
/* message building helper */
@@ -458,6 +460,7 @@ static int nl802154_send_wpan_phy(struct cfg802154_registered_device *rdev,
CMD(set_max_csma_backoffs, SET_MAX_CSMA_BACKOFFS);
CMD(set_max_frame_retries, SET_MAX_FRAME_RETRIES);
CMD(set_lbt_mode, SET_LBT_MODE);
+ CMD(set_ackreq_default, SET_ACKREQ_DEFAULT);
if (rdev->wpan_phy.flags & WPAN_PHY_FLAG_TXPOWER)
CMD(set_tx_power, SET_TX_POWER);
@@ -656,6 +659,10 @@ nl802154_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flags,
if (nla_put_u8(msg, NL802154_ATTR_LBT_MODE, wpan_dev->lbt))
goto nla_put_failure;
+ /* ackreq default behaviour */
+ if (nla_put_u8(msg, NL802154_ATTR_ACKREQ_DEFAULT, wpan_dev->ackreq))
+ goto nla_put_failure;
+
genlmsg_end(msg, hdr);
return 0;
@@ -1042,6 +1049,24 @@ static int nl802154_set_lbt_mode(struct sk_buff *skb, struct genl_info *info)
return rdev_set_lbt_mode(rdev, wpan_dev, mode);
}
+static int
+nl802154_set_ackreq_default(struct sk_buff *skb, struct genl_info *info)
+{
+ struct cfg802154_registered_device *rdev = info->user_ptr[0];
+ struct net_device *dev = info->user_ptr[1];
+ struct wpan_dev *wpan_dev = dev->ieee802154_ptr;
+ bool ackreq;
+
+ if (netif_running(dev))
+ return -EBUSY;
+
+ if (!info->attrs[NL802154_ATTR_ACKREQ_DEFAULT])
+ return -EINVAL;
+
+ ackreq = !!nla_get_u8(info->attrs[NL802154_ATTR_ACKREQ_DEFAULT]);
+ return rdev_set_ackreq_default(rdev, wpan_dev, ackreq);
+}
+
#define NL802154_FLAG_NEED_WPAN_PHY 0x01
#define NL802154_FLAG_NEED_NETDEV 0x02
#define NL802154_FLAG_NEED_RTNL 0x04
@@ -1248,6 +1273,14 @@ static const struct genl_ops nl802154_ops[] = {
.internal_flags = NL802154_FLAG_NEED_NETDEV |
NL802154_FLAG_NEED_RTNL,
},
+ {
+ .cmd = NL802154_CMD_SET_ACKREQ_DEFAULT,
+ .doit = nl802154_set_ackreq_default,
+ .policy = nl802154_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = NL802154_FLAG_NEED_NETDEV |
+ NL802154_FLAG_NEED_RTNL,
+ },
};
/* initialisation/exit functions */
diff --git a/net/ieee802154/rdev-ops.h b/net/ieee802154/rdev-ops.h
index b2155a123f6c..03b357501cc5 100644
--- a/net/ieee802154/rdev-ops.h
+++ b/net/ieee802154/rdev-ops.h
@@ -24,6 +24,26 @@ rdev_del_virtual_intf_deprecated(struct cfg802154_registered_device *rdev,
}
static inline int
+rdev_suspend(struct cfg802154_registered_device *rdev)
+{
+ int ret;
+ trace_802154_rdev_suspend(&rdev->wpan_phy);
+ ret = rdev->ops->suspend(&rdev->wpan_phy);
+ trace_802154_rdev_return_int(&rdev->wpan_phy, ret);
+ return ret;
+}
+
+static inline int
+rdev_resume(struct cfg802154_registered_device *rdev)
+{
+ int ret;
+ trace_802154_rdev_resume(&rdev->wpan_phy);
+ ret = rdev->ops->resume(&rdev->wpan_phy);
+ trace_802154_rdev_return_int(&rdev->wpan_phy, ret);
+ return ret;
+}
+
+static inline int
rdev_add_virtual_intf(struct cfg802154_registered_device *rdev, char *name,
unsigned char name_assign_type,
enum nl802154_iftype type, __le64 extended_addr)
@@ -175,4 +195,17 @@ rdev_set_lbt_mode(struct cfg802154_registered_device *rdev,
return ret;
}
+static inline int
+rdev_set_ackreq_default(struct cfg802154_registered_device *rdev,
+ struct wpan_dev *wpan_dev, bool ackreq)
+{
+ int ret;
+
+ trace_802154_rdev_set_ackreq_default(&rdev->wpan_phy, wpan_dev,
+ ackreq);
+ ret = rdev->ops->set_ackreq_default(&rdev->wpan_phy, wpan_dev, ackreq);
+ trace_802154_rdev_return_int(&rdev->wpan_phy, ret);
+ return ret;
+}
+
#endif /* __CFG802154_RDEV_OPS */
diff --git a/net/ieee802154/sysfs.c b/net/ieee802154/sysfs.c
index 133b4280660c..bd88525b041e 100644
--- a/net/ieee802154/sysfs.c
+++ b/net/ieee802154/sysfs.c
@@ -14,11 +14,13 @@
*/
#include <linux/device.h>
+#include <linux/rtnetlink.h>
#include <net/cfg802154.h>
#include "core.h"
#include "sysfs.h"
+#include "rdev-ops.h"
static inline struct cfg802154_registered_device *
dev_to_rdev(struct device *dev)
@@ -62,10 +64,46 @@ static struct attribute *pmib_attrs[] = {
};
ATTRIBUTE_GROUPS(pmib);
+#ifdef CONFIG_PM_SLEEP
+static int wpan_phy_suspend(struct device *dev)
+{
+ struct cfg802154_registered_device *rdev = dev_to_rdev(dev);
+ int ret = 0;
+
+ if (rdev->ops->suspend) {
+ rtnl_lock();
+ ret = rdev_suspend(rdev);
+ rtnl_unlock();
+ }
+
+ return ret;
+}
+
+static int wpan_phy_resume(struct device *dev)
+{
+ struct cfg802154_registered_device *rdev = dev_to_rdev(dev);
+ int ret = 0;
+
+ if (rdev->ops->resume) {
+ rtnl_lock();
+ ret = rdev_resume(rdev);
+ rtnl_unlock();
+ }
+
+ return ret;
+}
+
+static SIMPLE_DEV_PM_OPS(wpan_phy_pm_ops, wpan_phy_suspend, wpan_phy_resume);
+#define WPAN_PHY_PM_OPS (&wpan_phy_pm_ops)
+#else
+#define WPAN_PHY_PM_OPS NULL
+#endif
+
struct class wpan_phy_class = {
.name = "ieee802154",
.dev_release = wpan_phy_release,
.dev_groups = pmib_groups,
+ .pm = WPAN_PHY_PM_OPS,
};
int wpan_phy_sysfs_init(void)
diff --git a/net/ieee802154/trace.h b/net/ieee802154/trace.h
index 9b5f0eb36696..9a471e41ec73 100644
--- a/net/ieee802154/trace.h
+++ b/net/ieee802154/trace.h
@@ -40,6 +40,28 @@
* rdev->ops traces *
*************************************************************/
+DECLARE_EVENT_CLASS(wpan_phy_only_evt,
+ TP_PROTO(struct wpan_phy *wpan_phy),
+ TP_ARGS(wpan_phy),
+ TP_STRUCT__entry(
+ WPAN_PHY_ENTRY
+ ),
+ TP_fast_assign(
+ WPAN_PHY_ASSIGN;
+ ),
+ TP_printk(WPAN_PHY_PR_FMT, WPAN_PHY_PR_ARG)
+);
+
+DEFINE_EVENT(wpan_phy_only_evt, 802154_rdev_suspend,
+ TP_PROTO(struct wpan_phy *wpan_phy),
+ TP_ARGS(wpan_phy)
+);
+
+DEFINE_EVENT(wpan_phy_only_evt, 802154_rdev_resume,
+ TP_PROTO(struct wpan_phy *wpan_phy),
+ TP_ARGS(wpan_phy)
+);
+
TRACE_EVENT(802154_rdev_add_virtual_intf,
TP_PROTO(struct wpan_phy *wpan_phy, char *name,
enum nl802154_iftype type, __le64 extended_addr),
@@ -253,6 +275,25 @@ TRACE_EVENT(802154_rdev_set_lbt_mode,
WPAN_DEV_PR_ARG, BOOL_TO_STR(__entry->mode))
);
+TRACE_EVENT(802154_rdev_set_ackreq_default,
+ TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
+ bool ackreq),
+ TP_ARGS(wpan_phy, wpan_dev, ackreq),
+ TP_STRUCT__entry(
+ WPAN_PHY_ENTRY
+ WPAN_DEV_ENTRY
+ __field(bool, ackreq)
+ ),
+ TP_fast_assign(
+ WPAN_PHY_ASSIGN;
+ WPAN_DEV_ASSIGN;
+ __entry->ackreq = ackreq;
+ ),
+ TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT
+ ", ackreq default: %s", WPAN_PHY_PR_ARG,
+ WPAN_DEV_PR_ARG, BOOL_TO_STR(__entry->ackreq))
+);
+
TRACE_EVENT(802154_rdev_return_int,
TP_PROTO(struct wpan_phy *wpan_phy, int ret),
TP_ARGS(wpan_phy, ret),
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 9532ee87151f..675e88cac2b4 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -112,12 +112,14 @@
#include <net/raw.h>
#include <net/icmp.h>
#include <net/inet_common.h>
+#include <net/ip_tunnels.h>
#include <net/xfrm.h>
#include <net/net_namespace.h>
#include <net/secure_seq.h>
#ifdef CONFIG_IP_MROUTE
#include <linux/mroute.h>
#endif
+#include <net/vrf.h>
/* The inetsw table contains everything that inet_create needs to
@@ -426,6 +428,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
struct net *net = sock_net(sk);
unsigned short snum;
int chk_addr_ret;
+ int tb_id = RT_TABLE_LOCAL;
int err;
/* If the socket has its own bind function then use it. (RAW) */
@@ -447,7 +450,8 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
goto out;
}
- chk_addr_ret = inet_addr_type(net, addr->sin_addr.s_addr);
+ tb_id = vrf_dev_table_ifindex(net, sk->sk_bound_dev_if) ? : tb_id;
+ chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id);
/* Not specified by any standard per-se, however it breaks too
* many applications when removed. It is unfortunate since
@@ -1780,6 +1784,8 @@ static int __init inet_init(void)
dev_add_pack(&ip_packet_type);
+ ip_tunnel_core_init();
+
rc = 0;
out:
return rc;
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 6c8b1fbafce8..30409b75e925 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -233,7 +233,7 @@ static int arp_constructor(struct neighbour *neigh)
return -EINVAL;
}
- neigh->type = inet_addr_type(dev_net(dev), addr);
+ neigh->type = inet_addr_type_dev_table(dev_net(dev), dev, addr);
parms = in_dev->arp_parms;
__neigh_parms_put(neigh->parms);
@@ -291,6 +291,40 @@ static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb)
kfree_skb(skb);
}
+/* Create and send an arp packet. */
+static void arp_send_dst(int type, int ptype, __be32 dest_ip,
+ struct net_device *dev, __be32 src_ip,
+ const unsigned char *dest_hw,
+ const unsigned char *src_hw,
+ const unsigned char *target_hw, struct sk_buff *oskb)
+{
+ struct sk_buff *skb;
+
+ /* arp on this interface. */
+ if (dev->flags & IFF_NOARP)
+ return;
+
+ skb = arp_create(type, ptype, dest_ip, dev, src_ip,
+ dest_hw, src_hw, target_hw);
+ if (!skb)
+ return;
+
+ if (oskb)
+ skb_dst_copy(skb, oskb);
+
+ arp_xmit(skb);
+}
+
+void arp_send(int type, int ptype, __be32 dest_ip,
+ struct net_device *dev, __be32 src_ip,
+ const unsigned char *dest_hw, const unsigned char *src_hw,
+ const unsigned char *target_hw)
+{
+ arp_send_dst(type, ptype, dest_ip, dev, src_ip, dest_hw, src_hw,
+ target_hw, NULL);
+}
+EXPORT_SYMBOL(arp_send);
+
static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
{
__be32 saddr = 0;
@@ -309,7 +343,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
switch (IN_DEV_ARP_ANNOUNCE(in_dev)) {
default:
case 0: /* By default announce any local IP */
- if (skb && inet_addr_type(dev_net(dev),
+ if (skb && inet_addr_type_dev_table(dev_net(dev), dev,
ip_hdr(skb)->saddr) == RTN_LOCAL)
saddr = ip_hdr(skb)->saddr;
break;
@@ -317,7 +351,8 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
if (!skb)
break;
saddr = ip_hdr(skb)->saddr;
- if (inet_addr_type(dev_net(dev), saddr) == RTN_LOCAL) {
+ if (inet_addr_type_dev_table(dev_net(dev), dev,
+ saddr) == RTN_LOCAL) {
/* saddr should be known to target */
if (inet_addr_onlink(in_dev, target, saddr))
break;
@@ -346,8 +381,9 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb)
}
}
- arp_send(ARPOP_REQUEST, ETH_P_ARP, target, dev, saddr,
- dst_hw, dev->dev_addr, NULL);
+ arp_send_dst(ARPOP_REQUEST, ETH_P_ARP, target, dev, saddr,
+ dst_hw, dev->dev_addr, NULL,
+ dev->priv_flags & IFF_XMIT_DST_RELEASE ? NULL : skb);
}
static int arp_ignore(struct in_device *in_dev, __be32 sip, __be32 tip)
@@ -597,32 +633,6 @@ void arp_xmit(struct sk_buff *skb)
EXPORT_SYMBOL(arp_xmit);
/*
- * Create and send an arp packet.
- */
-void arp_send(int type, int ptype, __be32 dest_ip,
- struct net_device *dev, __be32 src_ip,
- const unsigned char *dest_hw, const unsigned char *src_hw,
- const unsigned char *target_hw)
-{
- struct sk_buff *skb;
-
- /*
- * No arp on this interface.
- */
-
- if (dev->flags&IFF_NOARP)
- return;
-
- skb = arp_create(type, ptype, dest_ip, dev, src_ip,
- dest_hw, src_hw, target_hw);
- if (!skb)
- return;
-
- arp_xmit(skb);
-}
-EXPORT_SYMBOL(arp_send);
-
-/*
* Process an arp request.
*/
@@ -742,7 +752,7 @@ static int arp_process(struct sock *sk, struct sk_buff *skb)
/* Special case: IPv4 duplicate address detection packet (RFC2131) */
if (sip == 0) {
if (arp->ar_op == htons(ARPOP_REQUEST) &&
- inet_addr_type(net, tip) == RTN_LOCAL &&
+ inet_addr_type_dev_table(net, dev, tip) == RTN_LOCAL &&
!arp_ignore(in_dev, sip, tip))
arp_send(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha,
dev->dev_addr, sha);
@@ -802,16 +812,18 @@ static int arp_process(struct sock *sk, struct sk_buff *skb)
n = __neigh_lookup(&arp_tbl, &sip, dev, 0);
if (IN_DEV_ARP_ACCEPT(in_dev)) {
+ unsigned int addr_type = inet_addr_type_dev_table(net, dev, sip);
+
/* Unsolicited ARP is not accepted by default.
It is possible, that this option should be enabled for some
devices (strip is candidate)
*/
is_garp = arp->ar_op == htons(ARPOP_REQUEST) && tip == sip &&
- inet_addr_type(net, sip) == RTN_UNICAST;
+ addr_type == RTN_UNICAST;
if (!n &&
((arp->ar_op == htons(ARPOP_REPLY) &&
- inet_addr_type(net, sip) == RTN_UNICAST) || is_garp))
+ addr_type == RTN_UNICAST) || is_garp))
n = __neigh_lookup(&arp_tbl, &sip, dev, 1);
}
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 574fad9cca05..f915abff1350 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -74,7 +74,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
inet->inet_daddr = fl4->daddr;
inet->inet_dport = usin->sin_port;
sk->sk_state = TCP_ESTABLISHED;
- inet_set_txhash(sk);
+ sk_set_txhash(sk);
inet->inet_id = jiffies;
sk_dst_set(sk, &rt->dst);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 6bbc54940eb4..7fa277176c33 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -45,6 +45,7 @@
#include <net/ip_fib.h>
#include <net/rtnetlink.h>
#include <net/xfrm.h>
+#include <net/vrf.h>
#ifndef CONFIG_IP_MULTIPLE_TABLES
@@ -211,12 +212,12 @@ void fib_flush_external(struct net *net)
*/
static inline unsigned int __inet_dev_addr_type(struct net *net,
const struct net_device *dev,
- __be32 addr)
+ __be32 addr, int tb_id)
{
struct flowi4 fl4 = { .daddr = addr };
struct fib_result res;
unsigned int ret = RTN_BROADCAST;
- struct fib_table *local_table;
+ struct fib_table *table;
if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr))
return RTN_BROADCAST;
@@ -225,10 +226,10 @@ static inline unsigned int __inet_dev_addr_type(struct net *net,
rcu_read_lock();
- local_table = fib_get_table(net, RT_TABLE_LOCAL);
- if (local_table) {
+ table = fib_get_table(net, tb_id);
+ if (table) {
ret = RTN_UNICAST;
- if (!fib_table_lookup(local_table, &fl4, &res, FIB_LOOKUP_NOREF)) {
+ if (!fib_table_lookup(table, &fl4, &res, FIB_LOOKUP_NOREF)) {
if (!dev || dev == res.fi->fib_dev)
ret = res.type;
}
@@ -238,19 +239,40 @@ static inline unsigned int __inet_dev_addr_type(struct net *net,
return ret;
}
+unsigned int inet_addr_type_table(struct net *net, __be32 addr, int tb_id)
+{
+ return __inet_dev_addr_type(net, NULL, addr, tb_id);
+}
+EXPORT_SYMBOL(inet_addr_type_table);
+
unsigned int inet_addr_type(struct net *net, __be32 addr)
{
- return __inet_dev_addr_type(net, NULL, addr);
+ return __inet_dev_addr_type(net, NULL, addr, RT_TABLE_LOCAL);
}
EXPORT_SYMBOL(inet_addr_type);
unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
__be32 addr)
{
- return __inet_dev_addr_type(net, dev, addr);
+ int rt_table = vrf_dev_table(dev) ? : RT_TABLE_LOCAL;
+
+ return __inet_dev_addr_type(net, dev, addr, rt_table);
}
EXPORT_SYMBOL(inet_dev_addr_type);
+/* inet_addr_type with dev == NULL but using the table from a dev
+ * if one is associated
+ */
+unsigned int inet_addr_type_dev_table(struct net *net,
+ const struct net_device *dev,
+ __be32 addr)
+{
+ int rt_table = vrf_dev_table(dev) ? : RT_TABLE_LOCAL;
+
+ return __inet_dev_addr_type(net, NULL, addr, rt_table);
+}
+EXPORT_SYMBOL(inet_addr_type_dev_table);
+
__be32 fib_compute_spec_dst(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
@@ -280,6 +302,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
fl4.flowi4_scope = scope;
fl4.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0;
+ fl4.flowi4_tun_key.tun_id = 0;
if (!fib_lookup(net, &fl4, &res, 0))
return FIB_RES_PREFSRC(net, res);
} else {
@@ -308,11 +331,14 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
bool dev_match;
fl4.flowi4_oif = 0;
- fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX;
+ fl4.flowi4_iif = vrf_master_ifindex_rcu(dev);
+ if (!fl4.flowi4_iif)
+ fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX;
fl4.daddr = src;
fl4.saddr = dst;
fl4.flowi4_tos = tos;
fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
+ fl4.flowi4_tun_key.tun_id = 0;
no_addr = idev->ifa_list == NULL;
@@ -337,6 +363,9 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
if (nh->nh_dev == dev) {
dev_match = true;
break;
+ } else if (vrf_master_ifindex_rcu(nh->nh_dev) == dev->ifindex) {
+ dev_match = true;
+ break;
}
}
#else
@@ -494,9 +523,12 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
addr = sk_extract_addr(&rt->rt_gateway);
if (rt->rt_gateway.sa_family == AF_INET && addr) {
+ unsigned int addr_type;
+
cfg->fc_gw = addr;
+ addr_type = inet_addr_type_table(net, addr, cfg->fc_table);
if (rt->rt_flags & RTF_GATEWAY &&
- inet_addr_type(net, addr) == RTN_UNICAST)
+ addr_type == RTN_UNICAST)
cfg->fc_scope = RT_SCOPE_UNIVERSE;
}
@@ -591,6 +623,8 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
[RTA_METRICS] = { .type = NLA_NESTED },
[RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
[RTA_FLOW] = { .type = NLA_U32 },
+ [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
+ [RTA_ENCAP] = { .type = NLA_NESTED },
};
static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
@@ -656,6 +690,12 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
case RTA_TABLE:
cfg->fc_table = nla_get_u32(attr);
break;
+ case RTA_ENCAP:
+ cfg->fc_encap = attr;
+ break;
+ case RTA_ENCAP_TYPE:
+ cfg->fc_encap_type = nla_get_u16(attr);
+ break;
}
}
@@ -760,6 +800,7 @@ out:
static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
{
struct net *net = dev_net(ifa->ifa_dev->dev);
+ int tb_id = vrf_dev_table_rtnl(ifa->ifa_dev->dev);
struct fib_table *tb;
struct fib_config cfg = {
.fc_protocol = RTPROT_KERNEL,
@@ -774,11 +815,10 @@ static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifad
},
};
- if (type == RTN_UNICAST)
- tb = fib_new_table(net, RT_TABLE_MAIN);
- else
- tb = fib_new_table(net, RT_TABLE_LOCAL);
+ if (!tb_id)
+ tb_id = (type == RTN_UNICAST) ? RT_TABLE_MAIN : RT_TABLE_LOCAL;
+ tb = fib_new_table(net, tb_id);
if (!tb)
return;
@@ -960,11 +1000,14 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
}
if (!(ok & LOCAL_OK)) {
+ unsigned int addr_type;
+
fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
/* Check, that this local address finally disappeared. */
- if (gone &&
- inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) {
+ addr_type = inet_addr_type_dev_table(dev_net(dev), dev,
+ ifa->ifa_local);
+ if (gone && addr_type != RTN_LOCAL) {
/* And the last, but not the least thing.
* We must flush stray FIB entries.
*
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 3a06586b170c..01f1c7dcd329 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -42,6 +42,7 @@
#include <net/ip_fib.h>
#include <net/netlink.h>
#include <net/nexthop.h>
+#include <net/lwtunnel.h>
#include "fib_lookup.h"
@@ -208,6 +209,7 @@ static void free_fib_info_rcu(struct rcu_head *head)
change_nexthops(fi) {
if (nexthop_nh->nh_dev)
dev_put(nexthop_nh->nh_dev);
+ lwtstate_put(nexthop_nh->nh_lwtstate);
free_nh_exceptions(nexthop_nh);
rt_fibinfo_free_cpus(nexthop_nh->nh_pcpu_rth_output);
rt_fibinfo_free(&nexthop_nh->nh_rth_input);
@@ -266,6 +268,7 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
#ifdef CONFIG_IP_ROUTE_CLASSID
nh->nh_tclassid != onh->nh_tclassid ||
#endif
+ lwtunnel_cmp_encap(nh->nh_lwtstate, onh->nh_lwtstate) ||
((nh->nh_flags ^ onh->nh_flags) & ~RTNH_COMPARE_MASK))
return -1;
onh++;
@@ -366,6 +369,7 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi)
payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
if (fi->fib_nhs) {
+ size_t nh_encapsize = 0;
/* Also handles the special case fib_nhs == 1 */
/* each nexthop is packed in an attribute */
@@ -374,8 +378,21 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi)
/* may contain flow and gateway attribute */
nhsize += 2 * nla_total_size(4);
+ /* grab encap info */
+ for_nexthops(fi) {
+ if (nh->nh_lwtstate) {
+ /* RTA_ENCAP_TYPE */
+ nh_encapsize += lwtunnel_get_encap_size(
+ nh->nh_lwtstate);
+ /* RTA_ENCAP */
+ nh_encapsize += nla_total_size(2);
+ }
+ } endfor_nexthops(fi);
+
/* all nexthops are packed in a nested attribute */
- payload += nla_total_size(fi->fib_nhs * nhsize);
+ payload += nla_total_size((fi->fib_nhs * nhsize) +
+ nh_encapsize);
+
}
return payload;
@@ -421,13 +438,15 @@ static int fib_detect_death(struct fib_info *fi, int order,
if (n) {
state = n->nud_state;
neigh_release(n);
+ } else {
+ return 0;
}
if (state == NUD_REACHABLE)
return 0;
if ((state & NUD_VALID) && order != dflt)
return 0;
if ((state & NUD_VALID) ||
- (*last_idx < 0 && order > dflt)) {
+ (*last_idx < 0 && order > dflt && state != NUD_INCOMPLETE)) {
*last_resort = fi;
*last_idx = order;
}
@@ -452,6 +471,9 @@ static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
int remaining, struct fib_config *cfg)
{
+ struct net *net = cfg->fc_nlinfo.nl_net;
+ int ret;
+
change_nexthops(fi) {
int attrlen;
@@ -475,18 +497,68 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
if (nexthop_nh->nh_tclassid)
fi->fib_net->ipv4.fib_num_tclassid_users++;
#endif
+ nla = nla_find(attrs, attrlen, RTA_ENCAP);
+ if (nla) {
+ struct lwtunnel_state *lwtstate;
+ struct net_device *dev = NULL;
+ struct nlattr *nla_entype;
+
+ nla_entype = nla_find(attrs, attrlen,
+ RTA_ENCAP_TYPE);
+ if (!nla_entype)
+ goto err_inval;
+ if (cfg->fc_oif)
+ dev = __dev_get_by_index(net, cfg->fc_oif);
+ ret = lwtunnel_build_state(dev, nla_get_u16(
+ nla_entype),
+ nla, &lwtstate);
+ if (ret)
+ goto errout;
+ nexthop_nh->nh_lwtstate =
+ lwtstate_get(lwtstate);
+ }
}
rtnh = rtnh_next(rtnh, &remaining);
} endfor_nexthops(fi);
return 0;
+
+err_inval:
+ ret = -EINVAL;
+
+errout:
+ return ret;
}
#endif
+static int fib_encap_match(struct net *net, u16 encap_type,
+ struct nlattr *encap,
+ int oif, const struct fib_nh *nh)
+{
+ struct lwtunnel_state *lwtstate;
+ struct net_device *dev = NULL;
+ int ret, result = 0;
+
+ if (encap_type == LWTUNNEL_ENCAP_NONE)
+ return 0;
+
+ if (oif)
+ dev = __dev_get_by_index(net, oif);
+ ret = lwtunnel_build_state(dev, encap_type,
+ encap, &lwtstate);
+ if (!ret) {
+ result = lwtunnel_cmp_encap(lwtstate, nh->nh_lwtstate);
+ lwtstate_free(lwtstate);
+ }
+
+ return result;
+}
+
int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
{
+ struct net *net = cfg->fc_nlinfo.nl_net;
#ifdef CONFIG_IP_ROUTE_MULTIPATH
struct rtnexthop *rtnh;
int remaining;
@@ -496,6 +568,12 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
return 1;
if (cfg->fc_oif || cfg->fc_gw) {
+ if (cfg->fc_encap) {
+ if (fib_encap_match(net, cfg->fc_encap_type,
+ cfg->fc_encap, cfg->fc_oif,
+ fi->fib_nh))
+ return 1;
+ }
if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
(!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw))
return 0;
@@ -594,16 +672,18 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
struct fib_result res;
if (nh->nh_flags & RTNH_F_ONLINK) {
+ unsigned int addr_type;
if (cfg->fc_scope >= RT_SCOPE_LINK)
return -EINVAL;
- if (inet_addr_type(net, nh->nh_gw) != RTN_UNICAST)
- return -EINVAL;
dev = __dev_get_by_index(net, nh->nh_oif);
if (!dev)
return -ENODEV;
if (!(dev->flags & IFF_UP))
return -ENETDOWN;
+ addr_type = inet_addr_type_dev_table(net, dev, nh->nh_gw);
+ if (addr_type != RTN_UNICAST)
+ return -EINVAL;
if (!netif_carrier_ok(dev))
nh->nh_flags |= RTNH_F_LINKDOWN;
nh->nh_dev = dev;
@@ -613,6 +693,7 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
}
rcu_read_lock();
{
+ struct fib_table *tbl = NULL;
struct flowi4 fl4 = {
.daddr = nh->nh_gw,
.flowi4_scope = cfg->fc_scope + 1,
@@ -623,8 +704,24 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
/* It is not necessary, but requires a bit of thinking */
if (fl4.flowi4_scope < RT_SCOPE_LINK)
fl4.flowi4_scope = RT_SCOPE_LINK;
- err = fib_lookup(net, &fl4, &res,
- FIB_LOOKUP_IGNORE_LINKSTATE);
+
+ if (cfg->fc_table)
+ tbl = fib_get_table(net, cfg->fc_table);
+
+ if (tbl)
+ err = fib_table_lookup(tbl, &fl4, &res,
+ FIB_LOOKUP_IGNORE_LINKSTATE |
+ FIB_LOOKUP_NOREF);
+
+ /* on error or if no table given do full lookup. This
+ * is needed for example when nexthops are in the local
+ * table rather than the given table
+ */
+ if (!tbl || err) {
+ err = fib_lookup(net, &fl4, &res,
+ FIB_LOOKUP_IGNORE_LINKSTATE);
+ }
+
if (err) {
rcu_read_unlock();
return err;
@@ -760,6 +857,23 @@ __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh)
return nh->nh_saddr;
}
+static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc)
+{
+ if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
+ fib_prefsrc != cfg->fc_dst) {
+ int tb_id = cfg->fc_table;
+
+ if (tb_id == RT_TABLE_MAIN)
+ tb_id = RT_TABLE_LOCAL;
+
+ if (inet_addr_type_table(cfg->fc_nlinfo.nl_net,
+ fib_prefsrc, tb_id) != RTN_LOCAL) {
+ return false;
+ }
+ }
+ return true;
+}
+
struct fib_info *fib_create_info(struct fib_config *cfg)
{
int err;
@@ -882,6 +996,21 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
} else {
struct fib_nh *nh = fi->fib_nh;
+ if (cfg->fc_encap) {
+ struct lwtunnel_state *lwtstate;
+ struct net_device *dev = NULL;
+
+ if (cfg->fc_encap_type == LWTUNNEL_ENCAP_NONE)
+ goto err_inval;
+ if (cfg->fc_oif)
+ dev = __dev_get_by_index(net, cfg->fc_oif);
+ err = lwtunnel_build_state(dev, cfg->fc_encap_type,
+ cfg->fc_encap, &lwtstate);
+ if (err)
+ goto failure;
+
+ nh->nh_lwtstate = lwtstate_get(lwtstate);
+ }
nh->nh_oif = cfg->fc_oif;
nh->nh_gw = cfg->fc_gw;
nh->nh_flags = cfg->fc_flags;
@@ -940,12 +1069,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
fi->fib_flags |= RTNH_F_LINKDOWN;
}
- if (fi->fib_prefsrc) {
- if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
- fi->fib_prefsrc != cfg->fc_dst)
- if (inet_addr_type(net, fi->fib_prefsrc) != RTN_LOCAL)
- goto err_inval;
- }
+ if (fi->fib_prefsrc && !fib_valid_prefsrc(cfg, fi->fib_prefsrc))
+ goto err_inval;
change_nexthops(fi) {
fib_info_update_nh_saddr(net, nexthop_nh);
@@ -1055,6 +1180,8 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid))
goto nla_put_failure;
#endif
+ if (fi->fib_nh->nh_lwtstate)
+ lwtunnel_fill_encap(skb, fi->fib_nh->nh_lwtstate);
}
#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (fi->fib_nhs > 1) {
@@ -1090,6 +1217,8 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid))
goto nla_put_failure;
#endif
+ if (nh->nh_lwtstate)
+ lwtunnel_fill_encap(skb, nh->nh_lwtstate);
/* length of rtnetlink header + attributes */
rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
} endfor_nexthops(fi);
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index b0c6258ffb79..5154f81c5326 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1423,8 +1423,11 @@ found:
nh->nh_flags & RTNH_F_LINKDOWN &&
!(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE))
continue;
- if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif)
- continue;
+ if (!(flp->flowi4_flags & FLOWI_FLAG_VRFSRC)) {
+ if (flp->flowi4_oif &&
+ flp->flowi4_oif != nh->nh_oif)
+ continue;
+ }
if (!(fib_flags & FIB_LOOKUP_NOREF))
atomic_inc(&fi->fib_clntref);
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index 4a7b5b2a1ce3..d9c552a721fc 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c
@@ -31,7 +31,6 @@
#include <net/xfrm.h>
static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly;
-static struct gre_cisco_protocol __rcu *gre_cisco_proto_list[GRE_IP_PROTO_MAX];
int gre_add_protocol(const struct gre_protocol *proto, u8 version)
{
@@ -61,197 +60,6 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version)
}
EXPORT_SYMBOL_GPL(gre_del_protocol);
-void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
- int hdr_len)
-{
- struct gre_base_hdr *greh;
-
- skb_push(skb, hdr_len);
-
- skb_reset_transport_header(skb);
- greh = (struct gre_base_hdr *)skb->data;
- greh->flags = tnl_flags_to_gre_flags(tpi->flags);
- greh->protocol = tpi->proto;
-
- if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) {
- __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
-
- if (tpi->flags&TUNNEL_SEQ) {
- *ptr = tpi->seq;
- ptr--;
- }
- if (tpi->flags&TUNNEL_KEY) {
- *ptr = tpi->key;
- ptr--;
- }
- if (tpi->flags&TUNNEL_CSUM &&
- !(skb_shinfo(skb)->gso_type &
- (SKB_GSO_GRE|SKB_GSO_GRE_CSUM))) {
- *ptr = 0;
- *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
- skb->len, 0));
- }
- }
-}
-EXPORT_SYMBOL_GPL(gre_build_header);
-
-static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
- bool *csum_err)
-{
- const struct gre_base_hdr *greh;
- __be32 *options;
- int hdr_len;
-
- if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
- return -EINVAL;
-
- greh = (struct gre_base_hdr *)skb_transport_header(skb);
- if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
- return -EINVAL;
-
- tpi->flags = gre_flags_to_tnl_flags(greh->flags);
- hdr_len = ip_gre_calc_hlen(tpi->flags);
-
- if (!pskb_may_pull(skb, hdr_len))
- return -EINVAL;
-
- greh = (struct gre_base_hdr *)skb_transport_header(skb);
- tpi->proto = greh->protocol;
-
- options = (__be32 *)(greh + 1);
- if (greh->flags & GRE_CSUM) {
- if (skb_checksum_simple_validate(skb)) {
- *csum_err = true;
- return -EINVAL;
- }
-
- skb_checksum_try_convert(skb, IPPROTO_GRE, 0,
- null_compute_pseudo);
-
- options++;
- }
-
- if (greh->flags & GRE_KEY) {
- tpi->key = *options;
- options++;
- } else
- tpi->key = 0;
-
- if (unlikely(greh->flags & GRE_SEQ)) {
- tpi->seq = *options;
- options++;
- } else
- tpi->seq = 0;
-
- /* WCCP version 1 and 2 protocol decoding.
- * - Change protocol to IP
- * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
- */
- if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
- tpi->proto = htons(ETH_P_IP);
- if ((*(u8 *)options & 0xF0) != 0x40) {
- hdr_len += 4;
- if (!pskb_may_pull(skb, hdr_len))
- return -EINVAL;
- }
- }
-
- return iptunnel_pull_header(skb, hdr_len, tpi->proto);
-}
-
-static int gre_cisco_rcv(struct sk_buff *skb)
-{
- struct tnl_ptk_info tpi;
- int i;
- bool csum_err = false;
-
-#ifdef CONFIG_NET_IPGRE_BROADCAST
- if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
- /* Looped back packet, drop it! */
- if (rt_is_output_route(skb_rtable(skb)))
- goto drop;
- }
-#endif
-
- if (parse_gre_header(skb, &tpi, &csum_err) < 0)
- goto drop;
-
- rcu_read_lock();
- for (i = 0; i < GRE_IP_PROTO_MAX; i++) {
- struct gre_cisco_protocol *proto;
- int ret;
-
- proto = rcu_dereference(gre_cisco_proto_list[i]);
- if (!proto)
- continue;
- ret = proto->handler(skb, &tpi);
- if (ret == PACKET_RCVD) {
- rcu_read_unlock();
- return 0;
- }
- }
- rcu_read_unlock();
-
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
-drop:
- kfree_skb(skb);
- return 0;
-}
-
-static void gre_cisco_err(struct sk_buff *skb, u32 info)
-{
- /* All the routers (except for Linux) return only
- * 8 bytes of packet payload. It means, that precise relaying of
- * ICMP in the real Internet is absolutely infeasible.
- *
- * Moreover, Cisco "wise men" put GRE key to the third word
- * in GRE header. It makes impossible maintaining even soft
- * state for keyed
- * GRE tunnels with enabled checksum. Tell them "thank you".
- *
- * Well, I wonder, rfc1812 was written by Cisco employee,
- * what the hell these idiots break standards established
- * by themselves???
- */
-
- const int type = icmp_hdr(skb)->type;
- const int code = icmp_hdr(skb)->code;
- struct tnl_ptk_info tpi;
- bool csum_err = false;
- int i;
-
- if (parse_gre_header(skb, &tpi, &csum_err)) {
- if (!csum_err) /* ignore csum errors. */
- return;
- }
-
- if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
- ipv4_update_pmtu(skb, dev_net(skb->dev), info,
- skb->dev->ifindex, 0, IPPROTO_GRE, 0);
- return;
- }
- if (type == ICMP_REDIRECT) {
- ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0,
- IPPROTO_GRE, 0);
- return;
- }
-
- rcu_read_lock();
- for (i = 0; i < GRE_IP_PROTO_MAX; i++) {
- struct gre_cisco_protocol *proto;
-
- proto = rcu_dereference(gre_cisco_proto_list[i]);
- if (!proto)
- continue;
-
- if (proto->err_handler(skb, info, &tpi) == PACKET_RCVD)
- goto out;
-
- }
-out:
- rcu_read_unlock();
-}
-
static int gre_rcv(struct sk_buff *skb)
{
const struct gre_protocol *proto;
@@ -302,60 +110,19 @@ static const struct net_protocol net_gre_protocol = {
.netns_ok = 1,
};
-static const struct gre_protocol ipgre_protocol = {
- .handler = gre_cisco_rcv,
- .err_handler = gre_cisco_err,
-};
-
-int gre_cisco_register(struct gre_cisco_protocol *newp)
-{
- struct gre_cisco_protocol **proto = (struct gre_cisco_protocol **)
- &gre_cisco_proto_list[newp->priority];
-
- return (cmpxchg(proto, NULL, newp) == NULL) ? 0 : -EBUSY;
-}
-EXPORT_SYMBOL_GPL(gre_cisco_register);
-
-int gre_cisco_unregister(struct gre_cisco_protocol *del_proto)
-{
- struct gre_cisco_protocol **proto = (struct gre_cisco_protocol **)
- &gre_cisco_proto_list[del_proto->priority];
- int ret;
-
- ret = (cmpxchg(proto, del_proto, NULL) == del_proto) ? 0 : -EINVAL;
-
- if (ret)
- return ret;
-
- synchronize_net();
- return 0;
-}
-EXPORT_SYMBOL_GPL(gre_cisco_unregister);
-
static int __init gre_init(void)
{
pr_info("GRE over IPv4 demultiplexor driver\n");
if (inet_add_protocol(&net_gre_protocol, IPPROTO_GRE) < 0) {
pr_err("can't add protocol\n");
- goto err;
- }
-
- if (gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0) {
- pr_info("%s: can't add ipgre handler\n", __func__);
- goto err_gre;
+ return -EAGAIN;
}
-
return 0;
-err_gre:
- inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
-err:
- return -EAGAIN;
}
static void __exit gre_exit(void)
{
- gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
inet_del_protocol(&net_gre_protocol, IPPROTO_GRE);
}
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index f5203fba6236..f16488efa1c8 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -96,6 +96,7 @@
#include <net/xfrm.h>
#include <net/inet_common.h>
#include <net/ip_fib.h>
+#include <net/vrf.h>
/*
* Build xmit assembly blocks
@@ -425,6 +426,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
fl4.flowi4_mark = mark;
fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
fl4.flowi4_proto = IPPROTO_ICMP;
+ fl4.flowi4_oif = vrf_master_ifindex(skb->dev) ? : skb->dev->ifindex;
security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
rt = ip_route_output_key(net, &fl4);
if (IS_ERR(rt))
@@ -458,6 +460,8 @@ static struct rtable *icmp_route_lookup(struct net *net,
fl4->flowi4_proto = IPPROTO_ICMP;
fl4->fl4_icmp_type = type;
fl4->fl4_icmp_code = code;
+ fl4->flowi4_oif = vrf_master_ifindex(skb_in->dev) ? : skb_in->dev->ifindex;
+
security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4));
rt = __ip_route_output_key(net, fl4);
if (IS_ERR(rt))
@@ -480,7 +484,8 @@ static struct rtable *icmp_route_lookup(struct net *net,
if (err)
goto relookup_failed;
- if (inet_addr_type(net, fl4_dec.saddr) == RTN_LOCAL) {
+ if (inet_addr_type_dev_table(net, skb_in->dev,
+ fl4_dec.saddr) == RTN_LOCAL) {
rt2 = __ip_route_output_key(net, &fl4_dec);
if (IS_ERR(rt2))
err = PTR_ERR(rt2);
@@ -496,6 +501,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
}
/* Ugh! */
orefdst = skb_in->_skb_refdst; /* save old refdst */
+ skb_dst_set(skb_in, NULL);
err = ip_route_input(skb_in, fl4_dec.daddr, fl4_dec.saddr,
RT_TOS(tos), rt2->dst.dev);
@@ -828,7 +834,7 @@ static bool icmp_unreach(struct sk_buff *skb)
*/
if (!net->ipv4.sysctl_icmp_ignore_bogus_error_responses &&
- inet_addr_type(net, iph->daddr) == RTN_BROADCAST) {
+ inet_addr_type_dev_table(net, skb->dev, iph->daddr) == RTN_BROADCAST) {
net_warn_ratelimited("%pI4 sent an invalid ICMP type %u, code %u error to a broadcast: %pI4 on %s\n",
&ip_hdr(skb)->saddr,
icmph->type, icmph->code,
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 0cb9165421d4..89120196a949 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -343,7 +343,6 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
struct sock *sk2;
const struct hlist_nulls_node *node;
struct inet_timewait_sock *tw = NULL;
- int twrefcnt = 0;
spin_lock(lock);
@@ -371,21 +370,17 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
WARN_ON(!sk_unhashed(sk));
__sk_nulls_add_node_rcu(sk, &head->chain);
if (tw) {
- twrefcnt = inet_twsk_unhash(tw);
+ sk_nulls_del_node_init_rcu((struct sock *)tw);
NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
}
spin_unlock(lock);
- if (twrefcnt)
- inet_twsk_put(tw);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
if (twp) {
*twp = tw;
} else if (tw) {
/* Silly. Should hash-dance instead... */
- inet_twsk_deschedule(tw);
-
- inet_twsk_put(tw);
+ inet_twsk_deschedule_put(tw);
}
return 0;
@@ -403,13 +398,12 @@ static u32 inet_sk_port_offset(const struct sock *sk)
inet->inet_dport);
}
-int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw)
+void __inet_hash_nolisten(struct sock *sk, struct sock *osk)
{
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
struct hlist_nulls_head *list;
struct inet_ehash_bucket *head;
spinlock_t *lock;
- int twrefcnt = 0;
WARN_ON(!sk_unhashed(sk));
@@ -420,23 +414,22 @@ int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw)
spin_lock(lock);
__sk_nulls_add_node_rcu(sk, list);
- if (tw) {
- WARN_ON(sk->sk_hash != tw->tw_hash);
- twrefcnt = inet_twsk_unhash(tw);
+ if (osk) {
+ WARN_ON(sk->sk_hash != osk->sk_hash);
+ sk_nulls_del_node_init_rcu(osk);
}
spin_unlock(lock);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
- return twrefcnt;
}
EXPORT_SYMBOL_GPL(__inet_hash_nolisten);
-int __inet_hash(struct sock *sk, struct inet_timewait_sock *tw)
+void __inet_hash(struct sock *sk, struct sock *osk)
{
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
struct inet_listen_hashbucket *ilb;
if (sk->sk_state != TCP_LISTEN)
- return __inet_hash_nolisten(sk, tw);
+ return __inet_hash_nolisten(sk, osk);
WARN_ON(!sk_unhashed(sk));
ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)];
@@ -445,7 +438,6 @@ int __inet_hash(struct sock *sk, struct inet_timewait_sock *tw)
__sk_nulls_add_node_rcu(sk, &ilb->head);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
spin_unlock(&ilb->lock);
- return 0;
}
EXPORT_SYMBOL(__inet_hash);
@@ -492,7 +484,6 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
struct inet_bind_bucket *tb;
int ret;
struct net *net = sock_net(sk);
- int twrefcnt = 1;
if (!snum) {
int i, remaining, low, high, port;
@@ -560,19 +551,14 @@ ok:
inet_bind_hash(sk, tb, port);
if (sk_unhashed(sk)) {
inet_sk(sk)->inet_sport = htons(port);
- twrefcnt += __inet_hash_nolisten(sk, tw);
+ __inet_hash_nolisten(sk, (struct sock *)tw);
}
if (tw)
- twrefcnt += inet_twsk_bind_unhash(tw, hinfo);
+ inet_twsk_bind_unhash(tw, hinfo);
spin_unlock(&head->lock);
- if (tw) {
- inet_twsk_deschedule(tw);
- while (twrefcnt) {
- twrefcnt--;
- inet_twsk_put(tw);
- }
- }
+ if (tw)
+ inet_twsk_deschedule_put(tw);
ret = 0;
goto out;
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 2ffbd16b79e0..ae22cc24fbe8 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -18,28 +18,6 @@
/**
- * inet_twsk_unhash - unhash a timewait socket from established hash
- * @tw: timewait socket
- *
- * unhash a timewait socket from established hash, if hashed.
- * ehash lock must be held by caller.
- * Returns 1 if caller should call inet_twsk_put() after lock release.
- */
-int inet_twsk_unhash(struct inet_timewait_sock *tw)
-{
- if (hlist_nulls_unhashed(&tw->tw_node))
- return 0;
-
- hlist_nulls_del_rcu(&tw->tw_node);
- sk_nulls_node_init(&tw->tw_node);
- /*
- * We cannot call inet_twsk_put() ourself under lock,
- * caller must call it for us.
- */
- return 1;
-}
-
-/**
* inet_twsk_bind_unhash - unhash a timewait socket from bind hash
* @tw: timewait socket
* @hashinfo: hashinfo pointer
@@ -48,35 +26,29 @@ int inet_twsk_unhash(struct inet_timewait_sock *tw)
* bind hash lock must be held by caller.
* Returns 1 if caller should call inet_twsk_put() after lock release.
*/
-int inet_twsk_bind_unhash(struct inet_timewait_sock *tw,
+void inet_twsk_bind_unhash(struct inet_timewait_sock *tw,
struct inet_hashinfo *hashinfo)
{
struct inet_bind_bucket *tb = tw->tw_tb;
if (!tb)
- return 0;
+ return;
__hlist_del(&tw->tw_bind_node);
tw->tw_tb = NULL;
inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
- /*
- * We cannot call inet_twsk_put() ourself under lock,
- * caller must call it for us.
- */
- return 1;
+ __sock_put((struct sock *)tw);
}
/* Must be called with locally disabled BHs. */
static void inet_twsk_kill(struct inet_timewait_sock *tw)
{
struct inet_hashinfo *hashinfo = tw->tw_dr->hashinfo;
- struct inet_bind_hashbucket *bhead;
- int refcnt;
- /* Unlink from established hashes. */
spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash);
+ struct inet_bind_hashbucket *bhead;
spin_lock(lock);
- refcnt = inet_twsk_unhash(tw);
+ sk_nulls_del_node_init_rcu((struct sock *)tw);
spin_unlock(lock);
/* Disassociate with bind bucket. */
@@ -84,11 +56,9 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw)
hashinfo->bhash_size)];
spin_lock(&bhead->lock);
- refcnt += inet_twsk_bind_unhash(tw, hashinfo);
+ inet_twsk_bind_unhash(tw, hashinfo);
spin_unlock(&bhead->lock);
- BUG_ON(refcnt >= atomic_read(&tw->tw_refcnt));
- atomic_sub(refcnt, &tw->tw_refcnt);
atomic_dec(&tw->tw_dr->tw_count);
inet_twsk_put(tw);
}
@@ -235,13 +205,17 @@ EXPORT_SYMBOL_GPL(inet_twsk_alloc);
* tcp_input.c to verify this.
*/
-/* This is for handling early-kills of TIME_WAIT sockets. */
-void inet_twsk_deschedule(struct inet_timewait_sock *tw)
+/* This is for handling early-kills of TIME_WAIT sockets.
+ * Warning : consume reference.
+ * Caller should not access tw anymore.
+ */
+void inet_twsk_deschedule_put(struct inet_timewait_sock *tw)
{
if (del_timer_sync(&tw->tw_timer))
inet_twsk_kill(tw);
+ inet_twsk_put(tw);
}
-EXPORT_SYMBOL(inet_twsk_deschedule);
+EXPORT_SYMBOL(inet_twsk_deschedule_put);
void inet_twsk_schedule(struct inet_timewait_sock *tw, const int timeo)
{
@@ -311,9 +285,8 @@ restart:
rcu_read_unlock();
local_bh_disable();
- inet_twsk_deschedule(tw);
+ inet_twsk_deschedule_put(tw);
local_bh_enable();
- inet_twsk_put(tw);
goto restart_rcu;
}
/* If the nulls value we got at the end of this lookup is
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 921138f6c97c..15762e758861 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -48,6 +48,7 @@
#include <linux/inet.h>
#include <linux/netfilter_ipv4.h>
#include <net/inet_ecn.h>
+#include <net/vrf.h>
/* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6
* code now. If you change something here, _PLEASE_ update ipv6/reassembly.c
@@ -77,6 +78,7 @@ struct ipq {
u8 ecn; /* RFC3168 support */
u16 max_df_size; /* largest frag with DF set seen */
int iif;
+ int vif; /* VRF device index */
unsigned int rid;
struct inet_peer *peer;
};
@@ -99,6 +101,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
struct ip4_create_arg {
struct iphdr *iph;
u32 user;
+ int vif;
};
static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot)
@@ -127,7 +130,8 @@ static bool ip4_frag_match(const struct inet_frag_queue *q, const void *a)
qp->saddr == arg->iph->saddr &&
qp->daddr == arg->iph->daddr &&
qp->protocol == arg->iph->protocol &&
- qp->user == arg->user;
+ qp->user == arg->user &&
+ qp->vif == arg->vif;
}
static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
@@ -144,6 +148,7 @@ static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
qp->ecn = ip4_frag_ecn(arg->iph->tos);
qp->saddr = arg->iph->saddr;
qp->daddr = arg->iph->daddr;
+ qp->vif = arg->vif;
qp->user = arg->user;
qp->peer = sysctl_ipfrag_max_dist ?
inet_getpeer_v4(net->ipv4.peers, arg->iph->saddr, 1) : NULL;
@@ -244,7 +249,8 @@ out:
/* Find the correct entry in the "incomplete datagrams" queue for
* this IP datagram, and create new one, if nothing is found.
*/
-static struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user)
+static struct ipq *ip_find(struct net *net, struct iphdr *iph,
+ u32 user, int vif)
{
struct inet_frag_queue *q;
struct ip4_create_arg arg;
@@ -252,6 +258,7 @@ static struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user)
arg.iph = iph;
arg.user = user;
+ arg.vif = vif;
hash = ipqhashfn(iph->id, iph->saddr, iph->daddr, iph->protocol);
@@ -522,7 +529,6 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
int len;
int ihlen;
int err;
- int sum_truesize;
u8 ecn;
ipq_kill(qp);
@@ -590,32 +596,19 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
add_frag_mem_limit(qp->q.net, clone->truesize);
}
+ skb_shinfo(head)->frag_list = head->next;
skb_push(head, head->data - skb_network_header(head));
- sum_truesize = head->truesize;
- for (fp = head->next; fp;) {
- bool headstolen;
- int delta;
- struct sk_buff *next = fp->next;
-
- sum_truesize += fp->truesize;
+ for (fp=head->next; fp; fp = fp->next) {
+ head->data_len += fp->len;
+ head->len += fp->len;
if (head->ip_summed != fp->ip_summed)
head->ip_summed = CHECKSUM_NONE;
else if (head->ip_summed == CHECKSUM_COMPLETE)
head->csum = csum_add(head->csum, fp->csum);
-
- if (skb_try_coalesce(head, fp, &headstolen, &delta)) {
- kfree_skb_partial(fp, headstolen);
- } else {
- if (!skb_shinfo(head)->frag_list)
- skb_shinfo(head)->frag_list = fp;
- head->data_len += fp->len;
- head->len += fp->len;
- head->truesize += fp->truesize;
- }
- fp = next;
+ head->truesize += fp->truesize;
}
- sub_frag_mem_limit(qp->q.net, sum_truesize);
+ sub_frag_mem_limit(qp->q.net, head->truesize);
head->next = NULL;
head->dev = dev;
@@ -662,14 +655,15 @@ out_fail:
/* Process an incoming IP datagram fragment. */
int ip_defrag(struct sk_buff *skb, u32 user)
{
+ struct net_device *dev = skb->dev ? : skb_dst(skb)->dev;
+ int vif = vrf_master_ifindex_rcu(dev);
+ struct net *net = dev_net(dev);
struct ipq *qp;
- struct net *net;
- net = skb->dev ? dev_net(skb->dev) : dev_net(skb_dst(skb)->dev);
IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS);
/* Lookup (or create) queue header */
- qp = ip_find(net, ip_hdr(skb), user);
+ qp = ip_find(net, ip_hdr(skb), user, vif);
if (qp) {
int ret;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 5fd706473c73..1bf328182697 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -25,6 +25,7 @@
#include <linux/udp.h>
#include <linux/if_arp.h>
#include <linux/mroute.h>
+#include <linux/if_vlan.h>
#include <linux/init.h>
#include <linux/in6.h>
#include <linux/inetdevice.h>
@@ -47,6 +48,7 @@
#include <net/netns/generic.h>
#include <net/rtnetlink.h>
#include <net/gre.h>
+#include <net/dst_metadata.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h>
@@ -121,8 +123,127 @@ static int ipgre_tunnel_init(struct net_device *dev);
static int ipgre_net_id __read_mostly;
static int gre_tap_net_id __read_mostly;
-static int ipgre_err(struct sk_buff *skb, u32 info,
- const struct tnl_ptk_info *tpi)
+static int ip_gre_calc_hlen(__be16 o_flags)
+{
+ int addend = 4;
+
+ if (o_flags & TUNNEL_CSUM)
+ addend += 4;
+ if (o_flags & TUNNEL_KEY)
+ addend += 4;
+ if (o_flags & TUNNEL_SEQ)
+ addend += 4;
+ return addend;
+}
+
+static __be16 gre_flags_to_tnl_flags(__be16 flags)
+{
+ __be16 tflags = 0;
+
+ if (flags & GRE_CSUM)
+ tflags |= TUNNEL_CSUM;
+ if (flags & GRE_ROUTING)
+ tflags |= TUNNEL_ROUTING;
+ if (flags & GRE_KEY)
+ tflags |= TUNNEL_KEY;
+ if (flags & GRE_SEQ)
+ tflags |= TUNNEL_SEQ;
+ if (flags & GRE_STRICT)
+ tflags |= TUNNEL_STRICT;
+ if (flags & GRE_REC)
+ tflags |= TUNNEL_REC;
+ if (flags & GRE_VERSION)
+ tflags |= TUNNEL_VERSION;
+
+ return tflags;
+}
+
+static __be16 tnl_flags_to_gre_flags(__be16 tflags)
+{
+ __be16 flags = 0;
+
+ if (tflags & TUNNEL_CSUM)
+ flags |= GRE_CSUM;
+ if (tflags & TUNNEL_ROUTING)
+ flags |= GRE_ROUTING;
+ if (tflags & TUNNEL_KEY)
+ flags |= GRE_KEY;
+ if (tflags & TUNNEL_SEQ)
+ flags |= GRE_SEQ;
+ if (tflags & TUNNEL_STRICT)
+ flags |= GRE_STRICT;
+ if (tflags & TUNNEL_REC)
+ flags |= GRE_REC;
+ if (tflags & TUNNEL_VERSION)
+ flags |= GRE_VERSION;
+
+ return flags;
+}
+
+static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
+ bool *csum_err)
+{
+ const struct gre_base_hdr *greh;
+ __be32 *options;
+ int hdr_len;
+
+ if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr))))
+ return -EINVAL;
+
+ greh = (struct gre_base_hdr *)skb_transport_header(skb);
+ if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING)))
+ return -EINVAL;
+
+ tpi->flags = gre_flags_to_tnl_flags(greh->flags);
+ hdr_len = ip_gre_calc_hlen(tpi->flags);
+
+ if (!pskb_may_pull(skb, hdr_len))
+ return -EINVAL;
+
+ greh = (struct gre_base_hdr *)skb_transport_header(skb);
+ tpi->proto = greh->protocol;
+
+ options = (__be32 *)(greh + 1);
+ if (greh->flags & GRE_CSUM) {
+ if (skb_checksum_simple_validate(skb)) {
+ *csum_err = true;
+ return -EINVAL;
+ }
+
+ skb_checksum_try_convert(skb, IPPROTO_GRE, 0,
+ null_compute_pseudo);
+ options++;
+ }
+
+ if (greh->flags & GRE_KEY) {
+ tpi->key = *options;
+ options++;
+ } else {
+ tpi->key = 0;
+ }
+ if (unlikely(greh->flags & GRE_SEQ)) {
+ tpi->seq = *options;
+ options++;
+ } else {
+ tpi->seq = 0;
+ }
+ /* WCCP version 1 and 2 protocol decoding.
+ * - Change protocol to IP
+ * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
+ */
+ if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) {
+ tpi->proto = htons(ETH_P_IP);
+ if ((*(u8 *)options & 0xF0) != 0x40) {
+ hdr_len += 4;
+ if (!pskb_may_pull(skb, hdr_len))
+ return -EINVAL;
+ }
+ }
+ return iptunnel_pull_header(skb, hdr_len, tpi->proto);
+}
+
+static void ipgre_err(struct sk_buff *skb, u32 info,
+ const struct tnl_ptk_info *tpi)
{
/* All the routers (except for Linux) return only
@@ -148,14 +269,14 @@ static int ipgre_err(struct sk_buff *skb, u32 info,
switch (type) {
default:
case ICMP_PARAMETERPROB:
- return PACKET_RCVD;
+ return;
case ICMP_DEST_UNREACH:
switch (code) {
case ICMP_SR_FAILED:
case ICMP_PORT_UNREACH:
/* Impossible event. */
- return PACKET_RCVD;
+ return;
default:
/* All others are translated to HOST_UNREACH.
rfc2003 contains "deep thoughts" about NET_UNREACH,
@@ -164,9 +285,10 @@ static int ipgre_err(struct sk_buff *skb, u32 info,
break;
}
break;
+
case ICMP_TIME_EXCEEDED:
if (code != ICMP_EXC_TTL)
- return PACKET_RCVD;
+ return;
break;
case ICMP_REDIRECT:
@@ -183,26 +305,85 @@ static int ipgre_err(struct sk_buff *skb, u32 info,
iph->daddr, iph->saddr, tpi->key);
if (!t)
- return PACKET_REJECT;
+ return;
if (t->parms.iph.daddr == 0 ||
ipv4_is_multicast(t->parms.iph.daddr))
- return PACKET_RCVD;
+ return;
if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
- return PACKET_RCVD;
+ return;
if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
t->err_count++;
else
t->err_count = 1;
t->err_time = jiffies;
- return PACKET_RCVD;
+}
+
+static void gre_err(struct sk_buff *skb, u32 info)
+{
+ /* All the routers (except for Linux) return only
+ * 8 bytes of packet payload. It means, that precise relaying of
+ * ICMP in the real Internet is absolutely infeasible.
+ *
+ * Moreover, Cisco "wise men" put GRE key to the third word
+ * in GRE header. It makes impossible maintaining even soft
+ * state for keyed
+ * GRE tunnels with enabled checksum. Tell them "thank you".
+ *
+ * Well, I wonder, rfc1812 was written by Cisco employee,
+ * what the hell these idiots break standards established
+ * by themselves???
+ */
+
+ const int type = icmp_hdr(skb)->type;
+ const int code = icmp_hdr(skb)->code;
+ struct tnl_ptk_info tpi;
+ bool csum_err = false;
+
+ if (parse_gre_header(skb, &tpi, &csum_err)) {
+ if (!csum_err) /* ignore csum errors. */
+ return;
+ }
+
+ if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
+ ipv4_update_pmtu(skb, dev_net(skb->dev), info,
+ skb->dev->ifindex, 0, IPPROTO_GRE, 0);
+ return;
+ }
+ if (type == ICMP_REDIRECT) {
+ ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0,
+ IPPROTO_GRE, 0);
+ return;
+ }
+
+ ipgre_err(skb, info, &tpi);
+}
+
+static __be64 key_to_tunnel_id(__be32 key)
+{
+#ifdef __BIG_ENDIAN
+ return (__force __be64)((__force u32)key);
+#else
+ return (__force __be64)((__force u64)key << 32);
+#endif
+}
+
+/* Returns the least-significant 32 bits of a __be64. */
+static __be32 tunnel_id_to_key(__be64 x)
+{
+#ifdef __BIG_ENDIAN
+ return (__force __be32)x;
+#else
+ return (__force __be32)((__force u64)x >> 32);
+#endif
}
static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
{
struct net *net = dev_net(skb->dev);
+ struct metadata_dst *tun_dst = NULL;
struct ip_tunnel_net *itn;
const struct iphdr *iph;
struct ip_tunnel *tunnel;
@@ -218,40 +399,194 @@ static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
if (tunnel) {
skb_pop_mac_header(skb);
- ip_tunnel_rcv(tunnel, skb, tpi, log_ecn_error);
+ if (tunnel->collect_md) {
+ struct ip_tunnel_info *info;
+
+ tun_dst = metadata_dst_alloc(0, GFP_ATOMIC);
+ if (!tun_dst)
+ return PACKET_REJECT;
+
+ info = &tun_dst->u.tun_info;
+ info->key.u.ipv4.src = iph->saddr;
+ info->key.u.ipv4.dst = iph->daddr;
+ info->key.tos = iph->tos;
+ info->key.ttl = iph->ttl;
+
+ info->mode = IP_TUNNEL_INFO_RX;
+ info->key.tun_flags = tpi->flags &
+ (TUNNEL_CSUM | TUNNEL_KEY);
+ info->key.tun_id = key_to_tunnel_id(tpi->key);
+
+ info->key.tp_src = 0;
+ info->key.tp_dst = 0;
+ }
+
+ ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
return PACKET_RCVD;
}
return PACKET_REJECT;
}
+static int gre_rcv(struct sk_buff *skb)
+{
+ struct tnl_ptk_info tpi;
+ bool csum_err = false;
+
+#ifdef CONFIG_NET_IPGRE_BROADCAST
+ if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
+ /* Looped back packet, drop it! */
+ if (rt_is_output_route(skb_rtable(skb)))
+ goto drop;
+ }
+#endif
+
+ if (parse_gre_header(skb, &tpi, &csum_err) < 0)
+ goto drop;
+
+ if (ipgre_rcv(skb, &tpi) == PACKET_RCVD)
+ return 0;
+
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+drop:
+ kfree_skb(skb);
+ return 0;
+}
+
+static void build_header(struct sk_buff *skb, int hdr_len, __be16 flags,
+ __be16 proto, __be32 key, __be32 seq)
+{
+ struct gre_base_hdr *greh;
+
+ skb_push(skb, hdr_len);
+
+ skb_reset_transport_header(skb);
+ greh = (struct gre_base_hdr *)skb->data;
+ greh->flags = tnl_flags_to_gre_flags(flags);
+ greh->protocol = proto;
+
+ if (flags & (TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_SEQ)) {
+ __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4);
+
+ if (flags & TUNNEL_SEQ) {
+ *ptr = seq;
+ ptr--;
+ }
+ if (flags & TUNNEL_KEY) {
+ *ptr = key;
+ ptr--;
+ }
+ if (flags & TUNNEL_CSUM &&
+ !(skb_shinfo(skb)->gso_type &
+ (SKB_GSO_GRE | SKB_GSO_GRE_CSUM))) {
+ *ptr = 0;
+ *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
+ skb->len, 0));
+ }
+ }
+}
+
static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
const struct iphdr *tnl_params,
__be16 proto)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- struct tnl_ptk_info tpi;
- tpi.flags = tunnel->parms.o_flags;
- tpi.proto = proto;
- tpi.key = tunnel->parms.o_key;
if (tunnel->parms.o_flags & TUNNEL_SEQ)
tunnel->o_seqno++;
- tpi.seq = htonl(tunnel->o_seqno);
/* Push GRE header. */
- gre_build_header(skb, &tpi, tunnel->tun_hlen);
-
- skb_set_inner_protocol(skb, tpi.proto);
+ build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
+ proto, tunnel->parms.o_key, htonl(tunnel->o_seqno));
+ skb_set_inner_protocol(skb, proto);
ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
}
+static struct sk_buff *gre_handle_offloads(struct sk_buff *skb,
+ bool csum)
+{
+ return iptunnel_handle_offloads(skb, csum,
+ csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
+}
+
+static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct ip_tunnel_info *tun_info;
+ struct net *net = dev_net(dev);
+ const struct ip_tunnel_key *key;
+ struct flowi4 fl;
+ struct rtable *rt;
+ int min_headroom;
+ int tunnel_hlen;
+ __be16 df, flags;
+ int err;
+
+ tun_info = skb_tunnel_info(skb);
+ if (unlikely(!tun_info || tun_info->mode != IP_TUNNEL_INFO_TX))
+ goto err_free_skb;
+
+ key = &tun_info->key;
+ memset(&fl, 0, sizeof(fl));
+ fl.daddr = key->u.ipv4.dst;
+ fl.saddr = key->u.ipv4.src;
+ fl.flowi4_tos = RT_TOS(key->tos);
+ fl.flowi4_mark = skb->mark;
+ fl.flowi4_proto = IPPROTO_GRE;
+
+ rt = ip_route_output_key(net, &fl);
+ if (IS_ERR(rt))
+ goto err_free_skb;
+
+ tunnel_hlen = ip_gre_calc_hlen(key->tun_flags);
+
+ min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
+ + tunnel_hlen + sizeof(struct iphdr);
+ if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
+ int head_delta = SKB_DATA_ALIGN(min_headroom -
+ skb_headroom(skb) +
+ 16);
+ err = pskb_expand_head(skb, max_t(int, head_delta, 0),
+ 0, GFP_ATOMIC);
+ if (unlikely(err))
+ goto err_free_rt;
+ }
+
+ /* Push Tunnel header. */
+ skb = gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM));
+ if (IS_ERR(skb)) {
+ skb = NULL;
+ goto err_free_rt;
+ }
+
+ flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
+ build_header(skb, tunnel_hlen, flags, htons(ETH_P_TEB),
+ tunnel_id_to_key(tun_info->key.tun_id), 0);
+
+ df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
+ err = iptunnel_xmit(skb->sk, rt, skb, fl.saddr,
+ key->u.ipv4.dst, IPPROTO_GRE,
+ key->tos, key->ttl, df, false);
+ iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
+ return;
+
+err_free_rt:
+ ip_rt_put(rt);
+err_free_skb:
+ kfree_skb(skb);
+ dev->stats.tx_dropped++;
+}
+
static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
const struct iphdr *tnl_params;
+ if (tunnel->collect_md) {
+ gre_fb_xmit(skb, dev);
+ return NETDEV_TX_OK;
+ }
+
if (dev->header_ops) {
/* Need space for new headers */
if (skb_cow_head(skb, dev->needed_headroom -
@@ -277,7 +612,6 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
goto out;
__gre_xmit(skb, dev, tnl_params, skb->protocol);
-
return NETDEV_TX_OK;
free_skb:
@@ -292,6 +626,11 @@ static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
{
struct ip_tunnel *tunnel = netdev_priv(dev);
+ if (tunnel->collect_md) {
+ gre_fb_xmit(skb, dev);
+ return NETDEV_TX_OK;
+ }
+
skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
if (IS_ERR(skb))
goto out;
@@ -300,7 +639,6 @@ static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
goto free_skb;
__gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
-
return NETDEV_TX_OK;
free_skb:
@@ -530,10 +868,9 @@ static int ipgre_tunnel_init(struct net_device *dev)
return ip_tunnel_init(dev);
}
-static struct gre_cisco_protocol ipgre_protocol = {
- .handler = ipgre_rcv,
- .err_handler = ipgre_err,
- .priority = 0,
+static const struct gre_protocol ipgre_protocol = {
+ .handler = gre_rcv,
+ .err_handler = gre_err,
};
static int __net_init ipgre_init_net(struct net *net)
@@ -596,8 +933,10 @@ out:
return ipgre_tunnel_validate(tb, data);
}
-static void ipgre_netlink_parms(struct nlattr *data[], struct nlattr *tb[],
- struct ip_tunnel_parm *parms)
+static void ipgre_netlink_parms(struct net_device *dev,
+ struct nlattr *data[],
+ struct nlattr *tb[],
+ struct ip_tunnel_parm *parms)
{
memset(parms, 0, sizeof(*parms));
@@ -635,6 +974,12 @@ static void ipgre_netlink_parms(struct nlattr *data[], struct nlattr *tb[],
if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
parms->iph.frag_off = htons(IP_DF);
+
+ if (data[IFLA_GRE_COLLECT_METADATA]) {
+ struct ip_tunnel *t = netdev_priv(dev);
+
+ t->collect_md = true;
+ }
}
/* This function returns true when ENCAP attributes are present in the nl msg */
@@ -712,7 +1057,7 @@ static int ipgre_newlink(struct net *src_net, struct net_device *dev,
return err;
}
- ipgre_netlink_parms(data, tb, &p);
+ ipgre_netlink_parms(dev, data, tb, &p);
return ip_tunnel_newlink(dev, tb, &p);
}
@@ -730,7 +1075,7 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
return err;
}
- ipgre_netlink_parms(data, tb, &p);
+ ipgre_netlink_parms(dev, data, tb, &p);
return ip_tunnel_changelink(dev, tb, &p);
}
@@ -765,6 +1110,8 @@ static size_t ipgre_get_size(const struct net_device *dev)
nla_total_size(2) +
/* IFLA_GRE_ENCAP_DPORT */
nla_total_size(2) +
+ /* IFLA_GRE_COLLECT_METADATA */
+ nla_total_size(0) +
0;
}
@@ -796,6 +1143,11 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
t->encap.flags))
goto nla_put_failure;
+ if (t->collect_md) {
+ if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
+ goto nla_put_failure;
+ }
+
return 0;
nla_put_failure:
@@ -817,6 +1169,7 @@ static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
[IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 },
[IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 },
[IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 },
+ [IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG },
};
static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
@@ -849,9 +1202,38 @@ static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
.get_link_net = ip_tunnel_get_link_net,
};
+struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
+ u8 name_assign_type)
+{
+ struct nlattr *tb[IFLA_MAX + 1];
+ struct net_device *dev;
+ struct ip_tunnel *t;
+ int err;
+
+ memset(&tb, 0, sizeof(tb));
+
+ dev = rtnl_create_link(net, name, name_assign_type,
+ &ipgre_tap_ops, tb);
+ if (IS_ERR(dev))
+ return dev;
+
+ /* Configure flow based GRE device. */
+ t = netdev_priv(dev);
+ t->collect_md = true;
+
+ err = ipgre_newlink(net, dev, tb, NULL);
+ if (err < 0)
+ goto out;
+ return dev;
+out:
+ free_netdev(dev);
+ return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
+
static int __net_init ipgre_tap_init_net(struct net *net)
{
- return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, NULL);
+ return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
}
static void __net_exit ipgre_tap_exit_net(struct net *net)
@@ -881,7 +1263,7 @@ static int __init ipgre_init(void)
if (err < 0)
goto pnet_tap_faied;
- err = gre_cisco_register(&ipgre_protocol);
+ err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
if (err < 0) {
pr_info("%s: can't add protocol\n", __func__);
goto add_proto_failed;
@@ -900,7 +1282,7 @@ static int __init ipgre_init(void)
tap_ops_failed:
rtnl_link_unregister(&ipgre_link_ops);
rtnl_link_failed:
- gre_cisco_unregister(&ipgre_protocol);
+ gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
add_proto_failed:
unregister_pernet_device(&ipgre_tap_net_ops);
pnet_tap_faied:
@@ -912,7 +1294,7 @@ static void __exit ipgre_fini(void)
{
rtnl_link_unregister(&ipgre_tap_ops);
rtnl_link_unregister(&ipgre_link_ops);
- gre_cisco_unregister(&ipgre_protocol);
+ gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
unregister_pernet_device(&ipgre_tap_net_ops);
unregister_pernet_device(&ipgre_net_ops);
}
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 2db4c8773c1b..f4fc8a77aaa7 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -146,6 +146,7 @@
#include <net/xfrm.h>
#include <linux/mroute.h>
#include <linux/netlink.h>
+#include <net/dst_metadata.h>
/*
* Process Router Attention IP option (RFC 2113)
@@ -331,7 +332,7 @@ static int ip_rcv_finish(struct sock *sk, struct sk_buff *skb)
* Initialise the virtual path cache for the packet. It describes
* how the packet travels inside Linux networking.
*/
- if (!skb_dst(skb)) {
+ if (!skb_valid_dst(skb)) {
int err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
iph->tos, skb->dev);
if (unlikely(err)) {
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 6bf89a6312bc..0138fada0951 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1542,6 +1542,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
struct net *net = sock_net(sk);
struct sk_buff *nskb;
int err;
+ int oif;
if (__ip_options_echo(&replyopts.opt.opt, skb, sopt))
return;
@@ -1559,7 +1560,11 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
daddr = replyopts.opt.opt.faddr;
}
- flowi4_init_output(&fl4, arg->bound_dev_if,
+ oif = arg->bound_dev_if;
+ if (!oif && netif_index_is_vrf(net, skb->skb_iif))
+ oif = skb->skb_iif;
+
+ flowi4_init_output(&fl4, oif,
IP4_REPLY_MARK(net, skb->mark),
RT_TOS(arg->tos),
RT_SCOPE_UNIVERSE, ip_hdr(skb)->protocol,
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 626d9e56a6bd..cbb51f3fac06 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -230,10 +230,13 @@ skip_key_lookup:
if (cand)
return cand;
+ t = rcu_dereference(itn->collect_md_tun);
+ if (t)
+ return t;
+
if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
return netdev_priv(itn->fb_tunnel_dev);
-
return NULL;
}
EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
@@ -261,11 +264,15 @@ static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
{
struct hlist_head *head = ip_bucket(itn, &t->parms);
+ if (t->collect_md)
+ rcu_assign_pointer(itn->collect_md_tun, t);
hlist_add_head_rcu(&t->hash_node, head);
}
-static void ip_tunnel_del(struct ip_tunnel *t)
+static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
{
+ if (t->collect_md)
+ rcu_assign_pointer(itn->collect_md_tun, NULL);
hlist_del_init_rcu(&t->hash_node);
}
@@ -419,7 +426,8 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net,
}
int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
- const struct tnl_ptk_info *tpi, bool log_ecn_error)
+ const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
+ bool log_ecn_error)
{
struct pcpu_sw_netstats *tstats;
const struct iphdr *iph = ip_hdr(skb);
@@ -478,6 +486,9 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
skb->dev = tunnel->dev;
}
+ if (tun_dst)
+ skb_dst_set(skb, (struct dst_entry *)tun_dst);
+
gro_cells_receive(&tunnel->gro_cells, skb);
return 0;
@@ -806,7 +817,7 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn,
struct ip_tunnel_parm *p,
bool set_mtu)
{
- ip_tunnel_del(t);
+ ip_tunnel_del(itn, t);
t->parms.iph.saddr = p->iph.saddr;
t->parms.iph.daddr = p->iph.daddr;
t->parms.i_key = p->i_key;
@@ -967,7 +978,7 @@ void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
if (itn->fb_tunnel_dev != dev) {
- ip_tunnel_del(netdev_priv(dev));
+ ip_tunnel_del(itn, netdev_priv(dev));
unregister_netdevice_queue(dev, head);
}
}
@@ -1072,8 +1083,13 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
nt = netdev_priv(dev);
itn = net_generic(net, nt->ip_tnl_net_id);
- if (ip_tunnel_find(itn, p, dev->type))
- return -EEXIST;
+ if (nt->collect_md) {
+ if (rtnl_dereference(itn->collect_md_tun))
+ return -EEXIST;
+ } else {
+ if (ip_tunnel_find(itn, p, dev->type))
+ return -EEXIST;
+ }
nt->net = net;
nt->parms = *p;
@@ -1089,7 +1105,6 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
dev->mtu = mtu;
ip_tunnel_add(itn, nt);
-
out:
return err;
}
@@ -1163,6 +1178,10 @@ int ip_tunnel_init(struct net_device *dev)
iph->version = 4;
iph->ihl = 5;
+ if (tunnel->collect_md) {
+ dev->features |= NETIF_F_NETNS_LOCAL;
+ netif_keep_dst(dev);
+ }
return 0;
}
EXPORT_SYMBOL_GPL(ip_tunnel_init);
@@ -1176,7 +1195,7 @@ void ip_tunnel_uninit(struct net_device *dev)
itn = net_generic(net, tunnel->ip_tnl_net_id);
/* fb_tunnel_dev will be unregisted in net-exit call. */
if (itn->fb_tunnel_dev != dev)
- ip_tunnel_del(netdev_priv(dev));
+ ip_tunnel_del(itn, netdev_priv(dev));
ip_tunnel_dst_reset_all(tunnel);
}
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 6a51a71a6c67..289b6c26ce37 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -32,6 +32,7 @@
#include <linux/etherdevice.h>
#include <linux/if_ether.h>
#include <linux/if_vlan.h>
+#include <linux/static_key.h>
#include <net/ip.h>
#include <net/icmp.h>
@@ -190,3 +191,232 @@ struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
return tot;
}
EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64);
+
+static const struct nla_policy ip_tun_policy[LWTUNNEL_IP_MAX + 1] = {
+ [LWTUNNEL_IP_ID] = { .type = NLA_U64 },
+ [LWTUNNEL_IP_DST] = { .type = NLA_U32 },
+ [LWTUNNEL_IP_SRC] = { .type = NLA_U32 },
+ [LWTUNNEL_IP_TTL] = { .type = NLA_U8 },
+ [LWTUNNEL_IP_TOS] = { .type = NLA_U8 },
+ [LWTUNNEL_IP_SPORT] = { .type = NLA_U16 },
+ [LWTUNNEL_IP_DPORT] = { .type = NLA_U16 },
+ [LWTUNNEL_IP_FLAGS] = { .type = NLA_U16 },
+};
+
+static int ip_tun_build_state(struct net_device *dev, struct nlattr *attr,
+ struct lwtunnel_state **ts)
+{
+ struct ip_tunnel_info *tun_info;
+ struct lwtunnel_state *new_state;
+ struct nlattr *tb[LWTUNNEL_IP_MAX + 1];
+ int err;
+
+ err = nla_parse_nested(tb, LWTUNNEL_IP_MAX, attr, ip_tun_policy);
+ if (err < 0)
+ return err;
+
+ new_state = lwtunnel_state_alloc(sizeof(*tun_info));
+ if (!new_state)
+ return -ENOMEM;
+
+ new_state->type = LWTUNNEL_ENCAP_IP;
+
+ tun_info = lwt_tun_info(new_state);
+
+ if (tb[LWTUNNEL_IP_ID])
+ tun_info->key.tun_id = nla_get_u64(tb[LWTUNNEL_IP_ID]);
+
+ if (tb[LWTUNNEL_IP_DST])
+ tun_info->key.u.ipv4.dst = nla_get_be32(tb[LWTUNNEL_IP_DST]);
+
+ if (tb[LWTUNNEL_IP_SRC])
+ tun_info->key.u.ipv4.src = nla_get_be32(tb[LWTUNNEL_IP_SRC]);
+
+ if (tb[LWTUNNEL_IP_TTL])
+ tun_info->key.ttl = nla_get_u8(tb[LWTUNNEL_IP_TTL]);
+
+ if (tb[LWTUNNEL_IP_TOS])
+ tun_info->key.tos = nla_get_u8(tb[LWTUNNEL_IP_TOS]);
+
+ if (tb[LWTUNNEL_IP_SPORT])
+ tun_info->key.tp_src = nla_get_be16(tb[LWTUNNEL_IP_SPORT]);
+
+ if (tb[LWTUNNEL_IP_DPORT])
+ tun_info->key.tp_dst = nla_get_be16(tb[LWTUNNEL_IP_DPORT]);
+
+ if (tb[LWTUNNEL_IP_FLAGS])
+ tun_info->key.tun_flags = nla_get_u16(tb[LWTUNNEL_IP_FLAGS]);
+
+ tun_info->mode = IP_TUNNEL_INFO_TX;
+ tun_info->options = NULL;
+ tun_info->options_len = 0;
+
+ *ts = new_state;
+
+ return 0;
+}
+
+static int ip_tun_fill_encap_info(struct sk_buff *skb,
+ struct lwtunnel_state *lwtstate)
+{
+ struct ip_tunnel_info *tun_info = lwt_tun_info(lwtstate);
+
+ if (nla_put_u64(skb, LWTUNNEL_IP_ID, tun_info->key.tun_id) ||
+ nla_put_be32(skb, LWTUNNEL_IP_DST, tun_info->key.u.ipv4.dst) ||
+ nla_put_be32(skb, LWTUNNEL_IP_SRC, tun_info->key.u.ipv4.src) ||
+ nla_put_u8(skb, LWTUNNEL_IP_TOS, tun_info->key.tos) ||
+ nla_put_u8(skb, LWTUNNEL_IP_TTL, tun_info->key.ttl) ||
+ nla_put_u16(skb, LWTUNNEL_IP_SPORT, tun_info->key.tp_src) ||
+ nla_put_u16(skb, LWTUNNEL_IP_DPORT, tun_info->key.tp_dst) ||
+ nla_put_u16(skb, LWTUNNEL_IP_FLAGS, tun_info->key.tun_flags))
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int ip_tun_encap_nlsize(struct lwtunnel_state *lwtstate)
+{
+ return nla_total_size(8) /* LWTUNNEL_IP_ID */
+ + nla_total_size(4) /* LWTUNNEL_IP_DST */
+ + nla_total_size(4) /* LWTUNNEL_IP_SRC */
+ + nla_total_size(1) /* LWTUNNEL_IP_TOS */
+ + nla_total_size(1) /* LWTUNNEL_IP_TTL */
+ + nla_total_size(2) /* LWTUNNEL_IP_SPORT */
+ + nla_total_size(2) /* LWTUNNEL_IP_DPORT */
+ + nla_total_size(2); /* LWTUNNEL_IP_FLAGS */
+}
+
+static int ip_tun_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b)
+{
+ return memcmp(lwt_tun_info(a), lwt_tun_info(b),
+ sizeof(struct ip_tunnel_info));
+}
+
+static const struct lwtunnel_encap_ops ip_tun_lwt_ops = {
+ .build_state = ip_tun_build_state,
+ .fill_encap = ip_tun_fill_encap_info,
+ .get_encap_size = ip_tun_encap_nlsize,
+ .cmp_encap = ip_tun_cmp_encap,
+};
+
+static const struct nla_policy ip6_tun_policy[LWTUNNEL_IP6_MAX + 1] = {
+ [LWTUNNEL_IP6_ID] = { .type = NLA_U64 },
+ [LWTUNNEL_IP6_DST] = { .len = sizeof(struct in6_addr) },
+ [LWTUNNEL_IP6_SRC] = { .len = sizeof(struct in6_addr) },
+ [LWTUNNEL_IP6_HOPLIMIT] = { .type = NLA_U8 },
+ [LWTUNNEL_IP6_TC] = { .type = NLA_U8 },
+ [LWTUNNEL_IP6_SPORT] = { .type = NLA_U16 },
+ [LWTUNNEL_IP6_DPORT] = { .type = NLA_U16 },
+ [LWTUNNEL_IP6_FLAGS] = { .type = NLA_U16 },
+};
+
+static int ip6_tun_build_state(struct net_device *dev, struct nlattr *attr,
+ struct lwtunnel_state **ts)
+{
+ struct ip_tunnel_info *tun_info;
+ struct lwtunnel_state *new_state;
+ struct nlattr *tb[LWTUNNEL_IP6_MAX + 1];
+ int err;
+
+ err = nla_parse_nested(tb, LWTUNNEL_IP6_MAX, attr, ip6_tun_policy);
+ if (err < 0)
+ return err;
+
+ new_state = lwtunnel_state_alloc(sizeof(*tun_info));
+ if (!new_state)
+ return -ENOMEM;
+
+ new_state->type = LWTUNNEL_ENCAP_IP6;
+
+ tun_info = lwt_tun_info(new_state);
+
+ if (tb[LWTUNNEL_IP6_ID])
+ tun_info->key.tun_id = nla_get_u64(tb[LWTUNNEL_IP6_ID]);
+
+ if (tb[LWTUNNEL_IP6_DST])
+ tun_info->key.u.ipv6.dst = nla_get_in6_addr(tb[LWTUNNEL_IP6_DST]);
+
+ if (tb[LWTUNNEL_IP6_SRC])
+ tun_info->key.u.ipv6.src = nla_get_in6_addr(tb[LWTUNNEL_IP6_SRC]);
+
+ if (tb[LWTUNNEL_IP6_HOPLIMIT])
+ tun_info->key.ttl = nla_get_u8(tb[LWTUNNEL_IP6_HOPLIMIT]);
+
+ if (tb[LWTUNNEL_IP6_TC])
+ tun_info->key.tos = nla_get_u8(tb[LWTUNNEL_IP6_TC]);
+
+ if (tb[LWTUNNEL_IP6_SPORT])
+ tun_info->key.tp_src = nla_get_be16(tb[LWTUNNEL_IP6_SPORT]);
+
+ if (tb[LWTUNNEL_IP6_DPORT])
+ tun_info->key.tp_dst = nla_get_be16(tb[LWTUNNEL_IP6_DPORT]);
+
+ if (tb[LWTUNNEL_IP6_FLAGS])
+ tun_info->key.tun_flags = nla_get_u16(tb[LWTUNNEL_IP6_FLAGS]);
+
+ tun_info->mode = IP_TUNNEL_INFO_TX;
+ tun_info->options = NULL;
+ tun_info->options_len = 0;
+
+ *ts = new_state;
+
+ return 0;
+}
+
+static int ip6_tun_fill_encap_info(struct sk_buff *skb,
+ struct lwtunnel_state *lwtstate)
+{
+ struct ip_tunnel_info *tun_info = lwt_tun_info(lwtstate);
+
+ if (nla_put_u64(skb, LWTUNNEL_IP6_ID, tun_info->key.tun_id) ||
+ nla_put_in6_addr(skb, LWTUNNEL_IP6_DST, &tun_info->key.u.ipv6.dst) ||
+ nla_put_in6_addr(skb, LWTUNNEL_IP6_SRC, &tun_info->key.u.ipv6.src) ||
+ nla_put_u8(skb, LWTUNNEL_IP6_HOPLIMIT, tun_info->key.tos) ||
+ nla_put_u8(skb, LWTUNNEL_IP6_TC, tun_info->key.ttl) ||
+ nla_put_u16(skb, LWTUNNEL_IP6_SPORT, tun_info->key.tp_src) ||
+ nla_put_u16(skb, LWTUNNEL_IP6_DPORT, tun_info->key.tp_dst) ||
+ nla_put_u16(skb, LWTUNNEL_IP6_FLAGS, tun_info->key.tun_flags))
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int ip6_tun_encap_nlsize(struct lwtunnel_state *lwtstate)
+{
+ return nla_total_size(8) /* LWTUNNEL_IP6_ID */
+ + nla_total_size(16) /* LWTUNNEL_IP6_DST */
+ + nla_total_size(16) /* LWTUNNEL_IP6_SRC */
+ + nla_total_size(1) /* LWTUNNEL_IP6_HOPLIMIT */
+ + nla_total_size(1) /* LWTUNNEL_IP6_TC */
+ + nla_total_size(2) /* LWTUNNEL_IP6_SPORT */
+ + nla_total_size(2) /* LWTUNNEL_IP6_DPORT */
+ + nla_total_size(2); /* LWTUNNEL_IP6_FLAGS */
+}
+
+static const struct lwtunnel_encap_ops ip6_tun_lwt_ops = {
+ .build_state = ip6_tun_build_state,
+ .fill_encap = ip6_tun_fill_encap_info,
+ .get_encap_size = ip6_tun_encap_nlsize,
+ .cmp_encap = ip_tun_cmp_encap,
+};
+
+void __init ip_tunnel_core_init(void)
+{
+ lwtunnel_encap_add_ops(&ip_tun_lwt_ops, LWTUNNEL_ENCAP_IP);
+ lwtunnel_encap_add_ops(&ip6_tun_lwt_ops, LWTUNNEL_ENCAP_IP6);
+}
+
+struct static_key ip_tunnel_metadata_cnt = STATIC_KEY_INIT_FALSE;
+EXPORT_SYMBOL(ip_tunnel_metadata_cnt);
+
+void ip_tunnel_need_metadata(void)
+{
+ static_key_slow_inc(&ip_tunnel_metadata_cnt);
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_need_metadata);
+
+void ip_tunnel_unneed_metadata(void)
+{
+ static_key_slow_dec(&ip_tunnel_metadata_cnt);
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_unneed_metadata);
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 8e7328c6a390..ed4ef09c2136 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -94,7 +94,7 @@
/* Define the timeout for waiting for a DHCP/BOOTP/RARP reply */
#define CONF_OPEN_RETRIES 2 /* (Re)open devices twice */
#define CONF_SEND_RETRIES 6 /* Send six requests per open */
-#define CONF_INTER_TIMEOUT (HZ/2) /* Inter-device timeout: 1/2 second */
+#define CONF_INTER_TIMEOUT (HZ) /* Inter-device timeout: 1 second */
#define CONF_BASE_TIMEOUT (HZ*2) /* Initial timeout: 2 seconds */
#define CONF_TIMEOUT_RANDOM (HZ) /* Maximum amount of randomization */
#define CONF_TIMEOUT_MULT *7/4 /* Rate of timeout growth */
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 254238daf58b..f34c31defafe 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -198,7 +198,7 @@ static int ipip_rcv(struct sk_buff *skb)
goto drop;
if (iptunnel_pull_header(skb, 0, tpi.proto))
goto drop;
- return ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error);
+ return ip_tunnel_rcv(tunnel, skb, &tpi, NULL, log_ecn_error);
}
return -1;
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 2199a5db25e6..690d27d3f2f9 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -58,6 +58,12 @@ config NFT_REJECT_IPV4
default NFT_REJECT
tristate
+config NFT_DUP_IPV4
+ tristate "IPv4 nf_tables packet duplication support"
+ select NF_DUP_IPV4
+ help
+ This module enables IPv4 packet duplication support for nf_tables.
+
endif # NF_TABLES_IPV4
config NF_TABLES_ARP
@@ -67,6 +73,12 @@ config NF_TABLES_ARP
endif # NF_TABLES
+config NF_DUP_IPV4
+ tristate "Netfilter IPv4 packet duplication to alternate destination"
+ help
+ This option enables the nf_dup_ipv4 core, which duplicates an IPv4
+ packet to be rerouted to another destination.
+
config NF_LOG_ARP
tristate "ARP packet logging"
default m if NETFILTER_ADVANCED=n
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 7fe6c703528f..87b073da14c9 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -41,6 +41,7 @@ obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o
obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o
obj-$(CONFIG_NFT_MASQ_IPV4) += nft_masq_ipv4.o
obj-$(CONFIG_NFT_REDIR_IPV4) += nft_redir_ipv4.o
+obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o
obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o
# generic IP tables
@@ -70,3 +71,5 @@ obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o
# just filtering instance of ARP tables for now
obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o
+
+obj-$(CONFIG_NF_DUP_IPV4) += nf_dup_ipv4.o
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 92305a1a021a..c416cb355cb0 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -240,7 +240,7 @@ get_entry(const void *base, unsigned int offset)
return (struct arpt_entry *)(base + offset);
}
-static inline __pure
+static inline
struct arpt_entry *arpt_next_entry(const struct arpt_entry *entry)
{
return (void *)entry + entry->next_offset;
@@ -280,6 +280,9 @@ unsigned int arpt_do_table(struct sk_buff *skb,
table_base = private->entries;
jumpstack = (struct arpt_entry **)private->jumpstack[cpu];
+ /* No TEE support for arptables, so no need to switch to alternate
+ * stack. All targets that reenter must return absolute verdicts.
+ */
e = get_entry(table_base, private->hook_entry[hook]);
acpar.in = state->in;
@@ -325,11 +328,6 @@ unsigned int arpt_do_table(struct sk_buff *skb,
}
if (table_base + v
!= arpt_next_entry(e)) {
-
- if (stackidx >= private->stacksize) {
- verdict = NF_DROP;
- break;
- }
jumpstack[stackidx++] = e;
}
@@ -337,9 +335,6 @@ unsigned int arpt_do_table(struct sk_buff *skb,
continue;
}
- /* Targets which reenter must return
- * abs. verdicts
- */
acpar.target = t->u.kernel.target;
acpar.targinfo = t->data;
verdict = t->u.kernel.target->target(skb, &acpar);
@@ -372,10 +367,13 @@ static inline bool unconditional(const struct arpt_arp *arp)
/* Figures out from what hook each rule can be called: returns 0 if
* there are loops. Puts hook bitmask in comefrom.
+ *
+ * Keeps track of largest call depth seen and stores it in newinfo->stacksize.
*/
-static int mark_source_chains(const struct xt_table_info *newinfo,
+static int mark_source_chains(struct xt_table_info *newinfo,
unsigned int valid_hooks, void *entry0)
{
+ unsigned int calldepth, max_calldepth = 0;
unsigned int hook;
/* No recursion; use packet counter to save back ptrs (reset
@@ -391,6 +389,7 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
/* Set initial back pointer. */
e->counters.pcnt = pos;
+ calldepth = 0;
for (;;) {
const struct xt_standard_target *t
@@ -445,6 +444,8 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
(entry0 + pos + size);
e->counters.pcnt = pos;
pos += size;
+ if (calldepth > 0)
+ --calldepth;
} else {
int newpos = t->verdict;
@@ -459,6 +460,10 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
return 0;
}
+ if (entry0 + newpos != arpt_next_entry(e) &&
+ ++calldepth > max_calldepth)
+ max_calldepth = calldepth;
+
/* This a jump; chase it. */
duprintf("Jump rule %u -> %u\n",
pos, newpos);
@@ -475,6 +480,7 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
next:
duprintf("Finished chain %u\n", hook);
}
+ newinfo->stacksize = max_calldepth;
return 1;
}
@@ -664,9 +670,6 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
if (ret != 0)
break;
++i;
- if (strcmp(arpt_get_target(iter)->u.user.name,
- XT_ERROR_TARGET) == 0)
- ++newinfo->stacksize;
}
duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret);
if (ret != 0)
@@ -1439,9 +1442,6 @@ static int translate_compat_table(const char *name,
break;
}
++i;
- if (strcmp(arpt_get_target(iter1)->u.user.name,
- XT_ERROR_TARGET) == 0)
- ++newinfo->stacksize;
}
if (ret) {
/*
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 6c72fbb7b49e..787f99ed55e2 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -276,7 +276,7 @@ static void trace_packet(const struct sk_buff *skb,
}
#endif
-static inline __pure
+static inline
struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry)
{
return (void *)entry + entry->next_offset;
@@ -296,12 +296,13 @@ ipt_do_table(struct sk_buff *skb,
const char *indev, *outdev;
const void *table_base;
struct ipt_entry *e, **jumpstack;
- unsigned int *stackptr, origptr, cpu;
+ unsigned int stackidx, cpu;
const struct xt_table_info *private;
struct xt_action_param acpar;
unsigned int addend;
/* Initialization */
+ stackidx = 0;
ip = ip_hdr(skb);
indev = state->in ? state->in->name : nulldevname;
outdev = state->out ? state->out->name : nulldevname;
@@ -331,13 +332,21 @@ ipt_do_table(struct sk_buff *skb,
smp_read_barrier_depends();
table_base = private->entries;
jumpstack = (struct ipt_entry **)private->jumpstack[cpu];
- stackptr = per_cpu_ptr(private->stackptr, cpu);
- origptr = *stackptr;
+
+ /* Switch to alternate jumpstack if we're being invoked via TEE.
+ * TEE issues XT_CONTINUE verdict on original skb so we must not
+ * clobber the jumpstack.
+ *
+ * For recursion via REJECT or SYNPROXY the stack will be clobbered
+ * but it is no problem since absolute verdict is issued by these.
+ */
+ if (static_key_false(&xt_tee_enabled))
+ jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated);
e = get_entry(table_base, private->hook_entry[hook]);
- pr_debug("Entering %s(hook %u); sp at %u (UF %p)\n",
- table->name, hook, origptr,
+ pr_debug("Entering %s(hook %u), UF %p\n",
+ table->name, hook,
get_entry(table_base, private->underflow[hook]));
do {
@@ -383,28 +392,24 @@ ipt_do_table(struct sk_buff *skb,
verdict = (unsigned int)(-v) - 1;
break;
}
- if (*stackptr <= origptr) {
+ if (stackidx == 0) {
e = get_entry(table_base,
private->underflow[hook]);
pr_debug("Underflow (this is normal) "
"to %p\n", e);
} else {
- e = jumpstack[--*stackptr];
+ e = jumpstack[--stackidx];
pr_debug("Pulled %p out from pos %u\n",
- e, *stackptr);
+ e, stackidx);
e = ipt_next_entry(e);
}
continue;
}
if (table_base + v != ipt_next_entry(e) &&
!(e->ip.flags & IPT_F_GOTO)) {
- if (*stackptr >= private->stacksize) {
- verdict = NF_DROP;
- break;
- }
- jumpstack[(*stackptr)++] = e;
+ jumpstack[stackidx++] = e;
pr_debug("Pushed %p into pos %u\n",
- e, *stackptr - 1);
+ e, stackidx - 1);
}
e = get_entry(table_base, v);
@@ -423,9 +428,8 @@ ipt_do_table(struct sk_buff *skb,
/* Verdict */
break;
} while (!acpar.hotdrop);
- pr_debug("Exiting %s; resetting sp from %u to %u\n",
- __func__, *stackptr, origptr);
- *stackptr = origptr;
+ pr_debug("Exiting %s; sp at %u\n", __func__, stackidx);
+
xt_write_recseq_end(addend);
local_bh_enable();
@@ -439,11 +443,15 @@ ipt_do_table(struct sk_buff *skb,
}
/* Figures out from what hook each rule can be called: returns 0 if
- there are loops. Puts hook bitmask in comefrom. */
+ * there are loops. Puts hook bitmask in comefrom.
+ *
+ * Keeps track of largest call depth seen and stores it in newinfo->stacksize.
+ */
static int
-mark_source_chains(const struct xt_table_info *newinfo,
+mark_source_chains(struct xt_table_info *newinfo,
unsigned int valid_hooks, void *entry0)
{
+ unsigned int calldepth, max_calldepth = 0;
unsigned int hook;
/* No recursion; use packet counter to save back ptrs (reset
@@ -457,6 +465,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
/* Set initial back pointer. */
e->counters.pcnt = pos;
+ calldepth = 0;
for (;;) {
const struct xt_standard_target *t
@@ -518,6 +527,9 @@ mark_source_chains(const struct xt_table_info *newinfo,
(entry0 + pos + size);
e->counters.pcnt = pos;
pos += size;
+ WARN_ON_ONCE(calldepth == 0);
+ if (calldepth > 0)
+ --calldepth;
} else {
int newpos = t->verdict;
@@ -531,9 +543,14 @@ mark_source_chains(const struct xt_table_info *newinfo,
newpos);
return 0;
}
+ if (entry0 + newpos != ipt_next_entry(e) &&
+ !(e->ip.flags & IPT_F_GOTO) &&
+ ++calldepth > max_calldepth)
+ max_calldepth = calldepth;
+
/* This a jump; chase it. */
- duprintf("Jump rule %u -> %u\n",
- pos, newpos);
+ duprintf("Jump rule %u -> %u, calldepth %d\n",
+ pos, newpos, calldepth);
} else {
/* ... this is a fallthru */
newpos = pos + e->next_offset;
@@ -547,6 +564,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
next:
duprintf("Finished chain %u\n", hook);
}
+ newinfo->stacksize = max_calldepth;
return 1;
}
@@ -826,9 +844,6 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
if (ret != 0)
return ret;
++i;
- if (strcmp(ipt_get_target(iter)->u.user.name,
- XT_ERROR_TARGET) == 0)
- ++newinfo->stacksize;
}
if (i != repl->num_entries) {
@@ -1744,9 +1759,6 @@ translate_compat_table(struct net *net,
if (ret != 0)
break;
++i;
- if (strcmp(ipt_get_target(iter1)->u.user.name,
- XT_ERROR_TARGET) == 0)
- ++newinfo->stacksize;
}
if (ret) {
/*
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index 4bf3dc49ad1e..270765236f5e 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -72,7 +72,7 @@ set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo)
tcph->cwr = einfo->proto.tcp.cwr;
inet_proto_csum_replace2(&tcph->check, skb,
- oldval, ((__be16 *)tcph)[6], 0);
+ oldval, ((__be16 *)tcph)[6], false);
return true;
}
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 30ad9554b5e9..8a2caaf3940b 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -280,7 +280,7 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
return -EINVAL;
}
- h = nf_conntrack_find_get(sock_net(sk), NF_CT_DEFAULT_ZONE, &tuple);
+ h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple);
if (h) {
struct sockaddr_in sin;
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 80d5554b9a88..cdde3ec496e9 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -134,9 +134,11 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
struct nf_conntrack_tuple innertuple, origtuple;
const struct nf_conntrack_l4proto *innerproto;
const struct nf_conntrack_tuple_hash *h;
- u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
+ const struct nf_conntrack_zone *zone;
+ struct nf_conntrack_zone tmp;
NF_CT_ASSERT(skb->nfct == NULL);
+ zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
/* Are they talking about one of our connections? */
if (!nf_ct_get_tuplepr(skb,
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index c88b7d434718..9306ec4fab41 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -43,22 +43,22 @@ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum,
struct sk_buff *skb)
{
- u16 zone = NF_CT_DEFAULT_ZONE;
-
+ u16 zone_id = NF_CT_DEFAULT_ZONE_ID;
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
- if (skb->nfct)
- zone = nf_ct_zone((struct nf_conn *)skb->nfct);
-#endif
+ if (skb->nfct) {
+ enum ip_conntrack_info ctinfo;
+ const struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
-#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- if (skb->nf_bridge &&
- skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)
- return IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone;
+ zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo));
+ }
#endif
+ if (nf_bridge_in_prerouting(skb))
+ return IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id;
+
if (hooknum == NF_INET_PRE_ROUTING)
- return IP_DEFRAG_CONNTRACK_IN + zone;
+ return IP_DEFRAG_CONNTRACK_IN + zone_id;
else
- return IP_DEFRAG_CONNTRACK_OUT + zone;
+ return IP_DEFRAG_CONNTRACK_OUT + zone_id;
}
static unsigned int ipv4_conntrack_defrag(const struct nf_hook_ops *ops,
diff --git a/net/ipv4/netfilter/nf_dup_ipv4.c b/net/ipv4/netfilter/nf_dup_ipv4.c
new file mode 100644
index 000000000000..b5bb37564b0e
--- /dev/null
+++ b/net/ipv4/netfilter/nf_dup_ipv4.c
@@ -0,0 +1,120 @@
+/*
+ * (C) 2007 by Sebastian Claßen <sebastian.classen@freenet.ag>
+ * (C) 2007-2010 by Jan Engelhardt <jengelh@medozas.de>
+ *
+ * Extracted from xt_TEE.c
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 or later, as
+ * published by the Free Software Foundation.
+ */
+#include <linux/ip.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/route.h>
+#include <linux/skbuff.h>
+#include <net/checksum.h>
+#include <net/icmp.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <net/netfilter/ipv4/nf_dup_ipv4.h>
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include <net/netfilter/nf_conntrack.h>
+#endif
+
+static struct net *pick_net(struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_NS
+ const struct dst_entry *dst;
+
+ if (skb->dev != NULL)
+ return dev_net(skb->dev);
+ dst = skb_dst(skb);
+ if (dst != NULL && dst->dev != NULL)
+ return dev_net(dst->dev);
+#endif
+ return &init_net;
+}
+
+static bool nf_dup_ipv4_route(struct sk_buff *skb, const struct in_addr *gw,
+ int oif)
+{
+ const struct iphdr *iph = ip_hdr(skb);
+ struct net *net = pick_net(skb);
+ struct rtable *rt;
+ struct flowi4 fl4;
+
+ memset(&fl4, 0, sizeof(fl4));
+ if (oif != -1)
+ fl4.flowi4_oif = oif;
+
+ fl4.daddr = gw->s_addr;
+ fl4.flowi4_tos = RT_TOS(iph->tos);
+ fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
+ fl4.flowi4_flags = FLOWI_FLAG_KNOWN_NH;
+ rt = ip_route_output_key(net, &fl4);
+ if (IS_ERR(rt))
+ return false;
+
+ skb_dst_drop(skb);
+ skb_dst_set(skb, &rt->dst);
+ skb->dev = rt->dst.dev;
+ skb->protocol = htons(ETH_P_IP);
+
+ return true;
+}
+
+void nf_dup_ipv4(struct sk_buff *skb, unsigned int hooknum,
+ const struct in_addr *gw, int oif)
+{
+ struct iphdr *iph;
+
+ if (this_cpu_read(nf_skb_duplicated))
+ return;
+ /*
+ * Copy the skb, and route the copy. Will later return %XT_CONTINUE for
+ * the original skb, which should continue on its way as if nothing has
+ * happened. The copy should be independently delivered to the gateway.
+ */
+ skb = pskb_copy(skb, GFP_ATOMIC);
+ if (skb == NULL)
+ return;
+
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+ /* Avoid counting cloned packets towards the original connection. */
+ nf_conntrack_put(skb->nfct);
+ skb->nfct = &nf_ct_untracked_get()->ct_general;
+ skb->nfctinfo = IP_CT_NEW;
+ nf_conntrack_get(skb->nfct);
+#endif
+ /*
+ * If we are in PREROUTING/INPUT, the checksum must be recalculated
+ * since the length could have changed as a result of defragmentation.
+ *
+ * We also decrease the TTL to mitigate potential loops between two
+ * hosts.
+ *
+ * Set %IP_DF so that the original source is notified of a potentially
+ * decreased MTU on the clone route. IPv6 does this too.
+ */
+ iph = ip_hdr(skb);
+ iph->frag_off |= htons(IP_DF);
+ if (hooknum == NF_INET_PRE_ROUTING ||
+ hooknum == NF_INET_LOCAL_IN)
+ --iph->ttl;
+ ip_send_check(iph);
+
+ if (nf_dup_ipv4_route(skb, gw, oif)) {
+ __this_cpu_write(nf_skb_duplicated, true);
+ ip_local_out(skb);
+ __this_cpu_write(nf_skb_duplicated, false);
+ } else {
+ kfree_skb(skb);
+ }
+}
+EXPORT_SYMBOL_GPL(nf_dup_ipv4);
+
+MODULE_AUTHOR("Sebastian Claßen <sebastian.classen@freenet.ag>");
+MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
+MODULE_DESCRIPTION("nf_dup_ipv4: Duplicate IPv4 packet");
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index e59cc05c09e9..22f4579b0c2a 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -120,7 +120,7 @@ static void nf_nat_ipv4_csum_update(struct sk_buff *skb,
oldip = iph->daddr;
newip = t->dst.u3.ip;
}
- inet_proto_csum_replace4(check, skb, oldip, newip, 1);
+ inet_proto_csum_replace4(check, skb, oldip, newip, true);
}
static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb,
@@ -151,7 +151,7 @@ static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb,
}
} else
inet_proto_csum_replace2(check, skb,
- htons(oldlen), htons(datalen), 1);
+ htons(oldlen), htons(datalen), true);
}
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c
index 4557b4ab8342..7b98baa13ede 100644
--- a/net/ipv4/netfilter/nf_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c
@@ -67,7 +67,7 @@ icmp_manip_pkt(struct sk_buff *skb,
hdr = (struct icmphdr *)(skb->data + hdroff);
inet_proto_csum_replace2(&hdr->checksum, skb,
- hdr->un.echo.id, tuple->src.u.icmp.id, 0);
+ hdr->un.echo.id, tuple->src.u.icmp.id, false);
hdr->un.echo.id = tuple->src.u.icmp.id;
return true;
}
diff --git a/net/ipv4/netfilter/nft_dup_ipv4.c b/net/ipv4/netfilter/nft_dup_ipv4.c
new file mode 100644
index 000000000000..25419fbddcb6
--- /dev/null
+++ b/net/ipv4/netfilter/nft_dup_ipv4.c
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2015 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/ipv4/nf_dup_ipv4.h>
+
+struct nft_dup_ipv4 {
+ enum nft_registers sreg_addr:8;
+ enum nft_registers sreg_dev:8;
+};
+
+static void nft_dup_ipv4_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_dup_ipv4 *priv = nft_expr_priv(expr);
+ struct in_addr gw = {
+ .s_addr = regs->data[priv->sreg_addr],
+ };
+ int oif = regs->data[priv->sreg_dev];
+
+ nf_dup_ipv4(pkt->skb, pkt->ops->hooknum, &gw, oif);
+}
+
+static int nft_dup_ipv4_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_dup_ipv4 *priv = nft_expr_priv(expr);
+ int err;
+
+ if (tb[NFTA_DUP_SREG_ADDR] == NULL)
+ return -EINVAL;
+
+ priv->sreg_addr = nft_parse_register(tb[NFTA_DUP_SREG_ADDR]);
+ err = nft_validate_register_load(priv->sreg_addr, sizeof(struct in_addr));
+ if (err < 0)
+ return err;
+
+ if (tb[NFTA_DUP_SREG_DEV] != NULL) {
+ priv->sreg_dev = nft_parse_register(tb[NFTA_DUP_SREG_DEV]);
+ return nft_validate_register_load(priv->sreg_dev, sizeof(int));
+ }
+ return 0;
+}
+
+static int nft_dup_ipv4_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ struct nft_dup_ipv4 *priv = nft_expr_priv(expr);
+
+ if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr) ||
+ nft_dump_register(skb, NFTA_DUP_SREG_DEV, priv->sreg_dev))
+ goto nla_put_failure;
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_dup_ipv4_type;
+static const struct nft_expr_ops nft_dup_ipv4_ops = {
+ .type = &nft_dup_ipv4_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_dup_ipv4)),
+ .eval = nft_dup_ipv4_eval,
+ .init = nft_dup_ipv4_init,
+ .dump = nft_dup_ipv4_dump,
+};
+
+static const struct nla_policy nft_dup_ipv4_policy[NFTA_DUP_MAX + 1] = {
+ [NFTA_DUP_SREG_ADDR] = { .type = NLA_U32 },
+ [NFTA_DUP_SREG_DEV] = { .type = NLA_U32 },
+};
+
+static struct nft_expr_type nft_dup_ipv4_type __read_mostly = {
+ .family = NFPROTO_IPV4,
+ .name = "dup",
+ .ops = &nft_dup_ipv4_ops,
+ .policy = nft_dup_ipv4_policy,
+ .maxattr = NFTA_DUP_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_dup_ipv4_module_init(void)
+{
+ return nft_register_expr(&nft_dup_ipv4_type);
+}
+
+static void __exit nft_dup_ipv4_module_exit(void)
+{
+ nft_unregister_expr(&nft_dup_ipv4_type);
+}
+
+module_init(nft_dup_ipv4_module_init);
+module_exit(nft_dup_ipv4_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "dup");
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 05ff44b758df..e89094ab5ddb 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -363,7 +363,8 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
scoped);
rcu_read_unlock();
- if (!(isk->freebind || isk->transparent || has_addr ||
+ if (!(net->ipv6.sysctl.ip_nonlocal_bind ||
+ isk->freebind || isk->transparent || has_addr ||
addr_type == IPV6_ADDR_ANY))
return -EADDRNOTAVAIL;
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index da5d483e236a..3abd9d7a3adf 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -300,6 +300,8 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPACKSkippedChallenge", LINUX_MIB_TCPACKSKIPPEDCHALLENGE),
SNMP_MIB_ITEM("TCPWinProbe", LINUX_MIB_TCPWINPROBE),
SNMP_MIB_ITEM("TCPKeepAlive", LINUX_MIB_TCPKEEPALIVE),
+ SNMP_MIB_ITEM("TCPMTUPFail", LINUX_MIB_TCPMTUPFAIL),
+ SNMP_MIB_ITEM("TCPMTUPSuccess", LINUX_MIB_TCPMTUPSUCCESS),
SNMP_MIB_SENTINEL
};
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index e681b852ced1..f3087aaa6dd8 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -91,6 +91,7 @@
#include <linux/slab.h>
#include <linux/jhash.h>
#include <net/dst.h>
+#include <net/dst_metadata.h>
#include <net/net_namespace.h>
#include <net/protocol.h>
#include <net/ip.h>
@@ -102,6 +103,7 @@
#include <net/tcp.h>
#include <net/icmp.h>
#include <net/xfrm.h>
+#include <net/lwtunnel.h>
#include <net/netevent.h>
#include <net/rtnetlink.h>
#ifdef CONFIG_SYSCTL
@@ -109,6 +111,8 @@
#include <linux/kmemleak.h>
#endif
#include <net/secure_seq.h>
+#include <net/ip_tunnels.h>
+#include <net/vrf.h>
#define RT_FL_TOS(oldflp4) \
((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
@@ -1403,6 +1407,7 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
#ifdef CONFIG_IP_ROUTE_CLASSID
rt->dst.tclassid = nh->nh_tclassid;
#endif
+ rt->dst.lwtstate = lwtstate_get(nh->nh_lwtstate);
if (unlikely(fnhe))
cached = rt_bind_exception(rt, fnhe, daddr);
else if (!(rt->dst.flags & DST_NOCACHE))
@@ -1546,7 +1551,6 @@ static int __mkroute_input(struct sk_buff *skb,
struct rtable *rth;
int err;
struct in_device *out_dev;
- unsigned int flags = 0;
bool do_cache;
u32 itag = 0;
@@ -1610,7 +1614,7 @@ static int __mkroute_input(struct sk_buff *skb,
}
rth->rt_genid = rt_genid_ipv4(dev_net(rth->dst.dev));
- rth->rt_flags = flags;
+ rth->rt_flags = 0;
rth->rt_type = res->type;
rth->rt_is_input = 1;
rth->rt_iif = 0;
@@ -1624,6 +1628,14 @@ static int __mkroute_input(struct sk_buff *skb,
rth->dst.output = ip_output;
rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag);
+ if (lwtunnel_output_redirect(rth->dst.lwtstate)) {
+ rth->dst.lwtstate->orig_output = rth->dst.output;
+ rth->dst.output = lwtunnel_output;
+ }
+ if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
+ rth->dst.lwtstate->orig_input = rth->dst.input;
+ rth->dst.input = lwtunnel_input;
+ }
skb_dst_set(skb, &rth->dst);
out:
err = 0;
@@ -1662,6 +1674,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
{
struct fib_result res;
struct in_device *in_dev = __in_dev_get_rcu(dev);
+ struct ip_tunnel_info *tun_info;
struct flowi4 fl4;
unsigned int flags = 0;
u32 itag = 0;
@@ -1679,6 +1692,13 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
by fib_lookup.
*/
+ tun_info = skb_tunnel_info(skb);
+ if (tun_info && tun_info->mode == IP_TUNNEL_INFO_RX)
+ fl4.flowi4_tun_key.tun_id = tun_info->key.tun_id;
+ else
+ fl4.flowi4_tun_key.tun_id = 0;
+ skb_dst_drop(skb);
+
if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
goto martian_source;
@@ -1710,7 +1730,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
* Now we are ready to route packet.
*/
fl4.flowi4_oif = 0;
- fl4.flowi4_iif = dev->ifindex;
+ fl4.flowi4_iif = vrf_master_ifindex_rcu(dev) ? : dev->ifindex;
fl4.flowi4_mark = skb->mark;
fl4.flowi4_tos = tos;
fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
@@ -1792,6 +1812,7 @@ local_input:
rth->rt_gateway = 0;
rth->rt_uses_gateway = 0;
INIT_LIST_HEAD(&rth->rt_uncached);
+
RT_CACHE_STAT_INC(in_slow_tot);
if (res.type == RTN_UNREACHABLE) {
rth->dst.input= ip_error;
@@ -1981,7 +2002,6 @@ add:
rth->rt_gateway = 0;
rth->rt_uses_gateway = 0;
INIT_LIST_HEAD(&rth->rt_uncached);
-
RT_CACHE_STAT_INC(out_slow_tot);
if (flags & RTCF_LOCAL)
@@ -2004,6 +2024,8 @@ add:
}
rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0);
+ if (lwtunnel_output_redirect(rth->dst.lwtstate))
+ rth->dst.output = lwtunnel_output;
return rth;
}
@@ -2110,6 +2132,11 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
fl4->saddr = inet_select_addr(dev_out, 0,
RT_SCOPE_HOST);
}
+ if (netif_is_vrf(dev_out) &&
+ !(fl4->flowi4_flags & FLOWI_FLAG_VRFSRC)) {
+ rth = vrf_dev_get_rth(dev_out);
+ goto out;
+ }
}
if (!fl4->daddr) {
@@ -2261,7 +2288,6 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
rt->rt_uses_gateway = ort->rt_uses_gateway;
INIT_LIST_HEAD(&rt->rt_uncached);
-
dst_free(new);
}
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index c037644eafb7..fd1405d37c14 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -146,7 +146,7 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
if (!tcp_is_cwnd_limited(sk))
return;
- if (tp->snd_cwnd <= tp->snd_ssthresh)
+ if (tcp_in_slow_start(tp))
tcp_slow_start(tp, acked);
else {
bictcp_update(ca, tp->snd_cwnd);
diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c
index 8c6fd3d5e40f..167b6a3e1b98 100644
--- a/net/ipv4/tcp_cdg.c
+++ b/net/ipv4/tcp_cdg.c
@@ -264,7 +264,7 @@ static void tcp_cdg_cong_avoid(struct sock *sk, u32 ack, u32 acked)
u32 prior_snd_cwnd;
u32 incr;
- if (tp->snd_cwnd < tp->snd_ssthresh && hystart_detect)
+ if (tcp_in_slow_start(tp) && hystart_detect)
tcp_cdg_hystart_update(sk);
if (after(ack, ca->rtt_seq) && ca->rtt.v64) {
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 84be008c945c..a2ed23c595cf 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -365,10 +365,8 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
*/
u32 tcp_slow_start(struct tcp_sock *tp, u32 acked)
{
- u32 cwnd = tp->snd_cwnd + acked;
+ u32 cwnd = min(tp->snd_cwnd + acked, tp->snd_ssthresh);
- if (cwnd > tp->snd_ssthresh)
- cwnd = tp->snd_ssthresh + 1;
acked -= cwnd - tp->snd_cwnd;
tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp);
@@ -413,7 +411,7 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
return;
/* In "safe" area, increase. */
- if (tp->snd_cwnd <= tp->snd_ssthresh) {
+ if (tcp_in_slow_start(tp)) {
acked = tcp_slow_start(tp, acked);
if (!acked)
return;
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 06d3d665a9fd..28011fb1f4a2 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -320,7 +320,7 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
if (!tcp_is_cwnd_limited(sk))
return;
- if (tp->snd_cwnd <= tp->snd_ssthresh) {
+ if (tcp_in_slow_start(tp)) {
if (hystart && after(ack, ca->end_seq))
bictcp_hystart_reset(sk);
acked = tcp_slow_start(tp, acked);
@@ -439,7 +439,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
ca->delay_min = delay;
/* hystart triggers when cwnd is larger than some threshold */
- if (hystart && tp->snd_cwnd <= tp->snd_ssthresh &&
+ if (hystart && tcp_in_slow_start(tp) &&
tp->snd_cwnd >= hystart_low_window)
hystart_update(sk, delay);
}
diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c
index 882c08aae2f5..db7842495a64 100644
--- a/net/ipv4/tcp_highspeed.c
+++ b/net/ipv4/tcp_highspeed.c
@@ -116,7 +116,7 @@ static void hstcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
if (!tcp_is_cwnd_limited(sk))
return;
- if (tp->snd_cwnd <= tp->snd_ssthresh)
+ if (tcp_in_slow_start(tp))
tcp_slow_start(tp, acked);
else {
/* Update AIMD parameters.
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 58469fff6c18..82f0d9ed60f5 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -236,7 +236,7 @@ static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
if (!tcp_is_cwnd_limited(sk))
return;
- if (tp->snd_cwnd <= tp->snd_ssthresh)
+ if (tcp_in_slow_start(tp))
tcp_slow_start(tp, acked);
else {
/* In dangerous area, increase slowly.
diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c
index f963b274f2b0..083831e359df 100644
--- a/net/ipv4/tcp_hybla.c
+++ b/net/ipv4/tcp_hybla.c
@@ -112,7 +112,7 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 acked)
rho_fractions = ca->rho_3ls - (ca->rho << 3);
- if (tp->snd_cwnd < tp->snd_ssthresh) {
+ if (tcp_in_slow_start(tp)) {
/*
* slow start
* INC = 2^RHO - 1
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index f71002e4db0b..2ab9bbb6faff 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -268,7 +268,7 @@ static void tcp_illinois_cong_avoid(struct sock *sk, u32 ack, u32 acked)
return;
/* In slow start */
- if (tp->snd_cwnd <= tp->snd_ssthresh)
+ if (tcp_in_slow_start(tp))
tcp_slow_start(tp, acked);
else {
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 728f5b3d3c64..4e4d6bcd0ca9 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -109,6 +109,7 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
#define FLAG_SYN_ACKED 0x10 /* This ACK acknowledged SYN. */
#define FLAG_DATA_SACKED 0x20 /* New SACK. */
#define FLAG_ECE 0x40 /* ECE in this ACK */
+#define FLAG_LOST_RETRANS 0x80 /* This ACK marks some retransmission lost */
#define FLAG_SLOWPATH 0x100 /* Do not skip RFC checks for window update.*/
#define FLAG_ORIG_SACK_ACKED 0x200 /* Never retransmitted data are (s)acked */
#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
@@ -196,11 +197,13 @@ static void tcp_enter_quickack_mode(struct sock *sk)
* and the session is not interactive.
*/
-static inline bool tcp_in_quickack_mode(const struct sock *sk)
+static bool tcp_in_quickack_mode(struct sock *sk)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
+ const struct dst_entry *dst = __sk_dst_get(sk);
- return icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong;
+ return (dst && dst_metric(dst, RTAX_QUICKACK)) ||
+ (icsk->icsk_ack.quick && !icsk->icsk_ack.pingpong);
}
static void tcp_ecn_queue_cwr(struct tcp_sock *tp)
@@ -1037,7 +1040,7 @@ static bool tcp_is_sackblock_valid(struct tcp_sock *tp, bool is_dsack,
* highest SACK block). Also calculate the lowest snd_nxt among the remaining
* retransmitted skbs to avoid some costly processing per ACKs.
*/
-static void tcp_mark_lost_retrans(struct sock *sk)
+static void tcp_mark_lost_retrans(struct sock *sk, int *flag)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
@@ -1078,7 +1081,7 @@ static void tcp_mark_lost_retrans(struct sock *sk)
if (after(received_upto, ack_seq)) {
TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
tp->retrans_out -= tcp_skb_pcount(skb);
-
+ *flag |= FLAG_LOST_RETRANS;
tcp_skb_mark_lost_uncond_verify(tp, skb);
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT);
} else {
@@ -1818,7 +1821,7 @@ advance_sp:
((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker))
tcp_update_reordering(sk, tp->fackets_out - state->reord, 0);
- tcp_mark_lost_retrans(sk);
+ tcp_mark_lost_retrans(sk, &state->flag);
tcp_verify_left_out(tp);
out:
@@ -2474,15 +2477,14 @@ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo)
return false;
}
-/* The cwnd reduction in CWR and Recovery use the PRR algorithm
- * https://datatracker.ietf.org/doc/draft-ietf-tcpm-proportional-rate-reduction/
+/* The cwnd reduction in CWR and Recovery uses the PRR algorithm in RFC 6937.
* It computes the number of packets to send (sndcnt) based on packets newly
* delivered:
* 1) If the packets in flight is larger than ssthresh, PRR spreads the
* cwnd reductions across a full RTT.
- * 2) If packets in flight is lower than ssthresh (such as due to excess
- * losses and/or application stalls), do not perform any further cwnd
- * reductions, but instead slow start up to ssthresh.
+ * 2) Otherwise PRR uses packet conservation to send as much as delivered.
+ * But when the retransmits are acked without further losses, PRR
+ * slow starts cwnd up to ssthresh to speed up the recovery.
*/
static void tcp_init_cwnd_reduction(struct sock *sk)
{
@@ -2499,7 +2501,7 @@ static void tcp_init_cwnd_reduction(struct sock *sk)
}
static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked,
- int fast_rexmit)
+ int fast_rexmit, int flag)
{
struct tcp_sock *tp = tcp_sk(sk);
int sndcnt = 0;
@@ -2508,16 +2510,18 @@ static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked,
(tp->packets_out - tp->sacked_out);
tp->prr_delivered += newly_acked_sacked;
- if (tcp_packets_in_flight(tp) > tp->snd_ssthresh) {
+ if (delta < 0) {
u64 dividend = (u64)tp->snd_ssthresh * tp->prr_delivered +
tp->prior_cwnd - 1;
sndcnt = div_u64(dividend, tp->prior_cwnd) - tp->prr_out;
- } else {
+ } else if ((flag & FLAG_RETRANS_DATA_ACKED) &&
+ !(flag & FLAG_LOST_RETRANS)) {
sndcnt = min_t(int, delta,
max_t(int, tp->prr_delivered - tp->prr_out,
newly_acked_sacked) + 1);
+ } else {
+ sndcnt = min(delta, newly_acked_sacked);
}
-
sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0));
tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
}
@@ -2578,7 +2582,7 @@ static void tcp_try_to_open(struct sock *sk, int flag, const int prior_unsacked)
if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
tcp_try_keep_open(sk);
} else {
- tcp_cwnd_reduction(sk, prior_unsacked, 0);
+ tcp_cwnd_reduction(sk, prior_unsacked, 0, flag);
}
}
@@ -2588,6 +2592,7 @@ static void tcp_mtup_probe_failed(struct sock *sk)
icsk->icsk_mtup.search_high = icsk->icsk_mtup.probe_size - 1;
icsk->icsk_mtup.probe_size = 0;
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMTUPFAIL);
}
static void tcp_mtup_probe_success(struct sock *sk)
@@ -2607,6 +2612,7 @@ static void tcp_mtup_probe_success(struct sock *sk)
icsk->icsk_mtup.search_low = icsk->icsk_mtup.probe_size;
icsk->icsk_mtup.probe_size = 0;
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMTUPSUCCESS);
}
/* Do a simple retransmit without using the backoff mechanisms in
@@ -2675,7 +2681,7 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
tp->prior_ssthresh = 0;
tcp_init_undo(tp);
- if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
+ if (!tcp_in_cwnd_reduction(sk)) {
if (!ece_ack)
tp->prior_ssthresh = tcp_current_ssthresh(sk);
tcp_init_cwnd_reduction(sk);
@@ -2735,7 +2741,7 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
/* Undo during fast recovery after partial ACK. */
static bool tcp_try_undo_partial(struct sock *sk, const int acked,
- const int prior_unsacked)
+ const int prior_unsacked, int flag)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -2751,7 +2757,7 @@ static bool tcp_try_undo_partial(struct sock *sk, const int acked,
* mark more packets lost or retransmit more.
*/
if (tp->retrans_out) {
- tcp_cwnd_reduction(sk, prior_unsacked, 0);
+ tcp_cwnd_reduction(sk, prior_unsacked, 0, flag);
return true;
}
@@ -2838,7 +2844,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
if (tcp_is_reno(tp) && is_dupack)
tcp_add_reno_sack(sk);
} else {
- if (tcp_try_undo_partial(sk, acked, prior_unsacked))
+ if (tcp_try_undo_partial(sk, acked, prior_unsacked, flag))
return;
/* Partial ACK arrived. Force fast retransmit. */
do_lost = tcp_is_reno(tp) ||
@@ -2851,9 +2857,10 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
break;
case TCP_CA_Loss:
tcp_process_loss(sk, flag, is_dupack);
- if (icsk->icsk_ca_state != TCP_CA_Open)
+ if (icsk->icsk_ca_state != TCP_CA_Open &&
+ !(flag & FLAG_LOST_RETRANS))
return;
- /* Fall through to processing in Open state. */
+ /* Change state if cwnd is undone or retransmits are lost */
default:
if (tcp_is_reno(tp)) {
if (flag & FLAG_SND_UNA_ADVANCED)
@@ -2888,7 +2895,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked,
if (do_lost)
tcp_update_scoreboard(sk, fast_rexmit);
- tcp_cwnd_reduction(sk, prior_unsacked, fast_rexmit);
+ tcp_cwnd_reduction(sk, prior_unsacked, fast_rexmit, flag);
tcp_xmit_retransmit_queue(sk);
}
@@ -3562,10 +3569,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
&sack_state);
acked -= tp->packets_out;
- /* Advance cwnd if state allows */
- if (tcp_may_raise_cwnd(sk, flag))
- tcp_cong_avoid(sk, ack, acked);
-
if (tcp_ack_is_dubious(sk, flag)) {
is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
tcp_fastretrans_alert(sk, acked, prior_unsacked,
@@ -3574,6 +3577,10 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if (tp->tlp_high_seq)
tcp_process_tlp_ack(sk, ack, flag);
+ /* Advance cwnd if state allows */
+ if (tcp_may_raise_cwnd(sk, flag))
+ tcp_cong_avoid(sk, ack, acked);
+
if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) {
struct dst_entry *dst = __sk_dst_get(sk);
if (dst)
@@ -3947,7 +3954,6 @@ void tcp_reset(struct sock *sk)
static void tcp_fin(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- const struct dst_entry *dst;
inet_csk_schedule_ack(sk);
@@ -3959,9 +3965,7 @@ static void tcp_fin(struct sock *sk)
case TCP_ESTABLISHED:
/* Move to CLOSE_WAIT */
tcp_set_state(sk, TCP_CLOSE_WAIT);
- dst = __sk_dst_get(sk);
- if (!dst || !dst_metric(dst, RTAX_QUICKACK))
- inet_csk(sk)->icsk_ack.pingpong = 1;
+ inet_csk(sk)->icsk_ack.pingpong = 1;
break;
case TCP_CLOSE_WAIT:
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 0ea2e1c5d395..93898e093d4e 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -222,7 +222,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (err)
goto failure;
- inet_set_txhash(sk);
+ sk_set_txhash(sk);
rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
inet->inet_sport, inet->inet_dport, sk);
@@ -1277,7 +1277,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newinet->mc_ttl = ip_hdr(skb)->ttl;
newinet->rcv_tos = ip_hdr(skb)->tos;
inet_csk(newsk)->icsk_ext_hdr_len = 0;
- inet_set_txhash(newsk);
+ sk_set_txhash(newsk);
if (inet_opt)
inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
newinet->inet_id = newtp->write_seq ^ jiffies;
@@ -1683,8 +1683,7 @@ do_time_wait:
iph->daddr, th->dest,
inet_iif(skb));
if (sk2) {
- inet_twsk_deschedule(inet_twsk(sk));
- inet_twsk_put(inet_twsk(sk));
+ inet_twsk_deschedule_put(inet_twsk(sk));
sk = sk2;
goto process;
}
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index a51d63a43e33..b3d64f61d922 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -461,7 +461,7 @@ void tcp_update_metrics(struct sock *sk)
tcp_metric_set(tm, TCP_METRIC_CWND,
tp->snd_cwnd);
}
- } else if (tp->snd_cwnd > tp->snd_ssthresh &&
+ } else if (!tcp_in_slow_start(tp) &&
icsk->icsk_ca_state == TCP_CA_Open) {
/* Cong. avoidance phase, cwnd is reliable. */
if (!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH))
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 4bc00cb79e60..6d8795b066ac 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -147,8 +147,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
if (!th->fin ||
TCP_SKB_CB(skb)->end_seq != tcptw->tw_rcv_nxt + 1) {
kill_with_rst:
- inet_twsk_deschedule(tw);
- inet_twsk_put(tw);
+ inet_twsk_deschedule_put(tw);
return TCP_TW_RST;
}
@@ -198,8 +197,7 @@ kill_with_rst:
*/
if (sysctl_tcp_rfc1337 == 0) {
kill:
- inet_twsk_deschedule(tw);
- inet_twsk_put(tw);
+ inet_twsk_deschedule_put(tw);
return TCP_TW_SUCCESS;
}
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b1c218df2c85..444ab5beecbd 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -163,7 +163,6 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
{
struct inet_connection_sock *icsk = inet_csk(sk);
const u32 now = tcp_time_stamp;
- const struct dst_entry *dst = __sk_dst_get(sk);
if (sysctl_tcp_slow_start_after_idle &&
(!tp->packets_out && (s32)(now - tp->lsndtime) > icsk->icsk_rto))
@@ -174,9 +173,8 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
/* If it is a reply for ato after last received
* packet, enter pingpong mode.
*/
- if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato &&
- (!dst || !dst_metric(dst, RTAX_QUICKACK)))
- icsk->icsk_ack.pingpong = 1;
+ if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
+ icsk->icsk_ack.pingpong = 1;
}
/* Account for an ACK we sent. */
@@ -1776,7 +1774,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
goto send_now;
- if (!((1 << icsk->icsk_ca_state) & (TCPF_CA_Open | TCPF_CA_CWR)))
+ if (icsk->icsk_ca_state >= TCP_CA_Recovery)
goto send_now;
/* Avoid bursty behavior by allowing defer
@@ -2151,7 +2149,7 @@ repair:
tcp_cwnd_validate(sk, is_cwnd_limited);
return false;
}
- return (push_one == 2) || (!tp->packets_out && tcp_send_head(sk));
+ return !tp->packets_out && tcp_send_head(sk);
}
bool tcp_schedule_loss_probe(struct sock *sk)
@@ -2228,7 +2226,7 @@ static bool skb_still_in_host_queue(const struct sock *sk,
return false;
}
-/* When probe timeout (PTO) fires, send a new segment if one exists, else
+/* When probe timeout (PTO) fires, try send a new segment if possible, else
* retransmit the last segment.
*/
void tcp_send_loss_probe(struct sock *sk)
@@ -2237,11 +2235,19 @@ void tcp_send_loss_probe(struct sock *sk)
struct sk_buff *skb;
int pcount;
int mss = tcp_current_mss(sk);
- int err = -1;
- if (tcp_send_head(sk)) {
- err = tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC);
- goto rearm_timer;
+ skb = tcp_send_head(sk);
+ if (skb) {
+ if (tcp_snd_wnd_test(tp, skb, mss)) {
+ pcount = tp->packets_out;
+ tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC);
+ if (tp->packets_out > pcount)
+ goto probe_sent;
+ goto rearm_timer;
+ }
+ skb = tcp_write_queue_prev(sk, skb);
+ } else {
+ skb = tcp_write_queue_tail(sk);
}
/* At most one outstanding TLP retransmission. */
@@ -2249,7 +2255,6 @@ void tcp_send_loss_probe(struct sock *sk)
goto rearm_timer;
/* Retransmit last segment. */
- skb = tcp_write_queue_tail(sk);
if (WARN_ON(!skb))
goto rearm_timer;
@@ -2264,26 +2269,24 @@ void tcp_send_loss_probe(struct sock *sk)
if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss,
GFP_ATOMIC)))
goto rearm_timer;
- skb = tcp_write_queue_tail(sk);
+ skb = tcp_write_queue_next(sk, skb);
}
if (WARN_ON(!skb || !tcp_skb_pcount(skb)))
goto rearm_timer;
- err = __tcp_retransmit_skb(sk, skb);
+ if (__tcp_retransmit_skb(sk, skb))
+ goto rearm_timer;
/* Record snd_nxt for loss detection. */
- if (likely(!err))
- tp->tlp_high_seq = tp->snd_nxt;
+ tp->tlp_high_seq = tp->snd_nxt;
+probe_sent:
+ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSPROBES);
+ /* Reset s.t. tcp_rearm_rto will restart timer from now */
+ inet_csk(sk)->icsk_pending = 0;
rearm_timer:
- inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
- inet_csk(sk)->icsk_rto,
- TCP_RTO_MAX);
-
- if (likely(!err))
- NET_INC_STATS_BH(sock_net(sk),
- LINUX_MIB_TCPLOSSPROBES);
+ tcp_rearm_rto(sk);
}
/* Push out any pending frames which were held back due to
diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c
index 333bcb2415ff..bf5ea9e9bbc1 100644
--- a/net/ipv4/tcp_scalable.c
+++ b/net/ipv4/tcp_scalable.c
@@ -22,7 +22,7 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked)
if (!tcp_is_cwnd_limited(sk))
return;
- if (tp->snd_cwnd <= tp->snd_ssthresh)
+ if (tcp_in_slow_start(tp))
tcp_slow_start(tp, acked);
else
tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT),
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 5b752f58a900..7149ebc820c7 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -649,4 +649,3 @@ void tcp_init_xmit_timers(struct sock *sk)
inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer,
&tcp_keepalive_timer);
}
-EXPORT_SYMBOL(tcp_init_xmit_timers);
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index a6cea1d5e20d..13951c4087d4 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -225,7 +225,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
*/
diff = tp->snd_cwnd * (rtt-vegas->baseRTT) / vegas->baseRTT;
- if (diff > gamma && tp->snd_cwnd <= tp->snd_ssthresh) {
+ if (diff > gamma && tcp_in_slow_start(tp)) {
/* Going too fast. Time to slow down
* and switch to congestion avoidance.
*/
@@ -240,7 +240,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
tp->snd_cwnd = min(tp->snd_cwnd, (u32)target_cwnd+1);
tp->snd_ssthresh = tcp_vegas_ssthresh(tp);
- } else if (tp->snd_cwnd <= tp->snd_ssthresh) {
+ } else if (tcp_in_slow_start(tp)) {
/* Slow start. */
tcp_slow_start(tp, acked);
} else {
@@ -281,7 +281,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
vegas->minRTT = 0x7fffffff;
}
/* Use normal slow start */
- else if (tp->snd_cwnd <= tp->snd_ssthresh)
+ else if (tcp_in_slow_start(tp))
tcp_slow_start(tp, acked);
}
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index 112151eeee45..0d094b995cd9 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -150,7 +150,7 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
veno->diff = (tp->snd_cwnd << V_PARAM_SHIFT) - target_cwnd;
- if (tp->snd_cwnd <= tp->snd_ssthresh) {
+ if (tcp_in_slow_start(tp)) {
/* Slow start. */
tcp_slow_start(tp, acked);
} else {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 1b8c5ba7d5f7..c0a15e7f359f 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1013,11 +1013,31 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
if (!rt) {
struct net *net = sock_net(sk);
+ __u8 flow_flags = inet_sk_flowi_flags(sk);
fl4 = &fl4_stack;
+
+ /* unconnected socket. If output device is enslaved to a VRF
+ * device lookup source address from VRF table. This mimics
+ * behavior of ip_route_connect{_init}.
+ */
+ if (netif_index_is_vrf(net, ipc.oif)) {
+ flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos,
+ RT_SCOPE_UNIVERSE, sk->sk_protocol,
+ (flow_flags | FLOWI_FLAG_VRFSRC),
+ faddr, saddr, dport,
+ inet->inet_sport);
+
+ rt = ip_route_output_flow(net, fl4, sk);
+ if (!IS_ERR(rt)) {
+ saddr = fl4->saddr;
+ ip_rt_put(rt);
+ }
+ }
+
flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos,
RT_SCOPE_UNIVERSE, sk->sk_protocol,
- inet_sk_flowi_flags(sk),
+ flow_flags,
faddr, saddr, dport, inet->inet_sport);
security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index bff69746e05f..55b3c0f4dde5 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -19,7 +19,7 @@
static struct xfrm_policy_afinfo xfrm4_policy_afinfo;
static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4,
- int tos,
+ int tos, int oif,
const xfrm_address_t *saddr,
const xfrm_address_t *daddr)
{
@@ -28,6 +28,7 @@ static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4,
memset(fl4, 0, sizeof(*fl4));
fl4->daddr = daddr->a4;
fl4->flowi4_tos = tos;
+ fl4->flowi4_oif = oif;
if (saddr)
fl4->saddr = saddr->a4;
@@ -38,22 +39,22 @@ static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4,
return ERR_CAST(rt);
}
-static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos,
+static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, int oif,
const xfrm_address_t *saddr,
const xfrm_address_t *daddr)
{
struct flowi4 fl4;
- return __xfrm4_dst_lookup(net, &fl4, tos, saddr, daddr);
+ return __xfrm4_dst_lookup(net, &fl4, tos, oif, saddr, daddr);
}
-static int xfrm4_get_saddr(struct net *net,
+static int xfrm4_get_saddr(struct net *net, int oif,
xfrm_address_t *saddr, xfrm_address_t *daddr)
{
struct dst_entry *dst;
struct flowi4 fl4;
- dst = __xfrm4_dst_lookup(net, &fl4, 0, NULL, daddr);
+ dst = __xfrm4_dst_lookup(net, &fl4, 0, oif, NULL, daddr);
if (IS_ERR(dst))
return -EHOSTUNREACH;
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 438a73aa777c..983bb999738c 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -5,16 +5,15 @@
# IPv6 as module will cause a CRASH if you try to unload it
menuconfig IPV6
tristate "The IPv6 protocol"
- default m
+ default y
---help---
- This is complemental support for the IP version 6.
- You will still be able to do traditional IPv4 networking as well.
+ Support for IP version 6 (IPv6).
For general information about IPv6, see
<https://en.wikipedia.org/wiki/IPv6>.
- For Linux IPv6 development information, see <http://www.linux-ipv6.org>.
- For specific information about IPv6 under Linux, read the HOWTO at
- <http://www.bieringer.de/linux/IPv6/>.
+ For specific information about IPv6 under Linux, see
+ Documentation/networking/ipv6.txt and read the HOWTO at
+ <http://www.tldp.org/HOWTO/Linux+IPv6-HOWTO/>
To compile this protocol support as a module, choose M here: the
module will be called ipv6.
@@ -93,6 +92,25 @@ config IPV6_MIP6
If unsure, say N.
+config IPV6_ILA
+ tristate "IPv6: Identifier Locator Addressing (ILA)"
+ select LWTUNNEL
+ ---help---
+ Support for IPv6 Identifier Locator Addressing (ILA).
+
+ ILA is a mechanism to do network virtualization without
+ encapsulation. The basic concept of ILA is that we split an
+ IPv6 address into a 64 bit locator and 64 bit identifier. The
+ identifier is the identity of an entity in communication
+ ("who") and the locator expresses the location of the
+ entity ("where").
+
+ ILA can be configured using the "encap ila" option with
+ "ip -6 route" command. ILA is described in
+ https://tools.ietf.org/html/draft-herbert-nvo3-ila-00.
+
+ If unsure, say N.
+
config INET6_XFRM_TUNNEL
tristate
select INET6_TUNNEL
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index 0f3f1999719a..2c900c7b7eb1 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -34,6 +34,7 @@ obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o
obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o
obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o
obj-$(CONFIG_IPV6_MIP6) += mip6.o
+obj-$(CONFIG_IPV6_ILA) += ila.o
obj-$(CONFIG_NETFILTER) += netfilter/
obj-$(CONFIG_IPV6_VTI) += ip6_vti.o
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 21c2c818df3b..0f08d3b9e238 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -195,6 +195,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.max_addresses = IPV6_MAX_ADDRESSES,
.accept_ra_defrtr = 1,
.accept_ra_from_local = 0,
+ .accept_ra_min_hop_limit= 1,
.accept_ra_pinfo = 1,
#ifdef CONFIG_IPV6_ROUTER_PREF
.accept_ra_rtr_pref = 1,
@@ -211,7 +212,9 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.accept_ra_mtu = 1,
.stable_secret = {
.initialized = false,
- }
+ },
+ .use_oif_addrs_only = 0,
+ .ignore_routes_with_linkdown = 0,
};
static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -236,6 +239,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.max_addresses = IPV6_MAX_ADDRESSES,
.accept_ra_defrtr = 1,
.accept_ra_from_local = 0,
+ .accept_ra_min_hop_limit= 1,
.accept_ra_pinfo = 1,
#ifdef CONFIG_IPV6_ROUTER_PREF
.accept_ra_rtr_pref = 1,
@@ -253,6 +257,8 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.stable_secret = {
.initialized = false,
},
+ .use_oif_addrs_only = 0,
+ .ignore_routes_with_linkdown = 0,
};
/* Check if a valid qdisc is available */
@@ -468,6 +474,9 @@ static int inet6_netconf_msgsize_devconf(int type)
if (type == -1 || type == NETCONFA_PROXY_NEIGH)
size += nla_total_size(4);
+ if (type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
+ size += nla_total_size(4);
+
return size;
}
@@ -504,6 +513,11 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
nla_put_s32(skb, NETCONFA_PROXY_NEIGH, devconf->proxy_ndp) < 0)
goto nla_put_failure;
+ if ((type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
+ nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
+ devconf->ignore_routes_with_linkdown) < 0)
+ goto nla_put_failure;
+
nlmsg_end(skb, nlh);
return 0;
@@ -540,6 +554,7 @@ static const struct nla_policy devconf_ipv6_policy[NETCONFA_MAX+1] = {
[NETCONFA_IFINDEX] = { .len = sizeof(int) },
[NETCONFA_FORWARDING] = { .len = sizeof(int) },
[NETCONFA_PROXY_NEIGH] = { .len = sizeof(int) },
+ [NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN] = { .len = sizeof(int) },
};
static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
@@ -762,6 +777,63 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf)
rt6_purge_dflt_routers(net);
return 1;
}
+
+static void addrconf_linkdown_change(struct net *net, __s32 newf)
+{
+ struct net_device *dev;
+ struct inet6_dev *idev;
+
+ for_each_netdev(net, dev) {
+ idev = __in6_dev_get(dev);
+ if (idev) {
+ int changed = (!idev->cnf.ignore_routes_with_linkdown) ^ (!newf);
+
+ idev->cnf.ignore_routes_with_linkdown = newf;
+ if (changed)
+ inet6_netconf_notify_devconf(dev_net(dev),
+ NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
+ dev->ifindex,
+ &idev->cnf);
+ }
+ }
+}
+
+static int addrconf_fixup_linkdown(struct ctl_table *table, int *p, int newf)
+{
+ struct net *net;
+ int old;
+
+ if (!rtnl_trylock())
+ return restart_syscall();
+
+ net = (struct net *)table->extra2;
+ old = *p;
+ *p = newf;
+
+ if (p == &net->ipv6.devconf_dflt->ignore_routes_with_linkdown) {
+ if ((!newf) ^ (!old))
+ inet6_netconf_notify_devconf(net,
+ NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
+ NETCONFA_IFINDEX_DEFAULT,
+ net->ipv6.devconf_dflt);
+ rtnl_unlock();
+ return 0;
+ }
+
+ if (p == &net->ipv6.devconf_all->ignore_routes_with_linkdown) {
+ net->ipv6.devconf_dflt->ignore_routes_with_linkdown = newf;
+ addrconf_linkdown_change(net, newf);
+ if ((!newf) ^ (!old))
+ inet6_netconf_notify_devconf(net,
+ NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
+ NETCONFA_IFINDEX_ALL,
+ net->ipv6.devconf_all);
+ }
+ rtnl_unlock();
+
+ return 1;
+}
+
#endif
/* Nobody refers to this ifaddr, destroy it */
@@ -1358,15 +1430,96 @@ out:
return ret;
}
+static int __ipv6_dev_get_saddr(struct net *net,
+ struct ipv6_saddr_dst *dst,
+ struct inet6_dev *idev,
+ struct ipv6_saddr_score *scores,
+ int hiscore_idx)
+{
+ struct ipv6_saddr_score *score = &scores[1 - hiscore_idx], *hiscore = &scores[hiscore_idx];
+
+ read_lock_bh(&idev->lock);
+ list_for_each_entry(score->ifa, &idev->addr_list, if_list) {
+ int i;
+
+ /*
+ * - Tentative Address (RFC2462 section 5.4)
+ * - A tentative address is not considered
+ * "assigned to an interface" in the traditional
+ * sense, unless it is also flagged as optimistic.
+ * - Candidate Source Address (section 4)
+ * - In any case, anycast addresses, multicast
+ * addresses, and the unspecified address MUST
+ * NOT be included in a candidate set.
+ */
+ if ((score->ifa->flags & IFA_F_TENTATIVE) &&
+ (!(score->ifa->flags & IFA_F_OPTIMISTIC)))
+ continue;
+
+ score->addr_type = __ipv6_addr_type(&score->ifa->addr);
+
+ if (unlikely(score->addr_type == IPV6_ADDR_ANY ||
+ score->addr_type & IPV6_ADDR_MULTICAST)) {
+ net_dbg_ratelimited("ADDRCONF: unspecified / multicast address assigned as unicast address on %s",
+ idev->dev->name);
+ continue;
+ }
+
+ score->rule = -1;
+ bitmap_zero(score->scorebits, IPV6_SADDR_RULE_MAX);
+
+ for (i = 0; i < IPV6_SADDR_RULE_MAX; i++) {
+ int minihiscore, miniscore;
+
+ minihiscore = ipv6_get_saddr_eval(net, hiscore, dst, i);
+ miniscore = ipv6_get_saddr_eval(net, score, dst, i);
+
+ if (minihiscore > miniscore) {
+ if (i == IPV6_SADDR_RULE_SCOPE &&
+ score->scopedist > 0) {
+ /*
+ * special case:
+ * each remaining entry
+ * has too small (not enough)
+ * scope, because ifa entries
+ * are sorted by their scope
+ * values.
+ */
+ goto out;
+ }
+ break;
+ } else if (minihiscore < miniscore) {
+ if (hiscore->ifa)
+ in6_ifa_put(hiscore->ifa);
+
+ in6_ifa_hold(score->ifa);
+
+ swap(hiscore, score);
+ hiscore_idx = 1 - hiscore_idx;
+
+ /* restore our iterator */
+ score->ifa = hiscore->ifa;
+
+ break;
+ }
+ }
+ }
+out:
+ read_unlock_bh(&idev->lock);
+ return hiscore_idx;
+}
+
int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
const struct in6_addr *daddr, unsigned int prefs,
struct in6_addr *saddr)
{
- struct ipv6_saddr_score scores[2],
- *score = &scores[0], *hiscore = &scores[1];
+ struct ipv6_saddr_score scores[2], *hiscore;
struct ipv6_saddr_dst dst;
+ struct inet6_dev *idev;
struct net_device *dev;
int dst_type;
+ bool use_oif_addr = false;
+ int hiscore_idx = 0;
dst_type = __ipv6_addr_type(daddr);
dst.addr = daddr;
@@ -1375,105 +1528,50 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev,
dst.label = ipv6_addr_label(net, daddr, dst_type, dst.ifindex);
dst.prefs = prefs;
- hiscore->rule = -1;
- hiscore->ifa = NULL;
+ scores[hiscore_idx].rule = -1;
+ scores[hiscore_idx].ifa = NULL;
rcu_read_lock();
- for_each_netdev_rcu(net, dev) {
- struct inet6_dev *idev;
-
- /* Candidate Source Address (section 4)
- * - multicast and link-local destination address,
- * the set of candidate source address MUST only
- * include addresses assigned to interfaces
- * belonging to the same link as the outgoing
- * interface.
- * (- For site-local destination addresses, the
- * set of candidate source addresses MUST only
- * include addresses assigned to interfaces
- * belonging to the same site as the outgoing
- * interface.)
- */
- if (((dst_type & IPV6_ADDR_MULTICAST) ||
- dst.scope <= IPV6_ADDR_SCOPE_LINKLOCAL) &&
- dst.ifindex && dev->ifindex != dst.ifindex)
- continue;
-
- idev = __in6_dev_get(dev);
- if (!idev)
- continue;
-
- read_lock_bh(&idev->lock);
- list_for_each_entry(score->ifa, &idev->addr_list, if_list) {
- int i;
-
- /*
- * - Tentative Address (RFC2462 section 5.4)
- * - A tentative address is not considered
- * "assigned to an interface" in the traditional
- * sense, unless it is also flagged as optimistic.
- * - Candidate Source Address (section 4)
- * - In any case, anycast addresses, multicast
- * addresses, and the unspecified address MUST
- * NOT be included in a candidate set.
- */
- if ((score->ifa->flags & IFA_F_TENTATIVE) &&
- (!(score->ifa->flags & IFA_F_OPTIMISTIC)))
- continue;
-
- score->addr_type = __ipv6_addr_type(&score->ifa->addr);
+ /* Candidate Source Address (section 4)
+ * - multicast and link-local destination address,
+ * the set of candidate source address MUST only
+ * include addresses assigned to interfaces
+ * belonging to the same link as the outgoing
+ * interface.
+ * (- For site-local destination addresses, the
+ * set of candidate source addresses MUST only
+ * include addresses assigned to interfaces
+ * belonging to the same site as the outgoing
+ * interface.)
+ * - "It is RECOMMENDED that the candidate source addresses
+ * be the set of unicast addresses assigned to the
+ * interface that will be used to send to the destination
+ * (the 'outgoing' interface)." (RFC 6724)
+ */
+ if (dst_dev) {
+ idev = __in6_dev_get(dst_dev);
+ if ((dst_type & IPV6_ADDR_MULTICAST) ||
+ dst.scope <= IPV6_ADDR_SCOPE_LINKLOCAL ||
+ (idev && idev->cnf.use_oif_addrs_only)) {
+ use_oif_addr = true;
+ }
+ }
- if (unlikely(score->addr_type == IPV6_ADDR_ANY ||
- score->addr_type & IPV6_ADDR_MULTICAST)) {
- net_dbg_ratelimited("ADDRCONF: unspecified / multicast address assigned as unicast address on %s",
- dev->name);
+ if (use_oif_addr) {
+ if (idev)
+ hiscore_idx = __ipv6_dev_get_saddr(net, &dst, idev, scores, hiscore_idx);
+ } else {
+ for_each_netdev_rcu(net, dev) {
+ idev = __in6_dev_get(dev);
+ if (!idev)
continue;
- }
-
- score->rule = -1;
- bitmap_zero(score->scorebits, IPV6_SADDR_RULE_MAX);
-
- for (i = 0; i < IPV6_SADDR_RULE_MAX; i++) {
- int minihiscore, miniscore;
-
- minihiscore = ipv6_get_saddr_eval(net, hiscore, &dst, i);
- miniscore = ipv6_get_saddr_eval(net, score, &dst, i);
-
- if (minihiscore > miniscore) {
- if (i == IPV6_SADDR_RULE_SCOPE &&
- score->scopedist > 0) {
- /*
- * special case:
- * each remaining entry
- * has too small (not enough)
- * scope, because ifa entries
- * are sorted by their scope
- * values.
- */
- goto try_nextdev;
- }
- break;
- } else if (minihiscore < miniscore) {
- if (hiscore->ifa)
- in6_ifa_put(hiscore->ifa);
-
- in6_ifa_hold(score->ifa);
-
- swap(hiscore, score);
-
- /* restore our iterator */
- score->ifa = hiscore->ifa;
-
- break;
- }
- }
+ hiscore_idx = __ipv6_dev_get_saddr(net, &dst, idev, scores, hiscore_idx);
}
-try_nextdev:
- read_unlock_bh(&idev->lock);
}
rcu_read_unlock();
+ hiscore = &scores[hiscore_idx];
if (!hiscore->ifa)
return -EADDRNOTAVAIL;
@@ -3558,7 +3656,7 @@ static void addrconf_dad_work(struct work_struct *w)
/* send a neighbour solicitation for our addr */
addrconf_addr_solict_mult(&ifp->addr, &mcaddr);
- ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &in6addr_any);
+ ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &in6addr_any, NULL);
out:
in6_ifa_put(ifp);
rtnl_unlock();
@@ -4560,6 +4658,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_MAX_DESYNC_FACTOR] = cnf->max_desync_factor;
array[DEVCONF_MAX_ADDRESSES] = cnf->max_addresses;
array[DEVCONF_ACCEPT_RA_DEFRTR] = cnf->accept_ra_defrtr;
+ array[DEVCONF_ACCEPT_RA_MIN_HOP_LIMIT] = cnf->accept_ra_min_hop_limit;
array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo;
#ifdef CONFIG_IPV6_ROUTER_PREF
array[DEVCONF_ACCEPT_RA_RTR_PREF] = cnf->accept_ra_rtr_pref;
@@ -4585,7 +4684,9 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_SUPPRESS_FRAG_NDISC] = cnf->suppress_frag_ndisc;
array[DEVCONF_ACCEPT_RA_FROM_LOCAL] = cnf->accept_ra_from_local;
array[DEVCONF_ACCEPT_RA_MTU] = cnf->accept_ra_mtu;
+ array[DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN] = cnf->ignore_routes_with_linkdown;
/* we omit DEVCONF_STABLE_SECRET for now */
+ array[DEVCONF_USE_OIF_ADDRS_ONLY] = cnf->use_oif_addrs_only;
}
static inline size_t inet6_ifla6_size(void)
@@ -4605,6 +4706,7 @@ static inline size_t inet6_if_nlmsg_size(void)
+ nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
+ nla_total_size(4) /* IFLA_MTU */
+ nla_total_size(4) /* IFLA_LINK */
+ + nla_total_size(1) /* IFLA_OPERSTATE */
+ nla_total_size(inet6_ifla6_size()); /* IFLA_PROTINFO */
}
@@ -4861,7 +4963,9 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) ||
nla_put_u32(skb, IFLA_MTU, dev->mtu) ||
(dev->ifindex != dev_get_iflink(dev) &&
- nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev))))
+ nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev))) ||
+ nla_put_u8(skb, IFLA_OPERSTATE,
+ netif_running(dev) ? dev->operstate : IF_OPER_DOWN))
goto nla_put_failure;
protoinfo = nla_nest_start(skb, IFLA_PROTINFO);
if (!protoinfo)
@@ -5306,6 +5410,34 @@ out:
return err;
}
+static
+int addrconf_sysctl_ignore_routes_with_linkdown(struct ctl_table *ctl,
+ int write,
+ void __user *buffer,
+ size_t *lenp,
+ loff_t *ppos)
+{
+ int *valp = ctl->data;
+ int val = *valp;
+ loff_t pos = *ppos;
+ struct ctl_table lctl;
+ int ret;
+
+ /* ctl->data points to idev->cnf.ignore_routes_when_linkdown
+ * we should not modify it until we get the rtnl lock.
+ */
+ lctl = *ctl;
+ lctl.data = &val;
+
+ ret = proc_dointvec(&lctl, write, buffer, lenp, ppos);
+
+ if (write)
+ ret = addrconf_fixup_linkdown(ctl, valp, val);
+ if (ret)
+ *ppos = pos;
+ return ret;
+}
+
static struct addrconf_sysctl_table
{
struct ctl_table_header *sysctl_header;
@@ -5456,6 +5588,13 @@ static struct addrconf_sysctl_table
.proc_handler = proc_dointvec,
},
{
+ .procname = "accept_ra_min_hop_limit",
+ .data = &ipv6_devconf.accept_ra_min_hop_limit,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
.procname = "accept_ra_pinfo",
.data = &ipv6_devconf.accept_ra_pinfo,
.maxlen = sizeof(int),
@@ -5585,6 +5724,20 @@ static struct addrconf_sysctl_table
.proc_handler = addrconf_sysctl_stable_secret,
},
{
+ .procname = "use_oif_addrs_only",
+ .data = &ipv6_devconf.use_oif_addrs_only,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "ignore_routes_with_linkdown",
+ .data = &ipv6_devconf.ignore_routes_with_linkdown,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = addrconf_sysctl_ignore_routes_with_linkdown,
+ },
+ {
/* sentinel */
}
},
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index ca09bf49ac68..bfa941fc1165 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -107,7 +107,16 @@ int inet6addr_notifier_call_chain(unsigned long val, void *v)
}
EXPORT_SYMBOL(inet6addr_notifier_call_chain);
-const struct ipv6_stub *ipv6_stub __read_mostly;
+static int eafnosupport_ipv6_dst_lookup(struct net *net, struct sock *u1,
+ struct dst_entry **u2,
+ struct flowi6 *u3)
+{
+ return -EAFNOSUPPORT;
+}
+
+const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
+ .ipv6_dst_lookup = eafnosupport_ipv6_dst_lookup,
+};
EXPORT_SYMBOL_GPL(ipv6_stub);
/* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 7de52b65173f..44bb66bde0e2 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -197,6 +197,7 @@ lookup_protocol:
np->mcast_hops = IPV6_DEFAULT_MCASTHOPS;
np->mc_loop = 1;
np->pmtudisc = IPV6_PMTUDISC_WANT;
+ np->autoflowlabel = ip6_default_np_autolabel(sock_net(sk));
sk->sk_ipv6only = net->ipv6.sysctl.bindv6only;
/* Init the ipv4 part of the socket since we can have sockets
@@ -342,7 +343,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
*/
v4addr = LOOPBACK4_IPV6;
if (!(addr_type & IPV6_ADDR_MULTICAST)) {
- if (!(inet->freebind || inet->transparent) &&
+ if (!net->ipv6.sysctl.ip_nonlocal_bind &&
+ !(inet->freebind || inet->transparent) &&
!ipv6_chk_addr(net, &addr->sin6_addr,
dev, 0)) {
err = -EADDRNOTAVAIL;
@@ -679,8 +681,8 @@ bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb,
const struct ipv6_pinfo *np = inet6_sk(sk);
if (np->rxopt.all) {
- if ((opt->hop && (np->rxopt.bits.hopopts ||
- np->rxopt.bits.ohopopts)) ||
+ if (((opt->flags & IP6SKB_HOPBYHOP) &&
+ (np->rxopt.bits.hopopts || np->rxopt.bits.ohopopts)) ||
(ip6_flowinfo((struct ipv6hdr *) skb_network_header(skb)) &&
np->rxopt.bits.rxflow) ||
(opt->srcrt && (np->rxopt.bits.srcrt ||
@@ -766,10 +768,10 @@ static int __net_init inet6_net_init(struct net *net)
net->ipv6.sysctl.bindv6only = 0;
net->ipv6.sysctl.icmpv6_time = 1*HZ;
net->ipv6.sysctl.flowlabel_consistency = 1;
- net->ipv6.sysctl.auto_flowlabels = 0;
+ net->ipv6.sysctl.auto_flowlabels = IP6_DEFAULT_AUTO_FLOW_LABELS;
net->ipv6.sysctl.idgen_retries = 3;
net->ipv6.sysctl.idgen_delay = 1 * HZ;
- net->ipv6.sysctl.flowlabel_state_ranges = 1;
+ net->ipv6.sysctl.flowlabel_state_ranges = 0;
atomic_set(&net->ipv6.fib6_sernum, 1);
err = ipv6_init_mibs(net);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index b10a88986a98..9aadd57808a5 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -199,7 +199,7 @@ ipv4_connected:
NULL);
sk->sk_state = TCP_ESTABLISHED;
- ip6_set_txhash(sk);
+ sk_set_txhash(sk);
out:
fl6_sock_release(flowlabel);
return err;
@@ -568,8 +568,8 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg,
}
/* HbH is allowed only once */
- if (np->rxopt.bits.hopopts && opt->hop) {
- u8 *ptr = nh + opt->hop;
+ if (np->rxopt.bits.hopopts && (opt->flags & IP6SKB_HOPBYHOP)) {
+ u8 *ptr = nh + sizeof(struct ipv6hdr);
put_cmsg(msg, SOL_IPV6, IPV6_HOPOPTS, (ptr[1]+1)<<3, ptr);
}
@@ -630,8 +630,8 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg,
int hlim = ipv6_hdr(skb)->hop_limit;
put_cmsg(msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim);
}
- if (np->rxopt.bits.ohopopts && opt->hop) {
- u8 *ptr = nh + opt->hop;
+ if (np->rxopt.bits.ohopopts && (opt->flags & IP6SKB_HOPBYHOP)) {
+ u8 *ptr = nh + sizeof(struct ipv6hdr);
put_cmsg(msg, SOL_IPV6, IPV6_2292HOPOPTS, (ptr[1]+1)<<3, ptr);
}
if (np->rxopt.bits.odstopts && opt->dst0) {
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index a7bbbe45570b..ce203b0402be 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -632,7 +632,7 @@ int ipv6_parse_hopopts(struct sk_buff *skb)
return -1;
}
- opt->hop = sizeof(struct ipv6hdr);
+ opt->flags |= IP6SKB_HOPBYHOP;
if (ip6_parse_tlv(tlvprochopopt_lst, skb)) {
skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3;
opt = IP6CB(skb);
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 713d7434c911..6c2b2132c8d3 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -329,7 +329,7 @@ static struct dst_entry *icmpv6_route_lookup(struct net *net,
struct flowi6 fl2;
int err;
- err = ip6_dst_lookup(sk, &dst, fl6);
+ err = ip6_dst_lookup(net, sk, &dst, fl6);
if (err)
return ERR_PTR(err);
@@ -361,7 +361,7 @@ static struct dst_entry *icmpv6_route_lookup(struct net *net,
if (err)
goto relookup_failed;
- err = ip6_dst_lookup(sk, &dst2, &fl2);
+ err = ip6_dst_lookup(net, sk, &dst2, &fl2);
if (err)
goto relookup_failed;
@@ -591,7 +591,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
else if (!fl6.flowi6_oif)
fl6.flowi6_oif = np->ucast_oif;
- err = ip6_dst_lookup(sk, &dst, &fl6);
+ err = ip6_dst_lookup(net, sk, &dst, &fl6);
if (err)
goto out;
dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
diff --git a/net/ipv6/ila.c b/net/ipv6/ila.c
new file mode 100644
index 000000000000..f011c3d5ca40
--- /dev/null
+++ b/net/ipv6/ila.c
@@ -0,0 +1,210 @@
+#include <linux/errno.h>
+#include <linux/ip.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+#include <linux/types.h>
+#include <net/checksum.h>
+#include <net/ip.h>
+#include <net/ip6_fib.h>
+#include <net/lwtunnel.h>
+#include <net/protocol.h>
+#include <uapi/linux/ila.h>
+
+struct ila_params {
+ __be64 locator;
+};
+
+static inline struct ila_params *ila_params_lwtunnel(
+ struct lwtunnel_state *lwstate)
+{
+ return (struct ila_params *)lwstate->data;
+}
+
+static inline __wsum compute_csum_diff8(const __be32 *from, const __be32 *to)
+{
+ __be32 diff[] = {
+ ~from[0], ~from[1], to[0], to[1],
+ };
+
+ return csum_partial(diff, sizeof(diff), 0);
+}
+
+static inline __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p)
+{
+ return compute_csum_diff8((__be32 *)&ip6h->daddr,
+ (__be32 *)&p->locator);
+}
+
+static void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p)
+{
+ __wsum diff;
+ struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ size_t nhoff = sizeof(struct ipv6hdr);
+
+ /* First update checksum */
+ switch (ip6h->nexthdr) {
+ case NEXTHDR_TCP:
+ if (likely(pskb_may_pull(skb, nhoff + sizeof(struct tcphdr)))) {
+ struct tcphdr *th = (struct tcphdr *)
+ (skb_network_header(skb) + nhoff);
+
+ diff = get_csum_diff(ip6h, p);
+ inet_proto_csum_replace_by_diff(&th->check, skb,
+ diff, true);
+ }
+ break;
+ case NEXTHDR_UDP:
+ if (likely(pskb_may_pull(skb, nhoff + sizeof(struct udphdr)))) {
+ struct udphdr *uh = (struct udphdr *)
+ (skb_network_header(skb) + nhoff);
+
+ if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+ diff = get_csum_diff(ip6h, p);
+ inet_proto_csum_replace_by_diff(&uh->check, skb,
+ diff, true);
+ if (!uh->check)
+ uh->check = CSUM_MANGLED_0;
+ }
+ }
+ break;
+ case NEXTHDR_ICMP:
+ if (likely(pskb_may_pull(skb,
+ nhoff + sizeof(struct icmp6hdr)))) {
+ struct icmp6hdr *ih = (struct icmp6hdr *)
+ (skb_network_header(skb) + nhoff);
+
+ diff = get_csum_diff(ip6h, p);
+ inet_proto_csum_replace_by_diff(&ih->icmp6_cksum, skb,
+ diff, true);
+ }
+ break;
+ }
+
+ /* Now change destination address */
+ *(__be64 *)&ip6h->daddr = p->locator;
+}
+
+static int ila_output(struct sock *sk, struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+
+ if (skb->protocol != htons(ETH_P_IPV6))
+ goto drop;
+
+ update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate));
+
+ return dst->lwtstate->orig_output(sk, skb);
+
+drop:
+ kfree_skb(skb);
+ return -EINVAL;
+}
+
+static int ila_input(struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+
+ if (skb->protocol != htons(ETH_P_IPV6))
+ goto drop;
+
+ update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate));
+
+ return dst->lwtstate->orig_input(skb);
+
+drop:
+ kfree_skb(skb);
+ return -EINVAL;
+}
+
+static struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = {
+ [ILA_ATTR_LOCATOR] = { .type = NLA_U64, },
+};
+
+static int ila_build_state(struct net_device *dev, struct nlattr *nla,
+ struct lwtunnel_state **ts)
+{
+ struct ila_params *p;
+ struct nlattr *tb[ILA_ATTR_MAX + 1];
+ size_t encap_len = sizeof(*p);
+ struct lwtunnel_state *newts;
+ int ret;
+
+ ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla,
+ ila_nl_policy);
+ if (ret < 0)
+ return ret;
+
+ if (!tb[ILA_ATTR_LOCATOR])
+ return -EINVAL;
+
+ newts = lwtunnel_state_alloc(encap_len);
+ if (!newts)
+ return -ENOMEM;
+
+ newts->len = encap_len;
+ p = ila_params_lwtunnel(newts);
+
+ p->locator = (__force __be64)nla_get_u64(tb[ILA_ATTR_LOCATOR]);
+
+ newts->type = LWTUNNEL_ENCAP_ILA;
+ newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT |
+ LWTUNNEL_STATE_INPUT_REDIRECT;
+
+ *ts = newts;
+
+ return 0;
+}
+
+static int ila_fill_encap_info(struct sk_buff *skb,
+ struct lwtunnel_state *lwtstate)
+{
+ struct ila_params *p = ila_params_lwtunnel(lwtstate);
+
+ if (nla_put_u64(skb, ILA_ATTR_LOCATOR, (__force u64)p->locator))
+ goto nla_put_failure;
+
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
+static int ila_encap_nlsize(struct lwtunnel_state *lwtstate)
+{
+ /* No encapsulation overhead */
+ return 0;
+}
+
+static int ila_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
+{
+ struct ila_params *a_p = ila_params_lwtunnel(a);
+ struct ila_params *b_p = ila_params_lwtunnel(b);
+
+ return (a_p->locator != b_p->locator);
+}
+
+static const struct lwtunnel_encap_ops ila_encap_ops = {
+ .build_state = ila_build_state,
+ .output = ila_output,
+ .input = ila_input,
+ .fill_encap = ila_fill_encap_info,
+ .get_encap_size = ila_encap_nlsize,
+ .cmp_encap = ila_encap_cmp,
+};
+
+static int __init ila_init(void)
+{
+ return lwtunnel_encap_add_ops(&ila_encap_ops, LWTUNNEL_ENCAP_ILA);
+}
+
+static void __exit ila_fini(void)
+{
+ lwtunnel_encap_del_ops(&ila_encap_ops, LWTUNNEL_ENCAP_ILA);
+}
+
+module_init(ila_init);
+module_exit(ila_fini);
+MODULE_AUTHOR("Tom Herbert <tom@herbertland.com>");
+MODULE_LICENSE("GPL");
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index b4fd96de97e6..6ac8dad0138a 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -207,7 +207,6 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
struct sock *sk2;
const struct hlist_nulls_node *node;
struct inet_timewait_sock *tw = NULL;
- int twrefcnt = 0;
spin_lock(lock);
@@ -234,21 +233,17 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
WARN_ON(!sk_unhashed(sk));
__sk_nulls_add_node_rcu(sk, &head->chain);
if (tw) {
- twrefcnt = inet_twsk_unhash(tw);
+ sk_nulls_del_node_init_rcu((struct sock *)tw);
NET_INC_STATS_BH(net, LINUX_MIB_TIMEWAITRECYCLED);
}
spin_unlock(lock);
- if (twrefcnt)
- inet_twsk_put(tw);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
if (twp) {
*twp = tw;
} else if (tw) {
/* Silly. Should hash-dance instead... */
- inet_twsk_deschedule(tw);
-
- inet_twsk_put(tw);
+ inet_twsk_deschedule_put(tw);
}
return 0;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 548c6237b1e7..418d9823692b 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -32,6 +32,7 @@
#include <net/ipv6.h>
#include <net/ndisc.h>
#include <net/addrconf.h>
+#include <net/lwtunnel.h>
#include <net/ip6_fib.h>
#include <net/ip6_route.h>
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index a38d3ac0f18f..34f121812a14 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -728,7 +728,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
*/
ipv6h = ipv6_hdr(skb);
ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield),
- ip6_make_flowlabel(net, skb, fl6->flowlabel, false));
+ ip6_make_flowlabel(net, skb, fl6->flowlabel, true, fl6));
ipv6h->hop_limit = tunnel->parms.hop_limit;
ipv6h->nexthdr = proto;
ipv6h->saddr = fl6->saddr;
@@ -1182,7 +1182,8 @@ static int ip6gre_header(struct sk_buff *skb, struct net_device *dev,
ip6_flow_hdr(ipv6h, 0,
ip6_make_flowlabel(dev_net(dev), skb,
- t->fl.u.ip6.flowlabel, false));
+ t->fl.u.ip6.flowlabel, true,
+ &t->fl.u.ip6));
ipv6h->hop_limit = t->parms.hop_limit;
ipv6h->nexthdr = NEXTHDR_GRE;
ipv6h->saddr = t->parms.laddr;
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 57990c929cd8..adba03ac7ce9 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -45,6 +45,7 @@
#include <net/addrconf.h>
#include <net/xfrm.h>
#include <net/inet_ecn.h>
+#include <net/dst_metadata.h>
int ip6_rcv_finish(struct sock *sk, struct sk_buff *skb)
{
@@ -55,7 +56,7 @@ int ip6_rcv_finish(struct sock *sk, struct sk_buff *skb)
if (ipprot && ipprot->early_demux)
ipprot->early_demux(skb);
}
- if (!skb_dst(skb))
+ if (!skb_valid_dst(skb))
ip6_route_input(skb);
return dst_input(skb);
@@ -98,7 +99,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
* arrived via the sending interface (ethX), because of the
* nature of scoping architecture. --yoshfuji
*/
- IP6CB(skb)->iif = skb_dst(skb) ? ip6_dst_idev(skb_dst(skb))->dev->ifindex : dev->ifindex;
+ IP6CB(skb)->iif = skb_valid_dst(skb) ? ip6_dst_idev(skb_dst(skb))->dev->ifindex : dev->ifindex;
if (unlikely(!pskb_may_pull(skb, sizeof(*hdr))))
goto err;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index d5f7716662db..26ea47930740 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -207,7 +207,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
hlimit = ip6_dst_hoplimit(dst);
ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
- np->autoflowlabel));
+ np->autoflowlabel, fl6));
hdr->payload_len = htons(seg_len);
hdr->nexthdr = proto;
@@ -881,10 +881,9 @@ out:
return dst;
}
-static int ip6_dst_lookup_tail(struct sock *sk,
+static int ip6_dst_lookup_tail(struct net *net, struct sock *sk,
struct dst_entry **dst, struct flowi6 *fl6)
{
- struct net *net = sock_net(sk);
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
struct neighbour *n;
struct rt6_info *rt;
@@ -994,10 +993,11 @@ out_err_release:
*
* It returns zero on success, or a standard errno code on error.
*/
-int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
+int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
+ struct flowi6 *fl6)
{
*dst = NULL;
- return ip6_dst_lookup_tail(sk, dst, fl6);
+ return ip6_dst_lookup_tail(net, sk, dst, fl6);
}
EXPORT_SYMBOL_GPL(ip6_dst_lookup);
@@ -1018,11 +1018,13 @@ struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
struct dst_entry *dst = NULL;
int err;
- err = ip6_dst_lookup_tail(sk, &dst, fl6);
+ err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
if (err)
return ERR_PTR(err);
if (final_dst)
fl6->daddr = *final_dst;
+ if (!fl6->flowi6_oif)
+ fl6->flowi6_oif = dst->dev->ifindex;
return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
}
@@ -1050,7 +1052,7 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
dst = ip6_sk_dst_check(sk, dst, fl6);
- err = ip6_dst_lookup_tail(sk, &dst, fl6);
+ err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6);
if (err)
return ERR_PTR(err);
if (final_dst)
@@ -1647,7 +1649,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
ip6_flow_hdr(hdr, v6_cork->tclass,
ip6_make_flowlabel(net, skb, fl6->flowlabel,
- np->autoflowlabel));
+ np->autoflowlabel, fl6));
hdr->hop_limit = v6_cork->hop_limit;
hdr->nexthdr = proto;
hdr->saddr = fl6->saddr;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 2e67b660118b..b0ab420612bc 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1095,7 +1095,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
skb_reset_network_header(skb);
ipv6h = ipv6_hdr(skb);
ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield),
- ip6_make_flowlabel(net, skb, fl6->flowlabel, false));
+ ip6_make_flowlabel(net, skb, fl6->flowlabel, true, fl6));
ipv6h->hop_limit = t->parms.hop_limit;
ipv6h->nexthdr = proto;
ipv6h->saddr = fl6->saddr;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index c53331cfed95..13d3c2beb93e 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -553,7 +553,8 @@ static void ndisc_send_unsol_na(struct net_device *dev)
void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
const struct in6_addr *solicit,
- const struct in6_addr *daddr, const struct in6_addr *saddr)
+ const struct in6_addr *daddr, const struct in6_addr *saddr,
+ struct sk_buff *oskb)
{
struct sk_buff *skb;
struct in6_addr addr_buf;
@@ -589,6 +590,9 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR,
dev->dev_addr);
+ if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE) && oskb)
+ skb_dst_copy(skb, oskb);
+
ndisc_send_skb(skb, daddr, saddr);
}
@@ -675,12 +679,12 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
"%s: trying to ucast probe in NUD_INVALID: %pI6\n",
__func__, target);
}
- ndisc_send_ns(dev, neigh, target, target, saddr);
+ ndisc_send_ns(dev, neigh, target, target, saddr, skb);
} else if ((probes -= NEIGH_VAR(neigh->parms, APP_PROBES)) < 0) {
neigh_app_ns(neigh);
} else {
addrconf_addr_solict_mult(target, &mcaddr);
- ndisc_send_ns(dev, NULL, target, &mcaddr, saddr);
+ ndisc_send_ns(dev, NULL, target, &mcaddr, saddr, skb);
}
}
@@ -1225,18 +1229,16 @@ static void ndisc_router_discovery(struct sk_buff *skb)
if (rt)
rt6_set_expires(rt, jiffies + (HZ * lifetime));
- if (ra_msg->icmph.icmp6_hop_limit) {
- /* Only set hop_limit on the interface if it is higher than
- * the current hop_limit.
- */
- if (in6_dev->cnf.hop_limit < ra_msg->icmph.icmp6_hop_limit) {
+ if (in6_dev->cnf.accept_ra_min_hop_limit < 256 &&
+ ra_msg->icmph.icmp6_hop_limit) {
+ if (in6_dev->cnf.accept_ra_min_hop_limit <= ra_msg->icmph.icmp6_hop_limit) {
in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit;
+ if (rt)
+ dst_metric_set(&rt->dst, RTAX_HOPLIMIT,
+ ra_msg->icmph.icmp6_hop_limit);
} else {
- ND_PRINTK(2, warn, "RA: Got route advertisement with lower hop_limit than current\n");
+ ND_PRINTK(2, warn, "RA: Got route advertisement with lower hop_limit than minimum\n");
}
- if (rt)
- dst_metric_set(&rt->dst, RTAX_HOPLIMIT,
- ra_msg->icmph.icmp6_hop_limit);
}
skip_defrtr:
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index b552cf0d6198..96833e4b3193 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -47,9 +47,21 @@ config NFT_REJECT_IPV6
default NFT_REJECT
tristate
+config NFT_DUP_IPV6
+ tristate "IPv6 nf_tables packet duplication support"
+ select NF_DUP_IPV6
+ help
+ This module enables IPv6 packet duplication support for nf_tables.
+
endif # NF_TABLES_IPV6
endif # NF_TABLES
+config NF_DUP_IPV6
+ tristate "Netfilter IPv6 packet duplication to alternate destination"
+ help
+ This option enables the nf_dup_ipv6 core, which duplicates an IPv6
+ packet to be rerouted to another destination.
+
config NF_REJECT_IPV6
tristate "IPv6 packet rejection"
default m if NETFILTER_ADVANCED=n
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index c36e0a5490de..b4f7d0b4e2af 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -30,6 +30,8 @@ obj-$(CONFIG_NF_LOG_IPV6) += nf_log_ipv6.o
# reject
obj-$(CONFIG_NF_REJECT_IPV6) += nf_reject_ipv6.o
+obj-$(CONFIG_NF_DUP_IPV6) += nf_dup_ipv6.o
+
# nf_tables
obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o
obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o
@@ -37,6 +39,7 @@ obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o
obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o
obj-$(CONFIG_NFT_MASQ_IPV6) += nft_masq_ipv6.o
obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redir_ipv6.o
+obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o
# matches
obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 3c35ced39b42..4e21f80228be 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -305,7 +305,7 @@ static void trace_packet(const struct sk_buff *skb,
}
#endif
-static inline __pure struct ip6t_entry *
+static inline struct ip6t_entry *
ip6t_next_entry(const struct ip6t_entry *entry)
{
return (void *)entry + entry->next_offset;
@@ -324,12 +324,13 @@ ip6t_do_table(struct sk_buff *skb,
const char *indev, *outdev;
const void *table_base;
struct ip6t_entry *e, **jumpstack;
- unsigned int *stackptr, origptr, cpu;
+ unsigned int stackidx, cpu;
const struct xt_table_info *private;
struct xt_action_param acpar;
unsigned int addend;
/* Initialization */
+ stackidx = 0;
indev = state->in ? state->in->name : nulldevname;
outdev = state->out ? state->out->name : nulldevname;
/* We handle fragments by dealing with the first fragment as
@@ -357,8 +358,16 @@ ip6t_do_table(struct sk_buff *skb,
cpu = smp_processor_id();
table_base = private->entries;
jumpstack = (struct ip6t_entry **)private->jumpstack[cpu];
- stackptr = per_cpu_ptr(private->stackptr, cpu);
- origptr = *stackptr;
+
+ /* Switch to alternate jumpstack if we're being invoked via TEE.
+ * TEE issues XT_CONTINUE verdict on original skb so we must not
+ * clobber the jumpstack.
+ *
+ * For recursion via REJECT or SYNPROXY the stack will be clobbered
+ * but it is no problem since absolute verdict is issued by these.
+ */
+ if (static_key_false(&xt_tee_enabled))
+ jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated);
e = get_entry(table_base, private->hook_entry[hook]);
@@ -406,20 +415,16 @@ ip6t_do_table(struct sk_buff *skb,
verdict = (unsigned int)(-v) - 1;
break;
}
- if (*stackptr <= origptr)
+ if (stackidx == 0)
e = get_entry(table_base,
private->underflow[hook]);
else
- e = ip6t_next_entry(jumpstack[--*stackptr]);
+ e = ip6t_next_entry(jumpstack[--stackidx]);
continue;
}
if (table_base + v != ip6t_next_entry(e) &&
!(e->ipv6.flags & IP6T_F_GOTO)) {
- if (*stackptr >= private->stacksize) {
- verdict = NF_DROP;
- break;
- }
- jumpstack[(*stackptr)++] = e;
+ jumpstack[stackidx++] = e;
}
e = get_entry(table_base, v);
@@ -437,8 +442,6 @@ ip6t_do_table(struct sk_buff *skb,
break;
} while (!acpar.hotdrop);
- *stackptr = origptr;
-
xt_write_recseq_end(addend);
local_bh_enable();
@@ -452,11 +455,15 @@ ip6t_do_table(struct sk_buff *skb,
}
/* Figures out from what hook each rule can be called: returns 0 if
- there are loops. Puts hook bitmask in comefrom. */
+ * there are loops. Puts hook bitmask in comefrom.
+ *
+ * Keeps track of largest call depth seen and stores it in newinfo->stacksize.
+ */
static int
-mark_source_chains(const struct xt_table_info *newinfo,
+mark_source_chains(struct xt_table_info *newinfo,
unsigned int valid_hooks, void *entry0)
{
+ unsigned int calldepth, max_calldepth = 0;
unsigned int hook;
/* No recursion; use packet counter to save back ptrs (reset
@@ -470,6 +477,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
/* Set initial back pointer. */
e->counters.pcnt = pos;
+ calldepth = 0;
for (;;) {
const struct xt_standard_target *t
@@ -531,6 +539,8 @@ mark_source_chains(const struct xt_table_info *newinfo,
(entry0 + pos + size);
e->counters.pcnt = pos;
pos += size;
+ if (calldepth > 0)
+ --calldepth;
} else {
int newpos = t->verdict;
@@ -544,6 +554,11 @@ mark_source_chains(const struct xt_table_info *newinfo,
newpos);
return 0;
}
+ if (entry0 + newpos != ip6t_next_entry(e) &&
+ !(e->ipv6.flags & IP6T_F_GOTO) &&
+ ++calldepth > max_calldepth)
+ max_calldepth = calldepth;
+
/* This a jump; chase it. */
duprintf("Jump rule %u -> %u\n",
pos, newpos);
@@ -560,6 +575,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
next:
duprintf("Finished chain %u\n", hook);
}
+ newinfo->stacksize = max_calldepth;
return 1;
}
@@ -839,9 +855,6 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
if (ret != 0)
return ret;
++i;
- if (strcmp(ip6t_get_target(iter)->u.user.name,
- XT_ERROR_TARGET) == 0)
- ++newinfo->stacksize;
}
if (i != repl->num_entries) {
@@ -1754,9 +1767,6 @@ translate_compat_table(struct net *net,
if (ret != 0)
break;
++i;
- if (strcmp(ip6t_get_target(iter1)->u.user.name,
- XT_ERROR_TARGET) == 0)
- ++newinfo->stacksize;
}
if (ret) {
/*
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 12331efd49cf..567367a75172 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -35,14 +35,12 @@ MODULE_AUTHOR("Yasuyuki KOZAKAI <yasuyuki.kozakai@toshiba.co.jp>");
MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv6");
MODULE_LICENSE("GPL");
-
static unsigned int
reject_tg6(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct ip6t_reject_info *reject = par->targinfo;
struct net *net = dev_net((par->in != NULL) ? par->in : par->out);
- pr_debug("%s: medium point\n", __func__);
switch (reject->with) {
case IP6T_ICMP6_NO_ROUTE:
nf_send_unreach6(net, skb, ICMPV6_NOROUTE, par->hooknum);
@@ -65,9 +63,6 @@ reject_tg6(struct sk_buff *skb, const struct xt_action_param *par)
case IP6T_TCP_RESET:
nf_send_reset6(net, skb, par->hooknum);
break;
- default:
- net_info_ratelimited("case %u not handled yet\n", reject->with);
- break;
}
return NF_DROP;
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 4ba0c34c627b..7302900c321a 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -251,7 +251,7 @@ ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
if (*len < 0 || (unsigned int) *len < sizeof(sin6))
return -EINVAL;
- h = nf_conntrack_find_get(sock_net(sk), NF_CT_DEFAULT_ZONE, &tuple);
+ h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple);
if (!h) {
pr_debug("IP6T_SO_ORIGINAL_DST: Can't find %pI6c/%u-%pI6c/%u.\n",
&tuple.src.u3.ip6, ntohs(tuple.src.u.tcp.port),
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 90388d606483..0e6fae103d33 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -150,7 +150,7 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
struct nf_conntrack_tuple intuple, origtuple;
const struct nf_conntrack_tuple_hash *h;
const struct nf_conntrack_l4proto *inproto;
- u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
+ struct nf_conntrack_zone tmp;
NF_CT_ASSERT(skb->nfct == NULL);
@@ -177,7 +177,8 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
*ctinfo = IP_CT_RELATED;
- h = nf_conntrack_find_get(net, zone, &intuple);
+ h = nf_conntrack_find_get(net, nf_ct_zone_tmpl(tmpl, skb, &tmp),
+ &intuple);
if (!h) {
pr_debug("icmpv6_error: no match\n");
return -NF_ACCEPT;
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index a45db0b4785c..6d9c0b3d5b8c 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -33,23 +33,22 @@
static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
struct sk_buff *skb)
{
- u16 zone = NF_CT_DEFAULT_ZONE;
-
+ u16 zone_id = NF_CT_DEFAULT_ZONE_ID;
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
- if (skb->nfct)
- zone = nf_ct_zone((struct nf_conn *)skb->nfct);
-#endif
+ if (skb->nfct) {
+ enum ip_conntrack_info ctinfo;
+ const struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
-#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- if (skb->nf_bridge &&
- skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)
- return IP6_DEFRAG_CONNTRACK_BRIDGE_IN + zone;
+ zone_id = nf_ct_zone_id(nf_ct_zone(ct), CTINFO2DIR(ctinfo));
+ }
#endif
+ if (nf_bridge_in_prerouting(skb))
+ return IP6_DEFRAG_CONNTRACK_BRIDGE_IN + zone_id;
+
if (hooknum == NF_INET_PRE_ROUTING)
- return IP6_DEFRAG_CONNTRACK_IN + zone;
+ return IP6_DEFRAG_CONNTRACK_IN + zone_id;
else
- return IP6_DEFRAG_CONNTRACK_OUT + zone;
-
+ return IP6_DEFRAG_CONNTRACK_OUT + zone_id;
}
static unsigned int ipv6_defrag(const struct nf_hook_ops *ops,
diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c
new file mode 100644
index 000000000000..d8ab654080b4
--- /dev/null
+++ b/net/ipv6/netfilter/nf_dup_ipv6.c
@@ -0,0 +1,96 @@
+/*
+ * (C) 2007 by Sebastian Claßen <sebastian.classen@freenet.ag>
+ * (C) 2007-2010 by Jan Engelhardt <jengelh@medozas.de>
+ *
+ * Extracted from xt_TEE.c
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 or later, as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/skbuff.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/netfilter/ipv6/nf_dup_ipv6.h>
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+#include <net/netfilter/nf_conntrack.h>
+#endif
+
+static struct net *pick_net(struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_NS
+ const struct dst_entry *dst;
+
+ if (skb->dev != NULL)
+ return dev_net(skb->dev);
+ dst = skb_dst(skb);
+ if (dst != NULL && dst->dev != NULL)
+ return dev_net(dst->dev);
+#endif
+ return &init_net;
+}
+
+static bool nf_dup_ipv6_route(struct sk_buff *skb, const struct in6_addr *gw,
+ int oif)
+{
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
+ struct net *net = pick_net(skb);
+ struct dst_entry *dst;
+ struct flowi6 fl6;
+
+ memset(&fl6, 0, sizeof(fl6));
+ if (oif != -1)
+ fl6.flowi6_oif = oif;
+
+ fl6.daddr = *gw;
+ fl6.flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) |
+ (iph->flow_lbl[1] << 8) | iph->flow_lbl[2];
+ dst = ip6_route_output(net, NULL, &fl6);
+ if (dst->error) {
+ dst_release(dst);
+ return false;
+ }
+ skb_dst_drop(skb);
+ skb_dst_set(skb, dst);
+ skb->dev = dst->dev;
+ skb->protocol = htons(ETH_P_IPV6);
+
+ return true;
+}
+
+void nf_dup_ipv6(struct sk_buff *skb, unsigned int hooknum,
+ const struct in6_addr *gw, int oif)
+{
+ if (this_cpu_read(nf_skb_duplicated))
+ return;
+ skb = pskb_copy(skb, GFP_ATOMIC);
+ if (skb == NULL)
+ return;
+
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+ nf_conntrack_put(skb->nfct);
+ skb->nfct = &nf_ct_untracked_get()->ct_general;
+ skb->nfctinfo = IP_CT_NEW;
+ nf_conntrack_get(skb->nfct);
+#endif
+ if (hooknum == NF_INET_PRE_ROUTING ||
+ hooknum == NF_INET_LOCAL_IN) {
+ struct ipv6hdr *iph = ipv6_hdr(skb);
+ --iph->hop_limit;
+ }
+ if (nf_dup_ipv6_route(skb, gw, oif)) {
+ __this_cpu_write(nf_skb_duplicated, true);
+ ip6_local_out(skb);
+ __this_cpu_write(nf_skb_duplicated, false);
+ } else {
+ kfree_skb(skb);
+ }
+}
+EXPORT_SYMBOL_GPL(nf_dup_ipv6);
+
+MODULE_AUTHOR("Sebastian Claßen <sebastian.classen@freenet.ag>");
+MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
+MODULE_DESCRIPTION("nf_dup_ipv6: IPv6 packet duplication");
+MODULE_LICENSE("GPL");
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index e76900e0aa92..70fbaed49edb 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -124,7 +124,7 @@ static void nf_nat_ipv6_csum_update(struct sk_buff *skb,
newip = &t->dst.u3.in6;
}
inet_proto_csum_replace16(check, skb, oldip->s6_addr32,
- newip->s6_addr32, 1);
+ newip->s6_addr32, true);
}
static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
@@ -155,7 +155,7 @@ static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
}
} else
inet_proto_csum_replace2(check, skb,
- htons(oldlen), htons(datalen), 1);
+ htons(oldlen), htons(datalen), true);
}
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
diff --git a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
index 2205e8eeeacf..57593b00c5b4 100644
--- a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
@@ -73,7 +73,7 @@ icmpv6_manip_pkt(struct sk_buff *skb,
hdr->icmp6_type == ICMPV6_ECHO_REPLY) {
inet_proto_csum_replace2(&hdr->icmp6_cksum, skb,
hdr->icmp6_identifier,
- tuple->src.u.icmp.id, 0);
+ tuple->src.u.icmp.id, false);
hdr->icmp6_identifier = tuple->src.u.icmp.id;
}
return true;
diff --git a/net/ipv6/netfilter/nft_dup_ipv6.c b/net/ipv6/netfilter/nft_dup_ipv6.c
new file mode 100644
index 000000000000..0eaa4f65fdea
--- /dev/null
+++ b/net/ipv6/netfilter/nft_dup_ipv6.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2015 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/ipv6/nf_dup_ipv6.h>
+
+struct nft_dup_ipv6 {
+ enum nft_registers sreg_addr:8;
+ enum nft_registers sreg_dev:8;
+};
+
+static void nft_dup_ipv6_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_dup_ipv6 *priv = nft_expr_priv(expr);
+ struct in6_addr *gw = (struct in6_addr *)&regs->data[priv->sreg_addr];
+ int oif = regs->data[priv->sreg_dev];
+
+ nf_dup_ipv6(pkt->skb, pkt->ops->hooknum, gw, oif);
+}
+
+static int nft_dup_ipv6_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_dup_ipv6 *priv = nft_expr_priv(expr);
+ int err;
+
+ if (tb[NFTA_DUP_SREG_ADDR] == NULL)
+ return -EINVAL;
+
+ priv->sreg_addr = nft_parse_register(tb[NFTA_DUP_SREG_ADDR]);
+ err = nft_validate_register_load(priv->sreg_addr, sizeof(struct in6_addr));
+ if (err < 0)
+ return err;
+
+ if (tb[NFTA_DUP_SREG_DEV] != NULL) {
+ priv->sreg_dev = nft_parse_register(tb[NFTA_DUP_SREG_DEV]);
+ return nft_validate_register_load(priv->sreg_dev, sizeof(int));
+ }
+ return 0;
+}
+
+static int nft_dup_ipv6_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ struct nft_dup_ipv6 *priv = nft_expr_priv(expr);
+
+ if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr) ||
+ nft_dump_register(skb, NFTA_DUP_SREG_DEV, priv->sreg_dev))
+ goto nla_put_failure;
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_dup_ipv6_type;
+static const struct nft_expr_ops nft_dup_ipv6_ops = {
+ .type = &nft_dup_ipv6_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_dup_ipv6)),
+ .eval = nft_dup_ipv6_eval,
+ .init = nft_dup_ipv6_init,
+ .dump = nft_dup_ipv6_dump,
+};
+
+static const struct nla_policy nft_dup_ipv6_policy[NFTA_DUP_MAX + 1] = {
+ [NFTA_DUP_SREG_ADDR] = { .type = NLA_U32 },
+ [NFTA_DUP_SREG_DEV] = { .type = NLA_U32 },
+};
+
+static struct nft_expr_type nft_dup_ipv6_type __read_mostly = {
+ .family = NFPROTO_IPV6,
+ .name = "dup",
+ .ops = &nft_dup_ipv6_ops,
+ .policy = nft_dup_ipv6_policy,
+ .maxattr = NFTA_DUP_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_dup_ipv6_module_init(void)
+{
+ return nft_register_expr(&nft_dup_ipv6_type);
+}
+
+static void __exit nft_dup_ipv6_module_exit(void)
+{
+ nft_unregister_expr(&nft_dup_ipv6_type);
+}
+
+module_init(nft_dup_ipv6_module_init);
+module_exit(nft_dup_ipv6_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "dup");
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index ca4700cb26c4..fdbada1569a3 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -295,7 +295,8 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
* unspecified and mapped address have a v4 equivalent.
*/
v4addr = LOOPBACK4_IPV6;
- if (!(addr_type & IPV6_ADDR_MULTICAST)) {
+ if (!(addr_type & IPV6_ADDR_MULTICAST) &&
+ !sock_net(sk)->ipv6.sysctl.ip_nonlocal_bind) {
err = -EADDRNOTAVAIL;
if (!ipv6_chk_addr(sock_net(sk), &addr->sin6_addr,
dev, 0)) {
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d15586490cec..e476f01add87 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -54,10 +54,13 @@
#include <net/tcp.h>
#include <linux/rtnetlink.h>
#include <net/dst.h>
+#include <net/dst_metadata.h>
#include <net/xfrm.h>
#include <net/netevent.h>
#include <net/netlink.h>
#include <net/nexthop.h>
+#include <net/lwtunnel.h>
+#include <net/ip_tunnels.h>
#include <asm/uaccess.h>
@@ -535,13 +538,14 @@ static void rt6_probe_deferred(struct work_struct *w)
container_of(w, struct __rt6_probe_work, work);
addrconf_addr_solict_mult(&work->target, &mcaddr);
- ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
+ ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL, NULL);
dev_put(work->dev);
kfree(work);
}
static void rt6_probe(struct rt6_info *rt)
{
+ struct __rt6_probe_work *work;
struct neighbour *neigh;
/*
* Okay, this does not seem to be appropriate
@@ -556,34 +560,33 @@ static void rt6_probe(struct rt6_info *rt)
rcu_read_lock_bh();
neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
if (neigh) {
- write_lock(&neigh->lock);
if (neigh->nud_state & NUD_VALID)
goto out;
- }
-
- if (!neigh ||
- time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
- struct __rt6_probe_work *work;
+ work = NULL;
+ write_lock(&neigh->lock);
+ if (!(neigh->nud_state & NUD_VALID) &&
+ time_after(jiffies,
+ neigh->updated +
+ rt->rt6i_idev->cnf.rtr_probe_interval)) {
+ work = kmalloc(sizeof(*work), GFP_ATOMIC);
+ if (work)
+ __neigh_set_probe_once(neigh);
+ }
+ write_unlock(&neigh->lock);
+ } else {
work = kmalloc(sizeof(*work), GFP_ATOMIC);
+ }
- if (neigh && work)
- __neigh_set_probe_once(neigh);
-
- if (neigh)
- write_unlock(&neigh->lock);
+ if (work) {
+ INIT_WORK(&work->work, rt6_probe_deferred);
+ work->target = rt->rt6i_gateway;
+ dev_hold(rt->dst.dev);
+ work->dev = rt->dst.dev;
+ schedule_work(&work->work);
+ }
- if (work) {
- INIT_WORK(&work->work, rt6_probe_deferred);
- work->target = rt->rt6i_gateway;
- dev_hold(rt->dst.dev);
- work->dev = rt->dst.dev;
- schedule_work(&work->work);
- }
- } else {
out:
- write_unlock(&neigh->lock);
- }
rcu_read_unlock_bh();
}
#else
@@ -662,6 +665,12 @@ static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
{
int m;
bool match_do_rr = false;
+ struct inet6_dev *idev = rt->rt6i_idev;
+ struct net_device *dev = rt->dst.dev;
+
+ if (dev && !netif_carrier_ok(dev) &&
+ idev->cnf.ignore_routes_with_linkdown)
+ goto out;
if (rt6_check_expired(rt))
goto out;
@@ -1154,6 +1163,7 @@ void ip6_route_input(struct sk_buff *skb)
const struct ipv6hdr *iph = ipv6_hdr(skb);
struct net *net = dev_net(skb->dev);
int flags = RT6_LOOKUP_F_HAS_SADDR;
+ struct ip_tunnel_info *tun_info;
struct flowi6 fl6 = {
.flowi6_iif = skb->dev->ifindex,
.daddr = iph->daddr,
@@ -1163,6 +1173,10 @@ void ip6_route_input(struct sk_buff *skb)
.flowi6_proto = iph->nexthdr,
};
+ tun_info = skb_tunnel_info(skb);
+ if (tun_info && tun_info->mode == IP_TUNNEL_INFO_RX)
+ fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
+ skb_dst_drop(skb);
skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
}
@@ -1801,6 +1815,24 @@ int ip6_route_add(struct fib6_config *cfg)
rt->dst.output = ip6_output;
+ if (cfg->fc_encap) {
+ struct lwtunnel_state *lwtstate;
+
+ err = lwtunnel_build_state(dev, cfg->fc_encap_type,
+ cfg->fc_encap, &lwtstate);
+ if (err)
+ goto out;
+ rt->dst.lwtstate = lwtstate_get(lwtstate);
+ if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
+ rt->dst.lwtstate->orig_output = rt->dst.output;
+ rt->dst.output = lwtunnel_output;
+ }
+ if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
+ rt->dst.lwtstate->orig_input = rt->dst.input;
+ rt->dst.input = lwtunnel_input;
+ }
+ }
+
ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
rt->rt6i_dst.plen = cfg->fc_dst_len;
if (rt->rt6i_dst.plen == 128)
@@ -2180,6 +2212,7 @@ static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort)
#endif
rt->rt6i_prefsrc = ort->rt6i_prefsrc;
rt->rt6i_table = ort->rt6i_table;
+ rt->dst.lwtstate = lwtstate_get(ort->dst.lwtstate);
}
#ifdef CONFIG_IPV6_ROUTE_INFO
@@ -2628,6 +2661,8 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
[RTA_METRICS] = { .type = NLA_NESTED },
[RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) },
[RTA_PREF] = { .type = NLA_U8 },
+ [RTA_ENCAP_TYPE] = { .type = NLA_U16 },
+ [RTA_ENCAP] = { .type = NLA_NESTED },
};
static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -2722,6 +2757,12 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
cfg->fc_flags |= RTF_PREF(pref);
}
+ if (tb[RTA_ENCAP])
+ cfg->fc_encap = tb[RTA_ENCAP];
+
+ if (tb[RTA_ENCAP_TYPE])
+ cfg->fc_encap_type = nla_get_u16(tb[RTA_ENCAP_TYPE]);
+
err = 0;
errout:
return err;
@@ -2754,6 +2795,10 @@ beginning:
r_cfg.fc_gateway = nla_get_in6_addr(nla);
r_cfg.fc_flags |= RTF_GATEWAY;
}
+ r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
+ nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE);
+ if (nla)
+ r_cfg.fc_encap_type = nla_get_u16(nla);
}
err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
if (err) {
@@ -2816,7 +2861,7 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
return ip6_route_add(&cfg);
}
-static inline size_t rt6_nlmsg_size(void)
+static inline size_t rt6_nlmsg_size(struct rt6_info *rt)
{
return NLMSG_ALIGN(sizeof(struct rtmsg))
+ nla_total_size(16) /* RTA_SRC */
@@ -2830,7 +2875,8 @@ static inline size_t rt6_nlmsg_size(void)
+ RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
+ nla_total_size(sizeof(struct rta_cacheinfo))
+ nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
- + nla_total_size(1); /* RTA_PREF */
+ + nla_total_size(1) /* RTA_PREF */
+ + lwtunnel_get_encap_size(rt->dst.lwtstate);
}
static int rt6_fill_node(struct net *net,
@@ -2891,6 +2937,11 @@ static int rt6_fill_node(struct net *net,
else
rtm->rtm_type = RTN_UNICAST;
rtm->rtm_flags = 0;
+ if (!netif_carrier_ok(rt->dst.dev)) {
+ rtm->rtm_flags |= RTNH_F_LINKDOWN;
+ if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
+ rtm->rtm_flags |= RTNH_F_DEAD;
+ }
rtm->rtm_scope = RT_SCOPE_UNIVERSE;
rtm->rtm_protocol = rt->rt6i_protocol;
if (rt->rt6i_flags & RTF_DYNAMIC)
@@ -2978,6 +3029,8 @@ static int rt6_fill_node(struct net *net,
if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
goto nla_put_failure;
+ lwtunnel_fill_encap(skb, rt->dst.lwtstate);
+
nlmsg_end(skb, nlh);
return 0;
@@ -3104,7 +3157,7 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
err = -ENOBUFS;
seq = info->nlh ? info->nlh->nlmsg_seq : 0;
- skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
+ skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
if (!skb)
goto errout;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index ac35a28599be..94428fd85b2f 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -742,7 +742,7 @@ static int ipip_rcv(struct sk_buff *skb)
goto drop;
if (iptunnel_pull_header(skb, 0, tpi.proto))
goto drop;
- return ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error);
+ return ip_tunnel_rcv(tunnel, skb, &tpi, NULL, log_ecn_error);
}
return 1;
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 4e705add4f18..45243bbe5253 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -17,6 +17,9 @@
#include <net/inet_frag.h>
static int one = 1;
+static int auto_flowlabels_min;
+static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX;
+
static struct ctl_table ipv6_table_template[] = {
{
@@ -45,7 +48,9 @@ static struct ctl_table ipv6_table_template[] = {
.data = &init_net.ipv6.sysctl.auto_flowlabels,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &auto_flowlabels_min,
+ .extra2 = &auto_flowlabels_max
},
{
.procname = "fwmark_reflect",
@@ -75,6 +80,13 @@ static struct ctl_table ipv6_table_template[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
+ {
+ .procname = "ip_nonlocal_bind",
+ .data = &init_net.ipv6.sysctl.ip_nonlocal_bind,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
{ }
};
@@ -117,6 +129,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
ipv6_table[5].data = &net->ipv6.sysctl.idgen_retries;
ipv6_table[6].data = &net->ipv6.sysctl.idgen_delay;
ipv6_table[7].data = &net->ipv6.sysctl.flowlabel_state_ranges;
+ ipv6_table[8].data = &net->ipv6.sysctl.ip_nonlocal_bind;
ipv6_route_table = ipv6_route_sysctl_init(net);
if (!ipv6_route_table)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 7a6cea5e4274..97d9314ea361 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -276,7 +276,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
if (err)
goto late_failure;
- ip6_set_txhash(sk);
+ sk_set_txhash(sk);
if (!tp->write_seq && likely(!tp->repair))
tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
@@ -1090,7 +1090,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
newsk->sk_bound_dev_if = ireq->ir_iif;
- ip6_set_txhash(newsk);
+ sk_set_txhash(newsk);
/* Now IPv6 options...
@@ -1481,8 +1481,7 @@ do_time_wait:
ntohs(th->dest), tcp_v6_iif(skb));
if (sk2) {
struct inet_timewait_sock *tw = inet_twsk(sk);
- inet_twsk_deschedule(tw);
- inet_twsk_put(tw);
+ inet_twsk_deschedule_put(tw);
sk = sk2;
tcp_v6_restore_cb(skb);
goto process;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index e51fc3eee6db..0aba654f5b91 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1496,7 +1496,8 @@ int __net_init udp6_proc_init(struct net *net)
return udp_proc_register(net, &udp6_seq_afinfo);
}
-void udp6_proc_exit(struct net *net) {
+void udp6_proc_exit(struct net *net)
+{
udp_proc_unregister(net, &udp6_seq_afinfo);
}
#endif /* CONFIG_PROC_FS */
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 901ef6f8addc..f7fbdbabe50e 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -20,10 +20,9 @@
static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
{
- const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
struct ipv6hdr *inner_iph = ipipv6_hdr(skb);
- if (INET_ECN_is_ce(ipv6_get_dsfield(outer_iph)))
+ if (INET_ECN_is_ce(XFRM_MODE_SKB_CB(skb)->tos))
IP6_ECN_set_ce(inner_iph);
}
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index ed0583c1b9fc..a74013d3eceb 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -26,7 +26,7 @@
static struct xfrm_policy_afinfo xfrm6_policy_afinfo;
-static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos,
+static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif,
const xfrm_address_t *saddr,
const xfrm_address_t *daddr)
{
@@ -35,6 +35,7 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos,
int err;
memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_oif = oif;
memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr));
if (saddr)
memcpy(&fl6.saddr, saddr, sizeof(fl6.saddr));
@@ -50,13 +51,13 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos,
return dst;
}
-static int xfrm6_get_saddr(struct net *net,
+static int xfrm6_get_saddr(struct net *net, int oif,
xfrm_address_t *saddr, xfrm_address_t *daddr)
{
struct dst_entry *dst;
struct net_device *dev;
- dst = xfrm6_dst_lookup(net, 0, NULL, daddr);
+ dst = xfrm6_dst_lookup(net, 0, oif, NULL, daddr);
if (IS_ERR(dst))
return -EHOSTUNREACH;
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index 086de496a4c1..3891cbd2adea 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -7,7 +7,6 @@ config MAC80211
select CRYPTO_CCM
select CRYPTO_GCM
select CRC32
- select AVERAGE
---help---
This option enables the hardware independent IEEE 802.11
networking stack.
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index 3275f01881be..783e891b7525 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -3,6 +3,7 @@ obj-$(CONFIG_MAC80211) += mac80211.o
# mac80211 objects
mac80211-y := \
main.o status.o \
+ driver-ops.o \
sta_info.o \
wep.o \
wpa.o \
diff --git a/net/mac80211/aes_cmac.c b/net/mac80211/aes_cmac.c
index 4192806be3d3..bdf0790d89cc 100644
--- a/net/mac80211/aes_cmac.c
+++ b/net/mac80211/aes_cmac.c
@@ -145,20 +145,3 @@ void ieee80211_aes_cmac_key_free(struct crypto_cipher *tfm)
{
crypto_free_cipher(tfm);
}
-
-void ieee80211_aes_cmac_calculate_k1_k2(struct ieee80211_key_conf *keyconf,
- u8 *k1, u8 *k2)
-{
- u8 l[AES_BLOCK_SIZE] = {};
- struct ieee80211_key *key =
- container_of(keyconf, struct ieee80211_key, conf);
-
- crypto_cipher_encrypt_one(key->u.aes_cmac.tfm, l, l);
-
- memcpy(k1, l, AES_BLOCK_SIZE);
- gf_mulx(k1);
-
- memcpy(k2, k1, AES_BLOCK_SIZE);
- gf_mulx(k2);
-}
-EXPORT_SYMBOL(ieee80211_aes_cmac_calculate_k1_k2);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index bf7023f6c327..685ec13ed7c2 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1019,6 +1019,65 @@ static int sta_apply_auth_flags(struct ieee80211_local *local,
return 0;
}
+static void sta_apply_mesh_params(struct ieee80211_local *local,
+ struct sta_info *sta,
+ struct station_parameters *params)
+{
+#ifdef CONFIG_MAC80211_MESH
+ struct ieee80211_sub_if_data *sdata = sta->sdata;
+ u32 changed = 0;
+
+ if (params->sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE) {
+ switch (params->plink_state) {
+ case NL80211_PLINK_ESTAB:
+ if (sta->mesh->plink_state != NL80211_PLINK_ESTAB)
+ changed = mesh_plink_inc_estab_count(sdata);
+ sta->mesh->plink_state = params->plink_state;
+
+ ieee80211_mps_sta_status_update(sta);
+ changed |= ieee80211_mps_set_sta_local_pm(sta,
+ sdata->u.mesh.mshcfg.power_mode);
+ break;
+ case NL80211_PLINK_LISTEN:
+ case NL80211_PLINK_BLOCKED:
+ case NL80211_PLINK_OPN_SNT:
+ case NL80211_PLINK_OPN_RCVD:
+ case NL80211_PLINK_CNF_RCVD:
+ case NL80211_PLINK_HOLDING:
+ if (sta->mesh->plink_state == NL80211_PLINK_ESTAB)
+ changed = mesh_plink_dec_estab_count(sdata);
+ sta->mesh->plink_state = params->plink_state;
+
+ ieee80211_mps_sta_status_update(sta);
+ changed |= ieee80211_mps_set_sta_local_pm(sta,
+ NL80211_MESH_POWER_UNKNOWN);
+ break;
+ default:
+ /* nothing */
+ break;
+ }
+ }
+
+ switch (params->plink_action) {
+ case NL80211_PLINK_ACTION_NO_ACTION:
+ /* nothing */
+ break;
+ case NL80211_PLINK_ACTION_OPEN:
+ changed |= mesh_plink_open(sta);
+ break;
+ case NL80211_PLINK_ACTION_BLOCK:
+ changed |= mesh_plink_block(sta);
+ break;
+ }
+
+ if (params->local_pm)
+ changed |= ieee80211_mps_set_sta_local_pm(sta,
+ params->local_pm);
+
+ ieee80211_mbss_info_change_notify(sdata, changed);
+#endif
+}
+
static int sta_apply_parameters(struct ieee80211_local *local,
struct sta_info *sta,
struct station_parameters *params)
@@ -1076,7 +1135,6 @@ static int sta_apply_parameters(struct ieee80211_local *local,
}
if (mask & BIT(NL80211_STA_FLAG_MFP)) {
- sta->sta.mfp = !!(set & BIT(NL80211_STA_FLAG_MFP));
if (set & BIT(NL80211_STA_FLAG_MFP))
set_sta_flag(sta, WLAN_STA_MFP);
else
@@ -1097,6 +1155,12 @@ static int sta_apply_parameters(struct ieee80211_local *local,
params->ext_capab[3] & WLAN_EXT_CAPA4_TDLS_CHAN_SWITCH)
set_sta_flag(sta, WLAN_STA_TDLS_CHAN_SWITCH);
+ if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) &&
+ ieee80211_hw_check(&local->hw, TDLS_WIDER_BW) &&
+ params->ext_capab_len >= 8 &&
+ params->ext_capab[7] & WLAN_EXT_CAPA8_TDLS_WIDE_BW_ENABLED)
+ set_sta_flag(sta, WLAN_STA_TDLS_WIDER_BW);
+
if (params->sta_modify_mask & STATION_PARAM_APPLY_UAPSD) {
sta->sta.uapsd_queues = params->uapsd_queues;
sta->sta.max_sp = params->max_sp;
@@ -1144,62 +1208,8 @@ static int sta_apply_parameters(struct ieee80211_local *local,
band, false);
}
- if (ieee80211_vif_is_mesh(&sdata->vif)) {
-#ifdef CONFIG_MAC80211_MESH
- u32 changed = 0;
-
- if (params->sta_modify_mask & STATION_PARAM_APPLY_PLINK_STATE) {
- switch (params->plink_state) {
- case NL80211_PLINK_ESTAB:
- if (sta->plink_state != NL80211_PLINK_ESTAB)
- changed = mesh_plink_inc_estab_count(
- sdata);
- sta->plink_state = params->plink_state;
-
- ieee80211_mps_sta_status_update(sta);
- changed |= ieee80211_mps_set_sta_local_pm(sta,
- sdata->u.mesh.mshcfg.power_mode);
- break;
- case NL80211_PLINK_LISTEN:
- case NL80211_PLINK_BLOCKED:
- case NL80211_PLINK_OPN_SNT:
- case NL80211_PLINK_OPN_RCVD:
- case NL80211_PLINK_CNF_RCVD:
- case NL80211_PLINK_HOLDING:
- if (sta->plink_state == NL80211_PLINK_ESTAB)
- changed = mesh_plink_dec_estab_count(
- sdata);
- sta->plink_state = params->plink_state;
-
- ieee80211_mps_sta_status_update(sta);
- changed |= ieee80211_mps_set_sta_local_pm(sta,
- NL80211_MESH_POWER_UNKNOWN);
- break;
- default:
- /* nothing */
- break;
- }
- }
-
- switch (params->plink_action) {
- case NL80211_PLINK_ACTION_NO_ACTION:
- /* nothing */
- break;
- case NL80211_PLINK_ACTION_OPEN:
- changed |= mesh_plink_open(sta);
- break;
- case NL80211_PLINK_ACTION_BLOCK:
- changed |= mesh_plink_block(sta);
- break;
- }
-
- if (params->local_pm)
- changed |=
- ieee80211_mps_set_sta_local_pm(sta,
- params->local_pm);
- ieee80211_mbss_info_change_notify(sdata, changed);
-#endif
- }
+ if (ieee80211_vif_is_mesh(&sdata->vif))
+ sta_apply_mesh_params(local, sta, params);
/* set the STA state after all sta info from usermode has been set */
if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) {
@@ -2358,6 +2368,8 @@ int __ieee80211_request_smps_mgd(struct ieee80211_sub_if_data *sdata,
const u8 *ap;
enum ieee80211_smps_mode old_req;
int err;
+ struct sta_info *sta;
+ bool tdls_peer_found = false;
lockdep_assert_held(&sdata->wdev.mtx);
@@ -2382,11 +2394,22 @@ int __ieee80211_request_smps_mgd(struct ieee80211_sub_if_data *sdata,
ap = sdata->u.mgd.associated->bssid;
+ rcu_read_lock();
+ list_for_each_entry_rcu(sta, &sdata->local->sta_list, list) {
+ if (!sta->sta.tdls || sta->sdata != sdata || !sta->uploaded ||
+ !test_sta_flag(sta, WLAN_STA_AUTHORIZED))
+ continue;
+
+ tdls_peer_found = true;
+ break;
+ }
+ rcu_read_unlock();
+
if (smps_mode == IEEE80211_SMPS_AUTOMATIC) {
- if (sdata->u.mgd.powersave)
- smps_mode = IEEE80211_SMPS_DYNAMIC;
- else
+ if (tdls_peer_found || !sdata->u.mgd.powersave)
smps_mode = IEEE80211_SMPS_OFF;
+ else
+ smps_mode = IEEE80211_SMPS_DYNAMIC;
}
/* send SM PS frame to AP */
@@ -2394,6 +2417,8 @@ int __ieee80211_request_smps_mgd(struct ieee80211_sub_if_data *sdata,
ap, ap);
if (err)
sdata->u.mgd.req_smps = old_req;
+ else if (smps_mode != IEEE80211_SMPS_OFF && tdls_peer_found)
+ ieee80211_teardown_tdls_peers(sdata);
return err;
}
@@ -2479,16 +2504,26 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
sdata->rc_rateidx_mask[i] = mask->control[i].legacy;
memcpy(sdata->rc_rateidx_mcs_mask[i], mask->control[i].ht_mcs,
sizeof(mask->control[i].ht_mcs));
+ memcpy(sdata->rc_rateidx_vht_mcs_mask[i],
+ mask->control[i].vht_mcs,
+ sizeof(mask->control[i].vht_mcs));
sdata->rc_has_mcs_mask[i] = false;
+ sdata->rc_has_vht_mcs_mask[i] = false;
if (!sband)
continue;
- for (j = 0; j < IEEE80211_HT_MCS_MASK_LEN; j++)
- if (~sdata->rc_rateidx_mcs_mask[i][j]) {
+ for (j = 0; j < IEEE80211_HT_MCS_MASK_LEN; j++) {
+ if (~sdata->rc_rateidx_mcs_mask[i][j])
sdata->rc_has_mcs_mask[i] = true;
+
+ if (~sdata->rc_rateidx_vht_mcs_mask[i][j])
+ sdata->rc_has_vht_mcs_mask[i] = true;
+
+ if (sdata->rc_has_mcs_mask[i] &&
+ sdata->rc_has_vht_mcs_mask[i])
break;
- }
+ }
}
return 0;
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index f01c18a3160e..1d1b9b7bdefe 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -190,7 +190,7 @@ ieee80211_find_reservation_chanctx(struct ieee80211_local *local,
return NULL;
}
-static enum nl80211_chan_width ieee80211_get_sta_bw(struct ieee80211_sta *sta)
+enum nl80211_chan_width ieee80211_get_sta_bw(struct ieee80211_sta *sta)
{
switch (sta->bandwidth) {
case IEEE80211_STA_RX_BW_20:
@@ -264,9 +264,17 @@ ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local,
case NL80211_IFTYPE_AP_VLAN:
width = ieee80211_get_max_required_bw(sdata);
break;
+ case NL80211_IFTYPE_STATION:
+ /*
+ * The ap's sta->bandwidth is not set yet at this
+ * point, so take the width from the chandef, but
+ * account also for TDLS peers
+ */
+ width = max(vif->bss_conf.chandef.width,
+ ieee80211_get_max_required_bw(sdata));
+ break;
case NL80211_IFTYPE_P2P_DEVICE:
continue;
- case NL80211_IFTYPE_STATION:
case NL80211_IFTYPE_ADHOC:
case NL80211_IFTYPE_WDS:
case NL80211_IFTYPE_MESH_POINT:
@@ -554,12 +562,13 @@ static void ieee80211_free_chanctx(struct ieee80211_local *local,
kfree_rcu(ctx, rcu_head);
}
-static void ieee80211_recalc_chanctx_chantype(struct ieee80211_local *local,
- struct ieee80211_chanctx *ctx)
+void ieee80211_recalc_chanctx_chantype(struct ieee80211_local *local,
+ struct ieee80211_chanctx *ctx)
{
struct ieee80211_chanctx_conf *conf = &ctx->conf;
struct ieee80211_sub_if_data *sdata;
const struct cfg80211_chan_def *compat = NULL;
+ struct sta_info *sta;
lockdep_assert_held(&local->chanctx_mtx);
@@ -581,6 +590,20 @@ static void ieee80211_recalc_chanctx_chantype(struct ieee80211_local *local,
if (WARN_ON_ONCE(!compat))
break;
}
+
+ /* TDLS peers can sometimes affect the chandef width */
+ list_for_each_entry_rcu(sta, &local->sta_list, list) {
+ if (!sta->uploaded ||
+ !test_sta_flag(sta, WLAN_STA_TDLS_WIDER_BW) ||
+ !test_sta_flag(sta, WLAN_STA_AUTHORIZED) ||
+ !sta->tdls_chandef.chan)
+ continue;
+
+ compat = cfg80211_chandef_compatible(&sta->tdls_chandef,
+ compat);
+ if (WARN_ON_ONCE(!compat))
+ break;
+ }
rcu_read_unlock();
if (!compat)
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 3ea8b7de9633..ced6bf3be8d6 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -122,6 +122,7 @@ static const char *hw_flag_names[NUM_IEEE80211_HW_FLAGS + 1] = {
FLAG(CHANCTX_STA_CSA),
FLAG(SUPPORTS_CLONED_SKBS),
FLAG(SINGLE_SCAN_ON_ALL_BANDS),
+ FLAG(TDLS_WIDER_BW),
/* keep last for the build bug below */
(void *)0x1
@@ -277,7 +278,6 @@ void debugfs_hw_add(struct ieee80211_local *local)
DEBUGFS_STATS_ADD(rx_handlers_queued);
DEBUGFS_STATS_ADD(rx_handlers_drop_nullfunc);
DEBUGFS_STATS_ADD(rx_handlers_drop_defrag);
- DEBUGFS_STATS_ADD(rx_handlers_drop_short);
DEBUGFS_STATS_ADD(tx_expand_skb_head);
DEBUGFS_STATS_ADD(tx_expand_skb_head_cloned);
DEBUGFS_STATS_ADD(rx_expand_skb_head_defrag);
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index e82bf1e9d7a8..702ca122c498 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -57,7 +57,6 @@ KEY_CONF_FILE(keylen, D);
KEY_CONF_FILE(keyidx, D);
KEY_CONF_FILE(hw_key_idx, D);
KEY_FILE(flags, X);
-KEY_FILE(tx_rx_count, D);
KEY_READ(ifindex, sdata->name, "%s\n");
KEY_OPS(ifindex);
@@ -310,7 +309,6 @@ void ieee80211_debugfs_key_add(struct ieee80211_key *key)
DEBUGFS_ADD(flags);
DEBUGFS_ADD(keyidx);
DEBUGFS_ADD(hw_key_idx);
- DEBUGFS_ADD(tx_rx_count);
DEBUGFS_ADD(algorithm);
DEBUGFS_ADD(tx_spec);
DEBUGFS_ADD(rx_spec);
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index c09c0131bfa2..1021e87c051f 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -186,6 +186,38 @@ IEEE80211_IF_FILE(rc_rateidx_mcs_mask_2ghz,
IEEE80211_IF_FILE(rc_rateidx_mcs_mask_5ghz,
rc_rateidx_mcs_mask[IEEE80211_BAND_5GHZ], HEXARRAY);
+static ssize_t ieee80211_if_fmt_rc_rateidx_vht_mcs_mask_2ghz(
+ const struct ieee80211_sub_if_data *sdata,
+ char *buf, int buflen)
+{
+ int i, len = 0;
+ const u16 *mask = sdata->rc_rateidx_vht_mcs_mask[IEEE80211_BAND_2GHZ];
+
+ for (i = 0; i < NL80211_VHT_NSS_MAX; i++)
+ len += scnprintf(buf + len, buflen - len, "%04x ", mask[i]);
+ len += scnprintf(buf + len, buflen - len, "\n");
+
+ return len;
+}
+
+IEEE80211_IF_FILE_R(rc_rateidx_vht_mcs_mask_2ghz);
+
+static ssize_t ieee80211_if_fmt_rc_rateidx_vht_mcs_mask_5ghz(
+ const struct ieee80211_sub_if_data *sdata,
+ char *buf, int buflen)
+{
+ int i, len = 0;
+ const u16 *mask = sdata->rc_rateidx_vht_mcs_mask[IEEE80211_BAND_5GHZ];
+
+ for (i = 0; i < NL80211_VHT_NSS_MAX; i++)
+ len += scnprintf(buf + len, buflen - len, "%04x ", mask[i]);
+ len += scnprintf(buf + len, buflen - len, "\n");
+
+ return len;
+}
+
+IEEE80211_IF_FILE_R(rc_rateidx_vht_mcs_mask_5ghz);
+
IEEE80211_IF_FILE(flags, flags, HEX);
IEEE80211_IF_FILE(state, state, LHEX);
IEEE80211_IF_FILE(txpower, vif.bss_conf.txpower, DEC);
@@ -565,6 +597,8 @@ static void add_common_files(struct ieee80211_sub_if_data *sdata)
DEBUGFS_ADD(rc_rateidx_mask_5ghz);
DEBUGFS_ADD(rc_rateidx_mcs_mask_2ghz);
DEBUGFS_ADD(rc_rateidx_mcs_mask_5ghz);
+ DEBUGFS_ADD(rc_rateidx_vht_mcs_mask_2ghz);
+ DEBUGFS_ADD(rc_rateidx_vht_mcs_mask_5ghz);
DEBUGFS_ADD(hw_queues);
}
diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c
new file mode 100644
index 000000000000..267c3b1ca047
--- /dev/null
+++ b/net/mac80211/driver-ops.c
@@ -0,0 +1,41 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <net/mac80211.h>
+#include "ieee80211_i.h"
+#include "trace.h"
+#include "driver-ops.h"
+
+__must_check
+int drv_sta_state(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct sta_info *sta,
+ enum ieee80211_sta_state old_state,
+ enum ieee80211_sta_state new_state)
+{
+ int ret = 0;
+
+ might_sleep();
+
+ sdata = get_bss_sdata(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return -EIO;
+
+ trace_drv_sta_state(local, sdata, &sta->sta, old_state, new_state);
+ if (local->ops->sta_state) {
+ ret = local->ops->sta_state(&local->hw, &sdata->vif, &sta->sta,
+ old_state, new_state);
+ } else if (old_state == IEEE80211_STA_AUTH &&
+ new_state == IEEE80211_STA_ASSOC) {
+ ret = drv_sta_add(local, sdata, &sta->sta);
+ if (ret == 0)
+ sta->uploaded = true;
+ } else if (old_state == IEEE80211_STA_ASSOC &&
+ new_state == IEEE80211_STA_AUTH) {
+ drv_sta_remove(local, sdata, &sta->sta);
+ }
+ trace_drv_return_int(local, ret);
+ return ret;
+}
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 32a2e707e222..02d91332d7dd 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -573,37 +573,12 @@ static inline void drv_sta_pre_rcu_remove(struct ieee80211_local *local,
trace_drv_return_void(local);
}
-static inline __must_check
+__must_check
int drv_sta_state(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
struct sta_info *sta,
enum ieee80211_sta_state old_state,
- enum ieee80211_sta_state new_state)
-{
- int ret = 0;
-
- might_sleep();
-
- sdata = get_bss_sdata(sdata);
- if (!check_sdata_in_driver(sdata))
- return -EIO;
-
- trace_drv_sta_state(local, sdata, &sta->sta, old_state, new_state);
- if (local->ops->sta_state) {
- ret = local->ops->sta_state(&local->hw, &sdata->vif, &sta->sta,
- old_state, new_state);
- } else if (old_state == IEEE80211_STA_AUTH &&
- new_state == IEEE80211_STA_ASSOC) {
- ret = drv_sta_add(local, sdata, &sta->sta);
- if (ret == 0)
- sta->uploaded = true;
- } else if (old_state == IEEE80211_STA_ASSOC &&
- new_state == IEEE80211_STA_AUTH) {
- drv_sta_remove(local, sdata, &sta->sta);
- }
- trace_drv_return_int(local, ret);
- return ret;
-}
+ enum ieee80211_sta_state new_state);
static inline void drv_sta_rc_update(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index b12f61507f9f..6e52659f923f 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -84,13 +84,13 @@ struct ieee80211_local;
#define IEEE80211_DEAUTH_FRAME_LEN (24 /* hdr */ + 2 /* reason */)
struct ieee80211_fragment_entry {
- unsigned long first_frag_time;
- unsigned int seq;
- unsigned int rx_queue;
- unsigned int last_frag;
- unsigned int extra_len;
struct sk_buff_head skb_list;
- int ccmp; /* Whether fragments were encrypted with CCMP */
+ unsigned long first_frag_time;
+ u16 seq;
+ u16 extra_len;
+ u16 last_frag;
+ u8 rx_queue;
+ bool ccmp; /* Whether fragments were encrypted with CCMP */
u8 last_pn[6]; /* PN of the last fragment if CCMP was used */
};
@@ -181,7 +181,6 @@ typedef unsigned __bitwise__ ieee80211_rx_result;
/**
* enum ieee80211_packet_rx_flags - packet RX flags
- * @IEEE80211_RX_FRAGMENTED: fragmented frame
* @IEEE80211_RX_AMSDU: a-MSDU packet
* @IEEE80211_RX_MALFORMED_ACTION_FRM: action frame is malformed
* @IEEE80211_RX_DEFERRED_RELEASE: frame was subjected to receive reordering
@@ -190,7 +189,6 @@ typedef unsigned __bitwise__ ieee80211_rx_result;
* @rx_flags field of &struct ieee80211_rx_status.
*/
enum ieee80211_packet_rx_flags {
- IEEE80211_RX_FRAGMENTED = BIT(2),
IEEE80211_RX_AMSDU = BIT(3),
IEEE80211_RX_MALFORMED_ACTION_FRM = BIT(4),
IEEE80211_RX_DEFERRED_RELEASE = BIT(5),
@@ -202,8 +200,6 @@ enum ieee80211_packet_rx_flags {
* @IEEE80211_RX_CMNTR: received on cooked monitor already
* @IEEE80211_RX_BEACON_REPORTED: This frame was already reported
* to cfg80211_report_obss_beacon().
- * @IEEE80211_RX_REORDER_TIMER: this frame is released by the
- * reorder buffer timeout timer, not the normal RX path
*
* These flags are used across handling multiple interfaces
* for a single frame.
@@ -211,10 +207,10 @@ enum ieee80211_packet_rx_flags {
enum ieee80211_rx_flags {
IEEE80211_RX_CMNTR = BIT(0),
IEEE80211_RX_BEACON_REPORTED = BIT(1),
- IEEE80211_RX_REORDER_TIMER = BIT(2),
};
struct ieee80211_rx_data {
+ struct napi_struct *napi;
struct sk_buff *skb;
struct ieee80211_local *local;
struct ieee80211_sub_if_data *sdata;
@@ -725,6 +721,7 @@ struct ieee80211_if_mesh {
* back to wireless media and to the local net stack.
* @IEEE80211_SDATA_DISCONNECT_RESUME: Disconnect after resume.
* @IEEE80211_SDATA_IN_DRIVER: indicates interface was added to driver
+ * @IEEE80211_SDATA_MU_MIMO_OWNER: indicates interface owns MU-MIMO capability
*/
enum ieee80211_sub_if_data_flags {
IEEE80211_SDATA_ALLMULTI = BIT(0),
@@ -732,6 +729,7 @@ enum ieee80211_sub_if_data_flags {
IEEE80211_SDATA_DONT_BRIDGE_PACKETS = BIT(3),
IEEE80211_SDATA_DISCONNECT_RESUME = BIT(4),
IEEE80211_SDATA_IN_DRIVER = BIT(5),
+ IEEE80211_SDATA_MU_MIMO_OWNER = BIT(6),
};
/**
@@ -903,6 +901,9 @@ struct ieee80211_sub_if_data {
bool rc_has_mcs_mask[IEEE80211_NUM_BANDS];
u8 rc_rateidx_mcs_mask[IEEE80211_NUM_BANDS][IEEE80211_HT_MCS_MASK_LEN];
+ bool rc_has_vht_mcs_mask[IEEE80211_NUM_BANDS];
+ u16 rc_rateidx_vht_mcs_mask[IEEE80211_NUM_BANDS][NL80211_VHT_NSS_MAX];
+
union {
struct ieee80211_if_ap ap;
struct ieee80211_if_wds wds;
@@ -1010,7 +1011,6 @@ enum sdata_queue_type {
IEEE80211_SDATA_QUEUE_AGG_STOP = 2,
IEEE80211_SDATA_QUEUE_RX_AGG_START = 3,
IEEE80211_SDATA_QUEUE_RX_AGG_STOP = 4,
- IEEE80211_SDATA_QUEUE_TDLS_CHSW = 5,
};
enum {
@@ -1286,7 +1286,6 @@ struct ieee80211_local {
unsigned int rx_handlers_queued;
unsigned int rx_handlers_drop_nullfunc;
unsigned int rx_handlers_drop_defrag;
- unsigned int rx_handlers_drop_short;
unsigned int tx_expand_skb_head;
unsigned int tx_expand_skb_head_cloned;
unsigned int rx_expand_skb_head_defrag;
@@ -1348,14 +1347,16 @@ struct ieee80211_local {
struct ieee80211_sub_if_data __rcu *p2p_sdata;
- struct napi_struct *napi;
-
/* virtual monitor interface */
struct ieee80211_sub_if_data __rcu *monitor_sdata;
struct cfg80211_chan_def monitor_chandef;
/* extended capabilities provided by mac80211 */
u8 ext_capa[8];
+
+ /* TDLS channel switch */
+ struct work_struct tdls_chsw_work;
+ struct sk_buff_head skb_queue_tdls_chsw;
};
static inline struct ieee80211_sub_if_data *
@@ -1715,6 +1716,8 @@ void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
enum ieee80211_band band, bool nss_only);
void ieee80211_apply_vhtcap_overrides(struct ieee80211_sub_if_data *sdata,
struct ieee80211_sta_vht_cap *vht_cap);
+void ieee80211_get_vht_mask_from_cap(__le16 vht_cap,
+ u16 vht_mask[NL80211_VHT_NSS_MAX]);
/* Spectrum management */
void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
@@ -1763,8 +1766,6 @@ static inline int __ieee80211_resume(struct ieee80211_hw *hw)
/* utility functions/constants */
extern const void *const mac80211_wiphy_privid; /* for wiphy privid */
-u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len,
- enum nl80211_iftype type);
int ieee80211_frame_duration(enum ieee80211_band band, size_t len,
int rate, int erp, int short_preamble,
int shift);
@@ -2042,6 +2043,9 @@ int ieee80211_check_combinations(struct ieee80211_sub_if_data *sdata,
enum ieee80211_chanctx_mode chanmode,
u8 radar_detect);
int ieee80211_max_num_channels(struct ieee80211_local *local);
+enum nl80211_chan_width ieee80211_get_sta_bw(struct ieee80211_sta *sta);
+void ieee80211_recalc_chanctx_chantype(struct ieee80211_local *local,
+ struct ieee80211_chanctx *ctx);
/* TDLS */
int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev,
@@ -2058,8 +2062,8 @@ int ieee80211_tdls_channel_switch(struct wiphy *wiphy, struct net_device *dev,
void ieee80211_tdls_cancel_channel_switch(struct wiphy *wiphy,
struct net_device *dev,
const u8 *addr);
-void ieee80211_process_tdls_channel_switch(struct ieee80211_sub_if_data *sdata,
- struct sk_buff *skb);
+void ieee80211_teardown_tdls_peers(struct ieee80211_sub_if_data *sdata);
+void ieee80211_tdls_chsw_work(struct work_struct *wk);
extern const struct ethtool_ops ieee80211_ethtool_ops;
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 553ac6dd4867..6964fc6a8ea2 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1242,8 +1242,6 @@ static void ieee80211_iface_work(struct work_struct *work)
WLAN_BACK_RECIPIENT, 0,
false);
mutex_unlock(&local->sta_mtx);
- } else if (skb->pkt_type == IEEE80211_SDATA_QUEUE_TDLS_CHSW) {
- ieee80211_process_tdls_channel_switch(sdata, skb);
} else if (ieee80211_is_action(mgmt->frame_control) &&
mgmt->u.action.category == WLAN_CATEGORY_BACK) {
int len = skb->len;
@@ -1790,13 +1788,23 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
sband = local->hw.wiphy->bands[i];
sdata->rc_rateidx_mask[i] =
sband ? (1 << sband->n_bitrates) - 1 : 0;
- if (sband)
+ if (sband) {
+ __le16 cap;
+ u16 *vht_rate_mask;
+
memcpy(sdata->rc_rateidx_mcs_mask[i],
sband->ht_cap.mcs.rx_mask,
sizeof(sdata->rc_rateidx_mcs_mask[i]));
- else
+
+ cap = sband->vht_cap.vht_mcs.rx_mcs_map;
+ vht_rate_mask = sdata->rc_rateidx_vht_mcs_mask[i];
+ ieee80211_get_vht_mask_from_cap(cap, vht_rate_mask);
+ } else {
memset(sdata->rc_rateidx_mcs_mask[i], 0,
sizeof(sdata->rc_rateidx_mcs_mask[i]));
+ memset(sdata->rc_rateidx_vht_mcs_mask[i], 0,
+ sizeof(sdata->rc_rateidx_vht_mcs_mask[i]));
+ }
}
ieee80211_set_default_queues(sdata);
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index b22df3a79a41..44388d6a1d8e 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -336,7 +336,6 @@ static void ieee80211_key_replace(struct ieee80211_sub_if_data *sdata,
ieee80211_check_fast_xmit(sta);
} else {
rcu_assign_pointer(sta->gtk[idx], new);
- sta->gtk_idx = idx;
}
} else {
defunikey = old &&
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index 3f4f9eaac140..9951ef06323e 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -115,9 +115,6 @@ struct ieee80211_key {
} gen;
} u;
- /* number of times this key has been used */
- int tx_rx_count;
-
#ifdef CONFIG_MAC80211_DEBUGFS
struct {
struct dentry *stalink;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 3c63468b4dfb..ff79a13d231d 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -629,6 +629,8 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
INIT_WORK(&local->sched_scan_stopped_work,
ieee80211_sched_scan_stopped_work);
+ INIT_WORK(&local->tdls_chsw_work, ieee80211_tdls_chsw_work);
+
spin_lock_init(&local->ack_status_lock);
idr_init(&local->ack_status_frames);
@@ -645,6 +647,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
skb_queue_head_init(&local->skb_queue);
skb_queue_head_init(&local->skb_queue_unreliable);
+ skb_queue_head_init(&local->skb_queue_tdls_chsw);
ieee80211_alloc_led_names(local);
@@ -1132,18 +1135,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
}
EXPORT_SYMBOL(ieee80211_register_hw);
-void ieee80211_napi_add(struct ieee80211_hw *hw, struct napi_struct *napi,
- struct net_device *napi_dev,
- int (*poll)(struct napi_struct *, int),
- int weight)
-{
- struct ieee80211_local *local = hw_to_local(hw);
-
- netif_napi_add(napi_dev, napi, poll, weight);
- local->napi = napi;
-}
-EXPORT_SYMBOL_GPL(ieee80211_napi_add);
-
void ieee80211_unregister_hw(struct ieee80211_hw *hw)
{
struct ieee80211_local *local = hw_to_local(hw);
@@ -1173,6 +1164,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
cancel_work_sync(&local->restart_work);
cancel_work_sync(&local->reconfig_filter);
+ cancel_work_sync(&local->tdls_chsw_work);
flush_work(&local->sched_scan_stopped_work);
ieee80211_clear_tx_pending(local);
@@ -1183,6 +1175,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
wiphy_warn(local->hw.wiphy, "skb_queue not empty\n");
skb_queue_purge(&local->skb_queue);
skb_queue_purge(&local->skb_queue_unreliable);
+ skb_queue_purge(&local->skb_queue_tdls_chsw);
destroy_workqueue(local->workqueue);
wiphy_unregister(local->hw.wiphy);
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 817098add1d6..e06a5ca7c9a9 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -158,7 +158,7 @@ void mesh_sta_cleanup(struct sta_info *sta)
changed = mesh_accept_plinks_update(sdata);
if (!sdata->u.mesh.user_mpm) {
changed |= mesh_plink_deactivate(sta);
- del_timer_sync(&sta->plink_timer);
+ del_timer_sync(&sta->mesh->plink_timer);
}
if (changed)
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 085edc1d056b..d80e0a4c16cf 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -19,15 +19,6 @@
#define MAX_PREQ_QUEUE_LEN 64
-/* Destination only */
-#define MP_F_DO 0x1
-/* Reply and forward */
-#define MP_F_RF 0x2
-/* Unknown Sequence Number */
-#define MP_F_USN 0x01
-/* Reason code Present */
-#define MP_F_RCODE 0x02
-
static void mesh_queue_preq(struct mesh_path *, u8);
static inline u32 u32_field_get(const u8 *preq_elem, int offset, bool ae)
@@ -79,6 +70,12 @@ static inline u16 u16_field_get(const u8 *preq_elem, int offset, bool ae)
#define MSEC_TO_TU(x) (x*1000/1024)
#define SN_GT(x, y) ((s32)(y - x) < 0)
#define SN_LT(x, y) ((s32)(x - y) < 0)
+#define MAX_SANE_SN_DELTA 32
+
+static inline u32 SN_DELTA(u32 x, u32 y)
+{
+ return x >= y ? x - y : y - x;
+}
#define net_traversal_jiffies(s) \
msecs_to_jiffies(s->u.mesh.mshcfg.dot11MeshHWMPnetDiameterTraversalTime)
@@ -279,15 +276,10 @@ int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata,
*pos++ = ttl;
/* number of destinations */
*pos++ = 1;
- /*
- * flags bit, bit 1 is unset if we know the sequence number and
- * bit 2 is set if we have a reason code
+ /* Flags field has AE bit only as defined in
+ * sec 8.4.2.117 IEEE802.11-2012
*/
*pos = 0;
- if (!target_sn)
- *pos |= MP_F_USN;
- if (target_rcode)
- *pos |= MP_F_RCODE;
pos++;
memcpy(pos, target, ETH_ALEN);
pos += ETH_ALEN;
@@ -316,8 +308,9 @@ void ieee80211s_update_metric(struct ieee80211_local *local,
failed = !(txinfo->flags & IEEE80211_TX_STAT_ACK);
/* moving average, scaled to 100 */
- sta->fail_avg = ((80 * sta->fail_avg + 5) / 100 + 20 * failed);
- if (sta->fail_avg > 95)
+ sta->mesh->fail_avg =
+ ((80 * sta->mesh->fail_avg + 5) / 100 + 20 * failed);
+ if (sta->mesh->fail_avg > 95)
mesh_plink_broken(sta);
}
@@ -333,7 +326,7 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local,
u32 tx_time, estimated_retx;
u64 result;
- if (sta->fail_avg >= 100)
+ if (sta->mesh->fail_avg >= 100)
return MAX_METRIC;
sta_set_rate_info_tx(sta, &sta->last_tx_rate, &rinfo);
@@ -341,7 +334,7 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local,
if (WARN_ON(!rate))
return MAX_METRIC;
- err = (sta->fail_avg << ARITH_SHIFT) / 100;
+ err = (sta->mesh->fail_avg << ARITH_SHIFT) / 100;
/* bitrate is in units of 100 Kbps, while we need rate in units of
* 1Mbps. This will be corrected on tx_time computation.
@@ -441,6 +434,26 @@ static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata,
process = false;
fresh_info = false;
}
+ } else if (!(mpath->flags & MESH_PATH_ACTIVE)) {
+ bool have_sn, newer_sn, bounced;
+
+ have_sn = mpath->flags & MESH_PATH_SN_VALID;
+ newer_sn = have_sn && SN_GT(orig_sn, mpath->sn);
+ bounced = have_sn &&
+ (SN_DELTA(orig_sn, mpath->sn) >
+ MAX_SANE_SN_DELTA);
+
+ if (!have_sn || newer_sn) {
+ /* if SN is newer than what we had
+ * then we can take it */;
+ } else if (bounced) {
+ /* if SN is way different than what
+ * we had then assume the other side
+ * rebooted or restarted */;
+ } else {
+ process = false;
+ fresh_info = false;
+ }
}
} else {
mpath = mesh_path_add(sdata, orig_addr);
@@ -570,15 +583,13 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata,
SN_LT(mpath->sn, target_sn)) {
mpath->sn = target_sn;
mpath->flags |= MESH_PATH_SN_VALID;
- } else if ((!(target_flags & MP_F_DO)) &&
+ } else if ((!(target_flags & IEEE80211_PREQ_TO_FLAG)) &&
(mpath->flags & MESH_PATH_ACTIVE)) {
reply = true;
target_metric = mpath->metric;
target_sn = mpath->sn;
- if (target_flags & MP_F_RF)
- target_flags |= MP_F_DO;
- else
- forward = false;
+ /* Case E2 of sec 13.10.9.3 IEEE 802.11-2012*/
+ target_flags |= IEEE80211_PREQ_TO_FLAG;
}
}
rcu_read_unlock();
@@ -736,9 +747,12 @@ static void hwmp_perr_frame_process(struct ieee80211_sub_if_data *sdata,
if (mpath->flags & MESH_PATH_ACTIVE &&
ether_addr_equal(ta, sta->sta.addr) &&
(!(mpath->flags & MESH_PATH_SN_VALID) ||
- SN_GT(target_sn, mpath->sn))) {
+ SN_GT(target_sn, mpath->sn) || target_sn == 0)) {
mpath->flags &= ~MESH_PATH_ACTIVE;
- mpath->sn = target_sn;
+ if (target_sn != 0)
+ mpath->sn = target_sn;
+ else
+ mpath->sn += 1;
spin_unlock_bh(&mpath->state_lock);
if (!ifmsh->mshcfg.dot11MeshForwarding)
goto endperr;
@@ -862,7 +876,7 @@ void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata,
rcu_read_lock();
sta = sta_info_get(sdata, mgmt->sa);
- if (!sta || sta->plink_state != NL80211_PLINK_ESTAB) {
+ if (!sta || sta->mesh->plink_state != NL80211_PLINK_ESTAB) {
rcu_read_unlock();
return;
}
@@ -974,7 +988,7 @@ void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata)
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
struct mesh_preq_queue *preq_node;
struct mesh_path *mpath;
- u8 ttl, target_flags;
+ u8 ttl, target_flags = 0;
const u8 *da;
u32 lifetime;
@@ -1033,9 +1047,9 @@ void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata)
}
if (preq_node->flags & PREQ_Q_F_REFRESH)
- target_flags = MP_F_DO;
+ target_flags |= IEEE80211_PREQ_TO_FLAG;
else
- target_flags = MP_F_RF;
+ target_flags &= ~IEEE80211_PREQ_TO_FLAG;
spin_unlock_bh(&mpath->state_lock);
da = (mpath->is_root) ? mpath->rann_snd_addr : broadcast_addr;
@@ -1176,7 +1190,9 @@ void mesh_path_timer(unsigned long data)
spin_unlock_bh(&mpath->state_lock);
mesh_queue_preq(mpath, 0);
} else {
- mpath->flags = 0;
+ mpath->flags &= ~(MESH_PATH_RESOLVING |
+ MESH_PATH_RESOLVED |
+ MESH_PATH_REQ_QUEUED);
mpath->exp_time = jiffies;
spin_unlock_bh(&mpath->state_lock);
if (!mpath->is_gate && mesh_gate_num(sdata) > 0) {
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 3b59099413fb..58384642e03c 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -13,10 +13,11 @@
#include "rate.h"
#include "mesh.h"
+#define PLINK_CNF_AID(mgmt) ((mgmt)->u.action.u.self_prot.variable + 2)
#define PLINK_GET_LLID(p) (p + 2)
#define PLINK_GET_PLID(p) (p + 4)
-#define mod_plink_timer(s, t) (mod_timer(&s->plink_timer, \
+#define mod_plink_timer(s, t) (mod_timer(&s->mesh->plink_timer, \
jiffies + msecs_to_jiffies(t)))
enum plink_event {
@@ -53,18 +54,13 @@ static const char * const mplevents[] = {
[CLS_IGNR] = "CLS_IGNR"
};
-static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
- enum ieee80211_self_protected_actioncode action,
- u8 *da, u16 llid, u16 plid, u16 reason);
-
-
/* We only need a valid sta if user configured a minimum rssi_threshold. */
static bool rssi_threshold_check(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta)
{
s32 rssi_threshold = sdata->u.mesh.mshcfg.rssi_threshold;
return rssi_threshold == 0 ||
- (sta && (s8) -ewma_read(&sta->avg_signal) > rssi_threshold);
+ (sta && (s8) -ewma_signal_read(&sta->avg_signal) > rssi_threshold);
}
/**
@@ -72,14 +68,14 @@ static bool rssi_threshold_check(struct ieee80211_sub_if_data *sdata,
*
* @sta: mesh peer link to restart
*
- * Locking: this function must be called holding sta->plink_lock
+ * Locking: this function must be called holding sta->mesh->plink_lock
*/
static inline void mesh_plink_fsm_restart(struct sta_info *sta)
{
- lockdep_assert_held(&sta->plink_lock);
- sta->plink_state = NL80211_PLINK_LISTEN;
- sta->llid = sta->plid = sta->reason = 0;
- sta->plink_retries = 0;
+ lockdep_assert_held(&sta->mesh->plink_lock);
+ sta->mesh->plink_state = NL80211_PLINK_LISTEN;
+ sta->mesh->llid = sta->mesh->plid = sta->mesh->reason = 0;
+ sta->mesh->plink_retries = 0;
}
/*
@@ -119,7 +115,7 @@ static u32 mesh_set_short_slot_time(struct ieee80211_sub_if_data *sdata)
rcu_read_lock();
list_for_each_entry_rcu(sta, &local->sta_list, list) {
if (sdata != sta->sdata ||
- sta->plink_state != NL80211_PLINK_ESTAB)
+ sta->mesh->plink_state != NL80211_PLINK_ESTAB)
continue;
short_slot = false;
@@ -169,7 +165,7 @@ static u32 mesh_set_ht_prot_mode(struct ieee80211_sub_if_data *sdata)
rcu_read_lock();
list_for_each_entry_rcu(sta, &local->sta_list, list) {
if (sdata != sta->sdata ||
- sta->plink_state != NL80211_PLINK_ESTAB)
+ sta->mesh->plink_state != NL80211_PLINK_ESTAB)
continue;
if (sta->sta.bandwidth > IEEE80211_STA_RX_BW_20)
@@ -204,59 +200,8 @@ static u32 mesh_set_ht_prot_mode(struct ieee80211_sub_if_data *sdata)
return BSS_CHANGED_HT;
}
-/**
- * __mesh_plink_deactivate - deactivate mesh peer link
- *
- * @sta: mesh peer link to deactivate
- *
- * All mesh paths with this peer as next hop will be flushed
- * Returns beacon changed flag if the beacon content changed.
- *
- * Locking: the caller must hold sta->plink_lock
- */
-static u32 __mesh_plink_deactivate(struct sta_info *sta)
-{
- struct ieee80211_sub_if_data *sdata = sta->sdata;
- u32 changed = 0;
-
- lockdep_assert_held(&sta->plink_lock);
-
- if (sta->plink_state == NL80211_PLINK_ESTAB)
- changed = mesh_plink_dec_estab_count(sdata);
- sta->plink_state = NL80211_PLINK_BLOCKED;
- mesh_path_flush_by_nexthop(sta);
-
- ieee80211_mps_sta_status_update(sta);
- changed |= ieee80211_mps_set_sta_local_pm(sta,
- NL80211_MESH_POWER_UNKNOWN);
-
- return changed;
-}
-
-/**
- * mesh_plink_deactivate - deactivate mesh peer link
- *
- * @sta: mesh peer link to deactivate
- *
- * All mesh paths with this peer as next hop will be flushed
- */
-u32 mesh_plink_deactivate(struct sta_info *sta)
-{
- struct ieee80211_sub_if_data *sdata = sta->sdata;
- u32 changed;
-
- spin_lock_bh(&sta->plink_lock);
- changed = __mesh_plink_deactivate(sta);
- sta->reason = WLAN_REASON_MESH_PEER_CANCELED;
- mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE,
- sta->sta.addr, sta->llid, sta->plid,
- sta->reason);
- spin_unlock_bh(&sta->plink_lock);
-
- return changed;
-}
-
static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
+ struct sta_info *sta,
enum ieee80211_self_protected_actioncode action,
u8 *da, u16 llid, u16 plid, u16 reason)
{
@@ -306,7 +251,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
if (action == WLAN_SP_MESH_PEERING_CONFIRM) {
/* AID */
pos = skb_put(skb, 2);
- put_unaligned_le16(plid, pos);
+ put_unaligned_le16(sta->sta.aid, pos);
}
if (ieee80211_add_srates_ie(sdata, skb, true, band) ||
ieee80211_add_ext_srates_ie(sdata, skb, true, band) ||
@@ -375,6 +320,58 @@ free:
return err;
}
+/**
+ * __mesh_plink_deactivate - deactivate mesh peer link
+ *
+ * @sta: mesh peer link to deactivate
+ *
+ * All mesh paths with this peer as next hop will be flushed
+ * Returns beacon changed flag if the beacon content changed.
+ *
+ * Locking: the caller must hold sta->mesh->plink_lock
+ */
+static u32 __mesh_plink_deactivate(struct sta_info *sta)
+{
+ struct ieee80211_sub_if_data *sdata = sta->sdata;
+ u32 changed = 0;
+
+ lockdep_assert_held(&sta->mesh->plink_lock);
+
+ if (sta->mesh->plink_state == NL80211_PLINK_ESTAB)
+ changed = mesh_plink_dec_estab_count(sdata);
+ sta->mesh->plink_state = NL80211_PLINK_BLOCKED;
+ mesh_path_flush_by_nexthop(sta);
+
+ ieee80211_mps_sta_status_update(sta);
+ changed |= ieee80211_mps_set_sta_local_pm(sta,
+ NL80211_MESH_POWER_UNKNOWN);
+
+ return changed;
+}
+
+/**
+ * mesh_plink_deactivate - deactivate mesh peer link
+ *
+ * @sta: mesh peer link to deactivate
+ *
+ * All mesh paths with this peer as next hop will be flushed
+ */
+u32 mesh_plink_deactivate(struct sta_info *sta)
+{
+ struct ieee80211_sub_if_data *sdata = sta->sdata;
+ u32 changed;
+
+ spin_lock_bh(&sta->mesh->plink_lock);
+ changed = __mesh_plink_deactivate(sta);
+ sta->mesh->reason = WLAN_REASON_MESH_PEER_CANCELED;
+ mesh_plink_frame_tx(sdata, sta, WLAN_SP_MESH_PEERING_CLOSE,
+ sta->sta.addr, sta->mesh->llid, sta->mesh->plid,
+ sta->mesh->reason);
+ spin_unlock_bh(&sta->mesh->plink_lock);
+
+ return changed;
+}
+
static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta,
struct ieee802_11_elems *elems, bool insert)
@@ -388,13 +385,14 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata,
sband = local->hw.wiphy->bands[band];
rates = ieee80211_sta_get_rates(sdata, elems, band, &basic_rates);
- spin_lock_bh(&sta->plink_lock);
+ spin_lock_bh(&sta->mesh->plink_lock);
sta->last_rx = jiffies;
/* rates and capabilities don't change during peering */
- if (sta->plink_state == NL80211_PLINK_ESTAB && sta->processed_beacon)
+ if (sta->mesh->plink_state == NL80211_PLINK_ESTAB &&
+ sta->mesh->processed_beacon)
goto out;
- sta->processed_beacon = true;
+ sta->mesh->processed_beacon = true;
if (sta->sta.supp_rates[band] != rates)
changed |= IEEE80211_RC_SUPP_RATES_CHANGED;
@@ -421,23 +419,57 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata,
else
rate_control_rate_update(local, sband, sta, changed);
out:
- spin_unlock_bh(&sta->plink_lock);
+ spin_unlock_bh(&sta->mesh->plink_lock);
+}
+
+static int mesh_allocate_aid(struct ieee80211_sub_if_data *sdata)
+{
+ struct sta_info *sta;
+ unsigned long *aid_map;
+ int aid;
+
+ aid_map = kcalloc(BITS_TO_LONGS(IEEE80211_MAX_AID + 1),
+ sizeof(*aid_map), GFP_KERNEL);
+ if (!aid_map)
+ return -ENOMEM;
+
+ /* reserve aid 0 for mcast indication */
+ __set_bit(0, aid_map);
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(sta, &sdata->local->sta_list, list)
+ __set_bit(sta->sta.aid, aid_map);
+ rcu_read_unlock();
+
+ aid = find_first_zero_bit(aid_map, IEEE80211_MAX_AID + 1);
+ kfree(aid_map);
+
+ if (aid > IEEE80211_MAX_AID)
+ return -ENOBUFS;
+
+ return aid;
}
static struct sta_info *
__mesh_sta_info_alloc(struct ieee80211_sub_if_data *sdata, u8 *hw_addr)
{
struct sta_info *sta;
+ int aid;
if (sdata->local->num_sta >= MESH_MAX_PLINKS)
return NULL;
+ aid = mesh_allocate_aid(sdata);
+ if (aid < 0)
+ return NULL;
+
sta = sta_info_alloc(sdata, hw_addr, GFP_KERNEL);
if (!sta)
return NULL;
- sta->plink_state = NL80211_PLINK_LISTEN;
+ sta->mesh->plink_state = NL80211_PLINK_LISTEN;
sta->sta.wme = true;
+ sta->sta.aid = aid;
sta_info_pre_move_state(sta, IEEE80211_STA_AUTH);
sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC);
@@ -524,7 +556,7 @@ void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata,
goto out;
if (mesh_peer_accepts_plinks(elems) &&
- sta->plink_state == NL80211_PLINK_LISTEN &&
+ sta->mesh->plink_state == NL80211_PLINK_LISTEN &&
sdata->u.mesh.accepting_plinks &&
sdata->u.mesh.mshcfg.auto_open_plinks &&
rssi_threshold_check(sdata, sta))
@@ -554,52 +586,52 @@ static void mesh_plink_timer(unsigned long data)
if (sta->sdata->local->quiescing)
return;
- spin_lock_bh(&sta->plink_lock);
+ spin_lock_bh(&sta->mesh->plink_lock);
/* If a timer fires just before a state transition on another CPU,
* we may have already extended the timeout and changed state by the
* time we've acquired the lock and arrived here. In that case,
* skip this timer and wait for the new one.
*/
- if (time_before(jiffies, sta->plink_timer.expires)) {
+ if (time_before(jiffies, sta->mesh->plink_timer.expires)) {
mpl_dbg(sta->sdata,
"Ignoring timer for %pM in state %s (timer adjusted)",
- sta->sta.addr, mplstates[sta->plink_state]);
- spin_unlock_bh(&sta->plink_lock);
+ sta->sta.addr, mplstates[sta->mesh->plink_state]);
+ spin_unlock_bh(&sta->mesh->plink_lock);
return;
}
/* del_timer() and handler may race when entering these states */
- if (sta->plink_state == NL80211_PLINK_LISTEN ||
- sta->plink_state == NL80211_PLINK_ESTAB) {
+ if (sta->mesh->plink_state == NL80211_PLINK_LISTEN ||
+ sta->mesh->plink_state == NL80211_PLINK_ESTAB) {
mpl_dbg(sta->sdata,
"Ignoring timer for %pM in state %s (timer deleted)",
- sta->sta.addr, mplstates[sta->plink_state]);
- spin_unlock_bh(&sta->plink_lock);
+ sta->sta.addr, mplstates[sta->mesh->plink_state]);
+ spin_unlock_bh(&sta->mesh->plink_lock);
return;
}
mpl_dbg(sta->sdata,
"Mesh plink timer for %pM fired on state %s\n",
- sta->sta.addr, mplstates[sta->plink_state]);
+ sta->sta.addr, mplstates[sta->mesh->plink_state]);
sdata = sta->sdata;
mshcfg = &sdata->u.mesh.mshcfg;
- switch (sta->plink_state) {
+ switch (sta->mesh->plink_state) {
case NL80211_PLINK_OPN_RCVD:
case NL80211_PLINK_OPN_SNT:
/* retry timer */
- if (sta->plink_retries < mshcfg->dot11MeshMaxRetries) {
+ if (sta->mesh->plink_retries < mshcfg->dot11MeshMaxRetries) {
u32 rand;
mpl_dbg(sta->sdata,
"Mesh plink for %pM (retry, timeout): %d %d\n",
- sta->sta.addr, sta->plink_retries,
- sta->plink_timeout);
+ sta->sta.addr, sta->mesh->plink_retries,
+ sta->mesh->plink_timeout);
get_random_bytes(&rand, sizeof(u32));
- sta->plink_timeout = sta->plink_timeout +
- rand % sta->plink_timeout;
- ++sta->plink_retries;
- mod_plink_timer(sta, sta->plink_timeout);
+ sta->mesh->plink_timeout = sta->mesh->plink_timeout +
+ rand % sta->mesh->plink_timeout;
+ ++sta->mesh->plink_retries;
+ mod_plink_timer(sta, sta->mesh->plink_timeout);
action = WLAN_SP_MESH_PEERING_OPEN;
break;
}
@@ -609,31 +641,31 @@ static void mesh_plink_timer(unsigned long data)
/* confirm timer */
if (!reason)
reason = WLAN_REASON_MESH_CONFIRM_TIMEOUT;
- sta->plink_state = NL80211_PLINK_HOLDING;
+ sta->mesh->plink_state = NL80211_PLINK_HOLDING;
mod_plink_timer(sta, mshcfg->dot11MeshHoldingTimeout);
action = WLAN_SP_MESH_PEERING_CLOSE;
break;
case NL80211_PLINK_HOLDING:
/* holding timer */
- del_timer(&sta->plink_timer);
+ del_timer(&sta->mesh->plink_timer);
mesh_plink_fsm_restart(sta);
break;
default:
break;
}
- spin_unlock_bh(&sta->plink_lock);
+ spin_unlock_bh(&sta->mesh->plink_lock);
if (action)
- mesh_plink_frame_tx(sdata, action, sta->sta.addr,
- sta->llid, sta->plid, reason);
+ mesh_plink_frame_tx(sdata, sta, action, sta->sta.addr,
+ sta->mesh->llid, sta->mesh->plid, reason);
}
static inline void mesh_plink_timer_set(struct sta_info *sta, u32 timeout)
{
- sta->plink_timer.expires = jiffies + msecs_to_jiffies(timeout);
- sta->plink_timer.data = (unsigned long) sta;
- sta->plink_timer.function = mesh_plink_timer;
- sta->plink_timeout = timeout;
- add_timer(&sta->plink_timer);
+ sta->mesh->plink_timer.expires = jiffies + msecs_to_jiffies(timeout);
+ sta->mesh->plink_timer.data = (unsigned long) sta;
+ sta->mesh->plink_timer.function = mesh_plink_timer;
+ sta->mesh->plink_timeout = timeout;
+ add_timer(&sta->mesh->plink_timer);
}
static bool llid_in_use(struct ieee80211_sub_if_data *sdata,
@@ -645,7 +677,7 @@ static bool llid_in_use(struct ieee80211_sub_if_data *sdata,
rcu_read_lock();
list_for_each_entry_rcu(sta, &local->sta_list, list) {
- if (!memcmp(&sta->llid, &llid, sizeof(llid))) {
+ if (!memcmp(&sta->mesh->llid, &llid, sizeof(llid))) {
in_use = true;
break;
}
@@ -661,8 +693,6 @@ static u16 mesh_get_new_llid(struct ieee80211_sub_if_data *sdata)
do {
get_random_bytes(&llid, sizeof(llid));
- /* for mesh PS we still only have the AID range for TIM bits */
- llid = (llid % IEEE80211_MAX_AID) + 1;
} while (llid_in_use(sdata, llid));
return llid;
@@ -676,16 +706,16 @@ u32 mesh_plink_open(struct sta_info *sta)
if (!test_sta_flag(sta, WLAN_STA_AUTH))
return 0;
- spin_lock_bh(&sta->plink_lock);
- sta->llid = mesh_get_new_llid(sdata);
- if (sta->plink_state != NL80211_PLINK_LISTEN &&
- sta->plink_state != NL80211_PLINK_BLOCKED) {
- spin_unlock_bh(&sta->plink_lock);
+ spin_lock_bh(&sta->mesh->plink_lock);
+ sta->mesh->llid = mesh_get_new_llid(sdata);
+ if (sta->mesh->plink_state != NL80211_PLINK_LISTEN &&
+ sta->mesh->plink_state != NL80211_PLINK_BLOCKED) {
+ spin_unlock_bh(&sta->mesh->plink_lock);
return 0;
}
- sta->plink_state = NL80211_PLINK_OPN_SNT;
+ sta->mesh->plink_state = NL80211_PLINK_OPN_SNT;
mesh_plink_timer_set(sta, sdata->u.mesh.mshcfg.dot11MeshRetryTimeout);
- spin_unlock_bh(&sta->plink_lock);
+ spin_unlock_bh(&sta->mesh->plink_lock);
mpl_dbg(sdata,
"Mesh plink: starting establishment with %pM\n",
sta->sta.addr);
@@ -693,8 +723,8 @@ u32 mesh_plink_open(struct sta_info *sta)
/* set the non-peer mode to active during peering */
changed = ieee80211_mps_local_status_update(sdata);
- mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_OPEN,
- sta->sta.addr, sta->llid, 0, 0);
+ mesh_plink_frame_tx(sdata, sta, WLAN_SP_MESH_PEERING_OPEN,
+ sta->sta.addr, sta->mesh->llid, 0, 0);
return changed;
}
@@ -702,10 +732,10 @@ u32 mesh_plink_block(struct sta_info *sta)
{
u32 changed;
- spin_lock_bh(&sta->plink_lock);
+ spin_lock_bh(&sta->mesh->plink_lock);
changed = __mesh_plink_deactivate(sta);
- sta->plink_state = NL80211_PLINK_BLOCKED;
- spin_unlock_bh(&sta->plink_lock);
+ sta->mesh->plink_state = NL80211_PLINK_BLOCKED;
+ spin_unlock_bh(&sta->mesh->plink_lock);
return changed;
}
@@ -715,12 +745,11 @@ static void mesh_plink_close(struct ieee80211_sub_if_data *sdata,
enum plink_event event)
{
struct mesh_config *mshcfg = &sdata->u.mesh.mshcfg;
-
u16 reason = (event == CLS_ACPT) ?
WLAN_REASON_MESH_CLOSE : WLAN_REASON_MESH_CONFIG;
- sta->reason = reason;
- sta->plink_state = NL80211_PLINK_HOLDING;
+ sta->mesh->reason = reason;
+ sta->mesh->plink_state = NL80211_PLINK_HOLDING;
mod_plink_timer(sta, mshcfg->dot11MeshHoldingTimeout);
}
@@ -730,8 +759,8 @@ static u32 mesh_plink_establish(struct ieee80211_sub_if_data *sdata,
struct mesh_config *mshcfg = &sdata->u.mesh.mshcfg;
u32 changed = 0;
- del_timer(&sta->plink_timer);
- sta->plink_state = NL80211_PLINK_ESTAB;
+ del_timer(&sta->mesh->plink_timer);
+ sta->mesh->plink_state = NL80211_PLINK_ESTAB;
changed |= mesh_plink_inc_estab_count(sdata);
changed |= mesh_set_ht_prot_mode(sdata);
changed |= mesh_set_short_slot_time(sdata);
@@ -758,18 +787,18 @@ static u32 mesh_plink_fsm(struct ieee80211_sub_if_data *sdata,
u32 changed = 0;
mpl_dbg(sdata, "peer %pM in state %s got event %s\n", sta->sta.addr,
- mplstates[sta->plink_state], mplevents[event]);
+ mplstates[sta->mesh->plink_state], mplevents[event]);
- spin_lock_bh(&sta->plink_lock);
- switch (sta->plink_state) {
+ spin_lock_bh(&sta->mesh->plink_lock);
+ switch (sta->mesh->plink_state) {
case NL80211_PLINK_LISTEN:
switch (event) {
case CLS_ACPT:
mesh_plink_fsm_restart(sta);
break;
case OPN_ACPT:
- sta->plink_state = NL80211_PLINK_OPN_RCVD;
- sta->llid = mesh_get_new_llid(sdata);
+ sta->mesh->plink_state = NL80211_PLINK_OPN_RCVD;
+ sta->mesh->llid = mesh_get_new_llid(sdata);
mesh_plink_timer_set(sta,
mshcfg->dot11MeshRetryTimeout);
@@ -791,11 +820,11 @@ static u32 mesh_plink_fsm(struct ieee80211_sub_if_data *sdata,
break;
case OPN_ACPT:
/* retry timer is left untouched */
- sta->plink_state = NL80211_PLINK_OPN_RCVD;
+ sta->mesh->plink_state = NL80211_PLINK_OPN_RCVD;
action = WLAN_SP_MESH_PEERING_CONFIRM;
break;
case CNF_ACPT:
- sta->plink_state = NL80211_PLINK_CNF_RCVD;
+ sta->mesh->plink_state = NL80211_PLINK_CNF_RCVD;
mod_plink_timer(sta, mshcfg->dot11MeshConfirmTimeout);
break;
default:
@@ -855,7 +884,7 @@ static u32 mesh_plink_fsm(struct ieee80211_sub_if_data *sdata,
case NL80211_PLINK_HOLDING:
switch (event) {
case CLS_ACPT:
- del_timer(&sta->plink_timer);
+ del_timer(&sta->mesh->plink_timer);
mesh_plink_fsm_restart(sta);
break;
case OPN_ACPT:
@@ -874,17 +903,18 @@ static u32 mesh_plink_fsm(struct ieee80211_sub_if_data *sdata,
*/
break;
}
- spin_unlock_bh(&sta->plink_lock);
+ spin_unlock_bh(&sta->mesh->plink_lock);
if (action) {
- mesh_plink_frame_tx(sdata, action, sta->sta.addr,
- sta->llid, sta->plid, sta->reason);
+ mesh_plink_frame_tx(sdata, sta, action, sta->sta.addr,
+ sta->mesh->llid, sta->mesh->plid,
+ sta->mesh->reason);
/* also send confirm in open case */
if (action == WLAN_SP_MESH_PEERING_OPEN) {
- mesh_plink_frame_tx(sdata,
+ mesh_plink_frame_tx(sdata, sta,
WLAN_SP_MESH_PEERING_CONFIRM,
- sta->sta.addr, sta->llid,
- sta->plid, 0);
+ sta->sta.addr, sta->mesh->llid,
+ sta->mesh->plid, 0);
}
}
@@ -939,7 +969,7 @@ mesh_plink_get_event(struct ieee80211_sub_if_data *sdata,
mpl_dbg(sdata, "Mesh plink: Action frame from non-authed peer\n");
goto out;
}
- if (sta->plink_state == NL80211_PLINK_BLOCKED)
+ if (sta->mesh->plink_state == NL80211_PLINK_BLOCKED)
goto out;
}
@@ -954,7 +984,7 @@ mesh_plink_get_event(struct ieee80211_sub_if_data *sdata,
if (!matches_local)
event = OPN_RJCT;
if (!mesh_plink_free_count(sdata) ||
- (sta->plid && sta->plid != plid))
+ (sta->mesh->plid && sta->mesh->plid != plid))
event = OPN_IGNR;
else
event = OPN_ACPT;
@@ -963,14 +993,14 @@ mesh_plink_get_event(struct ieee80211_sub_if_data *sdata,
if (!matches_local)
event = CNF_RJCT;
if (!mesh_plink_free_count(sdata) ||
- sta->llid != llid ||
- (sta->plid && sta->plid != plid))
+ sta->mesh->llid != llid ||
+ (sta->mesh->plid && sta->mesh->plid != plid))
event = CNF_IGNR;
else
event = CNF_ACPT;
break;
case WLAN_SP_MESH_PEERING_CLOSE:
- if (sta->plink_state == NL80211_PLINK_ESTAB)
+ if (sta->mesh->plink_state == NL80211_PLINK_ESTAB)
/* Do not check for llid or plid. This does not
* follow the standard but since multiple plinks
* per sta are not supported, it is necessary in
@@ -981,9 +1011,9 @@ mesh_plink_get_event(struct ieee80211_sub_if_data *sdata,
* restarted.
*/
event = CLS_ACPT;
- else if (sta->plid != plid)
+ else if (sta->mesh->plid != plid)
event = CLS_IGNR;
- else if (ie_len == 8 && sta->llid != llid)
+ else if (ie_len == 8 && sta->mesh->llid != llid)
event = CLS_IGNR;
else
event = CLS_ACPT;
@@ -1070,9 +1100,9 @@ mesh_process_plink_frame(struct ieee80211_sub_if_data *sdata,
mpl_dbg(sdata, "Mesh plink: failed to init peer!\n");
goto unlock_rcu;
}
- sta->plid = plid;
+ sta->mesh->plid = plid;
} else if (!sta && event == OPN_RJCT) {
- mesh_plink_frame_tx(sdata, WLAN_SP_MESH_PEERING_CLOSE,
+ mesh_plink_frame_tx(sdata, NULL, WLAN_SP_MESH_PEERING_CLOSE,
mgmt->sa, 0, plid,
WLAN_REASON_MESH_CONFIG);
goto unlock_rcu;
@@ -1081,9 +1111,13 @@ mesh_process_plink_frame(struct ieee80211_sub_if_data *sdata,
goto unlock_rcu;
}
- /* 802.11-2012 13.3.7.2 - update plid on CNF if not set */
- if (!sta->plid && event == CNF_ACPT)
- sta->plid = plid;
+ if (event == CNF_ACPT) {
+ /* 802.11-2012 13.3.7.2 - update plid on CNF if not set */
+ if (!sta->mesh->plid)
+ sta->mesh->plid = plid;
+
+ sta->mesh->aid = get_unaligned_le16(PLINK_CNF_AID(mgmt));
+ }
changed |= mesh_plink_fsm(sdata, sta, event);
diff --git a/net/mac80211/mesh_ps.c b/net/mac80211/mesh_ps.c
index ad8b377b4b9f..90a268abea17 100644
--- a/net/mac80211/mesh_ps.c
+++ b/net/mac80211/mesh_ps.c
@@ -92,16 +92,16 @@ u32 ieee80211_mps_local_status_update(struct ieee80211_sub_if_data *sdata)
if (sdata != sta->sdata)
continue;
- switch (sta->plink_state) {
+ switch (sta->mesh->plink_state) {
case NL80211_PLINK_OPN_SNT:
case NL80211_PLINK_OPN_RCVD:
case NL80211_PLINK_CNF_RCVD:
peering = true;
break;
case NL80211_PLINK_ESTAB:
- if (sta->local_pm == NL80211_MESH_POWER_LIGHT_SLEEP)
+ if (sta->mesh->local_pm == NL80211_MESH_POWER_LIGHT_SLEEP)
light_sleep_cnt++;
- else if (sta->local_pm == NL80211_MESH_POWER_DEEP_SLEEP)
+ else if (sta->mesh->local_pm == NL80211_MESH_POWER_DEEP_SLEEP)
deep_sleep_cnt++;
break;
default:
@@ -153,19 +153,19 @@ u32 ieee80211_mps_set_sta_local_pm(struct sta_info *sta,
{
struct ieee80211_sub_if_data *sdata = sta->sdata;
- if (sta->local_pm == pm)
+ if (sta->mesh->local_pm == pm)
return 0;
mps_dbg(sdata, "local STA operates in mode %d with %pM\n",
pm, sta->sta.addr);
- sta->local_pm = pm;
+ sta->mesh->local_pm = pm;
/*
* announce peer-specific power mode transition
* (see IEEE802.11-2012 13.14.3.2 and 13.14.3.3)
*/
- if (sta->plink_state == NL80211_PLINK_ESTAB)
+ if (sta->mesh->plink_state == NL80211_PLINK_ESTAB)
mps_qos_null_tx(sta);
return ieee80211_mps_local_status_update(sdata);
@@ -197,8 +197,8 @@ void ieee80211_mps_set_frame_flags(struct ieee80211_sub_if_data *sdata,
if (is_unicast_ether_addr(hdr->addr1) &&
ieee80211_is_data_qos(hdr->frame_control) &&
- sta->plink_state == NL80211_PLINK_ESTAB)
- pm = sta->local_pm;
+ sta->mesh->plink_state == NL80211_PLINK_ESTAB)
+ pm = sta->mesh->local_pm;
else
pm = sdata->u.mesh.nonpeer_pm;
@@ -241,16 +241,16 @@ void ieee80211_mps_sta_status_update(struct sta_info *sta)
* use peer-specific power mode if peering is established and the
* peer's power mode is known
*/
- if (sta->plink_state == NL80211_PLINK_ESTAB &&
- sta->peer_pm != NL80211_MESH_POWER_UNKNOWN)
- pm = sta->peer_pm;
+ if (sta->mesh->plink_state == NL80211_PLINK_ESTAB &&
+ sta->mesh->peer_pm != NL80211_MESH_POWER_UNKNOWN)
+ pm = sta->mesh->peer_pm;
else
- pm = sta->nonpeer_pm;
+ pm = sta->mesh->nonpeer_pm;
do_buffer = (pm != NL80211_MESH_POWER_ACTIVE);
/* clear the MPSP flags for non-peers or active STA */
- if (sta->plink_state != NL80211_PLINK_ESTAB) {
+ if (sta->mesh->plink_state != NL80211_PLINK_ESTAB) {
clear_sta_flag(sta, WLAN_STA_MPSP_OWNER);
clear_sta_flag(sta, WLAN_STA_MPSP_RECIPIENT);
} else if (!do_buffer) {
@@ -296,13 +296,13 @@ static void mps_set_sta_peer_pm(struct sta_info *sta,
pm = NL80211_MESH_POWER_ACTIVE;
}
- if (sta->peer_pm == pm)
+ if (sta->mesh->peer_pm == pm)
return;
mps_dbg(sta->sdata, "STA %pM enters mode %d\n",
sta->sta.addr, pm);
- sta->peer_pm = pm;
+ sta->mesh->peer_pm = pm;
ieee80211_mps_sta_status_update(sta);
}
@@ -317,13 +317,13 @@ static void mps_set_sta_nonpeer_pm(struct sta_info *sta,
else
pm = NL80211_MESH_POWER_ACTIVE;
- if (sta->nonpeer_pm == pm)
+ if (sta->mesh->nonpeer_pm == pm)
return;
mps_dbg(sta->sdata, "STA %pM sets non-peer mode to %d\n",
sta->sta.addr, pm);
- sta->nonpeer_pm = pm;
+ sta->mesh->nonpeer_pm = pm;
ieee80211_mps_sta_status_update(sta);
}
@@ -552,7 +552,7 @@ void ieee80211_mpsp_trigger_process(u8 *qc, struct sta_info *sta,
} else {
if (eosp)
clear_sta_flag(sta, WLAN_STA_MPSP_RECIPIENT);
- else if (sta->local_pm != NL80211_MESH_POWER_ACTIVE)
+ else if (sta->mesh->local_pm != NL80211_MESH_POWER_ACTIVE)
set_sta_flag(sta, WLAN_STA_MPSP_RECIPIENT);
if (rspi && !test_and_set_sta_flag(sta, WLAN_STA_MPSP_OWNER))
@@ -577,9 +577,9 @@ void ieee80211_mps_frame_release(struct sta_info *sta,
int ac, buffer_local = 0;
bool has_buffered = false;
- if (sta->plink_state == NL80211_PLINK_ESTAB)
+ if (sta->mesh->plink_state == NL80211_PLINK_ESTAB)
has_buffered = ieee80211_check_tim(elems->tim, elems->tim_len,
- sta->llid);
+ sta->mesh->aid);
if (has_buffered)
mps_dbg(sta->sdata, "%pM indicates buffered frames\n",
@@ -598,7 +598,7 @@ void ieee80211_mps_frame_release(struct sta_info *sta,
if (!has_buffered && !buffer_local)
return;
- if (sta->plink_state == NL80211_PLINK_ESTAB)
+ if (sta->mesh->plink_state == NL80211_PLINK_ESTAB)
mpsp_trigger_send(sta, has_buffered, !buffer_local);
else
mps_frame_deliver(sta, 1);
diff --git a/net/mac80211/mesh_sync.c b/net/mac80211/mesh_sync.c
index 09625d6205c3..64bc22ad9496 100644
--- a/net/mac80211/mesh_sync.c
+++ b/net/mac80211/mesh_sync.c
@@ -127,14 +127,14 @@ static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
/* Timing offset calculation (see 13.13.2.2.2) */
t_t = le64_to_cpu(mgmt->u.beacon.timestamp);
- sta->t_offset = t_t - t_r;
+ sta->mesh->t_offset = t_t - t_r;
if (test_sta_flag(sta, WLAN_STA_TOFFSET_KNOWN)) {
- s64 t_clockdrift = sta->t_offset_setpoint - sta->t_offset;
+ s64 t_clockdrift = sta->mesh->t_offset_setpoint - sta->mesh->t_offset;
msync_dbg(sdata,
- "STA %pM : sta->t_offset=%lld, sta->t_offset_setpoint=%lld, t_clockdrift=%lld\n",
- sta->sta.addr, (long long) sta->t_offset,
- (long long) sta->t_offset_setpoint,
+ "STA %pM : t_offset=%lld, t_offset_setpoint=%lld, t_clockdrift=%lld\n",
+ sta->sta.addr, (long long) sta->mesh->t_offset,
+ (long long) sta->mesh->t_offset_setpoint,
(long long) t_clockdrift);
if (t_clockdrift > TOFFSET_MAXIMUM_ADJUSTMENT ||
@@ -152,12 +152,12 @@ static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata,
ifmsh->sync_offset_clockdrift_max = t_clockdrift;
spin_unlock_bh(&ifmsh->sync_offset_lock);
} else {
- sta->t_offset_setpoint = sta->t_offset - TOFFSET_SET_MARGIN;
+ sta->mesh->t_offset_setpoint = sta->mesh->t_offset - TOFFSET_SET_MARGIN;
set_sta_flag(sta, WLAN_STA_TOFFSET_KNOWN);
msync_dbg(sdata,
- "STA %pM : offset was invalid, sta->t_offset=%lld\n",
+ "STA %pM : offset was invalid, t_offset=%lld\n",
sta->sta.addr,
- (long long) sta->t_offset);
+ (long long) sta->mesh->t_offset);
}
no_sync:
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 9b2cc278ac2a..705ef1d040ed 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -6,6 +6,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
+ * Copyright (C) 2015 Intel Deutschland GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -538,11 +539,16 @@ static void ieee80211_add_ht_ie(struct ieee80211_sub_if_data *sdata,
ieee80211_ie_build_ht_cap(pos, &ht_cap, cap);
}
+/* This function determines vht capability flags for the association
+ * and builds the IE.
+ * Note - the function may set the owner of the MU-MIMO capability
+ */
static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb,
struct ieee80211_supported_band *sband,
struct ieee80211_vht_cap *ap_vht_cap)
{
+ struct ieee80211_local *local = sdata->local;
u8 *pos;
u32 cap;
struct ieee80211_sta_vht_cap vht_cap;
@@ -576,7 +582,34 @@ static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata,
*/
if (!(ap_vht_cap->vht_cap_info &
cpu_to_le32(IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE)))
- cap &= ~IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE;
+ cap &= ~(IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE |
+ IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE);
+ else if (!(ap_vht_cap->vht_cap_info &
+ cpu_to_le32(IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE)))
+ cap &= ~IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE;
+
+ /*
+ * If some other vif is using the MU-MIMO capablity we cannot associate
+ * using MU-MIMO - this will lead to contradictions in the group-id
+ * mechanism.
+ * Ownership is defined since association request, in order to avoid
+ * simultaneous associations with MU-MIMO.
+ */
+ if (cap & IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE) {
+ bool disable_mu_mimo = false;
+ struct ieee80211_sub_if_data *other;
+
+ list_for_each_entry_rcu(other, &local->interfaces, list) {
+ if (other->flags & IEEE80211_SDATA_MU_MIMO_OWNER) {
+ disable_mu_mimo = true;
+ break;
+ }
+ }
+ if (disable_mu_mimo)
+ cap &= ~IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE;
+ else
+ sdata->flags |= IEEE80211_SDATA_MU_MIMO_OWNER;
+ }
mask = IEEE80211_VHT_CAP_BEAMFORMEE_STS_MASK;
@@ -1096,24 +1129,6 @@ static void ieee80211_chswitch_timer(unsigned long data)
ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.chswitch_work);
}
-static void ieee80211_teardown_tdls_peers(struct ieee80211_sub_if_data *sdata)
-{
- struct sta_info *sta;
- u16 reason = WLAN_REASON_TDLS_TEARDOWN_UNSPECIFIED;
-
- rcu_read_lock();
- list_for_each_entry_rcu(sta, &sdata->local->sta_list, list) {
- if (!sta->sta.tdls || sta->sdata != sdata || !sta->uploaded ||
- !test_sta_flag(sta, WLAN_STA_AUTHORIZED))
- continue;
-
- ieee80211_tdls_oper_request(&sdata->vif, sta->sta.addr,
- NL80211_TDLS_TEARDOWN, reason,
- GFP_ATOMIC);
- }
- rcu_read_unlock();
-}
-
static void
ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
u64 timestamp, u32 device_timestamp,
@@ -2076,6 +2091,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
memset(&ifmgd->ht_capa_mask, 0, sizeof(ifmgd->ht_capa_mask));
memset(&ifmgd->vht_capa, 0, sizeof(ifmgd->vht_capa));
memset(&ifmgd->vht_capa_mask, 0, sizeof(ifmgd->vht_capa_mask));
+ sdata->flags &= ~IEEE80211_SDATA_MU_MIMO_OWNER;
sdata->ap_power_level = IEEE80211_UNSET_POWER_LEVEL;
@@ -2538,6 +2554,7 @@ static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata,
eth_zero_addr(sdata->u.mgd.bssid);
ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BSSID);
sdata->u.mgd.flags = 0;
+ sdata->flags &= ~IEEE80211_SDATA_MU_MIMO_OWNER;
mutex_lock(&sdata->local->mtx);
ieee80211_vif_release_channel(sdata);
mutex_unlock(&sdata->local->mtx);
@@ -3034,12 +3051,8 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
rate_control_rate_init(sta);
- if (ifmgd->flags & IEEE80211_STA_MFP_ENABLED) {
+ if (ifmgd->flags & IEEE80211_STA_MFP_ENABLED)
set_sta_flag(sta, WLAN_STA_MFP);
- sta->sta.mfp = true;
- } else {
- sta->sta.mfp = false;
- }
sta->sta.wme = elems.wmm_param && local->hw.queues >= IEEE80211_NUM_ACS;
diff --git a/net/mac80211/ocb.c b/net/mac80211/ocb.c
index 358d5f9d8207..573b81a1fb2d 100644
--- a/net/mac80211/ocb.c
+++ b/net/mac80211/ocb.c
@@ -179,7 +179,7 @@ int ieee80211_ocb_join(struct ieee80211_sub_if_data *sdata,
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_if_ocb *ifocb = &sdata->u.ocb;
- u32 changed = BSS_CHANGED_OCB;
+ u32 changed = BSS_CHANGED_OCB | BSS_CHANGED_BSSID;
int err;
if (ifocb->joined == true)
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index fda33f961d83..9857693b91ec 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -29,6 +29,65 @@ module_param(ieee80211_default_rc_algo, charp, 0644);
MODULE_PARM_DESC(ieee80211_default_rc_algo,
"Default rate control algorithm for mac80211 to use");
+void rate_control_rate_init(struct sta_info *sta)
+{
+ struct ieee80211_local *local = sta->sdata->local;
+ struct rate_control_ref *ref = sta->rate_ctrl;
+ struct ieee80211_sta *ista = &sta->sta;
+ void *priv_sta = sta->rate_ctrl_priv;
+ struct ieee80211_supported_band *sband;
+ struct ieee80211_chanctx_conf *chanctx_conf;
+
+ ieee80211_sta_set_rx_nss(sta);
+
+ if (!ref)
+ return;
+
+ rcu_read_lock();
+
+ chanctx_conf = rcu_dereference(sta->sdata->vif.chanctx_conf);
+ if (WARN_ON(!chanctx_conf)) {
+ rcu_read_unlock();
+ return;
+ }
+
+ sband = local->hw.wiphy->bands[chanctx_conf->def.chan->band];
+
+ spin_lock_bh(&sta->rate_ctrl_lock);
+ ref->ops->rate_init(ref->priv, sband, &chanctx_conf->def, ista,
+ priv_sta);
+ spin_unlock_bh(&sta->rate_ctrl_lock);
+ rcu_read_unlock();
+ set_sta_flag(sta, WLAN_STA_RATE_CONTROL);
+}
+
+void rate_control_rate_update(struct ieee80211_local *local,
+ struct ieee80211_supported_band *sband,
+ struct sta_info *sta, u32 changed)
+{
+ struct rate_control_ref *ref = local->rate_ctrl;
+ struct ieee80211_sta *ista = &sta->sta;
+ void *priv_sta = sta->rate_ctrl_priv;
+ struct ieee80211_chanctx_conf *chanctx_conf;
+
+ if (ref && ref->ops->rate_update) {
+ rcu_read_lock();
+
+ chanctx_conf = rcu_dereference(sta->sdata->vif.chanctx_conf);
+ if (WARN_ON(!chanctx_conf)) {
+ rcu_read_unlock();
+ return;
+ }
+
+ spin_lock_bh(&sta->rate_ctrl_lock);
+ ref->ops->rate_update(ref->priv, sband, &chanctx_conf->def,
+ ista, priv_sta, changed);
+ spin_unlock_bh(&sta->rate_ctrl_lock);
+ rcu_read_unlock();
+ }
+ drv_sta_rc_update(local, sta->sdata, &sta->sta, changed);
+}
+
int ieee80211_rate_control_register(const struct rate_control_ops *ops)
{
struct rate_control_alg *alg;
@@ -294,39 +353,37 @@ bool rate_control_send_low(struct ieee80211_sta *pubsta,
}
EXPORT_SYMBOL(rate_control_send_low);
-static bool rate_idx_match_legacy_mask(struct ieee80211_tx_rate *rate,
- int n_bitrates, u32 mask)
+static bool rate_idx_match_legacy_mask(s8 *rate_idx, int n_bitrates, u32 mask)
{
int j;
/* See whether the selected rate or anything below it is allowed. */
- for (j = rate->idx; j >= 0; j--) {
+ for (j = *rate_idx; j >= 0; j--) {
if (mask & (1 << j)) {
/* Okay, found a suitable rate. Use it. */
- rate->idx = j;
+ *rate_idx = j;
return true;
}
}
/* Try to find a higher rate that would be allowed */
- for (j = rate->idx + 1; j < n_bitrates; j++) {
+ for (j = *rate_idx + 1; j < n_bitrates; j++) {
if (mask & (1 << j)) {
/* Okay, found a suitable rate. Use it. */
- rate->idx = j;
+ *rate_idx = j;
return true;
}
}
return false;
}
-static bool rate_idx_match_mcs_mask(struct ieee80211_tx_rate *rate,
- u8 mcs_mask[IEEE80211_HT_MCS_MASK_LEN])
+static bool rate_idx_match_mcs_mask(s8 *rate_idx, u8 *mcs_mask)
{
int i, j;
int ridx, rbit;
- ridx = rate->idx / 8;
- rbit = rate->idx % 8;
+ ridx = *rate_idx / 8;
+ rbit = *rate_idx % 8;
/* sanity check */
if (ridx < 0 || ridx >= IEEE80211_HT_MCS_MASK_LEN)
@@ -336,20 +393,20 @@ static bool rate_idx_match_mcs_mask(struct ieee80211_tx_rate *rate,
for (i = ridx; i >= 0; i--) {
for (j = rbit; j >= 0; j--)
if (mcs_mask[i] & BIT(j)) {
- rate->idx = i * 8 + j;
+ *rate_idx = i * 8 + j;
return true;
}
rbit = 7;
}
/* Try to find a higher rate that would be allowed */
- ridx = (rate->idx + 1) / 8;
- rbit = (rate->idx + 1) % 8;
+ ridx = (*rate_idx + 1) / 8;
+ rbit = (*rate_idx + 1) % 8;
for (i = ridx; i < IEEE80211_HT_MCS_MASK_LEN; i++) {
for (j = rbit; j < 8; j++)
if (mcs_mask[i] & BIT(j)) {
- rate->idx = i * 8 + j;
+ *rate_idx = i * 8 + j;
return true;
}
rbit = 0;
@@ -357,37 +414,93 @@ static bool rate_idx_match_mcs_mask(struct ieee80211_tx_rate *rate,
return false;
}
+static bool rate_idx_match_vht_mcs_mask(s8 *rate_idx, u16 *vht_mask)
+{
+ int i, j;
+ int ridx, rbit;
+
+ ridx = *rate_idx >> 4;
+ rbit = *rate_idx & 0xf;
+
+ if (ridx < 0 || ridx >= NL80211_VHT_NSS_MAX)
+ return false;
+
+ /* See whether the selected rate or anything below it is allowed. */
+ for (i = ridx; i >= 0; i--) {
+ for (j = rbit; j >= 0; j--) {
+ if (vht_mask[i] & BIT(j)) {
+ *rate_idx = (i << 4) | j;
+ return true;
+ }
+ }
+ rbit = 15;
+ }
+ /* Try to find a higher rate that would be allowed */
+ ridx = (*rate_idx + 1) >> 4;
+ rbit = (*rate_idx + 1) & 0xf;
-static void rate_idx_match_mask(struct ieee80211_tx_rate *rate,
+ for (i = ridx; i < NL80211_VHT_NSS_MAX; i++) {
+ for (j = rbit; j < 16; j++) {
+ if (vht_mask[i] & BIT(j)) {
+ *rate_idx = (i << 4) | j;
+ return true;
+ }
+ }
+ rbit = 0;
+ }
+ return false;
+}
+
+static void rate_idx_match_mask(s8 *rate_idx, u16 *rate_flags,
struct ieee80211_supported_band *sband,
enum nl80211_chan_width chan_width,
u32 mask,
- u8 mcs_mask[IEEE80211_HT_MCS_MASK_LEN])
+ u8 mcs_mask[IEEE80211_HT_MCS_MASK_LEN],
+ u16 vht_mask[NL80211_VHT_NSS_MAX])
{
- struct ieee80211_tx_rate alt_rate;
+ if (*rate_flags & IEEE80211_TX_RC_VHT_MCS) {
+ /* handle VHT rates */
+ if (rate_idx_match_vht_mcs_mask(rate_idx, vht_mask))
+ return;
+
+ *rate_idx = 0;
+ /* keep protection flags */
+ *rate_flags &= (IEEE80211_TX_RC_USE_RTS_CTS |
+ IEEE80211_TX_RC_USE_CTS_PROTECT |
+ IEEE80211_TX_RC_USE_SHORT_PREAMBLE);
- /* handle HT rates */
- if (rate->flags & IEEE80211_TX_RC_MCS) {
- if (rate_idx_match_mcs_mask(rate, mcs_mask))
+ *rate_flags |= IEEE80211_TX_RC_MCS;
+ if (chan_width == NL80211_CHAN_WIDTH_40)
+ *rate_flags |= IEEE80211_TX_RC_40_MHZ_WIDTH;
+
+ if (rate_idx_match_mcs_mask(rate_idx, mcs_mask))
return;
/* also try the legacy rates. */
- alt_rate.idx = 0;
+ *rate_flags &= ~(IEEE80211_TX_RC_MCS |
+ IEEE80211_TX_RC_40_MHZ_WIDTH);
+ if (rate_idx_match_legacy_mask(rate_idx, sband->n_bitrates,
+ mask))
+ return;
+ } else if (*rate_flags & IEEE80211_TX_RC_MCS) {
+ /* handle HT rates */
+ if (rate_idx_match_mcs_mask(rate_idx, mcs_mask))
+ return;
+
+ /* also try the legacy rates. */
+ *rate_idx = 0;
/* keep protection flags */
- alt_rate.flags = rate->flags &
- (IEEE80211_TX_RC_USE_RTS_CTS |
- IEEE80211_TX_RC_USE_CTS_PROTECT |
- IEEE80211_TX_RC_USE_SHORT_PREAMBLE);
- alt_rate.count = rate->count;
- if (rate_idx_match_legacy_mask(&alt_rate,
- sband->n_bitrates, mask)) {
- *rate = alt_rate;
+ *rate_flags &= (IEEE80211_TX_RC_USE_RTS_CTS |
+ IEEE80211_TX_RC_USE_CTS_PROTECT |
+ IEEE80211_TX_RC_USE_SHORT_PREAMBLE);
+ if (rate_idx_match_legacy_mask(rate_idx, sband->n_bitrates,
+ mask))
return;
- }
- } else if (!(rate->flags & IEEE80211_TX_RC_VHT_MCS)) {
+ } else {
/* handle legacy rates */
- if (rate_idx_match_legacy_mask(rate, sband->n_bitrates, mask))
+ if (rate_idx_match_legacy_mask(rate_idx, sband->n_bitrates,
+ mask))
return;
/* if HT BSS, and we handle a data frame, also try HT rates */
@@ -400,23 +513,19 @@ static void rate_idx_match_mask(struct ieee80211_tx_rate *rate,
break;
}
- alt_rate.idx = 0;
+ *rate_idx = 0;
/* keep protection flags */
- alt_rate.flags = rate->flags &
- (IEEE80211_TX_RC_USE_RTS_CTS |
- IEEE80211_TX_RC_USE_CTS_PROTECT |
- IEEE80211_TX_RC_USE_SHORT_PREAMBLE);
- alt_rate.count = rate->count;
+ *rate_flags &= (IEEE80211_TX_RC_USE_RTS_CTS |
+ IEEE80211_TX_RC_USE_CTS_PROTECT |
+ IEEE80211_TX_RC_USE_SHORT_PREAMBLE);
- alt_rate.flags |= IEEE80211_TX_RC_MCS;
+ *rate_flags |= IEEE80211_TX_RC_MCS;
if (chan_width == NL80211_CHAN_WIDTH_40)
- alt_rate.flags |= IEEE80211_TX_RC_40_MHZ_WIDTH;
+ *rate_flags |= IEEE80211_TX_RC_40_MHZ_WIDTH;
- if (rate_idx_match_mcs_mask(&alt_rate, mcs_mask)) {
- *rate = alt_rate;
+ if (rate_idx_match_mcs_mask(rate_idx, mcs_mask))
return;
- }
}
/*
@@ -569,18 +678,92 @@ static void rate_control_fill_sta_table(struct ieee80211_sta *sta,
}
}
+static bool rate_control_cap_mask(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_supported_band *sband,
+ struct ieee80211_sta *sta, u32 *mask,
+ u8 mcs_mask[IEEE80211_HT_MCS_MASK_LEN],
+ u16 vht_mask[NL80211_VHT_NSS_MAX])
+{
+ u32 i, flags;
+
+ *mask = sdata->rc_rateidx_mask[sband->band];
+ flags = ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chandef);
+ for (i = 0; i < sband->n_bitrates; i++) {
+ if ((flags & sband->bitrates[i].flags) != flags)
+ *mask &= ~BIT(i);
+ }
+
+ if (*mask == (1 << sband->n_bitrates) - 1 &&
+ !sdata->rc_has_mcs_mask[sband->band] &&
+ !sdata->rc_has_vht_mcs_mask[sband->band])
+ return false;
+
+ if (sdata->rc_has_mcs_mask[sband->band])
+ memcpy(mcs_mask, sdata->rc_rateidx_mcs_mask[sband->band],
+ IEEE80211_HT_MCS_MASK_LEN);
+ else
+ memset(mcs_mask, 0xff, IEEE80211_HT_MCS_MASK_LEN);
+
+ if (sdata->rc_has_vht_mcs_mask[sband->band])
+ memcpy(vht_mask, sdata->rc_rateidx_vht_mcs_mask[sband->band],
+ sizeof(u16) * NL80211_VHT_NSS_MAX);
+ else
+ memset(vht_mask, 0xff, sizeof(u16) * NL80211_VHT_NSS_MAX);
+
+ if (sta) {
+ __le16 sta_vht_cap;
+ u16 sta_vht_mask[NL80211_VHT_NSS_MAX];
+
+ /* Filter out rates that the STA does not support */
+ *mask &= sta->supp_rates[sband->band];
+ for (i = 0; i < sizeof(mcs_mask); i++)
+ mcs_mask[i] &= sta->ht_cap.mcs.rx_mask[i];
+
+ sta_vht_cap = sta->vht_cap.vht_mcs.rx_mcs_map;
+ ieee80211_get_vht_mask_from_cap(sta_vht_cap, sta_vht_mask);
+ for (i = 0; i < NL80211_VHT_NSS_MAX; i++)
+ vht_mask[i] &= sta_vht_mask[i];
+ }
+
+ return true;
+}
+
+static void
+rate_control_apply_mask_ratetbl(struct sta_info *sta,
+ struct ieee80211_supported_band *sband,
+ struct ieee80211_sta_rates *rates)
+{
+ int i;
+ u32 mask;
+ u8 mcs_mask[IEEE80211_HT_MCS_MASK_LEN];
+ u16 vht_mask[NL80211_VHT_NSS_MAX];
+ enum nl80211_chan_width chan_width;
+
+ if (!rate_control_cap_mask(sta->sdata, sband, &sta->sta, &mask,
+ mcs_mask, vht_mask))
+ return;
+
+ chan_width = sta->sdata->vif.bss_conf.chandef.width;
+ for (i = 0; i < IEEE80211_TX_RATE_TABLE_SIZE; i++) {
+ if (rates->rate[i].idx < 0)
+ break;
+
+ rate_idx_match_mask(&rates->rate[i].idx, &rates->rate[i].flags,
+ sband, chan_width, mask, mcs_mask,
+ vht_mask);
+ }
+}
+
static void rate_control_apply_mask(struct ieee80211_sub_if_data *sdata,
struct ieee80211_sta *sta,
struct ieee80211_supported_band *sband,
- struct ieee80211_tx_info *info,
struct ieee80211_tx_rate *rates,
int max_rates)
{
enum nl80211_chan_width chan_width;
u8 mcs_mask[IEEE80211_HT_MCS_MASK_LEN];
- bool has_mcs_mask;
u32 mask;
- u32 rate_flags;
+ u16 rate_flags, vht_mask[NL80211_VHT_NSS_MAX];
int i;
/*
@@ -588,30 +771,10 @@ static void rate_control_apply_mask(struct ieee80211_sub_if_data *sdata,
* default mask (allow all rates) is used to save some processing for
* the common case.
*/
- mask = sdata->rc_rateidx_mask[info->band];
- has_mcs_mask = sdata->rc_has_mcs_mask[info->band];
- rate_flags =
- ieee80211_chandef_rate_flags(&sdata->vif.bss_conf.chandef);
- for (i = 0; i < sband->n_bitrates; i++)
- if ((rate_flags & sband->bitrates[i].flags) != rate_flags)
- mask &= ~BIT(i);
-
- if (mask == (1 << sband->n_bitrates) - 1 && !has_mcs_mask)
+ if (!rate_control_cap_mask(sdata, sband, sta, &mask, mcs_mask,
+ vht_mask))
return;
- if (has_mcs_mask)
- memcpy(mcs_mask, sdata->rc_rateidx_mcs_mask[info->band],
- sizeof(mcs_mask));
- else
- memset(mcs_mask, 0xff, sizeof(mcs_mask));
-
- if (sta) {
- /* Filter out rates that the STA does not support */
- mask &= sta->supp_rates[info->band];
- for (i = 0; i < sizeof(mcs_mask); i++)
- mcs_mask[i] &= sta->ht_cap.mcs.rx_mask[i];
- }
-
/*
* Make sure the rate index selected for each TX rate is
* included in the configured mask and change the rate indexes
@@ -623,8 +786,10 @@ static void rate_control_apply_mask(struct ieee80211_sub_if_data *sdata,
if (rates[i].idx < 0)
break;
- rate_idx_match_mask(&rates[i], sband, chan_width, mask,
- mcs_mask);
+ rate_flags = rates[i].flags;
+ rate_idx_match_mask(&rates[i].idx, &rate_flags, sband,
+ chan_width, mask, mcs_mask, vht_mask);
+ rates[i].flags = rate_flags;
}
}
@@ -648,7 +813,7 @@ void ieee80211_get_tx_rates(struct ieee80211_vif *vif,
sband = sdata->local->hw.wiphy->bands[info->band];
if (ieee80211_is_data(hdr->frame_control))
- rate_control_apply_mask(sdata, sta, sband, info, dest, max_rates);
+ rate_control_apply_mask(sdata, sta, sband, dest, max_rates);
if (dest[0].idx < 0)
__rate_control_send_low(&sdata->local->hw, sband, sta, info,
@@ -705,7 +870,10 @@ int rate_control_set_rates(struct ieee80211_hw *hw,
{
struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
struct ieee80211_sta_rates *old;
+ struct ieee80211_supported_band *sband;
+ sband = hw->wiphy->bands[ieee80211_get_sdata_band(sta->sdata)];
+ rate_control_apply_mask_ratetbl(sta, sband, rates);
/*
* mac80211 guarantees that this function will not be called
* concurrently, so the following RCU access is safe, even without
diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h
index 25c9be5dd7fd..624fe5b81615 100644
--- a/net/mac80211/rate.h
+++ b/net/mac80211/rate.h
@@ -71,64 +71,10 @@ rate_control_tx_status_noskb(struct ieee80211_local *local,
spin_unlock_bh(&sta->rate_ctrl_lock);
}
-static inline void rate_control_rate_init(struct sta_info *sta)
-{
- struct ieee80211_local *local = sta->sdata->local;
- struct rate_control_ref *ref = sta->rate_ctrl;
- struct ieee80211_sta *ista = &sta->sta;
- void *priv_sta = sta->rate_ctrl_priv;
- struct ieee80211_supported_band *sband;
- struct ieee80211_chanctx_conf *chanctx_conf;
-
- ieee80211_sta_set_rx_nss(sta);
-
- if (!ref)
- return;
-
- rcu_read_lock();
-
- chanctx_conf = rcu_dereference(sta->sdata->vif.chanctx_conf);
- if (WARN_ON(!chanctx_conf)) {
- rcu_read_unlock();
- return;
- }
-
- sband = local->hw.wiphy->bands[chanctx_conf->def.chan->band];
-
- spin_lock_bh(&sta->rate_ctrl_lock);
- ref->ops->rate_init(ref->priv, sband, &chanctx_conf->def, ista,
- priv_sta);
- spin_unlock_bh(&sta->rate_ctrl_lock);
- rcu_read_unlock();
- set_sta_flag(sta, WLAN_STA_RATE_CONTROL);
-}
-
-static inline void rate_control_rate_update(struct ieee80211_local *local,
+void rate_control_rate_init(struct sta_info *sta);
+void rate_control_rate_update(struct ieee80211_local *local,
struct ieee80211_supported_band *sband,
- struct sta_info *sta, u32 changed)
-{
- struct rate_control_ref *ref = local->rate_ctrl;
- struct ieee80211_sta *ista = &sta->sta;
- void *priv_sta = sta->rate_ctrl_priv;
- struct ieee80211_chanctx_conf *chanctx_conf;
-
- if (ref && ref->ops->rate_update) {
- rcu_read_lock();
-
- chanctx_conf = rcu_dereference(sta->sdata->vif.chanctx_conf);
- if (WARN_ON(!chanctx_conf)) {
- rcu_read_unlock();
- return;
- }
-
- spin_lock_bh(&sta->rate_ctrl_lock);
- ref->ops->rate_update(ref->priv, sband, &chanctx_conf->def,
- ista, priv_sta, changed);
- spin_unlock_bh(&sta->rate_ctrl_lock);
- rcu_read_unlock();
- }
- drv_sta_rc_update(local, sta->sdata, &sta->sta, changed);
-}
+ struct sta_info *sta, u32 changed);
static inline void *rate_control_alloc_sta(struct rate_control_ref *ref,
struct sta_info *sta, gfp_t gfp)
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 543b67233535..3928dbd24e25 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -867,7 +867,13 @@ minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
else
idx = index % MCS_GROUP_RATES + (group->streams - 1) * 8;
- if (offset > 0) {
+ /* enable RTS/CTS if needed:
+ * - if station is in dynamic SMPS (and streams > 1)
+ * - for fallback rates, to increase chances of getting through
+ */
+ if (offset > 0 &&
+ (mi->sta->smps_mode == IEEE80211_SMPS_DYNAMIC &&
+ group->streams > 1)) {
ratetbl->rate[offset].count = ratetbl->rate[offset].count_rts;
flags |= IEEE80211_TX_RC_USE_RTS_CTS;
}
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 5dae166cb7f5..5bc0b88d9eb1 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -42,6 +42,51 @@ static inline void ieee80211_rx_stats(struct net_device *dev, u32 len)
u64_stats_update_end(&tstats->syncp);
}
+static u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len,
+ enum nl80211_iftype type)
+{
+ __le16 fc = hdr->frame_control;
+
+ if (ieee80211_is_data(fc)) {
+ if (len < 24) /* drop incorrect hdr len (data) */
+ return NULL;
+
+ if (ieee80211_has_a4(fc))
+ return NULL;
+ if (ieee80211_has_tods(fc))
+ return hdr->addr1;
+ if (ieee80211_has_fromds(fc))
+ return hdr->addr2;
+
+ return hdr->addr3;
+ }
+
+ if (ieee80211_is_mgmt(fc)) {
+ if (len < 24) /* drop incorrect hdr len (mgmt) */
+ return NULL;
+ return hdr->addr3;
+ }
+
+ if (ieee80211_is_ctl(fc)) {
+ if (ieee80211_is_pspoll(fc))
+ return hdr->addr1;
+
+ if (ieee80211_is_back_req(fc)) {
+ switch (type) {
+ case NL80211_IFTYPE_STATION:
+ return hdr->addr2;
+ case NL80211_IFTYPE_AP:
+ case NL80211_IFTYPE_AP_VLAN:
+ return hdr->addr1;
+ default:
+ break; /* fall through to the return */
+ }
+ }
+ }
+
+ return NULL;
+}
+
/*
* monitor mode reception
*
@@ -77,8 +122,7 @@ static inline bool should_drop_frame(struct sk_buff *skb, int present_fcs_len,
hdr = (void *)(skb->data + rtap_vendor_space);
if (status->flag & (RX_FLAG_FAILED_FCS_CRC |
- RX_FLAG_FAILED_PLCP_CRC |
- RX_FLAG_AMPDU_IS_ZEROLEN))
+ RX_FLAG_FAILED_PLCP_CRC))
return true;
if (unlikely(skb->len < 16 + present_fcs_len + rtap_vendor_space))
@@ -346,10 +390,6 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
cpu_to_le32(1 << IEEE80211_RADIOTAP_AMPDU_STATUS);
put_unaligned_le32(status->ampdu_reference, pos);
pos += 4;
- if (status->flag & RX_FLAG_AMPDU_REPORT_ZEROLEN)
- flags |= IEEE80211_RADIOTAP_AMPDU_REPORT_ZEROLEN;
- if (status->flag & RX_FLAG_AMPDU_IS_ZEROLEN)
- flags |= IEEE80211_RADIOTAP_AMPDU_IS_ZEROLEN;
if (status->flag & RX_FLAG_AMPDU_LAST_KNOWN)
flags |= IEEE80211_RADIOTAP_AMPDU_LAST_KNOWN;
if (status->flag & RX_FLAG_AMPDU_IS_LAST)
@@ -1093,11 +1133,6 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx)
{
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data;
- if (unlikely(rx->skb->len < 16)) {
- I802_DEBUG_INC(rx->local->rx_handlers_drop_short);
- return RX_DROP_MONITOR;
- }
-
/* Drop disallowed frame classes based on STA auth/assoc state;
* IEEE 802.11, Chap 5.5.
*
@@ -1240,22 +1275,22 @@ static void sta_ps_end(struct sta_info *sta)
ieee80211_sta_ps_deliver_wakeup(sta);
}
-int ieee80211_sta_ps_transition(struct ieee80211_sta *sta, bool start)
+int ieee80211_sta_ps_transition(struct ieee80211_sta *pubsta, bool start)
{
- struct sta_info *sta_inf = container_of(sta, struct sta_info, sta);
+ struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
bool in_ps;
- WARN_ON(!ieee80211_hw_check(&sta_inf->local->hw, AP_LINK_PS));
+ WARN_ON(!ieee80211_hw_check(&sta->local->hw, AP_LINK_PS));
/* Don't let the same PS state be set twice */
- in_ps = test_sta_flag(sta_inf, WLAN_STA_PS_STA);
+ in_ps = test_sta_flag(sta, WLAN_STA_PS_STA);
if ((start && in_ps) || (!start && !in_ps))
return -EINVAL;
if (start)
- sta_ps_start(sta_inf);
+ sta_ps_start(sta);
else
- sta_ps_end(sta_inf);
+ sta_ps_end(sta);
return 0;
}
@@ -1393,7 +1428,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
sta->rx_bytes += rx->skb->len;
if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) {
sta->last_signal = status->signal;
- ewma_add(&sta->avg_signal, -status->signal);
+ ewma_signal_add(&sta->avg_signal, -status->signal);
}
if (status->chains) {
@@ -1405,7 +1440,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
continue;
sta->chain_signal_last[i] = signal;
- ewma_add(&sta->chain_signal_avg[i], -signal);
+ ewma_signal_add(&sta->chain_signal_avg[i], -signal);
}
}
@@ -1647,7 +1682,6 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
if (unlikely(rx->key->flags & KEY_FLAG_TAINTED))
return RX_DROP_MONITOR;
- rx->key->tx_rx_count++;
/* TODO: add threshold stuff again */
} else {
return RX_DROP_MONITOR;
@@ -1883,7 +1917,6 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
/* Complete frame has been reassembled - process it now */
status = IEEE80211_SKB_RXCB(rx->skb);
- status->rx_flags |= IEEE80211_RX_FRAGMENTED;
out:
ieee80211_led_rx(rx->local);
@@ -2108,9 +2141,8 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
/* deliver to local stack */
skb->protocol = eth_type_trans(skb, dev);
memset(skb->cb, 0, sizeof(skb->cb));
- if (!(rx->flags & IEEE80211_RX_REORDER_TIMER) &&
- rx->local->napi)
- napi_gro_receive(rx->local->napi, skb);
+ if (rx->napi)
+ napi_gro_receive(rx->napi, skb);
else
netif_receive_skb(skb);
}
@@ -2378,9 +2410,8 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx)
tf->category == WLAN_CATEGORY_TDLS &&
(tf->action_code == WLAN_TDLS_CHANNEL_SWITCH_REQUEST ||
tf->action_code == WLAN_TDLS_CHANNEL_SWITCH_RESPONSE)) {
- rx->skb->pkt_type = IEEE80211_SDATA_QUEUE_TDLS_CHSW;
- skb_queue_tail(&sdata->skb_queue, rx->skb);
- ieee80211_queue_work(&rx->local->hw, &sdata->work);
+ skb_queue_tail(&local->skb_queue_tdls_chsw, rx->skb);
+ schedule_work(&local->tdls_chsw_work);
if (rx->sta)
rx->sta->rx_packets++;
@@ -3004,7 +3035,6 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx)
return RX_QUEUED;
}
-/* TODO: use IEEE80211_RX_FRAGMENTED */
static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx,
struct ieee80211_rate *rate)
{
@@ -3216,7 +3246,7 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid)
/* This is OK -- must be QoS data frame */
.security_idx = tid,
.seqno_idx = tid,
- .flags = IEEE80211_RX_REORDER_TIMER,
+ .napi = NULL, /* must be NULL to not have races */
};
struct tid_ampdu_rx *tid_agg_rx;
@@ -3286,7 +3316,7 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx)
case NL80211_IFTYPE_OCB:
if (!bssid)
return false;
- if (ieee80211_is_beacon(hdr->frame_control))
+ if (!ieee80211_is_data_present(hdr->frame_control))
return false;
if (!is_broadcast_ether_addr(bssid))
return false;
@@ -3393,7 +3423,8 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx,
* be called with rcu_read_lock protection.
*/
static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
- struct sk_buff *skb)
+ struct sk_buff *skb,
+ struct napi_struct *napi)
{
struct ieee80211_local *local = hw_to_local(hw);
struct ieee80211_sub_if_data *sdata;
@@ -3409,6 +3440,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
memset(&rx, 0, sizeof(rx));
rx.skb = skb;
rx.local = local;
+ rx.napi = napi;
if (ieee80211_is_data(fc) || ieee80211_is_mgmt(fc))
I802_DEBUG_INC(local->dot11ReceivedFragmentCount);
@@ -3510,7 +3542,8 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
* This is the receive path handler. It is called by a low level driver when an
* 802.11 MPDU is received from the hardware.
*/
-void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb)
+void ieee80211_rx_napi(struct ieee80211_hw *hw, struct sk_buff *skb,
+ struct napi_struct *napi)
{
struct ieee80211_local *local = hw_to_local(hw);
struct ieee80211_rate *rate = NULL;
@@ -3609,7 +3642,7 @@ void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb)
ieee80211_tpt_led_trig_rx(local,
((struct ieee80211_hdr *)skb->data)->frame_control,
skb->len);
- __ieee80211_rx_handle_packet(hw, skb);
+ __ieee80211_rx_handle_packet(hw, skb, napi);
rcu_read_unlock();
@@ -3617,7 +3650,7 @@ void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb)
drop:
kfree_skb(skb);
}
-EXPORT_SYMBOL(ieee80211_rx);
+EXPORT_SYMBOL(ieee80211_rx_napi);
/* This is a version of the rx handler that can be called from hard irq
* context. Post the skb on the queue and schedule the tasklet */
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 666ddac3c87c..64f1936350c6 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -68,7 +68,7 @@ static const struct rhashtable_params sta_rht_params = {
.nelem_hint = 3, /* start small */
.automatic_shrinking = true,
.head_offset = offsetof(struct sta_info, hash_node),
- .key_offset = offsetof(struct sta_info, sta.addr),
+ .key_offset = offsetof(struct sta_info, addr),
.key_len = ETH_ALEN,
.hashfn = sta_addr_hash,
.max_size = CONFIG_MAC80211_STA_HASH_MAX_SIZE,
@@ -249,6 +249,9 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta)
if (sta->sta.txq[0])
kfree(to_txq_info(sta->sta.txq[0]));
kfree(rcu_dereference_raw(sta->sta.rates));
+#ifdef CONFIG_MAC80211_MESH
+ kfree(sta->mesh);
+#endif
kfree(sta);
}
@@ -313,13 +316,19 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
INIT_WORK(&sta->ampdu_mlme.work, ieee80211_ba_session_work);
mutex_init(&sta->ampdu_mlme.mtx);
#ifdef CONFIG_MAC80211_MESH
- spin_lock_init(&sta->plink_lock);
- if (ieee80211_vif_is_mesh(&sdata->vif) &&
- !sdata->u.mesh.user_mpm)
- init_timer(&sta->plink_timer);
- sta->nonpeer_pm = NL80211_MESH_POWER_ACTIVE;
+ if (ieee80211_vif_is_mesh(&sdata->vif)) {
+ sta->mesh = kzalloc(sizeof(*sta->mesh), gfp);
+ if (!sta->mesh)
+ goto free;
+ spin_lock_init(&sta->mesh->plink_lock);
+ if (ieee80211_vif_is_mesh(&sdata->vif) &&
+ !sdata->u.mesh.user_mpm)
+ init_timer(&sta->mesh->plink_timer);
+ sta->mesh->nonpeer_pm = NL80211_MESH_POWER_ACTIVE;
+ }
#endif
+ memcpy(sta->addr, addr, ETH_ALEN);
memcpy(sta->sta.addr, addr, ETH_ALEN);
sta->local = local;
sta->sdata = sdata;
@@ -332,9 +341,9 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
ktime_get_ts(&uptime);
sta->last_connected = uptime.tv_sec;
- ewma_init(&sta->avg_signal, 1024, 8);
+ ewma_signal_init(&sta->avg_signal);
for (i = 0; i < ARRAY_SIZE(sta->chain_signal_avg); i++)
- ewma_init(&sta->chain_signal_avg[i], 1024, 8);
+ ewma_signal_init(&sta->chain_signal_avg[i]);
if (local->ops->wake_tx_queue) {
void *txq_data;
@@ -405,6 +414,9 @@ free_txq:
if (sta->sta.txq[0])
kfree(to_txq_info(sta->sta.txq[0]));
free:
+#ifdef CONFIG_MAC80211_MESH
+ kfree(sta->mesh);
+#endif
kfree(sta);
return NULL;
}
@@ -623,7 +635,7 @@ static void __sta_info_recalc_tim(struct sta_info *sta, bool ignore_pending)
bool indicate_tim = false;
u8 ignore_for_tim = sta->sta.uapsd_queues;
int ac;
- u16 id;
+ u16 id = sta->sta.aid;
if (sta->sdata->vif.type == NL80211_IFTYPE_AP ||
sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) {
@@ -631,12 +643,9 @@ static void __sta_info_recalc_tim(struct sta_info *sta, bool ignore_pending)
return;
ps = &sta->sdata->bss->ps;
- id = sta->sta.aid;
#ifdef CONFIG_MAC80211_MESH
} else if (ieee80211_vif_is_mesh(&sta->sdata->vif)) {
ps = &sta->sdata->u.mesh.ps;
- /* TIM map only for 1 <= PLID <= IEEE80211_MAX_AID */
- id = sta->plid % (IEEE80211_MAX_AID + 1);
#endif
} else {
return;
@@ -1887,7 +1896,8 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
}
if (!(sinfo->filled & BIT(NL80211_STA_INFO_SIGNAL_AVG))) {
- sinfo->signal_avg = (s8) -ewma_read(&sta->avg_signal);
+ sinfo->signal_avg =
+ (s8) -ewma_signal_read(&sta->avg_signal);
sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL_AVG);
}
}
@@ -1902,7 +1912,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
for (i = 0; i < ARRAY_SIZE(sinfo->chain_signal); i++) {
sinfo->chain_signal[i] = sta->chain_signal_last[i];
sinfo->chain_signal_avg[i] =
- (s8) -ewma_read(&sta->chain_signal_avg[i]);
+ (s8) -ewma_signal_read(&sta->chain_signal_avg[i]);
}
}
@@ -1956,16 +1966,16 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
BIT(NL80211_STA_INFO_PEER_PM) |
BIT(NL80211_STA_INFO_NONPEER_PM);
- sinfo->llid = sta->llid;
- sinfo->plid = sta->plid;
- sinfo->plink_state = sta->plink_state;
+ sinfo->llid = sta->mesh->llid;
+ sinfo->plid = sta->mesh->plid;
+ sinfo->plink_state = sta->mesh->plink_state;
if (test_sta_flag(sta, WLAN_STA_TOFFSET_KNOWN)) {
sinfo->filled |= BIT(NL80211_STA_INFO_T_OFFSET);
- sinfo->t_offset = sta->t_offset;
+ sinfo->t_offset = sta->mesh->t_offset;
}
- sinfo->local_pm = sta->local_pm;
- sinfo->peer_pm = sta->peer_pm;
- sinfo->nonpeer_pm = sta->nonpeer_pm;
+ sinfo->local_pm = sta->mesh->local_pm;
+ sinfo->peer_pm = sta->mesh->peer_pm;
+ sinfo->nonpeer_pm = sta->mesh->nonpeer_pm;
#endif
}
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 226f8ca47ad6..b087c71ff7fe 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -53,6 +53,8 @@
* @WLAN_STA_TDLS_CHAN_SWITCH: This TDLS peer supports TDLS channel-switching
* @WLAN_STA_TDLS_OFF_CHANNEL: The local STA is currently off-channel with this
* TDLS peer
+ * @WLAN_STA_TDLS_WIDER_BW: This TDLS peer supports working on a wider bw on
+ * the BSS base channel.
* @WLAN_STA_UAPSD: Station requested unscheduled SP while driver was
* keeping station in power-save mode, reply when the driver
* unblocks the station.
@@ -84,6 +86,7 @@ enum ieee80211_sta_info_flags {
WLAN_STA_TDLS_INITIATOR,
WLAN_STA_TDLS_CHAN_SWITCH,
WLAN_STA_TDLS_OFF_CHANNEL,
+ WLAN_STA_TDLS_WIDER_BW,
WLAN_STA_UAPSD,
WLAN_STA_SP,
WLAN_STA_4ADDR_EVENT,
@@ -270,6 +273,56 @@ struct ieee80211_fast_tx {
};
/**
+ * struct mesh_sta - mesh STA information
+ * @plink_lock: serialize access to plink fields
+ * @llid: Local link ID
+ * @plid: Peer link ID
+ * @aid: local aid supplied by peer
+ * @reason: Cancel reason on PLINK_HOLDING state
+ * @plink_retries: Retries in establishment
+ * @plink_state: peer link state
+ * @plink_timeout: timeout of peer link
+ * @plink_timer: peer link watch timer
+ * @t_offset: timing offset relative to this host
+ * @t_offset_setpoint: reference timing offset of this sta to be used when
+ * calculating clockdrift
+ * @local_pm: local link-specific power save mode
+ * @peer_pm: peer-specific power save mode towards local STA
+ * @nonpeer_pm: STA power save mode towards non-peer neighbors
+ * @processed_beacon: set to true after peer rates and capabilities are
+ * processed
+ * @fail_avg: moving percentage of failed MSDUs
+ */
+struct mesh_sta {
+ struct timer_list plink_timer;
+
+ s64 t_offset;
+ s64 t_offset_setpoint;
+
+ spinlock_t plink_lock;
+ u16 llid;
+ u16 plid;
+ u16 aid;
+ u16 reason;
+ u8 plink_retries;
+
+ bool processed_beacon;
+
+ enum nl80211_plink_state plink_state;
+ u32 plink_timeout;
+
+ /* mesh power save */
+ enum nl80211_mesh_power_mode local_pm;
+ enum nl80211_mesh_power_mode peer_pm;
+ enum nl80211_mesh_power_mode nonpeer_pm;
+
+ /* moving percentage of failed MSDUs */
+ unsigned int fail_avg;
+};
+
+DECLARE_EWMA(signal, 1024, 8)
+
+/**
* struct sta_info - STA information
*
* This structure collects information about a station that
@@ -278,12 +331,13 @@ struct ieee80211_fast_tx {
* @list: global linked list entry
* @free_list: list entry for keeping track of stations to free
* @hash_node: hash node for rhashtable
+ * @addr: station's MAC address - duplicated from public part to
+ * let the hash table work with just a single cacheline
* @local: pointer to the global information
* @sdata: virtual interface this station belongs to
* @ptk: peer keys negotiated with this station, if any
* @ptk_idx: last installed peer key index
* @gtk: group keys negotiated with this station, if any
- * @gtk_idx: last installed group key index
* @rate_ctrl: rate control algorithm reference
* @rate_ctrl_lock: spinlock used to protect rate control data
* (data inside the algorithm, so serializes calls there)
@@ -318,30 +372,17 @@ struct ieee80211_fast_tx {
* @last_signal: signal of last received frame from this STA
* @avg_signal: moving average of signal of received frames from this STA
* @last_ack_signal: signal of last received Ack frame from this STA
- * @last_seq_ctrl: last received seq/frag number from this STA (per RX queue)
+ * @last_seq_ctrl: last received seq/frag number from this STA (per TID
+ * plus one for non-QoS frames)
* @tx_filtered_count: number of frames the hardware filtered for this STA
* @tx_retry_failed: number of frames that failed retry
* @tx_retry_count: total number of retries for frames to this STA
- * @fail_avg: moving percentage of failed MSDUs
* @tx_packets: number of RX/TX MSDUs
* @tx_bytes: number of bytes transmitted to this STA
* @tid_seq: per-TID sequence numbers for sending to this STA
* @ampdu_mlme: A-MPDU state machine state
* @timer_to_tid: identity mapping to ID timers
- * @plink_lock: serialize access to plink fields
- * @llid: Local link ID
- * @plid: Peer link ID
- * @reason: Cancel reason on PLINK_HOLDING state
- * @plink_retries: Retries in establishment
- * @plink_state: peer link state
- * @plink_timeout: timeout of peer link
- * @plink_timer: peer link watch timer
- * @t_offset: timing offset relative to this host
- * @t_offset_setpoint: reference timing offset of this sta to be used when
- * calculating clockdrift
- * @local_pm: local link-specific power save mode
- * @peer_pm: peer-specific power save mode towards local STA
- * @nonpeer_pm: STA power save mode towards non-peer neighbors
+ * @mesh: mesh STA information
* @debugfs: debug filesystem info
* @dead: set to true when sta is unlinked
* @uploaded: set to true when sta is uploaded to the driver
@@ -369,19 +410,19 @@ struct ieee80211_fast_tx {
* @rx_msdu: MSDUs received from this station, using IEEE80211_NUM_TID
* entry for non-QoS frames
* @fast_tx: TX fastpath information
- * @processed_beacon: set to true after peer rates and capabilities are
- * processed
+ * @tdls_chandef: a TDLS peer can have a wider chandef that is compatible to
+ * the BSS one.
*/
struct sta_info {
/* General information, mostly static */
struct list_head list, free_list;
struct rcu_head rcu_head;
struct rhash_head hash_node;
+ u8 addr[ETH_ALEN];
struct ieee80211_local *local;
struct ieee80211_sub_if_data *sdata;
struct ieee80211_key __rcu *gtk[NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS];
struct ieee80211_key __rcu *ptk[NUM_DEFAULT_KEYS];
- u8 gtk_idx;
u8 ptk_idx;
struct rate_control_ref *rate_ctrl;
void *rate_ctrl_priv;
@@ -390,6 +431,10 @@ struct sta_info {
struct ieee80211_fast_tx __rcu *fast_tx;
+#ifdef CONFIG_MAC80211_MESH
+ struct mesh_sta *mesh;
+#endif
+
struct work_struct drv_deliver_wk;
u16 listen_interval;
@@ -419,12 +464,12 @@ struct sta_info {
unsigned long rx_fragments;
unsigned long rx_dropped;
int last_signal;
- struct ewma avg_signal;
+ struct ewma_signal avg_signal;
int last_ack_signal;
u8 chains;
s8 chain_signal_last[IEEE80211_MAX_CHAINS];
- struct ewma chain_signal_avg[IEEE80211_MAX_CHAINS];
+ struct ewma_signal chain_signal_avg[IEEE80211_MAX_CHAINS];
/* Plus 1 for non-QoS frames */
__le16 last_seq_ctrl[IEEE80211_NUM_TIDS + 1];
@@ -432,8 +477,6 @@ struct sta_info {
/* Updated from TX status path only, no locking requirements */
unsigned long tx_filtered_count;
unsigned long tx_retry_failed, tx_retry_count;
- /* moving percentage of failed MSDUs */
- unsigned int fail_avg;
/* Updated from TX path only, no locking requirements */
u64 tx_packets[IEEE80211_NUM_ACS];
@@ -455,29 +498,6 @@ struct sta_info {
struct sta_ampdu_mlme ampdu_mlme;
u8 timer_to_tid[IEEE80211_NUM_TIDS];
-#ifdef CONFIG_MAC80211_MESH
- /*
- * Mesh peer link attributes, protected by plink_lock.
- * TODO: move to a sub-structure that is referenced with pointer?
- */
- spinlock_t plink_lock;
- u16 llid;
- u16 plid;
- u16 reason;
- u8 plink_retries;
- enum nl80211_plink_state plink_state;
- u32 plink_timeout;
- struct timer_list plink_timer;
-
- s64 t_offset;
- s64 t_offset_setpoint;
- /* mesh power save */
- enum nl80211_mesh_power_mode local_pm;
- enum nl80211_mesh_power_mode peer_pm;
- enum nl80211_mesh_power_mode nonpeer_pm;
- bool processed_beacon;
-#endif
-
#ifdef CONFIG_MAC80211_DEBUGFS
struct sta_info_debugfsdentries {
struct dentry *dir;
@@ -498,6 +518,8 @@ struct sta_info {
u8 reserved_tid;
+ struct cfg80211_chan_def tdls_chandef;
+
/* keep last! */
struct ieee80211_sta sta;
};
@@ -505,7 +527,7 @@ struct sta_info {
static inline enum nl80211_plink_state sta_plink_state(struct sta_info *sta)
{
#ifdef CONFIG_MAC80211_MESH
- return sta->plink_state;
+ return sta->mesh->plink_state;
#endif
return NL80211_PLINK_LISTEN;
}
@@ -608,7 +630,7 @@ u32 sta_addr_hash(const void *key, u32 length, u32 seed);
_sta_bucket_idx(tbl, _addr), \
hash_node) \
/* compare address and run code only if it matches */ \
- if (ether_addr_equal(_sta->sta.addr, (_addr)))
+ if (ether_addr_equal(_sta->addr, (_addr)))
/*
* Get STA info by index, BROKEN!
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 45628f37c083..8ba583243509 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -515,7 +515,7 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local,
if (!sdata) {
skb->dev = NULL;
- } else if (info->flags & IEEE80211_TX_INTFL_MLME_CONN_TX) {
+ } else {
unsigned int hdr_size =
ieee80211_hdrlen(hdr->frame_control);
@@ -529,9 +529,6 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local,
ieee80211_mgd_conn_tx_status(sdata,
hdr->frame_control,
acked);
- } else {
- /* we assign ack frame ID for the others */
- WARN_ON(1);
}
rcu_read_unlock();
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index 8db6e2994bbc..aee701a5649e 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -4,6 +4,7 @@
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2014, Intel Corporation
* Copyright 2014 Intel Mobile Communications GmbH
+ * Copyright 2015 Intel Deutschland GmbH
*
* This file is GPLv2 as found in COPYING.
*/
@@ -11,6 +12,7 @@
#include <linux/ieee80211.h>
#include <linux/log2.h>
#include <net/cfg80211.h>
+#include <linux/rtnetlink.h>
#include "ieee80211_i.h"
#include "driver-ops.h"
@@ -35,20 +37,28 @@ void ieee80211_tdls_peer_del_work(struct work_struct *wk)
mutex_unlock(&local->mtx);
}
-static void ieee80211_tdls_add_ext_capab(struct ieee80211_local *local,
+static void ieee80211_tdls_add_ext_capab(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb)
{
- u8 *pos = (void *)skb_put(skb, 7);
+ struct ieee80211_local *local = sdata->local;
bool chan_switch = local->hw.wiphy->features &
NL80211_FEATURE_TDLS_CHANNEL_SWITCH;
+ bool wider_band = ieee80211_hw_check(&local->hw, TDLS_WIDER_BW);
+ enum ieee80211_band band = ieee80211_get_sdata_band(sdata);
+ struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band];
+ bool vht = sband && sband->vht_cap.vht_supported;
+ u8 *pos = (void *)skb_put(skb, 10);
*pos++ = WLAN_EID_EXT_CAPABILITY;
- *pos++ = 5; /* len */
+ *pos++ = 8; /* len */
*pos++ = 0x0;
*pos++ = 0x0;
*pos++ = 0x0;
*pos++ = chan_switch ? WLAN_EXT_CAPA4_TDLS_CHAN_SWITCH : 0;
*pos++ = WLAN_EXT_CAPA5_TDLS_ENABLED;
+ *pos++ = 0;
+ *pos++ = 0;
+ *pos++ = (vht && wider_band) ? WLAN_EXT_CAPA8_TDLS_WIDE_BW_ENABLED : 0;
}
static u8
@@ -284,6 +294,60 @@ static void ieee80211_tdls_add_wmm_param_ie(struct ieee80211_sub_if_data *sdata,
}
static void
+ieee80211_tdls_chandef_vht_upgrade(struct ieee80211_sub_if_data *sdata,
+ struct sta_info *sta)
+{
+ /* IEEE802.11ac-2013 Table E-4 */
+ u16 centers_80mhz[] = { 5210, 5290, 5530, 5610, 5690, 5775 };
+ struct cfg80211_chan_def uc = sta->tdls_chandef;
+ enum nl80211_chan_width max_width = ieee80211_get_sta_bw(&sta->sta);
+ int i;
+
+ /* only support upgrading non-narrow channels up to 80Mhz */
+ if (max_width == NL80211_CHAN_WIDTH_5 ||
+ max_width == NL80211_CHAN_WIDTH_10)
+ return;
+
+ if (max_width > NL80211_CHAN_WIDTH_80)
+ max_width = NL80211_CHAN_WIDTH_80;
+
+ if (uc.width == max_width)
+ return;
+ /*
+ * Channel usage constrains in the IEEE802.11ac-2013 specification only
+ * allow expanding a 20MHz channel to 80MHz in a single way. In
+ * addition, there are no 40MHz allowed channels that are not part of
+ * the allowed 80MHz range in the 5GHz spectrum (the relevant one here).
+ */
+ for (i = 0; i < ARRAY_SIZE(centers_80mhz); i++)
+ if (abs(uc.chan->center_freq - centers_80mhz[i]) <= 30) {
+ uc.center_freq1 = centers_80mhz[i];
+ uc.width = NL80211_CHAN_WIDTH_80;
+ break;
+ }
+
+ if (!uc.center_freq1)
+ return;
+
+ /* proceed to downgrade the chandef until usable or the same */
+ while (uc.width > max_width &&
+ !cfg80211_reg_can_beacon(sdata->local->hw.wiphy,
+ &uc, sdata->wdev.iftype))
+ ieee80211_chandef_downgrade(&uc);
+
+ if (!cfg80211_chandef_identical(&uc, &sta->tdls_chandef)) {
+ tdls_dbg(sdata, "TDLS ch width upgraded %d -> %d\n",
+ sta->tdls_chandef.width, uc.width);
+
+ /*
+ * the station is not yet authorized when BW upgrade is done,
+ * locking is not required
+ */
+ sta->tdls_chandef = uc;
+ }
+}
+
+static void
ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb, const u8 *peer,
u8 action_code, bool initiator,
@@ -320,7 +384,7 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata,
offset = noffset;
}
- ieee80211_tdls_add_ext_capab(local, skb);
+ ieee80211_tdls_add_ext_capab(sdata, skb);
/* add the QoS element if we support it */
if (local->hw.queues >= IEEE80211_NUM_ACS &&
@@ -350,15 +414,17 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata,
offset = noffset;
}
- rcu_read_lock();
+ mutex_lock(&local->sta_mtx);
/* we should have the peer STA if we're already responding */
if (action_code == WLAN_TDLS_SETUP_RESPONSE) {
sta = sta_info_get(sdata, peer);
if (WARN_ON_ONCE(!sta)) {
- rcu_read_unlock();
+ mutex_unlock(&local->sta_mtx);
return;
}
+
+ sta->tdls_chandef = sdata->vif.bss_conf.chandef;
}
ieee80211_tdls_add_oper_classes(sdata, skb);
@@ -384,10 +450,6 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata,
ieee80211_ie_build_ht_cap(pos, &ht_cap, ht_cap.cap);
} else if (action_code == WLAN_TDLS_SETUP_RESPONSE &&
ht_cap.ht_supported && sta->sta.ht_cap.ht_supported) {
- /* disable SMPS in TDLS responder */
- sta->sta.ht_cap.cap |= WLAN_HT_CAP_SM_PS_DISABLED
- << IEEE80211_HT_CAP_SM_PS_SHIFT;
-
/* the peer caps are already intersected with our own */
memcpy(&ht_cap, &sta->sta.ht_cap, sizeof(ht_cap));
@@ -448,9 +510,16 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata,
pos = skb_put(skb, sizeof(struct ieee80211_vht_cap) + 2);
ieee80211_ie_build_vht_cap(pos, &vht_cap, vht_cap.cap);
+
+ /*
+ * if both peers support WIDER_BW, we can expand the chandef to
+ * a wider compatible one, up to 80MHz
+ */
+ if (test_sta_flag(sta, WLAN_STA_TDLS_WIDER_BW))
+ ieee80211_tdls_chandef_vht_upgrade(sdata, sta);
}
- rcu_read_unlock();
+ mutex_unlock(&local->sta_mtx);
/* add any remaining IEs */
if (extra_ies_len) {
@@ -474,15 +543,17 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata,
enum ieee80211_band band = ieee80211_get_sdata_band(sdata);
u8 *pos;
- rcu_read_lock();
+ mutex_lock(&local->sta_mtx);
sta = sta_info_get(sdata, peer);
ap_sta = sta_info_get(sdata, ifmgd->bssid);
if (WARN_ON_ONCE(!sta || !ap_sta)) {
- rcu_read_unlock();
+ mutex_unlock(&local->sta_mtx);
return;
}
+ sta->tdls_chandef = sdata->vif.bss_conf.chandef;
+
/* add any custom IEs that go before the QoS IE */
if (extra_ies_len) {
static const u8 before_qos[] = {
@@ -530,12 +601,19 @@ ieee80211_tdls_add_setup_cfm_ies(struct ieee80211_sub_if_data *sdata,
/* only include VHT-operation if not on the 2.4GHz band */
if (band != IEEE80211_BAND_2GHZ && sta->sta.vht_cap.vht_supported) {
+ /*
+ * if both peers support WIDER_BW, we can expand the chandef to
+ * a wider compatible one, up to 80MHz
+ */
+ if (test_sta_flag(sta, WLAN_STA_TDLS_WIDER_BW))
+ ieee80211_tdls_chandef_vht_upgrade(sdata, sta);
+
pos = skb_put(skb, 2 + sizeof(struct ieee80211_vht_operation));
ieee80211_ie_build_vht_oper(pos, &sta->sta.vht_cap,
- &sdata->vif.bss_conf.chandef);
+ &sta->tdls_chandef);
}
- rcu_read_unlock();
+ mutex_unlock(&local->sta_mtx);
/* add any remaining IEs */
if (extra_ies_len) {
@@ -784,7 +862,7 @@ ieee80211_tdls_build_mgmt_packet_data(struct ieee80211_sub_if_data *sdata,
max(sizeof(struct ieee80211_mgmt),
sizeof(struct ieee80211_tdls_data)) +
50 + /* supported rates */
- 7 + /* ext capab */
+ 10 + /* ext capab */
26 + /* max(WMM-info, WMM-param) */
2 + max(sizeof(struct ieee80211_ht_cap),
sizeof(struct ieee80211_ht_operation)) +
@@ -983,8 +1061,17 @@ ieee80211_tdls_mgmt_setup(struct wiphy *wiphy, struct net_device *dev,
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_local *local = sdata->local;
+ enum ieee80211_smps_mode smps_mode = sdata->u.mgd.driver_smps_mode;
int ret;
+ /* don't support setup with forced SMPS mode that's not off */
+ if (smps_mode != IEEE80211_SMPS_AUTOMATIC &&
+ smps_mode != IEEE80211_SMPS_OFF) {
+ tdls_dbg(sdata, "Aborting TDLS setup due to SMPS mode %d\n",
+ smps_mode);
+ return -ENOTSUPP;
+ }
+
mutex_lock(&local->mtx);
/* we don't support concurrent TDLS peer setups */
@@ -1146,6 +1233,22 @@ int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev,
return ret;
}
+static void iee80211_tdls_recalc_chanctx(struct ieee80211_sub_if_data *sdata)
+{
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_chanctx_conf *conf;
+ struct ieee80211_chanctx *ctx;
+
+ mutex_lock(&local->chanctx_mtx);
+ conf = rcu_dereference_protected(sdata->vif.chanctx_conf,
+ lockdep_is_held(&local->chanctx_mtx));
+ if (conf) {
+ ctx = container_of(conf, struct ieee80211_chanctx, conf);
+ ieee80211_recalc_chanctx_chantype(local, ctx);
+ }
+ mutex_unlock(&local->chanctx_mtx);
+}
+
int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
const u8 *peer, enum nl80211_tdls_operation oper)
{
@@ -1182,6 +1285,8 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
break;
}
+ iee80211_tdls_recalc_chanctx(sdata);
+
rcu_read_lock();
sta = sta_info_get(sdata, peer);
if (!sta) {
@@ -1213,6 +1318,7 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
ieee80211_flush_queues(local, sdata, false);
ret = sta_info_destroy_addr(sdata, peer);
+ iee80211_tdls_recalc_chanctx(sdata);
break;
default:
ret = -ENOTSUPP;
@@ -1224,6 +1330,10 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev,
eth_zero_addr(sdata->u.mgd.tdls_peer);
}
+ if (ret == 0)
+ ieee80211_queue_work(&sdata->local->hw,
+ &sdata->u.mgd.request_smps_work);
+
mutex_unlock(&local->mtx);
return ret;
}
@@ -1627,6 +1737,31 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata,
return -EINVAL;
}
+ if (!elems.sec_chan_offs) {
+ chan_type = NL80211_CHAN_HT20;
+ } else {
+ switch (elems.sec_chan_offs->sec_chan_offs) {
+ case IEEE80211_HT_PARAM_CHA_SEC_ABOVE:
+ chan_type = NL80211_CHAN_HT40PLUS;
+ break;
+ case IEEE80211_HT_PARAM_CHA_SEC_BELOW:
+ chan_type = NL80211_CHAN_HT40MINUS;
+ break;
+ default:
+ chan_type = NL80211_CHAN_HT20;
+ break;
+ }
+ }
+
+ cfg80211_chandef_create(&chandef, chan, chan_type);
+
+ /* we will be active on the TDLS link */
+ if (!cfg80211_reg_can_beacon_relax(sdata->local->hw.wiphy, &chandef,
+ sdata->wdev.iftype)) {
+ tdls_dbg(sdata, "TDLS chan switch to forbidden channel\n");
+ return -EINVAL;
+ }
+
mutex_lock(&local->sta_mtx);
sta = sta_info_get(sdata, tf->sa);
if (!sta || !test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH)) {
@@ -1647,27 +1782,15 @@ ieee80211_process_tdls_channel_switch_req(struct ieee80211_sub_if_data *sdata,
goto out;
}
- if (!sta->sta.ht_cap.ht_supported) {
- chan_type = NL80211_CHAN_NO_HT;
- } else if (!elems.sec_chan_offs) {
- chan_type = NL80211_CHAN_HT20;
- } else {
- switch (elems.sec_chan_offs->sec_chan_offs) {
- case IEEE80211_HT_PARAM_CHA_SEC_ABOVE:
- chan_type = NL80211_CHAN_HT40PLUS;
- break;
- case IEEE80211_HT_PARAM_CHA_SEC_BELOW:
- chan_type = NL80211_CHAN_HT40MINUS;
- break;
- default:
- chan_type = NL80211_CHAN_HT20;
- break;
- }
+ /* peer should have known better */
+ if (!sta->sta.ht_cap.ht_supported && elems.sec_chan_offs &&
+ elems.sec_chan_offs->sec_chan_offs) {
+ tdls_dbg(sdata, "TDLS chan switch - wide chan unsupported\n");
+ ret = -ENOTSUPP;
+ goto out;
}
- cfg80211_chandef_create(&chandef, chan, chan_type);
params.chandef = &chandef;
-
params.switch_time = le16_to_cpu(elems.ch_sw_timing->switch_time);
params.switch_timeout = le16_to_cpu(elems.ch_sw_timing->switch_timeout);
@@ -1691,12 +1814,15 @@ out:
return ret;
}
-void ieee80211_process_tdls_channel_switch(struct ieee80211_sub_if_data *sdata,
- struct sk_buff *skb)
+static void
+ieee80211_process_tdls_channel_switch(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb)
{
struct ieee80211_tdls_data *tf = (void *)skb->data;
struct wiphy *wiphy = sdata->local->hw.wiphy;
+ ASSERT_RTNL();
+
/* make sure the driver supports it */
if (!(wiphy->features & NL80211_FEATURE_TDLS_CHANNEL_SWITCH))
return;
@@ -1720,3 +1846,47 @@ void ieee80211_process_tdls_channel_switch(struct ieee80211_sub_if_data *sdata,
return;
}
}
+
+void ieee80211_teardown_tdls_peers(struct ieee80211_sub_if_data *sdata)
+{
+ struct sta_info *sta;
+ u16 reason = WLAN_REASON_TDLS_TEARDOWN_UNSPECIFIED;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(sta, &sdata->local->sta_list, list) {
+ if (!sta->sta.tdls || sta->sdata != sdata || !sta->uploaded ||
+ !test_sta_flag(sta, WLAN_STA_AUTHORIZED))
+ continue;
+
+ ieee80211_tdls_oper_request(&sdata->vif, sta->sta.addr,
+ NL80211_TDLS_TEARDOWN, reason,
+ GFP_ATOMIC);
+ }
+ rcu_read_unlock();
+}
+
+void ieee80211_tdls_chsw_work(struct work_struct *wk)
+{
+ struct ieee80211_local *local =
+ container_of(wk, struct ieee80211_local, tdls_chsw_work);
+ struct ieee80211_sub_if_data *sdata;
+ struct sk_buff *skb;
+ struct ieee80211_tdls_data *tf;
+
+ rtnl_lock();
+ while ((skb = skb_dequeue(&local->skb_queue_tdls_chsw))) {
+ tf = (struct ieee80211_tdls_data *)skb->data;
+ list_for_each_entry(sdata, &local->interfaces, list) {
+ if (!ieee80211_sdata_running(sdata) ||
+ sdata->vif.type != NL80211_IFTYPE_STATION ||
+ !ether_addr_equal(tf->da, sdata->vif.addr))
+ continue;
+
+ ieee80211_process_tdls_channel_switch(sdata, skb);
+ break;
+ }
+
+ kfree_skb(skb);
+ }
+ rtnl_unlock();
+}
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index b8233505bf9f..84e0e8c7fb23 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -311,9 +311,6 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx)
if (tx->sdata->vif.type == NL80211_IFTYPE_WDS)
return TX_CONTINUE;
- if (tx->sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
- return TX_CONTINUE;
-
if (tx->flags & IEEE80211_TX_PS_BUFFERED)
return TX_CONTINUE;
@@ -610,7 +607,6 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
if (tx->key) {
bool skip_hw = false;
- tx->key->tx_rx_count++;
/* TODO: add threshold stuff again */
switch (tx->key->conf.cipher) {
@@ -690,7 +686,8 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
txrc.bss = (tx->sdata->vif.type == NL80211_IFTYPE_AP ||
tx->sdata->vif.type == NL80211_IFTYPE_MESH_POINT ||
- tx->sdata->vif.type == NL80211_IFTYPE_ADHOC);
+ tx->sdata->vif.type == NL80211_IFTYPE_ADHOC ||
+ tx->sdata->vif.type == NL80211_IFTYPE_OCB);
/* set up RTS protection if desired */
if (len > tx->local->hw.wiphy->rts_threshold) {
@@ -2777,7 +2774,11 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
sdata->sequence_number += 0x10;
}
- sta->tx_msdu[tid]++;
+ if (skb_shinfo(skb)->gso_size)
+ sta->tx_msdu[tid] +=
+ DIV_ROUND_UP(skb->len, skb_shinfo(skb)->gso_size);
+ else
+ sta->tx_msdu[tid]++;
info->hw_queue = sdata->vif.hw_queue[skb_get_queue_mapping(skb)];
@@ -3213,6 +3214,16 @@ static void ieee80211_set_csa(struct ieee80211_sub_if_data *sdata,
rcu_read_unlock();
}
+static u8 __ieee80211_csa_update_counter(struct beacon_data *beacon)
+{
+ beacon->csa_current_counter--;
+
+ /* the counter should never reach 0 */
+ WARN_ON_ONCE(!beacon->csa_current_counter);
+
+ return beacon->csa_current_counter;
+}
+
u8 ieee80211_csa_update_counter(struct ieee80211_vif *vif)
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
@@ -3231,11 +3242,7 @@ u8 ieee80211_csa_update_counter(struct ieee80211_vif *vif)
if (!beacon)
goto unlock;
- beacon->csa_current_counter--;
-
- /* the counter should never reach 0 */
- WARN_ON_ONCE(!beacon->csa_current_counter);
- count = beacon->csa_current_counter;
+ count = __ieee80211_csa_update_counter(beacon);
unlock:
rcu_read_unlock();
@@ -3335,7 +3342,7 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
if (beacon) {
if (beacon->csa_counter_offsets[0]) {
if (!is_template)
- ieee80211_csa_update_counter(vif);
+ __ieee80211_csa_update_counter(beacon);
ieee80211_set_csa(sdata, beacon);
}
@@ -3381,7 +3388,7 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
if (beacon->csa_counter_offsets[0]) {
if (!is_template)
- ieee80211_csa_update_counter(vif);
+ __ieee80211_csa_update_counter(beacon);
ieee80211_set_csa(sdata, beacon);
}
@@ -3411,7 +3418,7 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
* for now we leave it consistent with overall
* mac80211's behavior.
*/
- ieee80211_csa_update_counter(vif);
+ __ieee80211_csa_update_counter(beacon);
ieee80211_set_csa(sdata, beacon);
}
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 43e5aadd7a89..1104421bc525 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -47,55 +47,6 @@ struct ieee80211_hw *wiphy_to_ieee80211_hw(struct wiphy *wiphy)
}
EXPORT_SYMBOL(wiphy_to_ieee80211_hw);
-u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len,
- enum nl80211_iftype type)
-{
- __le16 fc = hdr->frame_control;
-
- /* drop ACK/CTS frames and incorrect hdr len (ctrl) */
- if (len < 16)
- return NULL;
-
- if (ieee80211_is_data(fc)) {
- if (len < 24) /* drop incorrect hdr len (data) */
- return NULL;
-
- if (ieee80211_has_a4(fc))
- return NULL;
- if (ieee80211_has_tods(fc))
- return hdr->addr1;
- if (ieee80211_has_fromds(fc))
- return hdr->addr2;
-
- return hdr->addr3;
- }
-
- if (ieee80211_is_mgmt(fc)) {
- if (len < 24) /* drop incorrect hdr len (mgmt) */
- return NULL;
- return hdr->addr3;
- }
-
- if (ieee80211_is_ctl(fc)) {
- if (ieee80211_is_pspoll(fc))
- return hdr->addr1;
-
- if (ieee80211_is_back_req(fc)) {
- switch (type) {
- case NL80211_IFTYPE_STATION:
- return hdr->addr2;
- case NL80211_IFTYPE_AP:
- case NL80211_IFTYPE_AP_VLAN:
- return hdr->addr1;
- default:
- break; /* fall through to the return */
- }
- }
- }
-
- return NULL;
-}
-
void ieee80211_tx_set_protected(struct ieee80211_tx_data *tx)
{
struct sk_buff *skb;
@@ -752,7 +703,12 @@ EXPORT_SYMBOL_GPL(wdev_to_ieee80211_vif);
struct wireless_dev *ieee80211_vif_to_wdev(struct ieee80211_vif *vif)
{
- struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+ struct ieee80211_sub_if_data *sdata;
+
+ if (!vif)
+ return NULL;
+
+ sdata = vif_to_sdata(vif);
if (!ieee80211_sdata_running(sdata) ||
!(sdata->flags & IEEE80211_SDATA_IN_DRIVER))
@@ -1709,6 +1665,7 @@ static void ieee80211_handle_reconfig_failure(struct ieee80211_local *local)
local->resuming = false;
local->suspended = false;
local->started = false;
+ local->in_reconfig = false;
/* scheduled scan clearly can't be running any more, but tell
* cfg80211 and clear local state
@@ -1759,16 +1716,24 @@ int ieee80211_reconfig(struct ieee80211_local *local)
struct ieee80211_sub_if_data *sched_scan_sdata;
struct cfg80211_sched_scan_request *sched_scan_req;
bool sched_scan_stopped = false;
+ bool suspended = local->suspended;
/* nothing to do if HW shouldn't run */
if (!local->open_count)
goto wake_up;
#ifdef CONFIG_PM
- if (local->suspended)
+ if (suspended)
local->resuming = true;
if (local->wowlan) {
+ /*
+ * In the wowlan case, both mac80211 and the device
+ * are functional when the resume op is called, so
+ * clear local->suspended so the device could operate
+ * normally (e.g. pass rx frames).
+ */
+ local->suspended = false;
res = drv_resume(local);
local->wowlan = false;
if (res < 0) {
@@ -1781,8 +1746,10 @@ int ieee80211_reconfig(struct ieee80211_local *local)
/*
* res is 1, which means the driver requested
* to go through a regular reset on wakeup.
+ * restore local->suspended in this case.
*/
reconfig_due_to_wowlan = true;
+ local->suspended = true;
}
#endif
@@ -1794,7 +1761,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
*/
res = drv_start(local);
if (res) {
- if (local->suspended)
+ if (suspended)
WARN(1, "Hardware became unavailable upon resume. This could be a software issue prior to suspend or a hardware issue.\n");
else
WARN(1, "Hardware became unavailable during restart.\n");
@@ -2088,10 +2055,10 @@ int ieee80211_reconfig(struct ieee80211_local *local)
* If this is for hw restart things are still running.
* We may want to change that later, however.
*/
- if (local->open_count && (!local->suspended || reconfig_due_to_wowlan))
+ if (local->open_count && (!suspended || reconfig_due_to_wowlan))
drv_reconfig_complete(local, IEEE80211_RECONFIG_TYPE_RESTART);
- if (!local->suspended)
+ if (!suspended)
return 0;
#ifdef CONFIG_PM
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index 80694d55db74..834ccdbc74be 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -308,11 +308,15 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta)
{
struct ieee80211_sub_if_data *sdata = sta->sdata;
enum ieee80211_sta_rx_bandwidth bw;
+ enum nl80211_chan_width bss_width = sdata->vif.bss_conf.chandef.width;
- bw = ieee80211_chan_width_to_rx_bw(sdata->vif.bss_conf.chandef.width);
- bw = min(bw, ieee80211_sta_cap_rx_bw(sta));
+ bw = ieee80211_sta_cap_rx_bw(sta);
bw = min(bw, sta->cur_max_bandwidth);
+ /* do not cap the BW of TDLS WIDER_BW peers by the bss */
+ if (!test_sta_flag(sta, WLAN_STA_TDLS_WIDER_BW))
+ bw = min(bw, ieee80211_chan_width_to_rx_bw(bss_width));
+
return bw;
}
@@ -422,3 +426,29 @@ void ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
if (changed > 0)
rate_control_rate_update(local, sband, sta, changed);
}
+
+void ieee80211_get_vht_mask_from_cap(__le16 vht_cap,
+ u16 vht_mask[NL80211_VHT_NSS_MAX])
+{
+ int i;
+ u16 mask, cap = le16_to_cpu(vht_cap);
+
+ for (i = 0; i < NL80211_VHT_NSS_MAX; i++) {
+ mask = (cap >> i * 2) & IEEE80211_VHT_MCS_NOT_SUPPORTED;
+ switch (mask) {
+ case IEEE80211_VHT_MCS_SUPPORT_0_7:
+ vht_mask[i] = 0x00FF;
+ break;
+ case IEEE80211_VHT_MCS_SUPPORT_0_8:
+ vht_mask[i] = 0x01FF;
+ break;
+ case IEEE80211_VHT_MCS_SUPPORT_0_9:
+ vht_mask[i] = 0x03FF;
+ break;
+ case IEEE80211_VHT_MCS_NOT_SUPPORTED:
+ default:
+ vht_mask[i] = 0;
+ break;
+ }
+ }
+}
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 943f7606527e..feb547dc8643 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -516,31 +516,34 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx,
return RX_DROP_UNUSABLE;
}
- ccmp_hdr2pn(pn, skb->data + hdrlen);
+ if (!(status->flag & RX_FLAG_PN_VALIDATED)) {
+ ccmp_hdr2pn(pn, skb->data + hdrlen);
- queue = rx->security_idx;
+ queue = rx->security_idx;
- if (memcmp(pn, key->u.ccmp.rx_pn[queue], IEEE80211_CCMP_PN_LEN) <= 0) {
- key->u.ccmp.replays++;
- return RX_DROP_UNUSABLE;
- }
+ if (memcmp(pn, key->u.ccmp.rx_pn[queue],
+ IEEE80211_CCMP_PN_LEN) <= 0) {
+ key->u.ccmp.replays++;
+ return RX_DROP_UNUSABLE;
+ }
- if (!(status->flag & RX_FLAG_DECRYPTED)) {
- u8 aad[2 * AES_BLOCK_SIZE];
- u8 b_0[AES_BLOCK_SIZE];
- /* hardware didn't decrypt/verify MIC */
- ccmp_special_blocks(skb, pn, b_0, aad);
+ if (!(status->flag & RX_FLAG_DECRYPTED)) {
+ u8 aad[2 * AES_BLOCK_SIZE];
+ u8 b_0[AES_BLOCK_SIZE];
+ /* hardware didn't decrypt/verify MIC */
+ ccmp_special_blocks(skb, pn, b_0, aad);
+
+ if (ieee80211_aes_ccm_decrypt(
+ key->u.ccmp.tfm, b_0, aad,
+ skb->data + hdrlen + IEEE80211_CCMP_HDR_LEN,
+ data_len,
+ skb->data + skb->len - mic_len, mic_len))
+ return RX_DROP_UNUSABLE;
+ }
- if (ieee80211_aes_ccm_decrypt(
- key->u.ccmp.tfm, b_0, aad,
- skb->data + hdrlen + IEEE80211_CCMP_HDR_LEN,
- data_len,
- skb->data + skb->len - mic_len, mic_len))
- return RX_DROP_UNUSABLE;
+ memcpy(key->u.ccmp.rx_pn[queue], pn, IEEE80211_CCMP_PN_LEN);
}
- memcpy(key->u.ccmp.rx_pn[queue], pn, IEEE80211_CCMP_PN_LEN);
-
/* Remove CCMP header and MIC */
if (pskb_trim(skb, skb->len - mic_len))
return RX_DROP_UNUSABLE;
@@ -739,31 +742,35 @@ ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx)
return RX_DROP_UNUSABLE;
}
- gcmp_hdr2pn(pn, skb->data + hdrlen);
+ if (!(status->flag & RX_FLAG_PN_VALIDATED)) {
+ gcmp_hdr2pn(pn, skb->data + hdrlen);
- queue = rx->security_idx;
+ queue = rx->security_idx;
- if (memcmp(pn, key->u.gcmp.rx_pn[queue], IEEE80211_GCMP_PN_LEN) <= 0) {
- key->u.gcmp.replays++;
- return RX_DROP_UNUSABLE;
- }
+ if (memcmp(pn, key->u.gcmp.rx_pn[queue],
+ IEEE80211_GCMP_PN_LEN) <= 0) {
+ key->u.gcmp.replays++;
+ return RX_DROP_UNUSABLE;
+ }
- if (!(status->flag & RX_FLAG_DECRYPTED)) {
- u8 aad[2 * AES_BLOCK_SIZE];
- u8 j_0[AES_BLOCK_SIZE];
- /* hardware didn't decrypt/verify MIC */
- gcmp_special_blocks(skb, pn, j_0, aad);
+ if (!(status->flag & RX_FLAG_DECRYPTED)) {
+ u8 aad[2 * AES_BLOCK_SIZE];
+ u8 j_0[AES_BLOCK_SIZE];
+ /* hardware didn't decrypt/verify MIC */
+ gcmp_special_blocks(skb, pn, j_0, aad);
+
+ if (ieee80211_aes_gcm_decrypt(
+ key->u.gcmp.tfm, j_0, aad,
+ skb->data + hdrlen + IEEE80211_GCMP_HDR_LEN,
+ data_len,
+ skb->data + skb->len -
+ IEEE80211_GCMP_MIC_LEN))
+ return RX_DROP_UNUSABLE;
+ }
- if (ieee80211_aes_gcm_decrypt(
- key->u.gcmp.tfm, j_0, aad,
- skb->data + hdrlen + IEEE80211_GCMP_HDR_LEN,
- data_len,
- skb->data + skb->len - IEEE80211_GCMP_MIC_LEN))
- return RX_DROP_UNUSABLE;
+ memcpy(key->u.gcmp.rx_pn[queue], pn, IEEE80211_GCMP_PN_LEN);
}
- memcpy(key->u.gcmp.rx_pn[queue], pn, IEEE80211_GCMP_PN_LEN);
-
/* Remove GCMP header and MIC */
if (pskb_trim(skb, skb->len - IEEE80211_GCMP_MIC_LEN))
return RX_DROP_UNUSABLE;
diff --git a/net/mac802154/cfg.c b/net/mac802154/cfg.c
index 317c4662e544..c865ebb2ace2 100644
--- a/net/mac802154/cfg.c
+++ b/net/mac802154/cfg.c
@@ -44,6 +44,49 @@ static void ieee802154_del_iface_deprecated(struct wpan_phy *wpan_phy,
ieee802154_if_remove(sdata);
}
+#ifdef CONFIG_PM
+static int ieee802154_suspend(struct wpan_phy *wpan_phy)
+{
+ struct ieee802154_local *local = wpan_phy_priv(wpan_phy);
+
+ if (!local->open_count)
+ goto suspend;
+
+ ieee802154_stop_queue(&local->hw);
+ synchronize_net();
+
+ /* stop hardware - this must stop RX */
+ ieee802154_stop_device(local);
+
+suspend:
+ local->suspended = true;
+ return 0;
+}
+
+static int ieee802154_resume(struct wpan_phy *wpan_phy)
+{
+ struct ieee802154_local *local = wpan_phy_priv(wpan_phy);
+ int ret;
+
+ /* nothing to do if HW shouldn't run */
+ if (!local->open_count)
+ goto wake_up;
+
+ /* restart hardware */
+ ret = drv_start(local);
+ if (ret)
+ return ret;
+
+wake_up:
+ ieee802154_wake_queue(&local->hw);
+ local->suspended = false;
+ return 0;
+}
+#else
+#define ieee802154_suspend NULL
+#define ieee802154_resume NULL
+#endif
+
static int
ieee802154_add_iface(struct wpan_phy *phy, const char *name,
unsigned char name_assign_type,
@@ -145,13 +188,18 @@ static int
ieee802154_set_pan_id(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
__le16 pan_id)
{
+ int ret;
+
ASSERT_RTNL();
if (wpan_dev->pan_id == pan_id)
return 0;
- wpan_dev->pan_id = pan_id;
- return 0;
+ ret = mac802154_wpan_update_llsec(wpan_dev->netdev);
+ if (!ret)
+ wpan_dev->pan_id = pan_id;
+
+ return ret;
}
static int
@@ -161,10 +209,6 @@ ieee802154_set_backoff_exponent(struct wpan_phy *wpan_phy,
{
ASSERT_RTNL();
- if (wpan_dev->min_be == min_be &&
- wpan_dev->max_be == max_be)
- return 0;
-
wpan_dev->min_be = min_be;
wpan_dev->max_be = max_be;
return 0;
@@ -176,9 +220,6 @@ ieee802154_set_short_addr(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
{
ASSERT_RTNL();
- if (wpan_dev->short_addr == short_addr)
- return 0;
-
wpan_dev->short_addr = short_addr;
return 0;
}
@@ -190,9 +231,6 @@ ieee802154_set_max_csma_backoffs(struct wpan_phy *wpan_phy,
{
ASSERT_RTNL();
- if (wpan_dev->csma_retries == max_csma_backoffs)
- return 0;
-
wpan_dev->csma_retries = max_csma_backoffs;
return 0;
}
@@ -204,9 +242,6 @@ ieee802154_set_max_frame_retries(struct wpan_phy *wpan_phy,
{
ASSERT_RTNL();
- if (wpan_dev->frame_retries == max_frame_retries)
- return 0;
-
wpan_dev->frame_retries = max_frame_retries;
return 0;
}
@@ -217,16 +252,25 @@ ieee802154_set_lbt_mode(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
{
ASSERT_RTNL();
- if (wpan_dev->lbt == mode)
- return 0;
-
wpan_dev->lbt = mode;
return 0;
}
+static int
+ieee802154_set_ackreq_default(struct wpan_phy *wpan_phy,
+ struct wpan_dev *wpan_dev, bool ackreq)
+{
+ ASSERT_RTNL();
+
+ wpan_dev->ackreq = ackreq;
+ return 0;
+}
+
const struct cfg802154_ops mac802154_config_ops = {
.add_virtual_intf_deprecated = ieee802154_add_iface_deprecated,
.del_virtual_intf_deprecated = ieee802154_del_iface_deprecated,
+ .suspend = ieee802154_suspend,
+ .resume = ieee802154_resume,
.add_virtual_intf = ieee802154_add_iface,
.del_virtual_intf = ieee802154_del_iface,
.set_channel = ieee802154_set_channel,
@@ -239,4 +283,5 @@ const struct cfg802154_ops mac802154_config_ops = {
.set_max_csma_backoffs = ieee802154_set_max_csma_backoffs,
.set_max_frame_retries = ieee802154_set_max_frame_retries,
.set_lbt_mode = ieee802154_set_lbt_mode,
+ .set_ackreq_default = ieee802154_set_ackreq_default,
};
diff --git a/net/mac802154/ieee802154_i.h b/net/mac802154/ieee802154_i.h
index 34755d5751a4..56ccffa3f2bf 100644
--- a/net/mac802154/ieee802154_i.h
+++ b/net/mac802154/ieee802154_i.h
@@ -56,9 +56,13 @@ struct ieee802154_local {
struct hrtimer ifs_timer;
bool started;
+ bool suspended;
struct tasklet_struct tasklet;
struct sk_buff_head skb_queue;
+
+ struct sk_buff *tx_skb;
+ struct work_struct tx_work;
};
enum {
@@ -94,8 +98,6 @@ struct ieee802154_sub_if_data {
struct mac802154_llsec sec;
};
-#define MAC802154_CHAN_NONE 0xff /* No channel is assigned */
-
/* utility functions/constants */
extern const void *const mac802154_wpan_phy_privid; /* for wpan_phy privid */
@@ -125,6 +127,8 @@ ieee802154_sdata_running(struct ieee802154_sub_if_data *sdata)
extern struct ieee802154_mlme_ops mac802154_mlme_wpan;
+void ieee802154_rx(struct ieee802154_local *local, struct sk_buff *skb);
+void ieee802154_xmit_worker(struct work_struct *work);
netdev_tx_t
ieee802154_monitor_start_xmit(struct sk_buff *skb, struct net_device *dev);
netdev_tx_t
@@ -167,6 +171,8 @@ void mac802154_get_table(struct net_device *dev,
struct ieee802154_llsec_table **t);
void mac802154_unlock_table(struct net_device *dev);
+int mac802154_wpan_update_llsec(struct net_device *dev);
+
/* interface handling */
int ieee802154_iface_init(void);
void ieee802154_iface_exit(void);
@@ -176,5 +182,6 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name,
unsigned char name_assign_type, enum nl802154_iftype type,
__le64 extended_addr);
void ieee802154_remove_interfaces(struct ieee802154_local *local);
+void ieee802154_stop_device(struct ieee802154_local *local);
#endif /* __IEEE802154_I_H */
diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c
index 8b698246a51b..ed26952f9e14 100644
--- a/net/mac802154/iface.c
+++ b/net/mac802154/iface.c
@@ -30,7 +30,7 @@
#include "ieee802154_i.h"
#include "driver-ops.h"
-static int mac802154_wpan_update_llsec(struct net_device *dev)
+int mac802154_wpan_update_llsec(struct net_device *dev)
{
struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
struct ieee802154_mlme_ops *ops = ieee802154_mlme_ops(dev);
@@ -125,6 +125,14 @@ static int mac802154_wpan_mac_addr(struct net_device *dev, void *p)
if (netif_running(dev))
return -EBUSY;
+ /* lowpan need to be down for update
+ * SLAAC address after ifup
+ */
+ if (sdata->wpan_dev.lowpan_dev) {
+ if (netif_running(sdata->wpan_dev.lowpan_dev))
+ return -EBUSY;
+ }
+
ieee802154_be64_to_le64(&extended_addr, addr->sa_data);
if (!ieee802154_is_valid_extended_unicast_addr(extended_addr))
return -EINVAL;
@@ -132,6 +140,13 @@ static int mac802154_wpan_mac_addr(struct net_device *dev, void *p)
memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
sdata->wpan_dev.extended_addr = extended_addr;
+ /* update lowpan interface mac address when
+ * wpan mac has been changed
+ */
+ if (sdata->wpan_dev.lowpan_dev)
+ memcpy(sdata->wpan_dev.lowpan_dev->dev_addr, dev->dev_addr,
+ dev->addr_len);
+
return mac802154_wpan_update_llsec(dev);
}
@@ -314,11 +329,8 @@ static int mac802154_slave_close(struct net_device *dev)
clear_bit(SDATA_STATE_RUNNING, &sdata->state);
- if (!local->open_count) {
- flush_workqueue(local->workqueue);
- hrtimer_cancel(&local->ifs_timer);
- drv_stop(local);
- }
+ if (!local->open_count)
+ ieee802154_stop_device(local);
return 0;
}
@@ -471,6 +483,7 @@ ieee802154_setup_sdata(struct ieee802154_sub_if_data *sdata,
enum nl802154_iftype type)
{
struct wpan_dev *wpan_dev = &sdata->wpan_dev;
+ int ret;
u8 tmp;
/* set some type-dependent values */
@@ -485,8 +498,7 @@ ieee802154_setup_sdata(struct ieee802154_sub_if_data *sdata,
wpan_dev->min_be = 3;
wpan_dev->max_be = 5;
wpan_dev->csma_retries = 4;
- /* for compatibility, actual default is 3 */
- wpan_dev->frame_retries = -1;
+ wpan_dev->frame_retries = 3;
wpan_dev->pan_id = cpu_to_le16(IEEE802154_PANID_BROADCAST);
wpan_dev->short_addr = cpu_to_le16(IEEE802154_ADDR_BROADCAST);
@@ -505,6 +517,10 @@ ieee802154_setup_sdata(struct ieee802154_sub_if_data *sdata,
mutex_init(&sdata->sec_mtx);
mac802154_llsec_init(&sdata->sec);
+ ret = mac802154_wpan_update_llsec(sdata->dev);
+ if (ret < 0)
+ return ret;
+
break;
case NL802154_IFTYPE_MONITOR:
sdata->dev->destructor = free_netdev;
diff --git a/net/mac802154/main.c b/net/mac802154/main.c
index 356b346e1ee8..e8cab5bb80c6 100644
--- a/net/mac802154/main.c
+++ b/net/mac802154/main.c
@@ -40,7 +40,7 @@ static void ieee802154_tasklet_handler(unsigned long data)
* netstack.
*/
skb->pkt_type = 0;
- ieee802154_rx(&local->hw, skb);
+ ieee802154_rx(local, skb);
break;
default:
WARN(1, "mac802154: Packet is of unknown type %d\n",
@@ -58,11 +58,9 @@ ieee802154_alloc_hw(size_t priv_data_len, const struct ieee802154_ops *ops)
struct ieee802154_local *local;
size_t priv_size;
- if (!ops || !(ops->xmit_async || ops->xmit_sync) || !ops->ed ||
- !ops->start || !ops->stop || !ops->set_channel) {
- pr_err("undefined IEEE802.15.4 device operations\n");
+ if (WARN_ON(!ops || !(ops->xmit_async || ops->xmit_sync) || !ops->ed ||
+ !ops->start || !ops->stop || !ops->set_channel))
return NULL;
- }
/* Ensure 32-byte alignment of our private data and hw private data.
* We use the wpan_phy priv data for both our ieee802154_local and for
@@ -107,11 +105,13 @@ ieee802154_alloc_hw(size_t priv_data_len, const struct ieee802154_ops *ops)
skb_queue_head_init(&local->skb_queue);
+ INIT_WORK(&local->tx_work, ieee802154_xmit_worker);
+
/* init supported flags with 802.15.4 default ranges */
phy->supported.max_minbe = 8;
phy->supported.min_maxbe = 3;
phy->supported.max_maxbe = 8;
- phy->supported.min_frame_retries = -1;
+ phy->supported.min_frame_retries = 0;
phy->supported.max_frame_retries = 7;
phy->supported.max_csma_backoffs = 5;
phy->supported.lbt = NL802154_SUPPORTED_BOOL_FALSE;
@@ -177,11 +177,8 @@ int ieee802154_register_hw(struct ieee802154_hw *hw)
}
if (!(hw->flags & IEEE802154_HW_FRAME_RETRIES)) {
- /* TODO should be 3, but our default value is -1 which means
- * no ARET handling.
- */
- local->phy->supported.min_frame_retries = -1;
- local->phy->supported.max_frame_retries = -1;
+ local->phy->supported.min_frame_retries = 3;
+ local->phy->supported.max_frame_retries = 3;
}
if (hw->flags & IEEE802154_HW_PROMISCUOUS)
diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c
index d93ad2d4a4fc..d1c33c1d6b9b 100644
--- a/net/mac802154/rx.c
+++ b/net/mac802154/rx.c
@@ -246,13 +246,15 @@ ieee802154_monitors_rx(struct ieee802154_local *local, struct sk_buff *skb)
}
}
-void ieee802154_rx(struct ieee802154_hw *hw, struct sk_buff *skb)
+void ieee802154_rx(struct ieee802154_local *local, struct sk_buff *skb)
{
- struct ieee802154_local *local = hw_to_local(hw);
u16 crc;
WARN_ON_ONCE(softirq_count() == 0);
+ if (local->suspended)
+ goto drop;
+
/* TODO: When a transceiver omits the checksum here, we
* add an own calculated one. This is currently an ugly
* solution because the monitor needs a crc here.
@@ -273,8 +275,7 @@ void ieee802154_rx(struct ieee802154_hw *hw, struct sk_buff *skb)
crc = crc_ccitt(0, skb->data, skb->len);
if (crc) {
rcu_read_unlock();
- kfree_skb(skb);
- return;
+ goto drop;
}
}
/* remove crc */
@@ -283,8 +284,11 @@ void ieee802154_rx(struct ieee802154_hw *hw, struct sk_buff *skb)
__ieee802154_rx_handle_packet(local, skb);
rcu_read_unlock();
+
+ return;
+drop:
+ kfree_skb(skb);
}
-EXPORT_SYMBOL(ieee802154_rx);
void
ieee802154_rx_irqsafe(struct ieee802154_hw *hw, struct sk_buff *skb, u8 lqi)
diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c
index c62e95695c78..7ed439172f30 100644
--- a/net/mac802154/tx.c
+++ b/net/mac802154/tx.c
@@ -30,23 +30,11 @@
#include "ieee802154_i.h"
#include "driver-ops.h"
-/* IEEE 802.15.4 transceivers can sleep during the xmit session, so process
- * packets through the workqueue.
- */
-struct ieee802154_xmit_cb {
- struct sk_buff *skb;
- struct work_struct work;
- struct ieee802154_local *local;
-};
-
-static struct ieee802154_xmit_cb ieee802154_xmit_cb;
-
-static void ieee802154_xmit_worker(struct work_struct *work)
+void ieee802154_xmit_worker(struct work_struct *work)
{
- struct ieee802154_xmit_cb *cb =
- container_of(work, struct ieee802154_xmit_cb, work);
- struct ieee802154_local *local = cb->local;
- struct sk_buff *skb = cb->skb;
+ struct ieee802154_local *local =
+ container_of(work, struct ieee802154_local, tx_work);
+ struct sk_buff *skb = local->tx_skb;
struct net_device *dev = skb->dev;
int res;
@@ -106,11 +94,8 @@ ieee802154_tx(struct ieee802154_local *local, struct sk_buff *skb)
dev->stats.tx_packets++;
dev->stats.tx_bytes += skb->len;
} else {
- INIT_WORK(&ieee802154_xmit_cb.work, ieee802154_xmit_worker);
- ieee802154_xmit_cb.skb = skb;
- ieee802154_xmit_cb.local = local;
-
- queue_work(local->workqueue, &ieee802154_xmit_cb.work);
+ local->tx_skb = skb;
+ queue_work(local->workqueue, &local->tx_work);
}
return NETDEV_TX_OK;
diff --git a/net/mac802154/util.c b/net/mac802154/util.c
index 583435f38930..f9fd0957ab67 100644
--- a/net/mac802154/util.c
+++ b/net/mac802154/util.c
@@ -14,6 +14,7 @@
*/
#include "ieee802154_i.h"
+#include "driver-ops.h"
/* privid for wpan_phys to determine whether they belong to us or not */
const void *const mac802154_wpan_phy_privid = &mac802154_wpan_phy_privid;
@@ -92,3 +93,10 @@ void ieee802154_xmit_complete(struct ieee802154_hw *hw, struct sk_buff *skb,
dev_consume_skb_any(skb);
}
EXPORT_SYMBOL(ieee802154_xmit_complete);
+
+void ieee802154_stop_device(struct ieee802154_local *local)
+{
+ flush_workqueue(local->workqueue);
+ hrtimer_cancel(&local->ifs_timer);
+ drv_stop(local);
+}
diff --git a/net/mpls/Kconfig b/net/mpls/Kconfig
index 17bde799c854..5c467ef97311 100644
--- a/net/mpls/Kconfig
+++ b/net/mpls/Kconfig
@@ -24,7 +24,13 @@ config NET_MPLS_GSO
config MPLS_ROUTING
tristate "MPLS: routing support"
- help
+ ---help---
Add support for forwarding of mpls packets.
+config MPLS_IPTUNNEL
+ tristate "MPLS: IP over MPLS tunnel support"
+ depends on LWTUNNEL && MPLS_ROUTING
+ ---help---
+ mpls ip tunnel support.
+
endif # MPLS
diff --git a/net/mpls/Makefile b/net/mpls/Makefile
index 65bbe68c72e6..9ca923625016 100644
--- a/net/mpls/Makefile
+++ b/net/mpls/Makefile
@@ -3,5 +3,6 @@
#
obj-$(CONFIG_NET_MPLS_GSO) += mpls_gso.o
obj-$(CONFIG_MPLS_ROUTING) += mpls_router.o
+obj-$(CONFIG_MPLS_IPTUNNEL) += mpls_iptunnel.o
mpls_router-y := af_mpls.o
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 1f93a5978f2a..8c5707db53c5 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -15,6 +15,10 @@
#include <net/ip_fib.h>
#include <net/netevent.h>
#include <net/netns/generic.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ipv6.h>
+#include <net/addrconf.h>
+#endif
#include "internal.h"
#define LABEL_NOT_SPECIFIED (1<<20)
@@ -23,11 +27,23 @@
/* This maximum ha length copied from the definition of struct neighbour */
#define MAX_VIA_ALEN (ALIGN(MAX_ADDR_LEN, sizeof(unsigned long)))
+enum mpls_payload_type {
+ MPT_UNSPEC, /* IPv4 or IPv6 */
+ MPT_IPV4 = 4,
+ MPT_IPV6 = 6,
+
+ /* Other types not implemented:
+ * - Pseudo-wire with or without control word (RFC4385)
+ * - GAL (RFC5586)
+ */
+};
+
struct mpls_route { /* next hop label forwarding entry */
struct net_device __rcu *rt_dev;
struct rcu_head rt_rcu;
u32 rt_label[MAX_NEW_LABELS];
u8 rt_protocol; /* routing protocol that set this entry */
+ u8 rt_payload_type;
u8 rt_labels;
u8 rt_via_alen;
u8 rt_via_table;
@@ -58,10 +74,11 @@ static inline struct mpls_dev *mpls_dev_get(const struct net_device *dev)
return rcu_dereference_rtnl(dev->mpls_ptr);
}
-static bool mpls_output_possible(const struct net_device *dev)
+bool mpls_output_possible(const struct net_device *dev)
{
return dev && (dev->flags & IFF_UP) && netif_carrier_ok(dev);
}
+EXPORT_SYMBOL_GPL(mpls_output_possible);
static unsigned int mpls_rt_header_size(const struct mpls_route *rt)
{
@@ -69,13 +86,14 @@ static unsigned int mpls_rt_header_size(const struct mpls_route *rt)
return rt->rt_labels * sizeof(struct mpls_shim_hdr);
}
-static unsigned int mpls_dev_mtu(const struct net_device *dev)
+unsigned int mpls_dev_mtu(const struct net_device *dev)
{
/* The amount of data the layer 2 frame can hold */
return dev->mtu;
}
+EXPORT_SYMBOL_GPL(mpls_dev_mtu);
-static bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
+bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
{
if (skb->len <= mtu)
return false;
@@ -85,20 +103,13 @@ static bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
return true;
}
+EXPORT_SYMBOL_GPL(mpls_pkt_too_big);
static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb,
struct mpls_entry_decoded dec)
{
- /* RFC4385 and RFC5586 encode other packets in mpls such that
- * they don't conflict with the ip version number, making
- * decoding by examining the ip version correct in everything
- * except for the strangest cases.
- *
- * The strange cases if we choose to support them will require
- * manual configuration.
- */
- struct iphdr *hdr4;
- bool success = true;
+ enum mpls_payload_type payload_type;
+ bool success = false;
/* The IPv4 code below accesses through the IPv4 header
* checksum, which is 12 bytes into the packet.
@@ -113,23 +124,32 @@ static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb,
if (!pskb_may_pull(skb, 12))
return false;
- /* Use ip_hdr to find the ip protocol version */
- hdr4 = ip_hdr(skb);
- if (hdr4->version == 4) {
+ payload_type = rt->rt_payload_type;
+ if (payload_type == MPT_UNSPEC)
+ payload_type = ip_hdr(skb)->version;
+
+ switch (payload_type) {
+ case MPT_IPV4: {
+ struct iphdr *hdr4 = ip_hdr(skb);
skb->protocol = htons(ETH_P_IP);
csum_replace2(&hdr4->check,
htons(hdr4->ttl << 8),
htons(dec.ttl << 8));
hdr4->ttl = dec.ttl;
+ success = true;
+ break;
}
- else if (hdr4->version == 6) {
+ case MPT_IPV6: {
struct ipv6hdr *hdr6 = ipv6_hdr(skb);
skb->protocol = htons(ETH_P_IPV6);
hdr6->hop_limit = dec.ttl;
+ success = true;
+ break;
+ }
+ case MPT_UNSPEC:
+ break;
}
- else
- /* version 0 and version 1 are used by pseudo wires */
- success = false;
+
return success;
}
@@ -248,16 +268,17 @@ static const struct nla_policy rtm_mpls_policy[RTA_MAX+1] = {
};
struct mpls_route_config {
- u32 rc_protocol;
- u32 rc_ifindex;
- u16 rc_via_table;
- u16 rc_via_alen;
- u8 rc_via[MAX_VIA_ALEN];
- u32 rc_label;
- u32 rc_output_labels;
- u32 rc_output_label[MAX_NEW_LABELS];
- u32 rc_nlflags;
- struct nl_info rc_nlinfo;
+ u32 rc_protocol;
+ u32 rc_ifindex;
+ u16 rc_via_table;
+ u16 rc_via_alen;
+ u8 rc_via[MAX_VIA_ALEN];
+ u32 rc_label;
+ u32 rc_output_labels;
+ u32 rc_output_label[MAX_NEW_LABELS];
+ u32 rc_nlflags;
+ enum mpls_payload_type rc_payload_type;
+ struct nl_info rc_nlinfo;
};
static struct mpls_route *mpls_rt_alloc(size_t alen)
@@ -286,7 +307,7 @@ static void mpls_notify_route(struct net *net, unsigned index,
struct mpls_route *rt = new ? new : old;
unsigned nlm_flags = (old && new) ? NLM_F_REPLACE : 0;
/* Ignore reserved labels for now */
- if (rt && (index >= 16))
+ if (rt && (index >= MPLS_LABEL_FIRST_UNRESERVED))
rtmsg_lfib(event, index, rt, nlh, net, portid, nlm_flags);
}
@@ -320,13 +341,96 @@ static unsigned find_free_label(struct net *net)
platform_label = rtnl_dereference(net->mpls.platform_label);
platform_labels = net->mpls.platform_labels;
- for (index = 16; index < platform_labels; index++) {
+ for (index = MPLS_LABEL_FIRST_UNRESERVED; index < platform_labels;
+ index++) {
if (!rtnl_dereference(platform_label[index]))
return index;
}
return LABEL_NOT_SPECIFIED;
}
+#if IS_ENABLED(CONFIG_INET)
+static struct net_device *inet_fib_lookup_dev(struct net *net, void *addr)
+{
+ struct net_device *dev;
+ struct rtable *rt;
+ struct in_addr daddr;
+
+ memcpy(&daddr, addr, sizeof(struct in_addr));
+ rt = ip_route_output(net, daddr.s_addr, 0, 0, 0);
+ if (IS_ERR(rt))
+ return ERR_CAST(rt);
+
+ dev = rt->dst.dev;
+ dev_hold(dev);
+
+ ip_rt_put(rt);
+
+ return dev;
+}
+#else
+static struct net_device *inet_fib_lookup_dev(struct net *net, void *addr)
+{
+ return ERR_PTR(-EAFNOSUPPORT);
+}
+#endif
+
+#if IS_ENABLED(CONFIG_IPV6)
+static struct net_device *inet6_fib_lookup_dev(struct net *net, void *addr)
+{
+ struct net_device *dev;
+ struct dst_entry *dst;
+ struct flowi6 fl6;
+ int err;
+
+ if (!ipv6_stub)
+ return ERR_PTR(-EAFNOSUPPORT);
+
+ memset(&fl6, 0, sizeof(fl6));
+ memcpy(&fl6.daddr, addr, sizeof(struct in6_addr));
+ err = ipv6_stub->ipv6_dst_lookup(net, NULL, &dst, &fl6);
+ if (err)
+ return ERR_PTR(err);
+
+ dev = dst->dev;
+ dev_hold(dev);
+ dst_release(dst);
+
+ return dev;
+}
+#else
+static struct net_device *inet6_fib_lookup_dev(struct net *net, void *addr)
+{
+ return ERR_PTR(-EAFNOSUPPORT);
+}
+#endif
+
+static struct net_device *find_outdev(struct net *net,
+ struct mpls_route_config *cfg)
+{
+ struct net_device *dev = NULL;
+
+ if (!cfg->rc_ifindex) {
+ switch (cfg->rc_via_table) {
+ case NEIGH_ARP_TABLE:
+ dev = inet_fib_lookup_dev(net, cfg->rc_via);
+ break;
+ case NEIGH_ND_TABLE:
+ dev = inet6_fib_lookup_dev(net, cfg->rc_via);
+ break;
+ case NEIGH_LINK_TABLE:
+ break;
+ }
+ } else {
+ dev = dev_get_by_index(net, cfg->rc_ifindex);
+ }
+
+ if (!dev)
+ return ERR_PTR(-ENODEV);
+
+ return dev;
+}
+
static int mpls_route_add(struct mpls_route_config *cfg)
{
struct mpls_route __rcu **platform_label;
@@ -345,8 +449,8 @@ static int mpls_route_add(struct mpls_route_config *cfg)
index = find_free_label(net);
}
- /* The first 16 labels are reserved, and may not be set */
- if (index < 16)
+ /* Reserved labels may not be set */
+ if (index < MPLS_LABEL_FIRST_UNRESERVED)
goto errout;
/* The full 20 bit range may not be supported. */
@@ -357,10 +461,12 @@ static int mpls_route_add(struct mpls_route_config *cfg)
if (cfg->rc_output_labels > MAX_NEW_LABELS)
goto errout;
- err = -ENODEV;
- dev = dev_get_by_index(net, cfg->rc_ifindex);
- if (!dev)
+ dev = find_outdev(net, cfg);
+ if (IS_ERR(dev)) {
+ err = PTR_ERR(dev);
+ dev = NULL;
goto errout;
+ }
/* Ensure this is a supported device */
err = -EINVAL;
@@ -401,6 +507,7 @@ static int mpls_route_add(struct mpls_route_config *cfg)
rt->rt_label[i] = cfg->rc_output_label[i];
rt->rt_protocol = cfg->rc_protocol;
RCU_INIT_POINTER(rt->rt_dev, dev);
+ rt->rt_payload_type = cfg->rc_payload_type;
rt->rt_via_table = cfg->rc_via_table;
memcpy(rt->rt_via, cfg->rc_via, cfg->rc_via_alen);
@@ -423,8 +530,8 @@ static int mpls_route_del(struct mpls_route_config *cfg)
index = cfg->rc_label;
- /* The first 16 labels are reserved, and may not be removed */
- if (index < 16)
+ /* Reserved labels may not be removed */
+ if (index < MPLS_LABEL_FIRST_UNRESERVED)
goto errout;
/* The full 20 bit range may not be supported */
@@ -626,6 +733,7 @@ int nla_put_labels(struct sk_buff *skb, int attrtype,
return 0;
}
+EXPORT_SYMBOL_GPL(nla_put_labels);
int nla_get_labels(const struct nlattr *nla,
u32 max_labels, u32 *labels, u32 label[])
@@ -671,6 +779,7 @@ int nla_get_labels(const struct nlattr *nla,
*labels = nla_labels;
return 0;
}
+EXPORT_SYMBOL_GPL(nla_get_labels);
static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh,
struct mpls_route_config *cfg)
@@ -740,8 +849,8 @@ static int rtm_to_route_config(struct sk_buff *skb, struct nlmsghdr *nlh,
&cfg->rc_label))
goto errout;
- /* The first 16 labels are reserved, and may not be set */
- if (cfg->rc_label < 16)
+ /* Reserved labels may not be set */
+ if (cfg->rc_label < MPLS_LABEL_FIRST_UNRESERVED)
goto errout;
break;
@@ -866,8 +975,8 @@ static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb)
ASSERT_RTNL();
index = cb->args[0];
- if (index < 16)
- index = 16;
+ if (index < MPLS_LABEL_FIRST_UNRESERVED)
+ index = MPLS_LABEL_FIRST_UNRESERVED;
platform_label = rtnl_dereference(net->mpls.platform_label);
platform_labels = net->mpls.platform_labels;
@@ -953,6 +1062,7 @@ static int resize_platform_label_table(struct net *net, size_t limit)
goto nort0;
RCU_INIT_POINTER(rt0->rt_dev, lo);
rt0->rt_protocol = RTPROT_KERNEL;
+ rt0->rt_payload_type = MPT_IPV4;
rt0->rt_via_table = NEIGH_LINK_TABLE;
memcpy(rt0->rt_via, lo->dev_addr, lo->addr_len);
}
@@ -963,6 +1073,7 @@ static int resize_platform_label_table(struct net *net, size_t limit)
goto nort2;
RCU_INIT_POINTER(rt2->rt_dev, lo);
rt2->rt_protocol = RTPROT_KERNEL;
+ rt2->rt_payload_type = MPT_IPV6;
rt2->rt_via_table = NEIGH_LINK_TABLE;
memcpy(rt2->rt_via, lo->dev_addr, lo->addr_len);
}
diff --git a/net/mpls/internal.h b/net/mpls/internal.h
index 8cabeb5a1cb9..2681a4ba6c37 100644
--- a/net/mpls/internal.h
+++ b/net/mpls/internal.h
@@ -50,7 +50,12 @@ static inline struct mpls_entry_decoded mpls_entry_decode(struct mpls_shim_hdr *
return result;
}
-int nla_put_labels(struct sk_buff *skb, int attrtype, u8 labels, const u32 label[]);
-int nla_get_labels(const struct nlattr *nla, u32 max_labels, u32 *labels, u32 label[]);
+int nla_put_labels(struct sk_buff *skb, int attrtype, u8 labels,
+ const u32 label[]);
+int nla_get_labels(const struct nlattr *nla, u32 max_labels, u32 *labels,
+ u32 label[]);
+bool mpls_output_possible(const struct net_device *dev);
+unsigned int mpls_dev_mtu(const struct net_device *dev);
+bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu);
#endif /* MPLS_INTERNAL_H */
diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c
new file mode 100644
index 000000000000..3da5ca3ba563
--- /dev/null
+++ b/net/mpls/mpls_iptunnel.c
@@ -0,0 +1,230 @@
+/*
+ * mpls tunnels An implementation mpls tunnels using the light weight tunnel
+ * infrastructure
+ *
+ * Authors: Roopa Prabhu, <roopa@cumulusnetworks.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/net.h>
+#include <linux/module.h>
+#include <linux/mpls.h>
+#include <linux/vmalloc.h>
+#include <net/ip.h>
+#include <net/dst.h>
+#include <net/lwtunnel.h>
+#include <net/netevent.h>
+#include <net/netns/generic.h>
+#include <net/ip6_fib.h>
+#include <net/route.h>
+#include <net/mpls_iptunnel.h>
+#include <linux/mpls_iptunnel.h>
+#include "internal.h"
+
+static const struct nla_policy mpls_iptunnel_policy[MPLS_IPTUNNEL_MAX + 1] = {
+ [MPLS_IPTUNNEL_DST] = { .type = NLA_U32 },
+};
+
+static unsigned int mpls_encap_size(struct mpls_iptunnel_encap *en)
+{
+ /* The size of the layer 2.5 labels to be added for this route */
+ return en->labels * sizeof(struct mpls_shim_hdr);
+}
+
+int mpls_output(struct sock *sk, struct sk_buff *skb)
+{
+ struct mpls_iptunnel_encap *tun_encap_info;
+ struct mpls_shim_hdr *hdr;
+ struct net_device *out_dev;
+ unsigned int hh_len;
+ unsigned int new_header_size;
+ unsigned int mtu;
+ struct dst_entry *dst = skb_dst(skb);
+ struct rtable *rt = NULL;
+ struct rt6_info *rt6 = NULL;
+ int err = 0;
+ bool bos;
+ int i;
+ unsigned int ttl;
+
+ /* Obtain the ttl */
+ if (skb->protocol == htons(ETH_P_IP)) {
+ ttl = ip_hdr(skb)->ttl;
+ rt = (struct rtable *)dst;
+ } else if (skb->protocol == htons(ETH_P_IPV6)) {
+ ttl = ipv6_hdr(skb)->hop_limit;
+ rt6 = (struct rt6_info *)dst;
+ } else {
+ goto drop;
+ }
+
+ skb_orphan(skb);
+
+ /* Find the output device */
+ out_dev = dst->dev;
+ if (!mpls_output_possible(out_dev) ||
+ !dst->lwtstate || skb_warn_if_lro(skb))
+ goto drop;
+
+ skb_forward_csum(skb);
+
+ tun_encap_info = mpls_lwtunnel_encap(dst->lwtstate);
+
+ /* Verify the destination can hold the packet */
+ new_header_size = mpls_encap_size(tun_encap_info);
+ mtu = mpls_dev_mtu(out_dev);
+ if (mpls_pkt_too_big(skb, mtu - new_header_size))
+ goto drop;
+
+ hh_len = LL_RESERVED_SPACE(out_dev);
+ if (!out_dev->header_ops)
+ hh_len = 0;
+
+ /* Ensure there is enough space for the headers in the skb */
+ if (skb_cow(skb, hh_len + new_header_size))
+ goto drop;
+
+ skb_push(skb, new_header_size);
+ skb_reset_network_header(skb);
+
+ skb->dev = out_dev;
+ skb->protocol = htons(ETH_P_MPLS_UC);
+
+ /* Push the new labels */
+ hdr = mpls_hdr(skb);
+ bos = true;
+ for (i = tun_encap_info->labels - 1; i >= 0; i--) {
+ hdr[i] = mpls_entry_encode(tun_encap_info->label[i],
+ ttl, 0, bos);
+ bos = false;
+ }
+
+ if (rt)
+ err = neigh_xmit(NEIGH_ARP_TABLE, out_dev, &rt->rt_gateway,
+ skb);
+ else if (rt6)
+ err = neigh_xmit(NEIGH_ND_TABLE, out_dev, &rt6->rt6i_gateway,
+ skb);
+ if (err)
+ net_dbg_ratelimited("%s: packet transmission failed: %d\n",
+ __func__, err);
+
+ return 0;
+
+drop:
+ kfree_skb(skb);
+ return -EINVAL;
+}
+
+static int mpls_build_state(struct net_device *dev, struct nlattr *nla,
+ struct lwtunnel_state **ts)
+{
+ struct mpls_iptunnel_encap *tun_encap_info;
+ struct nlattr *tb[MPLS_IPTUNNEL_MAX + 1];
+ struct lwtunnel_state *newts;
+ int tun_encap_info_len;
+ int ret;
+
+ ret = nla_parse_nested(tb, MPLS_IPTUNNEL_MAX, nla,
+ mpls_iptunnel_policy);
+ if (ret < 0)
+ return ret;
+
+ if (!tb[MPLS_IPTUNNEL_DST])
+ return -EINVAL;
+
+ tun_encap_info_len = sizeof(*tun_encap_info);
+
+ newts = lwtunnel_state_alloc(tun_encap_info_len);
+ if (!newts)
+ return -ENOMEM;
+
+ newts->len = tun_encap_info_len;
+ tun_encap_info = mpls_lwtunnel_encap(newts);
+ ret = nla_get_labels(tb[MPLS_IPTUNNEL_DST], MAX_NEW_LABELS,
+ &tun_encap_info->labels, tun_encap_info->label);
+ if (ret)
+ goto errout;
+ newts->type = LWTUNNEL_ENCAP_MPLS;
+ newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT;
+
+ *ts = newts;
+
+ return 0;
+
+errout:
+ kfree(newts);
+ *ts = NULL;
+
+ return ret;
+}
+
+static int mpls_fill_encap_info(struct sk_buff *skb,
+ struct lwtunnel_state *lwtstate)
+{
+ struct mpls_iptunnel_encap *tun_encap_info;
+
+ tun_encap_info = mpls_lwtunnel_encap(lwtstate);
+
+ if (nla_put_labels(skb, MPLS_IPTUNNEL_DST, tun_encap_info->labels,
+ tun_encap_info->label))
+ goto nla_put_failure;
+
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
+static int mpls_encap_nlsize(struct lwtunnel_state *lwtstate)
+{
+ struct mpls_iptunnel_encap *tun_encap_info;
+
+ tun_encap_info = mpls_lwtunnel_encap(lwtstate);
+
+ return nla_total_size(tun_encap_info->labels * 4);
+}
+
+static int mpls_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
+{
+ struct mpls_iptunnel_encap *a_hdr = mpls_lwtunnel_encap(a);
+ struct mpls_iptunnel_encap *b_hdr = mpls_lwtunnel_encap(b);
+ int l;
+
+ if (a_hdr->labels != b_hdr->labels)
+ return 1;
+
+ for (l = 0; l < MAX_NEW_LABELS; l++)
+ if (a_hdr->label[l] != b_hdr->label[l])
+ return 1;
+ return 0;
+}
+
+static const struct lwtunnel_encap_ops mpls_iptun_ops = {
+ .build_state = mpls_build_state,
+ .output = mpls_output,
+ .fill_encap = mpls_fill_encap_info,
+ .get_encap_size = mpls_encap_nlsize,
+ .cmp_encap = mpls_encap_cmp,
+};
+
+static int __init mpls_iptunnel_init(void)
+{
+ return lwtunnel_encap_add_ops(&mpls_iptun_ops, LWTUNNEL_ENCAP_MPLS);
+}
+module_init(mpls_iptunnel_init);
+
+static void __exit mpls_iptunnel_exit(void)
+{
+ lwtunnel_encap_del_ops(&mpls_iptun_ops, LWTUNNEL_ENCAP_MPLS);
+}
+module_exit(mpls_iptunnel_exit);
+
+MODULE_DESCRIPTION("MultiProtocol Label Switching IP Tunnels");
+MODULE_LICENSE("GPL v2");
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 6eae69a698ed..3e1b4abf1897 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -867,6 +867,8 @@ config NETFILTER_XT_TARGET_TEE
depends on NETFILTER_ADVANCED
depends on IPV6 || IPV6=n
depends on !NF_CONNTRACK || NF_CONNTRACK
+ select NF_DUP_IPV4
+ select NF_DUP_IPV6 if IP6_NF_IPTABLES
---help---
This option adds a "TEE" target with which a packet can be cloned and
this clone be rerouted to another nexthop.
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index a0e54974e2c9..2a5a0704245c 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -34,6 +34,9 @@ EXPORT_SYMBOL(nf_afinfo);
const struct nf_ipv6_ops __rcu *nf_ipv6_ops __read_mostly;
EXPORT_SYMBOL_GPL(nf_ipv6_ops);
+DEFINE_PER_CPU(bool, nf_skb_duplicated);
+EXPORT_SYMBOL_GPL(nf_skb_duplicated);
+
int nf_register_afinfo(const struct nf_afinfo *afinfo)
{
mutex_lock(&afinfo_mutex);
@@ -52,9 +55,6 @@ void nf_unregister_afinfo(const struct nf_afinfo *afinfo)
}
EXPORT_SYMBOL_GPL(nf_unregister_afinfo);
-struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS] __read_mostly;
-EXPORT_SYMBOL(nf_hooks);
-
#ifdef HAVE_JUMP_LABEL
struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
EXPORT_SYMBOL(nf_hooks_needed);
@@ -62,63 +62,166 @@ EXPORT_SYMBOL(nf_hooks_needed);
static DEFINE_MUTEX(nf_hook_mutex);
-int nf_register_hook(struct nf_hook_ops *reg)
+static struct list_head *nf_find_hook_list(struct net *net,
+ const struct nf_hook_ops *reg)
{
- struct list_head *nf_hook_list;
- struct nf_hook_ops *elem;
+ struct list_head *hook_list = NULL;
- mutex_lock(&nf_hook_mutex);
- switch (reg->pf) {
- case NFPROTO_NETDEV:
+ if (reg->pf != NFPROTO_NETDEV)
+ hook_list = &net->nf.hooks[reg->pf][reg->hooknum];
+ else if (reg->hooknum == NF_NETDEV_INGRESS) {
#ifdef CONFIG_NETFILTER_INGRESS
- if (reg->hooknum == NF_NETDEV_INGRESS) {
- BUG_ON(reg->dev == NULL);
- nf_hook_list = &reg->dev->nf_hooks_ingress;
- net_inc_ingress_queue();
- break;
- }
+ if (reg->dev && dev_net(reg->dev) == net)
+ hook_list = &reg->dev->nf_hooks_ingress;
#endif
- /* Fall through. */
- default:
- nf_hook_list = &nf_hooks[reg->pf][reg->hooknum];
- break;
+ }
+ return hook_list;
+}
+
+struct nf_hook_entry {
+ const struct nf_hook_ops *orig_ops;
+ struct nf_hook_ops ops;
+};
+
+int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
+{
+ struct list_head *hook_list;
+ struct nf_hook_entry *entry;
+ struct nf_hook_ops *elem;
+
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return -ENOMEM;
+
+ entry->orig_ops = reg;
+ entry->ops = *reg;
+
+ hook_list = nf_find_hook_list(net, reg);
+ if (!hook_list) {
+ kfree(entry);
+ return -ENOENT;
}
- list_for_each_entry(elem, nf_hook_list, list) {
+ mutex_lock(&nf_hook_mutex);
+ list_for_each_entry(elem, hook_list, list) {
if (reg->priority < elem->priority)
break;
}
- list_add_rcu(&reg->list, elem->list.prev);
+ list_add_rcu(&entry->ops.list, elem->list.prev);
mutex_unlock(&nf_hook_mutex);
+#ifdef CONFIG_NETFILTER_INGRESS
+ if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
+ net_inc_ingress_queue();
+#endif
#ifdef HAVE_JUMP_LABEL
static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]);
#endif
return 0;
}
-EXPORT_SYMBOL(nf_register_hook);
+EXPORT_SYMBOL(nf_register_net_hook);
-void nf_unregister_hook(struct nf_hook_ops *reg)
+void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
{
+ struct list_head *hook_list;
+ struct nf_hook_entry *entry;
+ struct nf_hook_ops *elem;
+
+ hook_list = nf_find_hook_list(net, reg);
+ if (!hook_list)
+ return;
+
mutex_lock(&nf_hook_mutex);
- list_del_rcu(&reg->list);
- mutex_unlock(&nf_hook_mutex);
- switch (reg->pf) {
- case NFPROTO_NETDEV:
-#ifdef CONFIG_NETFILTER_INGRESS
- if (reg->hooknum == NF_NETDEV_INGRESS) {
- net_dec_ingress_queue();
+ list_for_each_entry(elem, hook_list, list) {
+ entry = container_of(elem, struct nf_hook_entry, ops);
+ if (entry->orig_ops == reg) {
+ list_del_rcu(&entry->ops.list);
break;
}
- break;
-#endif
- default:
- break;
}
+ mutex_unlock(&nf_hook_mutex);
+ if (&elem->list == hook_list) {
+ WARN(1, "nf_unregister_net_hook: hook not found!\n");
+ return;
+ }
+#ifdef CONFIG_NETFILTER_INGRESS
+ if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
+ net_dec_ingress_queue();
+#endif
#ifdef HAVE_JUMP_LABEL
static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
#endif
synchronize_net();
- nf_queue_nf_hook_drop(reg);
+ nf_queue_nf_hook_drop(net, &entry->ops);
+ kfree(entry);
+}
+EXPORT_SYMBOL(nf_unregister_net_hook);
+
+int nf_register_net_hooks(struct net *net, const struct nf_hook_ops *reg,
+ unsigned int n)
+{
+ unsigned int i;
+ int err = 0;
+
+ for (i = 0; i < n; i++) {
+ err = nf_register_net_hook(net, &reg[i]);
+ if (err)
+ goto err;
+ }
+ return err;
+
+err:
+ if (i > 0)
+ nf_unregister_net_hooks(net, reg, i);
+ return err;
+}
+EXPORT_SYMBOL(nf_register_net_hooks);
+
+void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg,
+ unsigned int n)
+{
+ while (n-- > 0)
+ nf_unregister_net_hook(net, &reg[n]);
+}
+EXPORT_SYMBOL(nf_unregister_net_hooks);
+
+static LIST_HEAD(nf_hook_list);
+
+int nf_register_hook(struct nf_hook_ops *reg)
+{
+ struct net *net, *last;
+ int ret;
+
+ rtnl_lock();
+ for_each_net(net) {
+ ret = nf_register_net_hook(net, reg);
+ if (ret && ret != -ENOENT)
+ goto rollback;
+ }
+ list_add_tail(&reg->list, &nf_hook_list);
+ rtnl_unlock();
+
+ return 0;
+rollback:
+ last = net;
+ for_each_net(net) {
+ if (net == last)
+ break;
+ nf_unregister_net_hook(net, reg);
+ }
+ rtnl_unlock();
+ return ret;
+}
+EXPORT_SYMBOL(nf_register_hook);
+
+void nf_unregister_hook(struct nf_hook_ops *reg)
+{
+ struct net *net;
+
+ rtnl_lock();
+ list_del(&reg->list);
+ for_each_net(net)
+ nf_unregister_net_hook(net, reg);
+ rtnl_unlock();
}
EXPORT_SYMBOL(nf_unregister_hook);
@@ -295,8 +398,46 @@ void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *);
EXPORT_SYMBOL(nf_nat_decode_session_hook);
#endif
+static int nf_register_hook_list(struct net *net)
+{
+ struct nf_hook_ops *elem;
+ int ret;
+
+ rtnl_lock();
+ list_for_each_entry(elem, &nf_hook_list, list) {
+ ret = nf_register_net_hook(net, elem);
+ if (ret && ret != -ENOENT)
+ goto out_undo;
+ }
+ rtnl_unlock();
+ return 0;
+
+out_undo:
+ list_for_each_entry_continue_reverse(elem, &nf_hook_list, list)
+ nf_unregister_net_hook(net, elem);
+ rtnl_unlock();
+ return ret;
+}
+
+static void nf_unregister_hook_list(struct net *net)
+{
+ struct nf_hook_ops *elem;
+
+ rtnl_lock();
+ list_for_each_entry(elem, &nf_hook_list, list)
+ nf_unregister_net_hook(net, elem);
+ rtnl_unlock();
+}
+
static int __net_init netfilter_net_init(struct net *net)
{
+ int i, h, ret;
+
+ for (i = 0; i < ARRAY_SIZE(net->nf.hooks); i++) {
+ for (h = 0; h < NF_MAX_HOOKS; h++)
+ INIT_LIST_HEAD(&net->nf.hooks[i][h]);
+ }
+
#ifdef CONFIG_PROC_FS
net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter",
net->proc_net);
@@ -307,11 +448,16 @@ static int __net_init netfilter_net_init(struct net *net)
return -ENOMEM;
}
#endif
- return 0;
+ ret = nf_register_hook_list(net);
+ if (ret)
+ remove_proc_entry("netfilter", net->proc_net);
+
+ return ret;
}
static void __net_exit netfilter_net_exit(struct net *net)
{
+ nf_unregister_hook_list(net);
remove_proc_entry("netfilter", net->proc_net);
}
@@ -322,12 +468,7 @@ static struct pernet_operations netfilter_net_ops = {
int __init netfilter_init(void)
{
- int i, h, ret;
-
- for (i = 0; i < ARRAY_SIZE(nf_hooks); i++) {
- for (h = 0; h < NF_MAX_HOOKS; h++)
- INIT_LIST_HEAD(&nf_hooks[i][h]);
- }
+ int ret;
ret = register_pernet_subsys(&netfilter_net_ops);
if (ret < 0)
diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c
index 5882bbfd198c..136184572fc9 100644
--- a/net/netfilter/ipvs/ip_vs_nfct.c
+++ b/net/netfilter/ipvs/ip_vs_nfct.c
@@ -274,7 +274,7 @@ void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
" for conn " FMT_CONN "\n",
__func__, ARG_TUPLE(&tuple), ARG_CONN(cp));
- h = nf_conntrack_find_get(ip_vs_conn_net(cp), NF_CT_DEFAULT_ZONE,
+ h = nf_conntrack_find_get(ip_vs_conn_net(cp), &nf_ct_zone_dflt,
&tuple);
if (h) {
ct = nf_ct_tuplehash_to_ctrack(h);
diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c
index 7e8141647943..a2ff7d746ebf 100644
--- a/net/netfilter/ipvs/ip_vs_sched.c
+++ b/net/netfilter/ipvs/ip_vs_sched.c
@@ -137,7 +137,7 @@ struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name)
void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler)
{
- if (scheduler && scheduler->module)
+ if (scheduler)
module_put(scheduler->module);
}
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 3c20d02aee73..ac3be9b0629b 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -126,7 +126,7 @@ EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
unsigned int nf_conntrack_hash_rnd __read_mostly;
EXPORT_SYMBOL_GPL(nf_conntrack_hash_rnd);
-static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, u16 zone)
+static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple)
{
unsigned int n;
@@ -135,7 +135,7 @@ static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, u16 zone)
* three bytes manually.
*/
n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32);
- return jhash2((u32 *)tuple, n, zone ^ nf_conntrack_hash_rnd ^
+ return jhash2((u32 *)tuple, n, nf_conntrack_hash_rnd ^
(((__force __u16)tuple->dst.u.all << 16) |
tuple->dst.protonum));
}
@@ -151,15 +151,15 @@ static u32 hash_bucket(u32 hash, const struct net *net)
}
static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
- u16 zone, unsigned int size)
+ unsigned int size)
{
- return __hash_bucket(hash_conntrack_raw(tuple, zone), size);
+ return __hash_bucket(hash_conntrack_raw(tuple), size);
}
-static inline u_int32_t hash_conntrack(const struct net *net, u16 zone,
+static inline u_int32_t hash_conntrack(const struct net *net,
const struct nf_conntrack_tuple *tuple)
{
- return __hash_conntrack(tuple, zone, net->ct.htable_size);
+ return __hash_conntrack(tuple, net->ct.htable_size);
}
bool
@@ -288,7 +288,9 @@ static void nf_ct_del_from_dying_or_unconfirmed_list(struct nf_conn *ct)
}
/* Released via destroy_conntrack() */
-struct nf_conn *nf_ct_tmpl_alloc(struct net *net, u16 zone, gfp_t flags)
+struct nf_conn *nf_ct_tmpl_alloc(struct net *net,
+ const struct nf_conntrack_zone *zone,
+ gfp_t flags)
{
struct nf_conn *tmpl;
@@ -299,24 +301,15 @@ struct nf_conn *nf_ct_tmpl_alloc(struct net *net, u16 zone, gfp_t flags)
tmpl->status = IPS_TEMPLATE;
write_pnet(&tmpl->ct_net, net);
-#ifdef CONFIG_NF_CONNTRACK_ZONES
- if (zone) {
- struct nf_conntrack_zone *nf_ct_zone;
+ if (nf_ct_zone_add(tmpl, flags, zone) < 0)
+ goto out_free;
- nf_ct_zone = nf_ct_ext_add(tmpl, NF_CT_EXT_ZONE, flags);
- if (!nf_ct_zone)
- goto out_free;
- nf_ct_zone->id = zone;
- }
-#endif
atomic_set(&tmpl->ct_general.use, 0);
return tmpl;
-#ifdef CONFIG_NF_CONNTRACK_ZONES
out_free:
kfree(tmpl);
return NULL;
-#endif
}
EXPORT_SYMBOL_GPL(nf_ct_tmpl_alloc);
@@ -373,7 +366,6 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct)
{
struct net *net = nf_ct_net(ct);
unsigned int hash, reply_hash;
- u16 zone = nf_ct_zone(ct);
unsigned int sequence;
nf_ct_helper_destroy(ct);
@@ -381,9 +373,9 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct)
local_bh_disable();
do {
sequence = read_seqcount_begin(&net->ct.generation);
- hash = hash_conntrack(net, zone,
+ hash = hash_conntrack(net,
&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
- reply_hash = hash_conntrack(net, zone,
+ reply_hash = hash_conntrack(net,
&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
} while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
@@ -431,8 +423,8 @@ static void death_by_timeout(unsigned long ul_conntrack)
static inline bool
nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
- const struct nf_conntrack_tuple *tuple,
- u16 zone)
+ const struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_zone *zone)
{
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
@@ -440,8 +432,8 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
* so we need to check that the conntrack is confirmed
*/
return nf_ct_tuple_equal(tuple, &h->tuple) &&
- nf_ct_zone(ct) == zone &&
- nf_ct_is_confirmed(ct);
+ nf_ct_zone_equal(ct, zone, NF_CT_DIRECTION(h)) &&
+ nf_ct_is_confirmed(ct);
}
/*
@@ -450,7 +442,7 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
* and recheck nf_ct_tuple_equal(tuple, &h->tuple)
*/
static struct nf_conntrack_tuple_hash *
-____nf_conntrack_find(struct net *net, u16 zone,
+____nf_conntrack_find(struct net *net, const struct nf_conntrack_zone *zone,
const struct nf_conntrack_tuple *tuple, u32 hash)
{
struct nf_conntrack_tuple_hash *h;
@@ -486,7 +478,7 @@ begin:
/* Find a connection corresponding to a tuple. */
static struct nf_conntrack_tuple_hash *
-__nf_conntrack_find_get(struct net *net, u16 zone,
+__nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
const struct nf_conntrack_tuple *tuple, u32 hash)
{
struct nf_conntrack_tuple_hash *h;
@@ -513,11 +505,11 @@ begin:
}
struct nf_conntrack_tuple_hash *
-nf_conntrack_find_get(struct net *net, u16 zone,
+nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
const struct nf_conntrack_tuple *tuple)
{
return __nf_conntrack_find_get(net, zone, tuple,
- hash_conntrack_raw(tuple, zone));
+ hash_conntrack_raw(tuple));
}
EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
@@ -536,11 +528,11 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct,
int
nf_conntrack_hash_check_insert(struct nf_conn *ct)
{
+ const struct nf_conntrack_zone *zone;
struct net *net = nf_ct_net(ct);
unsigned int hash, reply_hash;
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_node *n;
- u16 zone;
unsigned int sequence;
zone = nf_ct_zone(ct);
@@ -548,9 +540,9 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
local_bh_disable();
do {
sequence = read_seqcount_begin(&net->ct.generation);
- hash = hash_conntrack(net, zone,
+ hash = hash_conntrack(net,
&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
- reply_hash = hash_conntrack(net, zone,
+ reply_hash = hash_conntrack(net,
&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
} while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
@@ -558,12 +550,14 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode)
if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
&h->tuple) &&
- zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
+ nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone,
+ NF_CT_DIRECTION(h)))
goto out;
hlist_nulls_for_each_entry(h, n, &net->ct.hash[reply_hash], hnnode)
if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
&h->tuple) &&
- zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
+ nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone,
+ NF_CT_DIRECTION(h)))
goto out;
add_timer(&ct->timeout);
@@ -588,6 +582,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
int
__nf_conntrack_confirm(struct sk_buff *skb)
{
+ const struct nf_conntrack_zone *zone;
unsigned int hash, reply_hash;
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
@@ -596,7 +591,6 @@ __nf_conntrack_confirm(struct sk_buff *skb)
struct hlist_nulls_node *n;
enum ip_conntrack_info ctinfo;
struct net *net;
- u16 zone;
unsigned int sequence;
ct = nf_ct_get(skb, &ctinfo);
@@ -617,7 +611,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
/* reuse the hash saved before */
hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev;
hash = hash_bucket(hash, net);
- reply_hash = hash_conntrack(net, zone,
+ reply_hash = hash_conntrack(net,
&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
} while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
@@ -649,12 +643,14 @@ __nf_conntrack_confirm(struct sk_buff *skb)
hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode)
if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
&h->tuple) &&
- zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
+ nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone,
+ NF_CT_DIRECTION(h)))
goto out;
hlist_nulls_for_each_entry(h, n, &net->ct.hash[reply_hash], hnnode)
if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
&h->tuple) &&
- zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)))
+ nf_ct_zone_equal(nf_ct_tuplehash_to_ctrack(h), zone,
+ NF_CT_DIRECTION(h)))
goto out;
/* Timer relative to confirmation time, not original
@@ -707,11 +703,14 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
const struct nf_conn *ignored_conntrack)
{
struct net *net = nf_ct_net(ignored_conntrack);
+ const struct nf_conntrack_zone *zone;
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_node *n;
struct nf_conn *ct;
- u16 zone = nf_ct_zone(ignored_conntrack);
- unsigned int hash = hash_conntrack(net, zone, tuple);
+ unsigned int hash;
+
+ zone = nf_ct_zone(ignored_conntrack);
+ hash = hash_conntrack(net, tuple);
/* Disable BHs the entire time since we need to disable them at
* least once for the stats anyway.
@@ -721,7 +720,7 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
ct = nf_ct_tuplehash_to_ctrack(h);
if (ct != ignored_conntrack &&
nf_ct_tuple_equal(tuple, &h->tuple) &&
- nf_ct_zone(ct) == zone) {
+ nf_ct_zone_equal(ct, zone, NF_CT_DIRECTION(h))) {
NF_CT_STAT_INC(net, found);
rcu_read_unlock_bh();
return 1;
@@ -810,7 +809,8 @@ void init_nf_conntrack_hash_rnd(void)
}
static struct nf_conn *
-__nf_conntrack_alloc(struct net *net, u16 zone,
+__nf_conntrack_alloc(struct net *net,
+ const struct nf_conntrack_zone *zone,
const struct nf_conntrack_tuple *orig,
const struct nf_conntrack_tuple *repl,
gfp_t gfp, u32 hash)
@@ -820,7 +820,7 @@ __nf_conntrack_alloc(struct net *net, u16 zone,
if (unlikely(!nf_conntrack_hash_rnd)) {
init_nf_conntrack_hash_rnd();
/* recompute the hash as nf_conntrack_hash_rnd is initialized */
- hash = hash_conntrack_raw(orig, zone);
+ hash = hash_conntrack_raw(orig);
}
/* We don't want any race condition at early drop stage */
@@ -840,10 +840,9 @@ __nf_conntrack_alloc(struct net *net, u16 zone,
* SLAB_DESTROY_BY_RCU.
*/
ct = kmem_cache_alloc(net->ct.nf_conntrack_cachep, gfp);
- if (ct == NULL) {
- atomic_dec(&net->ct.count);
- return ERR_PTR(-ENOMEM);
- }
+ if (ct == NULL)
+ goto out;
+
spin_lock_init(&ct->lock);
ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL;
@@ -857,31 +856,24 @@ __nf_conntrack_alloc(struct net *net, u16 zone,
memset(&ct->__nfct_init_offset[0], 0,
offsetof(struct nf_conn, proto) -
offsetof(struct nf_conn, __nfct_init_offset[0]));
-#ifdef CONFIG_NF_CONNTRACK_ZONES
- if (zone) {
- struct nf_conntrack_zone *nf_ct_zone;
- nf_ct_zone = nf_ct_ext_add(ct, NF_CT_EXT_ZONE, GFP_ATOMIC);
- if (!nf_ct_zone)
- goto out_free;
- nf_ct_zone->id = zone;
- }
-#endif
+ if (zone && nf_ct_zone_add(ct, GFP_ATOMIC, zone) < 0)
+ goto out_free;
+
/* Because we use RCU lookups, we set ct_general.use to zero before
* this is inserted in any list.
*/
atomic_set(&ct->ct_general.use, 0);
return ct;
-
-#ifdef CONFIG_NF_CONNTRACK_ZONES
out_free:
- atomic_dec(&net->ct.count);
kmem_cache_free(net->ct.nf_conntrack_cachep, ct);
+out:
+ atomic_dec(&net->ct.count);
return ERR_PTR(-ENOMEM);
-#endif
}
-struct nf_conn *nf_conntrack_alloc(struct net *net, u16 zone,
+struct nf_conn *nf_conntrack_alloc(struct net *net,
+ const struct nf_conntrack_zone *zone,
const struct nf_conntrack_tuple *orig,
const struct nf_conntrack_tuple *repl,
gfp_t gfp)
@@ -923,8 +915,9 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
struct nf_conntrack_tuple repl_tuple;
struct nf_conntrack_ecache *ecache;
struct nf_conntrack_expect *exp = NULL;
- u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
+ const struct nf_conntrack_zone *zone;
struct nf_conn_timeout *timeout_ext;
+ struct nf_conntrack_zone tmp;
unsigned int *timeouts;
if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, l4proto)) {
@@ -932,6 +925,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
return NULL;
}
+ zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC,
hash);
if (IS_ERR(ct))
@@ -1026,10 +1020,11 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
int *set_reply,
enum ip_conntrack_info *ctinfo)
{
+ const struct nf_conntrack_zone *zone;
struct nf_conntrack_tuple tuple;
struct nf_conntrack_tuple_hash *h;
+ struct nf_conntrack_zone tmp;
struct nf_conn *ct;
- u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
u32 hash;
if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
@@ -1040,7 +1035,8 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
}
/* look for tuple match */
- hash = hash_conntrack_raw(&tuple, zone);
+ zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
+ hash = hash_conntrack_raw(&tuple);
h = __nf_conntrack_find_get(net, zone, &tuple, hash);
if (!h) {
h = init_conntrack(net, tmpl, &tuple, l3proto, l4proto,
@@ -1290,6 +1286,13 @@ bool __nf_ct_kill_acct(struct nf_conn *ct,
}
EXPORT_SYMBOL_GPL(__nf_ct_kill_acct);
+/* Built-in default zone used e.g. by modules. */
+const struct nf_conntrack_zone nf_ct_zone_dflt = {
+ .id = NF_CT_DEFAULT_ZONE_ID,
+ .dir = NF_CT_DEFAULT_ZONE_DIR,
+};
+EXPORT_SYMBOL_GPL(nf_ct_zone_dflt);
+
#ifdef CONFIG_NF_CONNTRACK_ZONES
static struct nf_ct_ext_type nf_ct_zone_extend __read_mostly = {
.len = sizeof(struct nf_conntrack_zone),
@@ -1596,8 +1599,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
struct nf_conntrack_tuple_hash, hnnode);
ct = nf_ct_tuplehash_to_ctrack(h);
hlist_nulls_del_rcu(&h->hnnode);
- bucket = __hash_conntrack(&h->tuple, nf_ct_zone(ct),
- hashsize);
+ bucket = __hash_conntrack(&h->tuple, hashsize);
hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
}
}
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index b45a4223cb05..acf5c7b3f378 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -88,7 +88,8 @@ static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple
}
struct nf_conntrack_expect *
-__nf_ct_expect_find(struct net *net, u16 zone,
+__nf_ct_expect_find(struct net *net,
+ const struct nf_conntrack_zone *zone,
const struct nf_conntrack_tuple *tuple)
{
struct nf_conntrack_expect *i;
@@ -100,7 +101,7 @@ __nf_ct_expect_find(struct net *net, u16 zone,
h = nf_ct_expect_dst_hash(tuple);
hlist_for_each_entry_rcu(i, &net->ct.expect_hash[h], hnode) {
if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
- nf_ct_zone(i->master) == zone)
+ nf_ct_zone_equal_any(i->master, zone))
return i;
}
return NULL;
@@ -109,7 +110,8 @@ EXPORT_SYMBOL_GPL(__nf_ct_expect_find);
/* Just find a expectation corresponding to a tuple. */
struct nf_conntrack_expect *
-nf_ct_expect_find_get(struct net *net, u16 zone,
+nf_ct_expect_find_get(struct net *net,
+ const struct nf_conntrack_zone *zone,
const struct nf_conntrack_tuple *tuple)
{
struct nf_conntrack_expect *i;
@@ -127,7 +129,8 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
/* If an expectation for this connection is found, it gets delete from
* global list then returned. */
struct nf_conntrack_expect *
-nf_ct_find_expectation(struct net *net, u16 zone,
+nf_ct_find_expectation(struct net *net,
+ const struct nf_conntrack_zone *zone,
const struct nf_conntrack_tuple *tuple)
{
struct nf_conntrack_expect *i, *exp = NULL;
@@ -140,7 +143,7 @@ nf_ct_find_expectation(struct net *net, u16 zone,
hlist_for_each_entry(i, &net->ct.expect_hash[h], hnode) {
if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
- nf_ct_zone(i->master) == zone) {
+ nf_ct_zone_equal_any(i->master, zone)) {
exp = i;
break;
}
@@ -220,16 +223,16 @@ static inline int expect_clash(const struct nf_conntrack_expect *a,
}
return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask) &&
- nf_ct_zone(a->master) == nf_ct_zone(b->master);
+ nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master));
}
static inline int expect_matches(const struct nf_conntrack_expect *a,
const struct nf_conntrack_expect *b)
{
return a->master == b->master && a->class == b->class &&
- nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
- nf_ct_tuple_mask_equal(&a->mask, &b->mask) &&
- nf_ct_zone(a->master) == nf_ct_zone(b->master);
+ nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
+ nf_ct_tuple_mask_equal(&a->mask, &b->mask) &&
+ nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master));
}
/* Generally a bad idea to call this: could have matched already. */
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 6b8b0abbfab4..94a66541e0b7 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -128,6 +128,20 @@ ctnetlink_dump_tuples(struct sk_buff *skb,
}
static inline int
+ctnetlink_dump_zone_id(struct sk_buff *skb, int attrtype,
+ const struct nf_conntrack_zone *zone, int dir)
+{
+ if (zone->id == NF_CT_DEFAULT_ZONE_ID || zone->dir != dir)
+ return 0;
+ if (nla_put_be16(skb, attrtype, htons(zone->id)))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static inline int
ctnetlink_dump_status(struct sk_buff *skb, const struct nf_conn *ct)
{
if (nla_put_be32(skb, CTA_STATUS, htonl(ct->status)))
@@ -458,6 +472,7 @@ static int
ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
struct nf_conn *ct)
{
+ const struct nf_conntrack_zone *zone;
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
struct nlattr *nest_parms;
@@ -473,11 +488,16 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = 0;
+ zone = nf_ct_zone(ct);
+
nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED);
if (!nest_parms)
goto nla_put_failure;
if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
goto nla_put_failure;
+ if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone,
+ NF_CT_ZONE_DIR_ORIG) < 0)
+ goto nla_put_failure;
nla_nest_end(skb, nest_parms);
nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED);
@@ -485,10 +505,13 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
goto nla_put_failure;
if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0)
goto nla_put_failure;
+ if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone,
+ NF_CT_ZONE_DIR_REPL) < 0)
+ goto nla_put_failure;
nla_nest_end(skb, nest_parms);
- if (nf_ct_zone(ct) &&
- nla_put_be16(skb, CTA_ZONE, htons(nf_ct_zone(ct))))
+ if (ctnetlink_dump_zone_id(skb, CTA_ZONE, zone,
+ NF_CT_DEFAULT_ZONE_DIR) < 0)
goto nla_put_failure;
if (ctnetlink_dump_status(skb, ct) < 0 ||
@@ -598,7 +621,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct)
+ nla_total_size(sizeof(u_int32_t)) /* CTA_MARK */
#endif
#ifdef CONFIG_NF_CONNTRACK_ZONES
- + nla_total_size(sizeof(u_int16_t)) /* CTA_ZONE */
+ + nla_total_size(sizeof(u_int16_t)) /* CTA_ZONE|CTA_TUPLE_ZONE */
#endif
+ ctnetlink_proto_size(ct)
+ ctnetlink_label_size(ct)
@@ -609,6 +632,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct)
static int
ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
{
+ const struct nf_conntrack_zone *zone;
struct net *net;
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
@@ -655,11 +679,16 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
nfmsg->res_id = 0;
rcu_read_lock();
+ zone = nf_ct_zone(ct);
+
nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED);
if (!nest_parms)
goto nla_put_failure;
if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
goto nla_put_failure;
+ if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone,
+ NF_CT_ZONE_DIR_ORIG) < 0)
+ goto nla_put_failure;
nla_nest_end(skb, nest_parms);
nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED);
@@ -667,10 +696,13 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
goto nla_put_failure;
if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0)
goto nla_put_failure;
+ if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone,
+ NF_CT_ZONE_DIR_REPL) < 0)
+ goto nla_put_failure;
nla_nest_end(skb, nest_parms);
- if (nf_ct_zone(ct) &&
- nla_put_be16(skb, CTA_ZONE, htons(nf_ct_zone(ct))))
+ if (ctnetlink_dump_zone_id(skb, CTA_ZONE, zone,
+ NF_CT_DEFAULT_ZONE_DIR) < 0)
goto nla_put_failure;
if (ctnetlink_dump_id(skb, ct) < 0)
@@ -920,15 +952,54 @@ ctnetlink_parse_tuple_proto(struct nlattr *attr,
return ret;
}
+static int
+ctnetlink_parse_zone(const struct nlattr *attr,
+ struct nf_conntrack_zone *zone)
+{
+ nf_ct_zone_init(zone, NF_CT_DEFAULT_ZONE_ID,
+ NF_CT_DEFAULT_ZONE_DIR, 0);
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+ if (attr)
+ zone->id = ntohs(nla_get_be16(attr));
+#else
+ if (attr)
+ return -EOPNOTSUPP;
+#endif
+ return 0;
+}
+
+static int
+ctnetlink_parse_tuple_zone(struct nlattr *attr, enum ctattr_type type,
+ struct nf_conntrack_zone *zone)
+{
+ int ret;
+
+ if (zone->id != NF_CT_DEFAULT_ZONE_ID)
+ return -EINVAL;
+
+ ret = ctnetlink_parse_zone(attr, zone);
+ if (ret < 0)
+ return ret;
+
+ if (type == CTA_TUPLE_REPLY)
+ zone->dir = NF_CT_ZONE_DIR_REPL;
+ else
+ zone->dir = NF_CT_ZONE_DIR_ORIG;
+
+ return 0;
+}
+
static const struct nla_policy tuple_nla_policy[CTA_TUPLE_MAX+1] = {
[CTA_TUPLE_IP] = { .type = NLA_NESTED },
[CTA_TUPLE_PROTO] = { .type = NLA_NESTED },
+ [CTA_TUPLE_ZONE] = { .type = NLA_U16 },
};
static int
ctnetlink_parse_tuple(const struct nlattr * const cda[],
struct nf_conntrack_tuple *tuple,
- enum ctattr_type type, u_int8_t l3num)
+ enum ctattr_type type, u_int8_t l3num,
+ struct nf_conntrack_zone *zone)
{
struct nlattr *tb[CTA_TUPLE_MAX+1];
int err;
@@ -955,6 +1026,16 @@ ctnetlink_parse_tuple(const struct nlattr * const cda[],
if (err < 0)
return err;
+ if (tb[CTA_TUPLE_ZONE]) {
+ if (!zone)
+ return -EINVAL;
+
+ err = ctnetlink_parse_tuple_zone(tb[CTA_TUPLE_ZONE],
+ type, zone);
+ if (err < 0)
+ return err;
+ }
+
/* orig and expect tuples get DIR_ORIGINAL */
if (type == CTA_TUPLE_REPLY)
tuple->dst.dir = IP_CT_DIR_REPLY;
@@ -964,21 +1045,6 @@ ctnetlink_parse_tuple(const struct nlattr * const cda[],
return 0;
}
-static int
-ctnetlink_parse_zone(const struct nlattr *attr, u16 *zone)
-{
- if (attr)
-#ifdef CONFIG_NF_CONNTRACK_ZONES
- *zone = ntohs(nla_get_be16(attr));
-#else
- return -EOPNOTSUPP;
-#endif
- else
- *zone = 0;
-
- return 0;
-}
-
static const struct nla_policy help_nla_policy[CTA_HELP_MAX+1] = {
[CTA_HELP_NAME] = { .type = NLA_NUL_STRING,
.len = NF_CT_HELPER_NAME_LEN - 1 },
@@ -1058,7 +1124,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
struct nf_conn *ct;
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u_int8_t u3 = nfmsg->nfgen_family;
- u16 zone;
+ struct nf_conntrack_zone zone;
int err;
err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone);
@@ -1066,9 +1132,11 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
return err;
if (cda[CTA_TUPLE_ORIG])
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, u3);
+ err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG,
+ u3, &zone);
else if (cda[CTA_TUPLE_REPLY])
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3);
+ err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY,
+ u3, &zone);
else {
return ctnetlink_flush_conntrack(net, cda,
NETLINK_CB(skb).portid,
@@ -1078,7 +1146,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
if (err < 0)
return err;
- h = nf_conntrack_find_get(net, zone, &tuple);
+ h = nf_conntrack_find_get(net, &zone, &tuple);
if (!h)
return -ENOENT;
@@ -1112,7 +1180,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
struct sk_buff *skb2 = NULL;
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u_int8_t u3 = nfmsg->nfgen_family;
- u16 zone;
+ struct nf_conntrack_zone zone;
int err;
if (nlh->nlmsg_flags & NLM_F_DUMP) {
@@ -1138,16 +1206,18 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
return err;
if (cda[CTA_TUPLE_ORIG])
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, u3);
+ err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG,
+ u3, &zone);
else if (cda[CTA_TUPLE_REPLY])
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3);
+ err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY,
+ u3, &zone);
else
return -EINVAL;
if (err < 0)
return err;
- h = nf_conntrack_find_get(net, zone, &tuple);
+ h = nf_conntrack_find_get(net, &zone, &tuple);
if (!h)
return -ENOENT;
@@ -1645,7 +1715,8 @@ ctnetlink_change_conntrack(struct nf_conn *ct,
}
static struct nf_conn *
-ctnetlink_create_conntrack(struct net *net, u16 zone,
+ctnetlink_create_conntrack(struct net *net,
+ const struct nf_conntrack_zone *zone,
const struct nlattr * const cda[],
struct nf_conntrack_tuple *otuple,
struct nf_conntrack_tuple *rtuple,
@@ -1761,7 +1832,8 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
struct nf_conntrack_tuple_hash *master_h;
struct nf_conn *master_ct;
- err = ctnetlink_parse_tuple(cda, &master, CTA_TUPLE_MASTER, u3);
+ err = ctnetlink_parse_tuple(cda, &master, CTA_TUPLE_MASTER,
+ u3, NULL);
if (err < 0)
goto err2;
@@ -1804,7 +1876,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
struct nf_conn *ct;
u_int8_t u3 = nfmsg->nfgen_family;
- u16 zone;
+ struct nf_conntrack_zone zone;
int err;
err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone);
@@ -1812,21 +1884,23 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
return err;
if (cda[CTA_TUPLE_ORIG]) {
- err = ctnetlink_parse_tuple(cda, &otuple, CTA_TUPLE_ORIG, u3);
+ err = ctnetlink_parse_tuple(cda, &otuple, CTA_TUPLE_ORIG,
+ u3, &zone);
if (err < 0)
return err;
}
if (cda[CTA_TUPLE_REPLY]) {
- err = ctnetlink_parse_tuple(cda, &rtuple, CTA_TUPLE_REPLY, u3);
+ err = ctnetlink_parse_tuple(cda, &rtuple, CTA_TUPLE_REPLY,
+ u3, &zone);
if (err < 0)
return err;
}
if (cda[CTA_TUPLE_ORIG])
- h = nf_conntrack_find_get(net, zone, &otuple);
+ h = nf_conntrack_find_get(net, &zone, &otuple);
else if (cda[CTA_TUPLE_REPLY])
- h = nf_conntrack_find_get(net, zone, &rtuple);
+ h = nf_conntrack_find_get(net, &zone, &rtuple);
if (h == NULL) {
err = -ENOENT;
@@ -1836,7 +1910,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
if (!cda[CTA_TUPLE_ORIG] || !cda[CTA_TUPLE_REPLY])
return -EINVAL;
- ct = ctnetlink_create_conntrack(net, zone, cda, &otuple,
+ ct = ctnetlink_create_conntrack(net, &zone, cda, &otuple,
&rtuple, u3);
if (IS_ERR(ct))
return PTR_ERR(ct);
@@ -2082,7 +2156,7 @@ ctnetlink_nfqueue_build_size(const struct nf_conn *ct)
+ nla_total_size(sizeof(u_int32_t)) /* CTA_MARK */
#endif
#ifdef CONFIG_NF_CONNTRACK_ZONES
- + nla_total_size(sizeof(u_int16_t)) /* CTA_ZONE */
+ + nla_total_size(sizeof(u_int16_t)) /* CTA_ZONE|CTA_TUPLE_ZONE */
#endif
+ ctnetlink_proto_size(ct)
;
@@ -2091,14 +2165,20 @@ ctnetlink_nfqueue_build_size(const struct nf_conn *ct)
static int
ctnetlink_nfqueue_build(struct sk_buff *skb, struct nf_conn *ct)
{
+ const struct nf_conntrack_zone *zone;
struct nlattr *nest_parms;
rcu_read_lock();
+ zone = nf_ct_zone(ct);
+
nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED);
if (!nest_parms)
goto nla_put_failure;
if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
goto nla_put_failure;
+ if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone,
+ NF_CT_ZONE_DIR_ORIG) < 0)
+ goto nla_put_failure;
nla_nest_end(skb, nest_parms);
nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED);
@@ -2106,12 +2186,14 @@ ctnetlink_nfqueue_build(struct sk_buff *skb, struct nf_conn *ct)
goto nla_put_failure;
if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0)
goto nla_put_failure;
+ if (ctnetlink_dump_zone_id(skb, CTA_TUPLE_ZONE, zone,
+ NF_CT_ZONE_DIR_REPL) < 0)
+ goto nla_put_failure;
nla_nest_end(skb, nest_parms);
- if (nf_ct_zone(ct)) {
- if (nla_put_be16(skb, CTA_ZONE, htons(nf_ct_zone(ct))))
- goto nla_put_failure;
- }
+ if (ctnetlink_dump_zone_id(skb, CTA_ZONE, zone,
+ NF_CT_DEFAULT_ZONE_DIR) < 0)
+ goto nla_put_failure;
if (ctnetlink_dump_id(skb, ct) < 0)
goto nla_put_failure;
@@ -2218,12 +2300,12 @@ static int ctnetlink_nfqueue_exp_parse(const struct nlattr * const *cda,
int err;
err = ctnetlink_parse_tuple(cda, tuple, CTA_EXPECT_TUPLE,
- nf_ct_l3num(ct));
+ nf_ct_l3num(ct), NULL);
if (err < 0)
return err;
return ctnetlink_parse_tuple(cda, mask, CTA_EXPECT_MASK,
- nf_ct_l3num(ct));
+ nf_ct_l3num(ct), NULL);
}
static int
@@ -2612,23 +2694,22 @@ static int ctnetlink_dump_exp_ct(struct sock *ctnl, struct sk_buff *skb,
struct nf_conntrack_tuple tuple;
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
- u16 zone = 0;
+ struct nf_conntrack_zone zone;
struct netlink_dump_control c = {
.dump = ctnetlink_exp_ct_dump_table,
.done = ctnetlink_exp_done,
};
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, u3);
+ err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER,
+ u3, NULL);
if (err < 0)
return err;
- if (cda[CTA_EXPECT_ZONE]) {
- err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone);
- if (err < 0)
- return err;
- }
+ err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone);
+ if (err < 0)
+ return err;
- h = nf_conntrack_find_get(net, zone, &tuple);
+ h = nf_conntrack_find_get(net, &zone, &tuple);
if (!h)
return -ENOENT;
@@ -2652,7 +2733,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
struct sk_buff *skb2;
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u_int8_t u3 = nfmsg->nfgen_family;
- u16 zone;
+ struct nf_conntrack_zone zone;
int err;
if (nlh->nlmsg_flags & NLM_F_DUMP) {
@@ -2672,16 +2753,18 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
return err;
if (cda[CTA_EXPECT_TUPLE])
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
+ err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE,
+ u3, NULL);
else if (cda[CTA_EXPECT_MASTER])
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, u3);
+ err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER,
+ u3, NULL);
else
return -EINVAL;
if (err < 0)
return err;
- exp = nf_ct_expect_find_get(net, zone, &tuple);
+ exp = nf_ct_expect_find_get(net, &zone, &tuple);
if (!exp)
return -ENOENT;
@@ -2732,8 +2815,8 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
struct hlist_node *next;
u_int8_t u3 = nfmsg->nfgen_family;
+ struct nf_conntrack_zone zone;
unsigned int i;
- u16 zone;
int err;
if (cda[CTA_EXPECT_TUPLE]) {
@@ -2742,12 +2825,13 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
if (err < 0)
return err;
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
+ err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE,
+ u3, NULL);
if (err < 0)
return err;
/* bump usage count to 2 */
- exp = nf_ct_expect_find_get(net, zone, &tuple);
+ exp = nf_ct_expect_find_get(net, &zone, &tuple);
if (!exp)
return -ENOENT;
@@ -2849,7 +2933,8 @@ ctnetlink_parse_expect_nat(const struct nlattr *attr,
return -EINVAL;
err = ctnetlink_parse_tuple((const struct nlattr * const *)tb,
- &nat_tuple, CTA_EXPECT_NAT_TUPLE, u3);
+ &nat_tuple, CTA_EXPECT_NAT_TUPLE,
+ u3, NULL);
if (err < 0)
return err;
@@ -2937,7 +3022,8 @@ err_out:
}
static int
-ctnetlink_create_expect(struct net *net, u16 zone,
+ctnetlink_create_expect(struct net *net,
+ const struct nf_conntrack_zone *zone,
const struct nlattr * const cda[],
u_int8_t u3, u32 portid, int report)
{
@@ -2949,13 +3035,16 @@ ctnetlink_create_expect(struct net *net, u16 zone,
int err;
/* caller guarantees that those three CTA_EXPECT_* exist */
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
+ err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE,
+ u3, NULL);
if (err < 0)
return err;
- err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK, u3);
+ err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK,
+ u3, NULL);
if (err < 0)
return err;
- err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER, u3);
+ err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER,
+ u3, NULL);
if (err < 0)
return err;
@@ -3011,7 +3100,7 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
struct nf_conntrack_expect *exp;
struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u_int8_t u3 = nfmsg->nfgen_family;
- u16 zone;
+ struct nf_conntrack_zone zone;
int err;
if (!cda[CTA_EXPECT_TUPLE]
@@ -3023,19 +3112,18 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
if (err < 0)
return err;
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
+ err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE,
+ u3, NULL);
if (err < 0)
return err;
spin_lock_bh(&nf_conntrack_expect_lock);
- exp = __nf_ct_expect_find(net, zone, &tuple);
-
+ exp = __nf_ct_expect_find(net, &zone, &tuple);
if (!exp) {
spin_unlock_bh(&nf_conntrack_expect_lock);
err = -ENOENT;
if (nlh->nlmsg_flags & NLM_F_CREATE) {
- err = ctnetlink_create_expect(net, zone, cda,
- u3,
+ err = ctnetlink_create_expect(net, &zone, cda, u3,
NETLINK_CB(skb).portid,
nlmsg_report(nlh));
}
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 825c3e3f8305..5588c7ae1ac2 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -143,13 +143,14 @@ static int destroy_sibling_or_exp(struct net *net, struct nf_conn *ct,
const struct nf_conntrack_tuple *t)
{
const struct nf_conntrack_tuple_hash *h;
+ const struct nf_conntrack_zone *zone;
struct nf_conntrack_expect *exp;
struct nf_conn *sibling;
- u16 zone = nf_ct_zone(ct);
pr_debug("trying to timeout ct or exp for tuple ");
nf_ct_dump_tuple(t);
+ zone = nf_ct_zone(ct);
h = nf_conntrack_find_get(net, zone, t);
if (h) {
sibling = nf_ct_tuplehash_to_ctrack(h);
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index b45da90fad32..67197731eb68 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -42,6 +42,8 @@ static const char *const sctp_conntrack_names[] = {
"SHUTDOWN_SENT",
"SHUTDOWN_RECD",
"SHUTDOWN_ACK_SENT",
+ "HEARTBEAT_SENT",
+ "HEARTBEAT_ACKED",
};
#define SECS * HZ
@@ -57,6 +59,8 @@ static unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] __read_mostly = {
[SCTP_CONNTRACK_SHUTDOWN_SENT] = 300 SECS / 1000,
[SCTP_CONNTRACK_SHUTDOWN_RECD] = 300 SECS / 1000,
[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = 3 SECS,
+ [SCTP_CONNTRACK_HEARTBEAT_SENT] = 30 SECS,
+ [SCTP_CONNTRACK_HEARTBEAT_ACKED] = 210 SECS,
};
#define sNO SCTP_CONNTRACK_NONE
@@ -67,6 +71,8 @@ static unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] __read_mostly = {
#define sSS SCTP_CONNTRACK_SHUTDOWN_SENT
#define sSR SCTP_CONNTRACK_SHUTDOWN_RECD
#define sSA SCTP_CONNTRACK_SHUTDOWN_ACK_SENT
+#define sHS SCTP_CONNTRACK_HEARTBEAT_SENT
+#define sHA SCTP_CONNTRACK_HEARTBEAT_ACKED
#define sIV SCTP_CONNTRACK_MAX
/*
@@ -88,6 +94,10 @@ SHUTDOWN_ACK_SENT - We have seen a SHUTDOWN_ACK chunk in the direction opposite
to that of the SHUTDOWN chunk.
CLOSED - We have seen a SHUTDOWN_COMPLETE chunk in the direction of
the SHUTDOWN chunk. Connection is closed.
+HEARTBEAT_SENT - We have seen a HEARTBEAT in a new flow.
+HEARTBEAT_ACKED - We have seen a HEARTBEAT-ACK in the direction opposite to
+ that of the HEARTBEAT chunk. Secondary connection is
+ established.
*/
/* TODO
@@ -97,36 +107,40 @@ CLOSED - We have seen a SHUTDOWN_COMPLETE chunk in the direction of
- Check the error type in the reply dir before transitioning from
cookie echoed to closed.
- Sec 5.2.4 of RFC 2960
- - Multi Homing support.
+ - Full Multi Homing support.
*/
/* SCTP conntrack state transitions */
-static const u8 sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = {
+static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = {
{
/* ORIGINAL */
-/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */
-/* init */ {sCW, sCW, sCW, sCE, sES, sSS, sSR, sSA},
-/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},
-/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
-/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA},
-/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA},
-/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Can't have Stale cookie*/
-/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA},/* 5.2.4 - Big TODO */
-/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Can't come in orig dir */
-/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL}
+/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA */
+/* init */ {sCW, sCW, sCW, sCE, sES, sSS, sSR, sSA, sCW, sHA},
+/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA},
+/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
+/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL, sSS},
+/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA, sSA, sHA},
+/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA},/* Can't have Stale cookie*/
+/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA, sCL, sHA},/* 5.2.4 - Big TODO */
+/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA},/* Can't come in orig dir */
+/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL, sCL, sHA},
+/* heartbeat */ {sHS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA},
+/* heartbeat_ack*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA}
},
{
/* REPLY */
-/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */
-/* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* INIT in sCL Big TODO */
-/* init_ack */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},
-/* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
-/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA},
-/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA},
-/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA},
-/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Can't come in reply dir */
-/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA},
-/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL}
+/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA */
+/* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA},/* INIT in sCL Big TODO */
+/* init_ack */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA},
+/* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV, sCL},
+/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA, sIV, sSR},
+/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA, sIV, sHA},
+/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA, sIV, sHA},
+/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA},/* Can't come in reply dir */
+/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA, sIV, sHA},
+/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL, sIV, sHA},
+/* heartbeat */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA},
+/* heartbeat_ack*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHA, sHA}
}
};
@@ -278,9 +292,16 @@ static int sctp_new_state(enum ip_conntrack_dir dir,
pr_debug("SCTP_CID_SHUTDOWN_COMPLETE\n");
i = 8;
break;
+ case SCTP_CID_HEARTBEAT:
+ pr_debug("SCTP_CID_HEARTBEAT");
+ i = 9;
+ break;
+ case SCTP_CID_HEARTBEAT_ACK:
+ pr_debug("SCTP_CID_HEARTBEAT_ACK");
+ i = 10;
+ break;
default:
- /* Other chunks like DATA, SACK, HEARTBEAT and
- its ACK do not cause a change in state */
+ /* Other chunks like DATA or SACK do not change the state */
pr_debug("Unknown chunk type, Will stay in %s\n",
sctp_conntrack_names[cur_state]);
return cur_state;
@@ -329,6 +350,8 @@ static int sctp_packet(struct nf_conn *ct,
!test_bit(SCTP_CID_COOKIE_ECHO, map) &&
!test_bit(SCTP_CID_ABORT, map) &&
!test_bit(SCTP_CID_SHUTDOWN_ACK, map) &&
+ !test_bit(SCTP_CID_HEARTBEAT, map) &&
+ !test_bit(SCTP_CID_HEARTBEAT_ACK, map) &&
sh->vtag != ct->proto.sctp.vtag[dir]) {
pr_debug("Verification tag check failed\n");
goto out;
@@ -357,6 +380,16 @@ static int sctp_packet(struct nf_conn *ct,
/* Sec 8.5.1 (D) */
if (sh->vtag != ct->proto.sctp.vtag[dir])
goto out_unlock;
+ } else if (sch->type == SCTP_CID_HEARTBEAT ||
+ sch->type == SCTP_CID_HEARTBEAT_ACK) {
+ if (ct->proto.sctp.vtag[dir] == 0) {
+ pr_debug("Setting vtag %x for dir %d\n",
+ sh->vtag, dir);
+ ct->proto.sctp.vtag[dir] = sh->vtag;
+ } else if (sh->vtag != ct->proto.sctp.vtag[dir]) {
+ pr_debug("Verification tag check failed\n");
+ goto out_unlock;
+ }
}
old_state = ct->proto.sctp.state;
@@ -466,6 +499,10 @@ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
/* Sec 8.5.1 (A) */
return false;
}
+ } else if (sch->type == SCTP_CID_HEARTBEAT) {
+ pr_debug("Setting vtag %x for secondary conntrack\n",
+ sh->vtag);
+ ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = sh->vtag;
}
/* If it is a shutdown ack OOTB packet, we expect a return
shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */
@@ -610,6 +647,8 @@ sctp_timeout_nla_policy[CTA_TIMEOUT_SCTP_MAX+1] = {
[CTA_TIMEOUT_SCTP_SHUTDOWN_SENT] = { .type = NLA_U32 },
[CTA_TIMEOUT_SCTP_SHUTDOWN_RECD] = { .type = NLA_U32 },
[CTA_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT] = { .type = NLA_U32 },
+ [CTA_TIMEOUT_SCTP_HEARTBEAT_SENT] = { .type = NLA_U32 },
+ [CTA_TIMEOUT_SCTP_HEARTBEAT_ACKED] = { .type = NLA_U32 },
};
#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
@@ -658,6 +697,18 @@ static struct ctl_table sctp_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
+ {
+ .procname = "nf_conntrack_sctp_timeout_heartbeat_sent",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ {
+ .procname = "nf_conntrack_sctp_timeout_heartbeat_acked",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
{ }
};
@@ -730,6 +781,8 @@ static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn,
pn->ctl_table[4].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_SENT];
pn->ctl_table[5].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_RECD];
pn->ctl_table[6].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT];
+ pn->ctl_table[7].data = &sn->timeouts[SCTP_CONNTRACK_HEARTBEAT_SENT];
+ pn->ctl_table[8].data = &sn->timeouts[SCTP_CONNTRACK_HEARTBEAT_ACKED];
#endif
return 0;
}
diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c
index ce3e840c8704..dff0f0cc59e4 100644
--- a/net/netfilter/nf_conntrack_seqadj.c
+++ b/net/netfilter/nf_conntrack_seqadj.c
@@ -103,9 +103,9 @@ static void nf_ct_sack_block_adjust(struct sk_buff *skb,
ntohl(sack->end_seq), ntohl(new_end_seq));
inet_proto_csum_replace4(&tcph->check, skb,
- sack->start_seq, new_start_seq, 0);
+ sack->start_seq, new_start_seq, false);
inet_proto_csum_replace4(&tcph->check, skb,
- sack->end_seq, new_end_seq, 0);
+ sack->end_seq, new_end_seq, false);
sack->start_seq = new_start_seq;
sack->end_seq = new_end_seq;
sackoff += sizeof(*sack);
@@ -193,8 +193,9 @@ int nf_ct_seq_adjust(struct sk_buff *skb,
newseq = htonl(ntohl(tcph->seq) + seqoff);
newack = htonl(ntohl(tcph->ack_seq) - ackoff);
- inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0);
- inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0);
+ inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, false);
+ inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack,
+ false);
pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n",
ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index fc823fa5dcf5..1fb3cacc04e1 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -140,6 +140,35 @@ static inline void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
}
#endif
+#ifdef CONFIG_NF_CONNTRACK_ZONES
+static void ct_show_zone(struct seq_file *s, const struct nf_conn *ct,
+ int dir)
+{
+ const struct nf_conntrack_zone *zone = nf_ct_zone(ct);
+
+ if (zone->dir != dir)
+ return;
+ switch (zone->dir) {
+ case NF_CT_DEFAULT_ZONE_DIR:
+ seq_printf(s, "zone=%u ", zone->id);
+ break;
+ case NF_CT_ZONE_DIR_ORIG:
+ seq_printf(s, "zone-orig=%u ", zone->id);
+ break;
+ case NF_CT_ZONE_DIR_REPL:
+ seq_printf(s, "zone-reply=%u ", zone->id);
+ break;
+ default:
+ break;
+ }
+}
+#else
+static inline void ct_show_zone(struct seq_file *s, const struct nf_conn *ct,
+ int dir)
+{
+}
+#endif
+
#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
static void ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct)
{
@@ -202,6 +231,8 @@ static int ct_seq_show(struct seq_file *s, void *v)
print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
l3proto, l4proto);
+ ct_show_zone(s, ct, NF_CT_ZONE_DIR_ORIG);
+
if (seq_has_overflowed(s))
goto release;
@@ -214,6 +245,8 @@ static int ct_seq_show(struct seq_file *s, void *v)
print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
l3proto, l4proto);
+ ct_show_zone(s, ct, NF_CT_ZONE_DIR_REPL);
+
if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
goto release;
@@ -228,11 +261,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
#endif
ct_show_secctx(s, ct);
-
-#ifdef CONFIG_NF_CONNTRACK_ZONES
- seq_printf(s, "zone=%u ", nf_ct_zone(ct));
-#endif
-
+ ct_show_zone(s, ct, NF_CT_DEFAULT_ZONE_DIR);
ct_show_delta_time(s, ct);
seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use));
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index 399210693c2a..065522564ac6 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -19,7 +19,7 @@ unsigned int nf_iterate(struct list_head *head, struct sk_buff *skb,
/* nf_queue.c */
int nf_queue(struct sk_buff *skb, struct nf_hook_ops *elem,
struct nf_hook_state *state, unsigned int queuenum);
-void nf_queue_nf_hook_drop(struct nf_hook_ops *ops);
+void nf_queue_nf_hook_drop(struct net *net, struct nf_hook_ops *ops);
int __init netfilter_queue_init(void);
/* nf_log.c */
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 4e0b47831d43..5113dfd39df9 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -118,14 +118,13 @@ EXPORT_SYMBOL(nf_xfrm_me_harder);
/* We keep an extra hash for each conntrack, for fast searching. */
static inline unsigned int
-hash_by_src(const struct net *net, u16 zone,
- const struct nf_conntrack_tuple *tuple)
+hash_by_src(const struct net *net, const struct nf_conntrack_tuple *tuple)
{
unsigned int hash;
/* Original src, to ensure we map it consistently if poss. */
hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32),
- tuple->dst.protonum ^ zone ^ nf_conntrack_hash_rnd);
+ tuple->dst.protonum ^ nf_conntrack_hash_rnd);
return reciprocal_scale(hash, net->ct.nat_htable_size);
}
@@ -185,20 +184,22 @@ same_src(const struct nf_conn *ct,
/* Only called for SRC manip */
static int
-find_appropriate_src(struct net *net, u16 zone,
+find_appropriate_src(struct net *net,
+ const struct nf_conntrack_zone *zone,
const struct nf_nat_l3proto *l3proto,
const struct nf_nat_l4proto *l4proto,
const struct nf_conntrack_tuple *tuple,
struct nf_conntrack_tuple *result,
const struct nf_nat_range *range)
{
- unsigned int h = hash_by_src(net, zone, tuple);
+ unsigned int h = hash_by_src(net, tuple);
const struct nf_conn_nat *nat;
const struct nf_conn *ct;
hlist_for_each_entry_rcu(nat, &net->ct.nat_bysource[h], bysource) {
ct = nat->ct;
- if (same_src(ct, tuple) && nf_ct_zone(ct) == zone) {
+ if (same_src(ct, tuple) &&
+ nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) {
/* Copy source part from reply tuple. */
nf_ct_invert_tuplepr(result,
&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
@@ -218,7 +219,8 @@ find_appropriate_src(struct net *net, u16 zone,
* the ip with the lowest src-ip/dst-ip/proto usage.
*/
static void
-find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple,
+find_best_ips_proto(const struct nf_conntrack_zone *zone,
+ struct nf_conntrack_tuple *tuple,
const struct nf_nat_range *range,
const struct nf_conn *ct,
enum nf_nat_manip_type maniptype)
@@ -258,7 +260,7 @@ find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple,
*/
j = jhash2((u32 *)&tuple->src.u3, sizeof(tuple->src.u3) / sizeof(u32),
range->flags & NF_NAT_RANGE_PERSISTENT ?
- 0 : (__force u32)tuple->dst.u3.all[max] ^ zone);
+ 0 : (__force u32)tuple->dst.u3.all[max] ^ zone->id);
full_range = false;
for (i = 0; i <= max; i++) {
@@ -297,10 +299,12 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
struct nf_conn *ct,
enum nf_nat_manip_type maniptype)
{
+ const struct nf_conntrack_zone *zone;
const struct nf_nat_l3proto *l3proto;
const struct nf_nat_l4proto *l4proto;
struct net *net = nf_ct_net(ct);
- u16 zone = nf_ct_zone(ct);
+
+ zone = nf_ct_zone(ct);
rcu_read_lock();
l3proto = __nf_nat_l3proto_find(orig_tuple->src.l3num);
@@ -420,7 +424,7 @@ nf_nat_setup_info(struct nf_conn *ct,
if (maniptype == NF_NAT_MANIP_SRC) {
unsigned int srchash;
- srchash = hash_by_src(net, nf_ct_zone(ct),
+ srchash = hash_by_src(net,
&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
spin_lock_bh(&nf_nat_lock);
/* nf_conntrack_alter_reply might re-allocate extension aera */
diff --git a/net/netfilter/nf_nat_proto_dccp.c b/net/netfilter/nf_nat_proto_dccp.c
index b8067b53ff3a..15c47b246d0d 100644
--- a/net/netfilter/nf_nat_proto_dccp.c
+++ b/net/netfilter/nf_nat_proto_dccp.c
@@ -69,7 +69,7 @@ dccp_manip_pkt(struct sk_buff *skb,
l3proto->csum_update(skb, iphdroff, &hdr->dccph_checksum,
tuple, maniptype);
inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport,
- 0);
+ false);
return true;
}
diff --git a/net/netfilter/nf_nat_proto_tcp.c b/net/netfilter/nf_nat_proto_tcp.c
index 37f5505f4529..4f8820fc5148 100644
--- a/net/netfilter/nf_nat_proto_tcp.c
+++ b/net/netfilter/nf_nat_proto_tcp.c
@@ -70,7 +70,7 @@ tcp_manip_pkt(struct sk_buff *skb,
return true;
l3proto->csum_update(skb, iphdroff, &hdr->check, tuple, maniptype);
- inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, 0);
+ inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, false);
return true;
}
diff --git a/net/netfilter/nf_nat_proto_udp.c b/net/netfilter/nf_nat_proto_udp.c
index b0ede2f0d8bc..b1e627227b6e 100644
--- a/net/netfilter/nf_nat_proto_udp.c
+++ b/net/netfilter/nf_nat_proto_udp.c
@@ -57,7 +57,7 @@ udp_manip_pkt(struct sk_buff *skb,
l3proto->csum_update(skb, iphdroff, &hdr->check,
tuple, maniptype);
inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport,
- 0);
+ false);
if (!hdr->check)
hdr->check = CSUM_MANGLED_0;
}
diff --git a/net/netfilter/nf_nat_proto_udplite.c b/net/netfilter/nf_nat_proto_udplite.c
index 368f14e01e75..58340c97bd83 100644
--- a/net/netfilter/nf_nat_proto_udplite.c
+++ b/net/netfilter/nf_nat_proto_udplite.c
@@ -56,7 +56,7 @@ udplite_manip_pkt(struct sk_buff *skb,
}
l3proto->csum_update(skb, iphdroff, &hdr->check, tuple, maniptype);
- inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, 0);
+ inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, false);
if (!hdr->check)
hdr->check = CSUM_MANGLED_0;
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 8a8b2abc35ff..96777f9a9350 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -105,21 +105,15 @@ bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
}
EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
-void nf_queue_nf_hook_drop(struct nf_hook_ops *ops)
+void nf_queue_nf_hook_drop(struct net *net, struct nf_hook_ops *ops)
{
const struct nf_queue_handler *qh;
- struct net *net;
- rtnl_lock();
rcu_read_lock();
qh = rcu_dereference(queue_handler);
- if (qh) {
- for_each_net(net) {
- qh->nf_hook_drop(net, ops);
- }
- }
+ if (qh)
+ qh->nf_hook_drop(net, ops);
rcu_read_unlock();
- rtnl_unlock();
}
/*
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index d7f168527903..8fbbdb09826e 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -17,10 +17,12 @@
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/xt_tcpudp.h>
#include <linux/netfilter/xt_SYNPROXY.h>
+
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_extend.h>
#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/nf_conntrack_synproxy.h>
+#include <net/netfilter/nf_conntrack_zones.h>
int synproxy_net_id;
EXPORT_SYMBOL_GPL(synproxy_net_id);
@@ -225,7 +227,7 @@ unsigned int synproxy_tstamp_adjust(struct sk_buff *skb,
synproxy->tsoff);
}
inet_proto_csum_replace4(&th->check, skb,
- old, *ptr, 0);
+ old, *ptr, false);
return 1;
}
optoff += op[1];
@@ -352,7 +354,7 @@ static int __net_init synproxy_net_init(struct net *net)
struct nf_conn *ct;
int err = -ENOMEM;
- ct = nf_ct_tmpl_alloc(net, 0, GFP_KERNEL);
+ ct = nf_ct_tmpl_alloc(net, &nf_ct_zone_dflt, GFP_KERNEL);
if (!ct)
goto err1;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index cfe636808541..4a41eb92bcc0 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -130,20 +130,24 @@ static void nft_trans_destroy(struct nft_trans *trans)
int nft_register_basechain(struct nft_base_chain *basechain,
unsigned int hook_nops)
{
+ struct net *net = read_pnet(&basechain->pnet);
+
if (basechain->flags & NFT_BASECHAIN_DISABLED)
return 0;
- return nf_register_hooks(basechain->ops, hook_nops);
+ return nf_register_net_hooks(net, basechain->ops, hook_nops);
}
EXPORT_SYMBOL_GPL(nft_register_basechain);
void nft_unregister_basechain(struct nft_base_chain *basechain,
unsigned int hook_nops)
{
+ struct net *net = read_pnet(&basechain->pnet);
+
if (basechain->flags & NFT_BASECHAIN_DISABLED)
return;
- nf_unregister_hooks(basechain->ops, hook_nops);
+ nf_unregister_net_hooks(net, basechain->ops, hook_nops);
}
EXPORT_SYMBOL_GPL(nft_unregister_basechain);
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index f77bad46ac68..05d0b03530f6 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -114,7 +114,6 @@ unsigned int
nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops)
{
const struct nft_chain *chain = ops->priv, *basechain = chain;
- const struct net *chain_net = read_pnet(&nft_base_chain(basechain)->pnet);
const struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
const struct nft_rule *rule;
const struct nft_expr *expr, *last;
@@ -125,10 +124,6 @@ nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops)
int rulenum;
unsigned int gencursor = nft_genmask_cur(net);
- /* Ignore chains that are not for the current network namespace */
- if (!net_eq(net, chain_net))
- return NF_ACCEPT;
-
do_chain:
rulenum = 0;
rule = list_entry(&chain->rules, struct nft_rule, list);
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index c18af2f63eef..fefbf5f0b28d 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -27,8 +27,6 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
MODULE_DESCRIPTION("nfacct: Extended Netfilter accounting infrastructure");
-static LIST_HEAD(nfnl_acct_list);
-
struct nf_acct {
atomic64_t pkts;
atomic64_t bytes;
@@ -53,6 +51,7 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
const struct nlmsghdr *nlh, const struct nlattr * const tb[])
{
struct nf_acct *nfacct, *matching = NULL;
+ struct net *net = sock_net(nfnl);
char *acct_name;
unsigned int size = 0;
u32 flags = 0;
@@ -64,7 +63,7 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
if (strlen(acct_name) == 0)
return -EINVAL;
- list_for_each_entry(nfacct, &nfnl_acct_list, head) {
+ list_for_each_entry(nfacct, &net->nfnl_acct_list, head) {
if (strncmp(nfacct->name, acct_name, NFACCT_NAME_MAX) != 0)
continue;
@@ -124,7 +123,7 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
be64_to_cpu(nla_get_be64(tb[NFACCT_PKTS])));
}
atomic_set(&nfacct->refcnt, 1);
- list_add_tail_rcu(&nfacct->head, &nfnl_acct_list);
+ list_add_tail_rcu(&nfacct->head, &net->nfnl_acct_list);
return 0;
}
@@ -185,6 +184,7 @@ nla_put_failure:
static int
nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
+ struct net *net = sock_net(skb->sk);
struct nf_acct *cur, *last;
const struct nfacct_filter *filter = cb->data;
@@ -196,7 +196,7 @@ nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb)
cb->args[1] = 0;
rcu_read_lock();
- list_for_each_entry_rcu(cur, &nfnl_acct_list, head) {
+ list_for_each_entry_rcu(cur, &net->nfnl_acct_list, head) {
if (last) {
if (cur != last)
continue;
@@ -257,6 +257,7 @@ static int
nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb,
const struct nlmsghdr *nlh, const struct nlattr * const tb[])
{
+ struct net *net = sock_net(nfnl);
int ret = -ENOENT;
struct nf_acct *cur;
char *acct_name;
@@ -283,7 +284,7 @@ nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb,
return -EINVAL;
acct_name = nla_data(tb[NFACCT_NAME]);
- list_for_each_entry(cur, &nfnl_acct_list, head) {
+ list_for_each_entry(cur, &net->nfnl_acct_list, head) {
struct sk_buff *skb2;
if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0)
@@ -336,19 +337,20 @@ static int
nfnl_acct_del(struct sock *nfnl, struct sk_buff *skb,
const struct nlmsghdr *nlh, const struct nlattr * const tb[])
{
+ struct net *net = sock_net(nfnl);
char *acct_name;
struct nf_acct *cur;
int ret = -ENOENT;
if (!tb[NFACCT_NAME]) {
- list_for_each_entry(cur, &nfnl_acct_list, head)
+ list_for_each_entry(cur, &net->nfnl_acct_list, head)
nfnl_acct_try_del(cur);
return 0;
}
acct_name = nla_data(tb[NFACCT_NAME]);
- list_for_each_entry(cur, &nfnl_acct_list, head) {
+ list_for_each_entry(cur, &net->nfnl_acct_list, head) {
if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX) != 0)
continue;
@@ -394,12 +396,12 @@ static const struct nfnetlink_subsystem nfnl_acct_subsys = {
MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_ACCT);
-struct nf_acct *nfnl_acct_find_get(const char *acct_name)
+struct nf_acct *nfnl_acct_find_get(struct net *net, const char *acct_name)
{
struct nf_acct *cur, *acct = NULL;
rcu_read_lock();
- list_for_each_entry_rcu(cur, &nfnl_acct_list, head) {
+ list_for_each_entry_rcu(cur, &net->nfnl_acct_list, head) {
if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0)
continue;
@@ -422,7 +424,9 @@ EXPORT_SYMBOL_GPL(nfnl_acct_find_get);
void nfnl_acct_put(struct nf_acct *acct)
{
- atomic_dec(&acct->refcnt);
+ if (atomic_dec_and_test(&acct->refcnt))
+ kfree_rcu(acct, rcu_head);
+
module_put(THIS_MODULE);
}
EXPORT_SYMBOL_GPL(nfnl_acct_put);
@@ -478,34 +482,59 @@ int nfnl_acct_overquota(const struct sk_buff *skb, struct nf_acct *nfacct)
}
EXPORT_SYMBOL_GPL(nfnl_acct_overquota);
+static int __net_init nfnl_acct_net_init(struct net *net)
+{
+ INIT_LIST_HEAD(&net->nfnl_acct_list);
+
+ return 0;
+}
+
+static void __net_exit nfnl_acct_net_exit(struct net *net)
+{
+ struct nf_acct *cur, *tmp;
+
+ list_for_each_entry_safe(cur, tmp, &net->nfnl_acct_list, head) {
+ list_del_rcu(&cur->head);
+
+ if (atomic_dec_and_test(&cur->refcnt))
+ kfree_rcu(cur, rcu_head);
+ }
+}
+
+static struct pernet_operations nfnl_acct_ops = {
+ .init = nfnl_acct_net_init,
+ .exit = nfnl_acct_net_exit,
+};
+
static int __init nfnl_acct_init(void)
{
int ret;
+ ret = register_pernet_subsys(&nfnl_acct_ops);
+ if (ret < 0) {
+ pr_err("nfnl_acct_init: failed to register pernet ops\n");
+ goto err_out;
+ }
+
pr_info("nfnl_acct: registering with nfnetlink.\n");
ret = nfnetlink_subsys_register(&nfnl_acct_subsys);
if (ret < 0) {
pr_err("nfnl_acct_init: cannot register with nfnetlink.\n");
- goto err_out;
+ goto cleanup_pernet;
}
return 0;
+
+cleanup_pernet:
+ unregister_pernet_subsys(&nfnl_acct_ops);
err_out:
return ret;
}
static void __exit nfnl_acct_exit(void)
{
- struct nf_acct *cur, *tmp;
-
pr_info("nfnl_acct: unregistering from nfnetlink.\n");
nfnetlink_subsys_unregister(&nfnl_acct_subsys);
-
- list_for_each_entry_safe(cur, tmp, &nfnl_acct_list, head) {
- list_del_rcu(&cur->head);
- /* We are sure that our objects have no clients at this point,
- * it's safe to release them all without checking refcnt. */
- kfree_rcu(cur, rcu_head);
- }
+ unregister_pernet_subsys(&nfnl_acct_ops);
}
module_init(nfnl_acct_init);
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index 17591239229f..1067fb4c1ffa 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -18,39 +18,59 @@
#include <net/netfilter/nf_tables.h>
struct nft_counter {
- seqlock_t lock;
u64 bytes;
u64 packets;
};
+struct nft_counter_percpu {
+ struct nft_counter counter;
+ struct u64_stats_sync syncp;
+};
+
+struct nft_counter_percpu_priv {
+ struct nft_counter_percpu __percpu *counter;
+};
+
static void nft_counter_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
- struct nft_counter *priv = nft_expr_priv(expr);
-
- write_seqlock_bh(&priv->lock);
- priv->bytes += pkt->skb->len;
- priv->packets++;
- write_sequnlock_bh(&priv->lock);
+ struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
+ struct nft_counter_percpu *this_cpu;
+
+ local_bh_disable();
+ this_cpu = this_cpu_ptr(priv->counter);
+ u64_stats_update_begin(&this_cpu->syncp);
+ this_cpu->counter.bytes += pkt->skb->len;
+ this_cpu->counter.packets++;
+ u64_stats_update_end(&this_cpu->syncp);
+ local_bh_enable();
}
static int nft_counter_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
- struct nft_counter *priv = nft_expr_priv(expr);
+ struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
+ struct nft_counter_percpu *cpu_stats;
+ struct nft_counter total;
+ u64 bytes, packets;
unsigned int seq;
- u64 bytes;
- u64 packets;
-
- do {
- seq = read_seqbegin(&priv->lock);
- bytes = priv->bytes;
- packets = priv->packets;
- } while (read_seqretry(&priv->lock, seq));
-
- if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(bytes)))
- goto nla_put_failure;
- if (nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(packets)))
+ int cpu;
+
+ memset(&total, 0, sizeof(total));
+ for_each_possible_cpu(cpu) {
+ cpu_stats = per_cpu_ptr(priv->counter, cpu);
+ do {
+ seq = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
+ bytes = cpu_stats->counter.bytes;
+ packets = cpu_stats->counter.packets;
+ } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, seq));
+
+ total.packets += packets;
+ total.bytes += bytes;
+ }
+
+ if (nla_put_be64(skb, NFTA_COUNTER_BYTES, cpu_to_be64(total.bytes)) ||
+ nla_put_be64(skb, NFTA_COUNTER_PACKETS, cpu_to_be64(total.packets)))
goto nla_put_failure;
return 0;
@@ -67,23 +87,44 @@ static int nft_counter_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
{
- struct nft_counter *priv = nft_expr_priv(expr);
+ struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
+ struct nft_counter_percpu __percpu *cpu_stats;
+ struct nft_counter_percpu *this_cpu;
+
+ cpu_stats = netdev_alloc_pcpu_stats(struct nft_counter_percpu);
+ if (cpu_stats == NULL)
+ return ENOMEM;
+
+ preempt_disable();
+ this_cpu = this_cpu_ptr(cpu_stats);
+ if (tb[NFTA_COUNTER_PACKETS]) {
+ this_cpu->counter.packets =
+ be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]));
+ }
+ if (tb[NFTA_COUNTER_BYTES]) {
+ this_cpu->counter.bytes =
+ be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES]));
+ }
+ preempt_enable();
+ priv->counter = cpu_stats;
+ return 0;
+}
- if (tb[NFTA_COUNTER_PACKETS])
- priv->packets = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_PACKETS]));
- if (tb[NFTA_COUNTER_BYTES])
- priv->bytes = be64_to_cpu(nla_get_be64(tb[NFTA_COUNTER_BYTES]));
+static void nft_counter_destroy(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ struct nft_counter_percpu_priv *priv = nft_expr_priv(expr);
- seqlock_init(&priv->lock);
- return 0;
+ free_percpu(priv->counter);
}
static struct nft_expr_type nft_counter_type;
static const struct nft_expr_ops nft_counter_ops = {
.type = &nft_counter_type,
- .size = NFT_EXPR_SIZE(sizeof(struct nft_counter)),
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_counter_percpu_priv)),
.eval = nft_counter_eval,
.init = nft_counter_init,
+ .destroy = nft_counter_destroy,
.dump = nft_counter_dump,
};
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index 435c1ccd6c0e..5d67938f8b2f 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -20,63 +20,79 @@
static DEFINE_SPINLOCK(limit_lock);
struct nft_limit {
+ u64 last;
u64 tokens;
+ u64 tokens_max;
u64 rate;
- u64 unit;
- unsigned long stamp;
+ u64 nsecs;
+ u32 burst;
};
-static void nft_limit_eval(const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_pktinfo *pkt)
+static inline bool nft_limit_eval(struct nft_limit *limit, u64 cost)
{
- struct nft_limit *priv = nft_expr_priv(expr);
+ u64 now, tokens;
+ s64 delta;
spin_lock_bh(&limit_lock);
- if (time_after_eq(jiffies, priv->stamp)) {
- priv->tokens = priv->rate;
- priv->stamp = jiffies + priv->unit * HZ;
- }
-
- if (priv->tokens >= 1) {
- priv->tokens--;
+ now = ktime_get_ns();
+ tokens = limit->tokens + now - limit->last;
+ if (tokens > limit->tokens_max)
+ tokens = limit->tokens_max;
+
+ limit->last = now;
+ delta = tokens - cost;
+ if (delta >= 0) {
+ limit->tokens = delta;
spin_unlock_bh(&limit_lock);
- return;
+ return false;
}
+ limit->tokens = tokens;
spin_unlock_bh(&limit_lock);
-
- regs->verdict.code = NFT_BREAK;
+ return true;
}
-static const struct nla_policy nft_limit_policy[NFTA_LIMIT_MAX + 1] = {
- [NFTA_LIMIT_RATE] = { .type = NLA_U64 },
- [NFTA_LIMIT_UNIT] = { .type = NLA_U64 },
-};
-
-static int nft_limit_init(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
+static int nft_limit_init(struct nft_limit *limit,
const struct nlattr * const tb[])
{
- struct nft_limit *priv = nft_expr_priv(expr);
+ u64 unit;
if (tb[NFTA_LIMIT_RATE] == NULL ||
tb[NFTA_LIMIT_UNIT] == NULL)
return -EINVAL;
- priv->rate = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_RATE]));
- priv->unit = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_UNIT]));
- priv->stamp = jiffies + priv->unit * HZ;
- priv->tokens = priv->rate;
+ limit->rate = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_RATE]));
+ unit = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_UNIT]));
+ limit->nsecs = unit * NSEC_PER_SEC;
+ if (limit->rate == 0 || limit->nsecs < unit)
+ return -EOVERFLOW;
+ limit->tokens = limit->tokens_max = limit->nsecs;
+
+ if (tb[NFTA_LIMIT_BURST]) {
+ u64 rate;
+
+ limit->burst = ntohl(nla_get_be32(tb[NFTA_LIMIT_BURST]));
+
+ rate = limit->rate + limit->burst;
+ if (rate < limit->rate)
+ return -EOVERFLOW;
+
+ limit->rate = rate;
+ }
+ limit->last = ktime_get_ns();
+
return 0;
}
-static int nft_limit_dump(struct sk_buff *skb, const struct nft_expr *expr)
+static int nft_limit_dump(struct sk_buff *skb, const struct nft_limit *limit,
+ enum nft_limit_type type)
{
- const struct nft_limit *priv = nft_expr_priv(expr);
+ u64 secs = div_u64(limit->nsecs, NSEC_PER_SEC);
+ u64 rate = limit->rate - limit->burst;
- if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(priv->rate)))
- goto nla_put_failure;
- if (nla_put_be64(skb, NFTA_LIMIT_UNIT, cpu_to_be64(priv->unit)))
+ if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(rate)) ||
+ nla_put_be64(skb, NFTA_LIMIT_UNIT, cpu_to_be64(secs)) ||
+ nla_put_be32(skb, NFTA_LIMIT_BURST, htonl(limit->burst)) ||
+ nla_put_be32(skb, NFTA_LIMIT_TYPE, htonl(type)))
goto nla_put_failure;
return 0;
@@ -84,18 +100,114 @@ nla_put_failure:
return -1;
}
+struct nft_limit_pkts {
+ struct nft_limit limit;
+ u64 cost;
+};
+
+static void nft_limit_pkts_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_limit_pkts *priv = nft_expr_priv(expr);
+
+ if (nft_limit_eval(&priv->limit, priv->cost))
+ regs->verdict.code = NFT_BREAK;
+}
+
+static const struct nla_policy nft_limit_policy[NFTA_LIMIT_MAX + 1] = {
+ [NFTA_LIMIT_RATE] = { .type = NLA_U64 },
+ [NFTA_LIMIT_UNIT] = { .type = NLA_U64 },
+ [NFTA_LIMIT_BURST] = { .type = NLA_U32 },
+ [NFTA_LIMIT_TYPE] = { .type = NLA_U32 },
+};
+
+static int nft_limit_pkts_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_limit_pkts *priv = nft_expr_priv(expr);
+ int err;
+
+ err = nft_limit_init(&priv->limit, tb);
+ if (err < 0)
+ return err;
+
+ priv->cost = div_u64(priv->limit.nsecs, priv->limit.rate);
+ return 0;
+}
+
+static int nft_limit_pkts_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_limit_pkts *priv = nft_expr_priv(expr);
+
+ return nft_limit_dump(skb, &priv->limit, NFT_LIMIT_PKTS);
+}
+
static struct nft_expr_type nft_limit_type;
-static const struct nft_expr_ops nft_limit_ops = {
+static const struct nft_expr_ops nft_limit_pkts_ops = {
+ .type = &nft_limit_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_limit_pkts)),
+ .eval = nft_limit_pkts_eval,
+ .init = nft_limit_pkts_init,
+ .dump = nft_limit_pkts_dump,
+};
+
+static void nft_limit_pkt_bytes_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_limit *priv = nft_expr_priv(expr);
+ u64 cost = div_u64(priv->nsecs * pkt->skb->len, priv->rate);
+
+ if (nft_limit_eval(priv, cost))
+ regs->verdict.code = NFT_BREAK;
+}
+
+static int nft_limit_pkt_bytes_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_limit *priv = nft_expr_priv(expr);
+
+ return nft_limit_init(priv, tb);
+}
+
+static int nft_limit_pkt_bytes_dump(struct sk_buff *skb,
+ const struct nft_expr *expr)
+{
+ const struct nft_limit *priv = nft_expr_priv(expr);
+
+ return nft_limit_dump(skb, priv, NFT_LIMIT_PKT_BYTES);
+}
+
+static const struct nft_expr_ops nft_limit_pkt_bytes_ops = {
.type = &nft_limit_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_limit)),
- .eval = nft_limit_eval,
- .init = nft_limit_init,
- .dump = nft_limit_dump,
+ .eval = nft_limit_pkt_bytes_eval,
+ .init = nft_limit_pkt_bytes_init,
+ .dump = nft_limit_pkt_bytes_dump,
};
+static const struct nft_expr_ops *
+nft_limit_select_ops(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[])
+{
+ if (tb[NFTA_LIMIT_TYPE] == NULL)
+ return &nft_limit_pkts_ops;
+
+ switch (ntohl(nla_get_be32(tb[NFTA_LIMIT_TYPE]))) {
+ case NFT_LIMIT_PKTS:
+ return &nft_limit_pkts_ops;
+ case NFT_LIMIT_PKT_BYTES:
+ return &nft_limit_pkt_bytes_ops;
+ }
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
static struct nft_expr_type nft_limit_type __read_mostly = {
.name = "limit",
- .ops = &nft_limit_ops,
+ .select_ops = nft_limit_select_ops,
.policy = nft_limit_policy,
.maxattr = NFTA_LIMIT_MAX,
.flags = NFT_EXPR_STATEFUL,
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 52561e1c31e2..cb2f13ebb5a6 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -166,11 +166,13 @@ void nft_meta_get_eval(const struct nft_expr *expr,
goto err;
*dest = out->group;
break;
+#ifdef CONFIG_CGROUP_NET_CLASSID
case NFT_META_CGROUP:
if (skb->sk == NULL || !sk_fullsock(skb->sk))
goto err;
*dest = skb->sk->sk_classid;
break;
+#endif
default:
WARN_ON(1);
goto err;
@@ -246,7 +248,9 @@ int nft_meta_get_init(const struct nft_ctx *ctx,
case NFT_META_CPU:
case NFT_META_IIFGROUP:
case NFT_META_OIFGROUP:
+#ifdef CONFIG_CGROUP_NET_CLASSID
case NFT_META_CGROUP:
+#endif
len = sizeof(u32);
break;
case NFT_META_IIFNAME:
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 94fb3b27a2c5..09b4b07eb676 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -9,6 +9,7 @@
*/
#include <linux/kernel.h>
+#include <linux/if_vlan.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/netlink.h>
@@ -17,6 +18,53 @@
#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_tables.h>
+/* add vlan header into the user buffer for if tag was removed by offloads */
+static bool
+nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u8 offset, u8 len)
+{
+ int mac_off = skb_mac_header(skb) - skb->data;
+ u8 vlan_len, *vlanh, *dst_u8 = (u8 *) d;
+ struct vlan_ethhdr veth;
+
+ vlanh = (u8 *) &veth;
+ if (offset < ETH_HLEN) {
+ u8 ethlen = min_t(u8, len, ETH_HLEN - offset);
+
+ if (skb_copy_bits(skb, mac_off, &veth, ETH_HLEN))
+ return false;
+
+ veth.h_vlan_proto = skb->vlan_proto;
+
+ memcpy(dst_u8, vlanh + offset, ethlen);
+
+ len -= ethlen;
+ if (len == 0)
+ return true;
+
+ dst_u8 += ethlen;
+ offset = ETH_HLEN;
+ } else if (offset >= VLAN_ETH_HLEN) {
+ offset -= VLAN_HLEN;
+ goto skip;
+ }
+
+ veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb));
+ veth.h_vlan_encapsulated_proto = skb->protocol;
+
+ vlanh += offset;
+
+ vlan_len = min_t(u8, len, VLAN_ETH_HLEN - offset);
+ memcpy(dst_u8, vlanh, vlan_len);
+
+ len -= vlan_len;
+ if (!len)
+ return true;
+
+ dst_u8 += vlan_len;
+ skip:
+ return skb_copy_bits(skb, offset + mac_off, dst_u8, len) == 0;
+}
+
static void nft_payload_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -26,10 +74,18 @@ static void nft_payload_eval(const struct nft_expr *expr,
u32 *dest = &regs->data[priv->dreg];
int offset;
+ dest[priv->len / NFT_REG32_SIZE] = 0;
switch (priv->base) {
case NFT_PAYLOAD_LL_HEADER:
if (!skb_mac_header_was_set(skb))
goto err;
+
+ if (skb_vlan_tag_present(skb)) {
+ if (!nft_payload_copy_vlan(dest, skb,
+ priv->offset, priv->len))
+ goto err;
+ return;
+ }
offset = skb_mac_header(skb) - skb->data;
break;
case NFT_PAYLOAD_NETWORK_HEADER:
@@ -43,7 +99,6 @@ static void nft_payload_eval(const struct nft_expr *expr,
}
offset += priv->offset;
- dest[priv->len / NFT_REG32_SIZE] = 0;
if (skb_copy_bits(skb, offset, dest, priv->len) < 0)
goto err;
return;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index d324fe71260c..9b42b5ea6dcd 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -67,9 +67,6 @@ static const char *const xt_prefix[NFPROTO_NUMPROTO] = {
[NFPROTO_IPV6] = "ip6",
};
-/* Allow this many total (re)entries. */
-static const unsigned int xt_jumpstack_multiplier = 2;
-
/* Registration hooks for targets. */
int xt_register_target(struct xt_target *target)
{
@@ -688,8 +685,6 @@ void xt_free_table_info(struct xt_table_info *info)
kvfree(info->jumpstack);
}
- free_percpu(info->stackptr);
-
kvfree(info);
}
EXPORT_SYMBOL(xt_free_table_info);
@@ -732,15 +727,14 @@ EXPORT_SYMBOL_GPL(xt_compat_unlock);
DEFINE_PER_CPU(seqcount_t, xt_recseq);
EXPORT_PER_CPU_SYMBOL_GPL(xt_recseq);
+struct static_key xt_tee_enabled __read_mostly;
+EXPORT_SYMBOL_GPL(xt_tee_enabled);
+
static int xt_jumpstack_alloc(struct xt_table_info *i)
{
unsigned int size;
int cpu;
- i->stackptr = alloc_percpu(unsigned int);
- if (i->stackptr == NULL)
- return -ENOMEM;
-
size = sizeof(void **) * nr_cpu_ids;
if (size > PAGE_SIZE)
i->jumpstack = vzalloc(size);
@@ -749,8 +743,21 @@ static int xt_jumpstack_alloc(struct xt_table_info *i)
if (i->jumpstack == NULL)
return -ENOMEM;
- i->stacksize *= xt_jumpstack_multiplier;
- size = sizeof(void *) * i->stacksize;
+ /* ruleset without jumps -- no stack needed */
+ if (i->stacksize == 0)
+ return 0;
+
+ /* Jumpstack needs to be able to record two full callchains, one
+ * from the first rule set traversal, plus one table reentrancy
+ * via -j TEE without clobbering the callchain that brought us to
+ * TEE target.
+ *
+ * This is done by allocating two jumpstacks per cpu, on reentry
+ * the upper half of the stack is used.
+ *
+ * see the jumpstack setup in ipt_do_table() for more details.
+ */
+ size = sizeof(void *) * i->stacksize * 2u;
for_each_possible_cpu(cpu) {
if (size > PAGE_SIZE)
i->jumpstack[cpu] = vmalloc_node(size,
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 43ddeee404e9..8e524898ccea 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -181,9 +181,23 @@ out:
#endif
}
+static u16 xt_ct_flags_to_dir(const struct xt_ct_target_info_v1 *info)
+{
+ switch (info->flags & (XT_CT_ZONE_DIR_ORIG |
+ XT_CT_ZONE_DIR_REPL)) {
+ case XT_CT_ZONE_DIR_ORIG:
+ return NF_CT_ZONE_DIR_ORIG;
+ case XT_CT_ZONE_DIR_REPL:
+ return NF_CT_ZONE_DIR_REPL;
+ default:
+ return NF_CT_DEFAULT_ZONE_DIR;
+ }
+}
+
static int xt_ct_tg_check(const struct xt_tgchk_param *par,
struct xt_ct_target_info_v1 *info)
{
+ struct nf_conntrack_zone zone;
struct nf_conn *ct;
int ret = -EOPNOTSUPP;
@@ -193,7 +207,9 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
}
#ifndef CONFIG_NF_CONNTRACK_ZONES
- if (info->zone)
+ if (info->zone || info->flags & (XT_CT_ZONE_DIR_ORIG |
+ XT_CT_ZONE_DIR_REPL |
+ XT_CT_ZONE_MARK))
goto err1;
#endif
@@ -201,7 +217,13 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
if (ret < 0)
goto err1;
- ct = nf_ct_tmpl_alloc(par->net, info->zone, GFP_KERNEL);
+ memset(&zone, 0, sizeof(zone));
+ zone.id = info->zone;
+ zone.dir = xt_ct_flags_to_dir(info);
+ if (info->flags & XT_CT_ZONE_MARK)
+ zone.flags |= NF_CT_FLAG_MARK;
+
+ ct = nf_ct_tmpl_alloc(par->net, &zone, GFP_KERNEL);
if (!ct) {
ret = -ENOMEM;
goto err2;
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 8c3190e2fc6a..8c02501a530f 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -144,7 +144,7 @@ tcpmss_mangle_packet(struct sk_buff *skb,
inet_proto_csum_replace2(&tcph->check, skb,
htons(oldmss), htons(newmss),
- 0);
+ false);
return 0;
}
}
@@ -185,18 +185,18 @@ tcpmss_mangle_packet(struct sk_buff *skb,
memmove(opt + TCPOLEN_MSS, opt, len - sizeof(struct tcphdr));
inet_proto_csum_replace2(&tcph->check, skb,
- htons(len), htons(len + TCPOLEN_MSS), 1);
+ htons(len), htons(len + TCPOLEN_MSS), true);
opt[0] = TCPOPT_MSS;
opt[1] = TCPOLEN_MSS;
opt[2] = (newmss & 0xff00) >> 8;
opt[3] = newmss & 0x00ff;
- inet_proto_csum_replace4(&tcph->check, skb, 0, *((__be32 *)opt), 0);
+ inet_proto_csum_replace4(&tcph->check, skb, 0, *((__be32 *)opt), false);
oldval = ((__be16 *)tcph)[6];
tcph->doff += TCPOLEN_MSS/4;
inet_proto_csum_replace2(&tcph->check, skb,
- oldval, ((__be16 *)tcph)[6], 0);
+ oldval, ((__be16 *)tcph)[6], false);
return TCPOLEN_MSS;
}
diff --git a/net/netfilter/xt_TCPOPTSTRIP.c b/net/netfilter/xt_TCPOPTSTRIP.c
index 625fa1d636a0..eb92bffff11c 100644
--- a/net/netfilter/xt_TCPOPTSTRIP.c
+++ b/net/netfilter/xt_TCPOPTSTRIP.c
@@ -80,7 +80,7 @@ tcpoptstrip_mangle_packet(struct sk_buff *skb,
n <<= 8;
}
inet_proto_csum_replace2(&tcph->check, skb, htons(o),
- htons(n), 0);
+ htons(n), false);
}
memset(opt + i, TCPOPT_NOP, optl);
}
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index a747eb475b68..49fee6aa2c0a 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -10,26 +10,15 @@
* modify it under the terms of the GNU General Public License
* version 2 or later, as published by the Free Software Foundation.
*/
-#include <linux/ip.h>
#include <linux/module.h>
-#include <linux/percpu.h>
-#include <linux/route.h>
#include <linux/skbuff.h>
-#include <linux/notifier.h>
-#include <net/checksum.h>
-#include <net/icmp.h>
-#include <net/ip.h>
-#include <net/ipv6.h>
-#include <net/ip6_route.h>
-#include <net/route.h>
+#include <linux/route.h>
#include <linux/netfilter/x_tables.h>
+#include <net/route.h>
+#include <net/netfilter/ipv4/nf_dup_ipv4.h>
+#include <net/netfilter/ipv6/nf_dup_ipv6.h>
#include <linux/netfilter/xt_TEE.h>
-#if IS_ENABLED(CONFIG_NF_CONNTRACK)
-# define WITH_CONNTRACK 1
-# include <net/netfilter/nf_conntrack.h>
-#endif
-
struct xt_tee_priv {
struct notifier_block notifier;
struct xt_tee_tginfo *tginfo;
@@ -37,163 +26,25 @@ struct xt_tee_priv {
};
static const union nf_inet_addr tee_zero_address;
-static DEFINE_PER_CPU(bool, tee_active);
-
-static struct net *pick_net(struct sk_buff *skb)
-{
-#ifdef CONFIG_NET_NS
- const struct dst_entry *dst;
-
- if (skb->dev != NULL)
- return dev_net(skb->dev);
- dst = skb_dst(skb);
- if (dst != NULL && dst->dev != NULL)
- return dev_net(dst->dev);
-#endif
- return &init_net;
-}
-
-static bool
-tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info)
-{
- const struct iphdr *iph = ip_hdr(skb);
- struct net *net = pick_net(skb);
- struct rtable *rt;
- struct flowi4 fl4;
-
- memset(&fl4, 0, sizeof(fl4));
- if (info->priv) {
- if (info->priv->oif == -1)
- return false;
- fl4.flowi4_oif = info->priv->oif;
- }
- fl4.daddr = info->gw.ip;
- fl4.flowi4_tos = RT_TOS(iph->tos);
- fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
- fl4.flowi4_flags = FLOWI_FLAG_KNOWN_NH;
- rt = ip_route_output_key(net, &fl4);
- if (IS_ERR(rt))
- return false;
-
- skb_dst_drop(skb);
- skb_dst_set(skb, &rt->dst);
- skb->dev = rt->dst.dev;
- skb->protocol = htons(ETH_P_IP);
- return true;
-}
static unsigned int
tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_tee_tginfo *info = par->targinfo;
- struct iphdr *iph;
- if (__this_cpu_read(tee_active))
- return XT_CONTINUE;
- /*
- * Copy the skb, and route the copy. Will later return %XT_CONTINUE for
- * the original skb, which should continue on its way as if nothing has
- * happened. The copy should be independently delivered to the TEE
- * --gateway.
- */
- skb = pskb_copy(skb, GFP_ATOMIC);
- if (skb == NULL)
- return XT_CONTINUE;
-
-#ifdef WITH_CONNTRACK
- /* Avoid counting cloned packets towards the original connection. */
- nf_conntrack_put(skb->nfct);
- skb->nfct = &nf_ct_untracked_get()->ct_general;
- skb->nfctinfo = IP_CT_NEW;
- nf_conntrack_get(skb->nfct);
-#endif
- /*
- * If we are in PREROUTING/INPUT, the checksum must be recalculated
- * since the length could have changed as a result of defragmentation.
- *
- * We also decrease the TTL to mitigate potential TEE loops
- * between two hosts.
- *
- * Set %IP_DF so that the original source is notified of a potentially
- * decreased MTU on the clone route. IPv6 does this too.
- */
- iph = ip_hdr(skb);
- iph->frag_off |= htons(IP_DF);
- if (par->hooknum == NF_INET_PRE_ROUTING ||
- par->hooknum == NF_INET_LOCAL_IN)
- --iph->ttl;
- ip_send_check(iph);
+ nf_dup_ipv4(skb, par->hooknum, &info->gw.in, info->priv->oif);
- if (tee_tg_route4(skb, info)) {
- __this_cpu_write(tee_active, true);
- ip_local_out(skb);
- __this_cpu_write(tee_active, false);
- } else {
- kfree_skb(skb);
- }
return XT_CONTINUE;
}
#if IS_ENABLED(CONFIG_IPV6)
-static bool
-tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info)
-{
- const struct ipv6hdr *iph = ipv6_hdr(skb);
- struct net *net = pick_net(skb);
- struct dst_entry *dst;
- struct flowi6 fl6;
-
- memset(&fl6, 0, sizeof(fl6));
- if (info->priv) {
- if (info->priv->oif == -1)
- return false;
- fl6.flowi6_oif = info->priv->oif;
- }
- fl6.daddr = info->gw.in6;
- fl6.flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) |
- (iph->flow_lbl[1] << 8) | iph->flow_lbl[2];
- fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH;
- dst = ip6_route_output(net, NULL, &fl6);
- if (dst->error) {
- dst_release(dst);
- return false;
- }
- skb_dst_drop(skb);
- skb_dst_set(skb, dst);
- skb->dev = dst->dev;
- skb->protocol = htons(ETH_P_IPV6);
- return true;
-}
-
static unsigned int
tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_tee_tginfo *info = par->targinfo;
- if (__this_cpu_read(tee_active))
- return XT_CONTINUE;
- skb = pskb_copy(skb, GFP_ATOMIC);
- if (skb == NULL)
- return XT_CONTINUE;
+ nf_dup_ipv6(skb, par->hooknum, &info->gw.in6, info->priv->oif);
-#ifdef WITH_CONNTRACK
- nf_conntrack_put(skb->nfct);
- skb->nfct = &nf_ct_untracked_get()->ct_general;
- skb->nfctinfo = IP_CT_NEW;
- nf_conntrack_get(skb->nfct);
-#endif
- if (par->hooknum == NF_INET_PRE_ROUTING ||
- par->hooknum == NF_INET_LOCAL_IN) {
- struct ipv6hdr *iph = ipv6_hdr(skb);
- --iph->hop_limit;
- }
- if (tee_tg_route6(skb, info)) {
- __this_cpu_write(tee_active, true);
- ip6_local_out(skb);
- __this_cpu_write(tee_active, false);
- } else {
- kfree_skb(skb);
- }
return XT_CONTINUE;
}
#endif
@@ -252,6 +103,7 @@ static int tee_tg_check(const struct xt_tgchk_param *par)
} else
info->priv = NULL;
+ static_key_slow_inc(&xt_tee_enabled);
return 0;
}
@@ -263,6 +115,7 @@ static void tee_tg_destroy(const struct xt_tgdtor_param *par)
unregister_netdevice_notifier(&info->priv->notifier);
kfree(info->priv);
}
+ static_key_slow_dec(&xt_tee_enabled);
}
static struct xt_target tee_tg_reg[] __read_mostly = {
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index cca96cec1b68..d0c96c5ae29a 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -272,8 +272,7 @@ tproxy_handle_time_wait4(struct sk_buff *skb, __be32 laddr, __be16 lport,
hp->source, lport ? lport : hp->dest,
skb->dev, NFT_LOOKUP_LISTENER);
if (sk2) {
- inet_twsk_deschedule(inet_twsk(sk));
- inet_twsk_put(inet_twsk(sk));
+ inet_twsk_deschedule_put(inet_twsk(sk));
sk = sk2;
}
}
@@ -437,8 +436,7 @@ tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
tgi->lport ? tgi->lport : hp->dest,
skb->dev, NFT_LOOKUP_LISTENER);
if (sk2) {
- inet_twsk_deschedule(inet_twsk(sk));
- inet_twsk_put(inet_twsk(sk));
+ inet_twsk_deschedule_put(inet_twsk(sk));
sk = sk2;
}
}
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 29ba6218a820..075d89d94d28 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -134,7 +134,7 @@ static bool add_hlist(struct hlist_head *head,
static unsigned int check_hlist(struct net *net,
struct hlist_head *head,
const struct nf_conntrack_tuple *tuple,
- u16 zone,
+ const struct nf_conntrack_zone *zone,
bool *addit)
{
const struct nf_conntrack_tuple_hash *found;
@@ -201,7 +201,7 @@ static unsigned int
count_tree(struct net *net, struct rb_root *root,
const struct nf_conntrack_tuple *tuple,
const union nf_inet_addr *addr, const union nf_inet_addr *mask,
- u8 family, u16 zone)
+ u8 family, const struct nf_conntrack_zone *zone)
{
struct xt_connlimit_rb *gc_nodes[CONNLIMIT_GC_MAX_NODES];
struct rb_node **rbnode, *parent;
@@ -290,7 +290,8 @@ static int count_them(struct net *net,
const struct nf_conntrack_tuple *tuple,
const union nf_inet_addr *addr,
const union nf_inet_addr *mask,
- u_int8_t family, u16 zone)
+ u_int8_t family,
+ const struct nf_conntrack_zone *zone)
{
struct rb_root *root;
int count;
@@ -321,10 +322,10 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
union nf_inet_addr addr;
struct nf_conntrack_tuple tuple;
const struct nf_conntrack_tuple *tuple_ptr = &tuple;
+ const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
enum ip_conntrack_info ctinfo;
const struct nf_conn *ct;
unsigned int connections;
- u16 zone = NF_CT_DEFAULT_ZONE;
ct = nf_ct_get(skb, &ctinfo);
if (ct != NULL) {
diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c
index 8c646ed9c921..3048a7e3a90a 100644
--- a/net/netfilter/xt_nfacct.c
+++ b/net/netfilter/xt_nfacct.c
@@ -37,7 +37,7 @@ nfacct_mt_checkentry(const struct xt_mtchk_param *par)
struct xt_nfacct_match_info *info = par->matchinfo;
struct nf_acct *nfacct;
- nfacct = nfnl_acct_find_get(info->name);
+ nfacct = nfnl_acct_find_get(par->net, info->name);
if (nfacct == NULL) {
pr_info("xt_nfacct: accounting object with name `%s' "
"does not exists\n", info->name);
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index 15840401a2ce..422dc0567de9 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig
@@ -34,7 +34,7 @@ config OPENVSWITCH
config OPENVSWITCH_GRE
tristate "Open vSwitch GRE tunneling support"
depends on OPENVSWITCH
- depends on NET_IPGRE_DEMUX
+ depends on NET_IPGRE
default OPENVSWITCH
---help---
If you say Y here, then the Open vSwitch will be able create GRE
diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile
index 91b9478413ef..6e1701de04d8 100644
--- a/net/openvswitch/Makefile
+++ b/net/openvswitch/Makefile
@@ -15,6 +15,6 @@ openvswitch-y := \
vport-internal_dev.o \
vport-netdev.o
+obj-$(CONFIG_OPENVSWITCH_VXLAN)+= vport-vxlan.o
obj-$(CONFIG_OPENVSWITCH_GENEVE)+= vport-geneve.o
-obj-$(CONFIG_OPENVSWITCH_VXLAN) += vport-vxlan.o
obj-$(CONFIG_OPENVSWITCH_GRE) += vport-gre.o
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index ee34f474ad14..4f4200717bef 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -284,14 +284,14 @@ static void update_ip_l4_checksum(struct sk_buff *skb, struct iphdr *nh,
if (nh->protocol == IPPROTO_TCP) {
if (likely(transport_len >= sizeof(struct tcphdr)))
inet_proto_csum_replace4(&tcp_hdr(skb)->check, skb,
- addr, new_addr, 1);
+ addr, new_addr, true);
} else if (nh->protocol == IPPROTO_UDP) {
if (likely(transport_len >= sizeof(struct udphdr))) {
struct udphdr *uh = udp_hdr(skb);
if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
inet_proto_csum_replace4(&uh->check, skb,
- addr, new_addr, 1);
+ addr, new_addr, true);
if (!uh->check)
uh->check = CSUM_MANGLED_0;
}
@@ -316,14 +316,14 @@ static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto,
if (l4_proto == NEXTHDR_TCP) {
if (likely(transport_len >= sizeof(struct tcphdr)))
inet_proto_csum_replace16(&tcp_hdr(skb)->check, skb,
- addr, new_addr, 1);
+ addr, new_addr, true);
} else if (l4_proto == NEXTHDR_UDP) {
if (likely(transport_len >= sizeof(struct udphdr))) {
struct udphdr *uh = udp_hdr(skb);
if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
inet_proto_csum_replace16(&uh->check, skb,
- addr, new_addr, 1);
+ addr, new_addr, true);
if (!uh->check)
uh->check = CSUM_MANGLED_0;
}
@@ -331,7 +331,7 @@ static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto,
} else if (l4_proto == NEXTHDR_ICMP) {
if (likely(transport_len >= sizeof(struct icmp6hdr)))
inet_proto_csum_replace16(&icmp6_hdr(skb)->icmp6_cksum,
- skb, addr, new_addr, 1);
+ skb, addr, new_addr, true);
}
}
@@ -498,7 +498,7 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
static void set_tp_port(struct sk_buff *skb, __be16 *port,
__be16 new_port, __sum16 *check)
{
- inet_proto_csum_replace2(check, skb, *port, new_port, 0);
+ inet_proto_csum_replace2(check, skb, *port, new_port, false);
*port = new_port;
}
@@ -619,7 +619,7 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
struct sw_flow_key *key, const struct nlattr *attr,
const struct nlattr *actions, int actions_len)
{
- struct ovs_tunnel_info info;
+ struct ip_tunnel_info info;
struct dp_upcall_info upcall;
const struct nlattr *a;
int rem;
@@ -677,9 +677,12 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
a = nla_next(a, &rem)) {
+ u32 probability;
+
switch (nla_type(a)) {
case OVS_SAMPLE_ATTR_PROBABILITY:
- if (prandom_u32() >= nla_get_u32(a))
+ probability = nla_get_u32(a);
+ if (!probability || prandom_u32() > probability)
return 0;
break;
@@ -741,7 +744,15 @@ static int execute_set_action(struct sk_buff *skb,
{
/* Only tunnel set execution is supported without a mask. */
if (nla_type(a) == OVS_KEY_ATTR_TUNNEL_INFO) {
- OVS_CB(skb)->egress_tun_info = nla_data(a);
+ struct ovs_tunnel_info *tun = nla_data(a);
+
+ skb_dst_drop(skb);
+ dst_hold((struct dst_entry *)tun->tun_dst);
+ skb_dst_set(skb, (struct dst_entry *)tun->tun_dst);
+
+ /* FIXME: Remove when all vports have been converted */
+ OVS_CB(skb)->egress_tun_info = &tun->tun_dst->u.tun_info;
+
return 0;
}
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index ff8c4a4c1609..ffe984f5b95c 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -176,7 +176,7 @@ static inline struct datapath *get_dp(struct net *net, int dp_ifindex)
const char *ovs_dp_name(const struct datapath *dp)
{
struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
- return vport->ops->get_name(vport);
+ return ovs_vport_name(vport);
}
static int get_dpifindex(const struct datapath *dp)
@@ -188,7 +188,7 @@ static int get_dpifindex(const struct datapath *dp)
local = ovs_vport_rcu(dp, OVSP_LOCAL);
if (local)
- ifindex = netdev_vport_priv(local)->dev->ifindex;
+ ifindex = local->dev->ifindex;
else
ifindex = 0;
@@ -1018,7 +1018,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
}
ovs_unlock();
- ovs_nla_free_flow_actions(old_acts);
+ ovs_nla_free_flow_actions_rcu(old_acts);
ovs_flow_free(new_flow, false);
}
@@ -1030,7 +1030,7 @@ err_unlock_ovs:
ovs_unlock();
kfree_skb(reply);
err_kfree_acts:
- kfree(acts);
+ ovs_nla_free_flow_actions(acts);
err_kfree_flow:
ovs_flow_free(new_flow, false);
error:
@@ -1157,7 +1157,7 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
if (reply)
ovs_notify(&dp_flow_genl_family, reply, info);
if (old_acts)
- ovs_nla_free_flow_actions(old_acts);
+ ovs_nla_free_flow_actions_rcu(old_acts);
return 0;
@@ -1165,7 +1165,7 @@ err_unlock_ovs:
ovs_unlock();
kfree_skb(reply);
err_kfree_acts:
- kfree(acts);
+ ovs_nla_free_flow_actions(acts);
error:
return error;
}
@@ -1800,7 +1800,7 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
nla_put_string(skb, OVS_VPORT_ATTR_NAME,
- vport->ops->get_name(vport)))
+ ovs_vport_name(vport)))
goto nla_put_failure;
ovs_vport_get_stats(vport, &vport_stats);
@@ -2219,13 +2219,10 @@ static void __net_exit list_vports_from_net(struct net *net, struct net *dnet,
struct vport *vport;
hlist_for_each_entry(vport, &dp->ports[i], dp_hash_node) {
- struct netdev_vport *netdev_vport;
-
if (vport->ops->type != OVS_VPORT_TYPE_INTERNAL)
continue;
- netdev_vport = netdev_vport_priv(vport);
- if (dev_net(netdev_vport->dev) == dnet)
+ if (dev_net(vport->dev) == dnet)
list_add(&vport->detach_list, head);
}
}
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index cd691e935e08..6b28c5cedb23 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -25,6 +25,7 @@
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/u64_stats_sync.h>
+#include <net/ip_tunnels.h>
#include "flow.h"
#include "flow_table.h"
@@ -98,7 +99,7 @@ struct datapath {
* when a packet is received by OVS.
*/
struct ovs_skb_cb {
- struct ovs_tunnel_info *egress_tun_info;
+ struct ip_tunnel_info *egress_tun_info;
struct vport *input_vport;
};
#define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
@@ -114,7 +115,7 @@ struct ovs_skb_cb {
* @egress_tun_info: If nonnull, becomes %OVS_PACKET_ATTR_EGRESS_TUN_KEY.
*/
struct dp_upcall_info {
- const struct ovs_tunnel_info *egress_tun_info;
+ const struct ip_tunnel_info *egress_tun_info;
const struct nlattr *userdata;
const struct nlattr *actions;
int actions_len;
diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c
index 2c631fe76be1..a7a80a6b77b0 100644
--- a/net/openvswitch/dp_notify.c
+++ b/net/openvswitch/dp_notify.c
@@ -58,13 +58,10 @@ void ovs_dp_notify_wq(struct work_struct *work)
struct hlist_node *n;
hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node) {
- struct netdev_vport *netdev_vport;
-
if (vport->ops->type != OVS_VPORT_TYPE_NETDEV)
continue;
- netdev_vport = netdev_vport_priv(vport);
- if (!(netdev_vport->dev->priv_flags & IFF_OVS_DATAPATH))
+ if (!(vport->dev->priv_flags & IFF_OVS_DATAPATH))
dp_detach_port_notify(vport);
}
}
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index bc7b0aba994a..8db22ef73626 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -682,12 +682,12 @@ int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key)
return key_extract(skb, key);
}
-int ovs_flow_key_extract(const struct ovs_tunnel_info *tun_info,
+int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
struct sk_buff *skb, struct sw_flow_key *key)
{
/* Extract metadata from packet. */
if (tun_info) {
- memcpy(&key->tun_key, &tun_info->tunnel, sizeof(key->tun_key));
+ memcpy(&key->tun_key, &tun_info->key, sizeof(key->tun_key));
if (tun_info->options) {
BUILD_BUG_ON((1 << (sizeof(tun_info->options_len) *
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index a076e445ccc2..b62cdb3e3589 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -32,31 +32,11 @@
#include <linux/time.h>
#include <linux/flex_array.h>
#include <net/inet_ecn.h>
+#include <net/ip_tunnels.h>
+#include <net/dst_metadata.h>
struct sk_buff;
-/* Used to memset ovs_key_ipv4_tunnel padding. */
-#define OVS_TUNNEL_KEY_SIZE \
- (offsetof(struct ovs_key_ipv4_tunnel, tp_dst) + \
- FIELD_SIZEOF(struct ovs_key_ipv4_tunnel, tp_dst))
-
-struct ovs_key_ipv4_tunnel {
- __be64 tun_id;
- __be32 ipv4_src;
- __be32 ipv4_dst;
- __be16 tun_flags;
- u8 ipv4_tos;
- u8 ipv4_ttl;
- __be16 tp_src;
- __be16 tp_dst;
-} __packed __aligned(4); /* Minimize padding. */
-
-struct ovs_tunnel_info {
- struct ovs_key_ipv4_tunnel tunnel;
- const void *options;
- u8 options_len;
-};
-
/* Store options at the end of the array if they are less than the
* maximum size. This allows us to get the benefits of variable length
* matching for small options.
@@ -66,54 +46,9 @@ struct ovs_tunnel_info {
#define TUN_METADATA_OPTS(flow_key, opt_len) \
((void *)((flow_key)->tun_opts + TUN_METADATA_OFFSET(opt_len)))
-static inline void __ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
- __be32 saddr, __be32 daddr,
- u8 tos, u8 ttl,
- __be16 tp_src,
- __be16 tp_dst,
- __be64 tun_id,
- __be16 tun_flags,
- const void *opts,
- u8 opts_len)
-{
- tun_info->tunnel.tun_id = tun_id;
- tun_info->tunnel.ipv4_src = saddr;
- tun_info->tunnel.ipv4_dst = daddr;
- tun_info->tunnel.ipv4_tos = tos;
- tun_info->tunnel.ipv4_ttl = ttl;
- tun_info->tunnel.tun_flags = tun_flags;
-
- /* For the tunnel types on the top of IPsec, the tp_src and tp_dst of
- * the upper tunnel are used.
- * E.g: GRE over IPSEC, the tp_src and tp_port are zero.
- */
- tun_info->tunnel.tp_src = tp_src;
- tun_info->tunnel.tp_dst = tp_dst;
-
- /* Clear struct padding. */
- if (sizeof(tun_info->tunnel) != OVS_TUNNEL_KEY_SIZE)
- memset((unsigned char *)&tun_info->tunnel + OVS_TUNNEL_KEY_SIZE,
- 0, sizeof(tun_info->tunnel) - OVS_TUNNEL_KEY_SIZE);
-
- tun_info->options = opts;
- tun_info->options_len = opts_len;
-}
-
-static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
- const struct iphdr *iph,
- __be16 tp_src,
- __be16 tp_dst,
- __be64 tun_id,
- __be16 tun_flags,
- const void *opts,
- u8 opts_len)
-{
- __ovs_flow_tun_info_init(tun_info, iph->saddr, iph->daddr,
- iph->tos, iph->ttl,
- tp_src, tp_dst,
- tun_id, tun_flags,
- opts, opts_len);
-}
+struct ovs_tunnel_info {
+ struct metadata_dst *tun_dst;
+};
#define OVS_SW_FLOW_KEY_METADATA_SIZE \
(offsetof(struct sw_flow_key, recirc_id) + \
@@ -122,7 +57,7 @@ static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info,
struct sw_flow_key {
u8 tun_opts[255];
u8 tun_opts_len;
- struct ovs_key_ipv4_tunnel tun_key; /* Encapsulating tunnel key. */
+ struct ip_tunnel_key tun_key; /* Encapsulating tunnel key. */
struct {
u32 priority; /* Packet QoS priority. */
u32 skb_mark; /* SKB mark. */
@@ -273,7 +208,7 @@ void ovs_flow_stats_clear(struct sw_flow *);
u64 ovs_flow_used_time(unsigned long flow_jiffies);
int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key);
-int ovs_flow_key_extract(const struct ovs_tunnel_info *tun_info,
+int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
struct sk_buff *skb,
struct sw_flow_key *key);
/* Extract key from packet coming from userspace. */
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 624e41c4267f..4e7a3f7facc2 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -47,9 +47,9 @@
#include <net/ipv6.h>
#include <net/ndisc.h>
#include <net/mpls.h>
+#include <net/vxlan.h>
#include "flow_netlink.h"
-#include "vport-vxlan.h"
struct ovs_len_tbl {
int len;
@@ -475,7 +475,7 @@ static int vxlan_tun_opt_from_nlattr(const struct nlattr *a,
{
struct nlattr *tb[OVS_VXLAN_EXT_MAX+1];
unsigned long opt_key_offset;
- struct ovs_vxlan_opts opts;
+ struct vxlan_metadata opts;
int err;
BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts));
@@ -534,19 +534,19 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
tun_flags |= TUNNEL_KEY;
break;
case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
- SW_FLOW_KEY_PUT(match, tun_key.ipv4_src,
+ SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.src,
nla_get_in_addr(a), is_mask);
break;
case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
- SW_FLOW_KEY_PUT(match, tun_key.ipv4_dst,
+ SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.dst,
nla_get_in_addr(a), is_mask);
break;
case OVS_TUNNEL_KEY_ATTR_TOS:
- SW_FLOW_KEY_PUT(match, tun_key.ipv4_tos,
+ SW_FLOW_KEY_PUT(match, tun_key.tos,
nla_get_u8(a), is_mask);
break;
case OVS_TUNNEL_KEY_ATTR_TTL:
- SW_FLOW_KEY_PUT(match, tun_key.ipv4_ttl,
+ SW_FLOW_KEY_PUT(match, tun_key.ttl,
nla_get_u8(a), is_mask);
ttl = true;
break;
@@ -609,7 +609,7 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
}
if (!is_mask) {
- if (!match->key->tun_key.ipv4_dst) {
+ if (!match->key->tun_key.u.ipv4.dst) {
OVS_NLERR(log, "IPv4 tunnel dst address is zero");
return -EINVAL;
}
@@ -626,7 +626,7 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
static int vxlan_opt_to_nlattr(struct sk_buff *skb,
const void *tun_opts, int swkey_tun_opts_len)
{
- const struct ovs_vxlan_opts *opts = tun_opts;
+ const struct vxlan_metadata *opts = tun_opts;
struct nlattr *nla;
nla = nla_nest_start(skb, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS);
@@ -641,24 +641,24 @@ static int vxlan_opt_to_nlattr(struct sk_buff *skb,
}
static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
- const struct ovs_key_ipv4_tunnel *output,
+ const struct ip_tunnel_key *output,
const void *tun_opts, int swkey_tun_opts_len)
{
if (output->tun_flags & TUNNEL_KEY &&
nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id))
return -EMSGSIZE;
- if (output->ipv4_src &&
+ if (output->u.ipv4.src &&
nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC,
- output->ipv4_src))
+ output->u.ipv4.src))
return -EMSGSIZE;
- if (output->ipv4_dst &&
+ if (output->u.ipv4.dst &&
nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST,
- output->ipv4_dst))
+ output->u.ipv4.dst))
return -EMSGSIZE;
- if (output->ipv4_tos &&
- nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->ipv4_tos))
+ if (output->tos &&
+ nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->tos))
return -EMSGSIZE;
- if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ipv4_ttl))
+ if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ttl))
return -EMSGSIZE;
if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) &&
nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
@@ -689,7 +689,7 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
}
static int ipv4_tun_to_nlattr(struct sk_buff *skb,
- const struct ovs_key_ipv4_tunnel *output,
+ const struct ip_tunnel_key *output,
const void *tun_opts, int swkey_tun_opts_len)
{
struct nlattr *nla;
@@ -708,9 +708,9 @@ static int ipv4_tun_to_nlattr(struct sk_buff *skb,
}
int ovs_nla_put_egress_tunnel_key(struct sk_buff *skb,
- const struct ovs_tunnel_info *egress_tun_info)
+ const struct ip_tunnel_info *egress_tun_info)
{
- return __ipv4_tun_to_nlattr(skb, &egress_tun_info->tunnel,
+ return __ipv4_tun_to_nlattr(skb, &egress_tun_info->key,
egress_tun_info->options,
egress_tun_info->options_len);
}
@@ -1116,7 +1116,7 @@ int ovs_nla_get_match(struct sw_flow_match *match,
/* The userspace does not send tunnel attributes that
* are 0, but we should not wildcard them nonetheless.
*/
- if (match->key->tun_key.ipv4_dst)
+ if (match->key->tun_key.u.ipv4.dst)
SW_FLOW_KEY_MEMSET_FIELD(match, tun_key,
0xff, true);
@@ -1287,7 +1287,7 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
goto nla_put_failure;
- if ((swkey->tun_key.ipv4_dst || is_mask)) {
+ if ((swkey->tun_key.u.ipv4.dst || is_mask)) {
const void *opts = NULL;
if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT)
@@ -1548,11 +1548,48 @@ static struct sw_flow_actions *nla_alloc_flow_actions(int size, bool log)
return sfa;
}
+static void ovs_nla_free_set_action(const struct nlattr *a)
+{
+ const struct nlattr *ovs_key = nla_data(a);
+ struct ovs_tunnel_info *ovs_tun;
+
+ switch (nla_type(ovs_key)) {
+ case OVS_KEY_ATTR_TUNNEL_INFO:
+ ovs_tun = nla_data(ovs_key);
+ dst_release((struct dst_entry *)ovs_tun->tun_dst);
+ break;
+ }
+}
+
+void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
+{
+ const struct nlattr *a;
+ int rem;
+
+ if (!sf_acts)
+ return;
+
+ nla_for_each_attr(a, sf_acts->actions, sf_acts->actions_len, rem) {
+ switch (nla_type(a)) {
+ case OVS_ACTION_ATTR_SET:
+ ovs_nla_free_set_action(a);
+ break;
+ }
+ }
+
+ kfree(sf_acts);
+}
+
+static void __ovs_nla_free_flow_actions(struct rcu_head *head)
+{
+ ovs_nla_free_flow_actions(container_of(head, struct sw_flow_actions, rcu));
+}
+
/* Schedules 'sf_acts' to be freed after the next RCU grace period.
* The caller must hold rcu_read_lock for this to be sensible. */
-void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
+void ovs_nla_free_flow_actions_rcu(struct sw_flow_actions *sf_acts)
{
- kfree_rcu(sf_acts, rcu);
+ call_rcu(&sf_acts->rcu, __ovs_nla_free_flow_actions);
}
static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
@@ -1746,7 +1783,9 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
{
struct sw_flow_match match;
struct sw_flow_key key;
- struct ovs_tunnel_info *tun_info;
+ struct metadata_dst *tun_dst;
+ struct ip_tunnel_info *tun_info;
+ struct ovs_tunnel_info *ovs_tun;
struct nlattr *a;
int err = 0, start, opts_type;
@@ -1771,13 +1810,23 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
if (start < 0)
return start;
+ tun_dst = metadata_dst_alloc(key.tun_opts_len, GFP_KERNEL);
+ if (!tun_dst)
+ return -ENOMEM;
+
a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL,
- sizeof(*tun_info) + key.tun_opts_len, log);
- if (IS_ERR(a))
+ sizeof(*ovs_tun), log);
+ if (IS_ERR(a)) {
+ dst_release((struct dst_entry *)tun_dst);
return PTR_ERR(a);
+ }
+
+ ovs_tun = nla_data(a);
+ ovs_tun->tun_dst = tun_dst;
- tun_info = nla_data(a);
- tun_info->tunnel = key.tun_key;
+ tun_info = &tun_dst->u.tun_info;
+ tun_info->mode = IP_TUNNEL_INFO_TX;
+ tun_info->key = key.tun_key;
tun_info->options_len = key.tun_opts_len;
if (tun_info->options_len) {
@@ -2177,7 +2226,7 @@ int ovs_nla_copy_actions(const struct nlattr *attr,
err = __ovs_nla_copy_actions(attr, key, 0, sfa, key->eth.type,
key->eth.tci, log);
if (err)
- kfree(*sfa);
+ ovs_nla_free_flow_actions(*sfa);
return err;
}
@@ -2227,13 +2276,14 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
switch (key_type) {
case OVS_KEY_ATTR_TUNNEL_INFO: {
- struct ovs_tunnel_info *tun_info = nla_data(ovs_key);
+ struct ovs_tunnel_info *ovs_tun = nla_data(ovs_key);
+ struct ip_tunnel_info *tun_info = &ovs_tun->tun_dst->u.tun_info;
start = nla_nest_start(skb, OVS_ACTION_ATTR_SET);
if (!start)
return -EMSGSIZE;
- err = ipv4_tun_to_nlattr(skb, &tun_info->tunnel,
+ err = ipv4_tun_to_nlattr(skb, &tun_info->key,
tun_info->options_len ?
tun_info->options : NULL,
tun_info->options_len);
diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h
index 5c3d75bff310..acd074408f0a 100644
--- a/net/openvswitch/flow_netlink.h
+++ b/net/openvswitch/flow_netlink.h
@@ -55,7 +55,7 @@ int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb);
int ovs_nla_get_match(struct sw_flow_match *, const struct nlattr *key,
const struct nlattr *mask, bool log);
int ovs_nla_put_egress_tunnel_key(struct sk_buff *,
- const struct ovs_tunnel_info *);
+ const struct ip_tunnel_info *);
bool ovs_nla_get_ufid(struct sw_flow_id *, const struct nlattr *, bool log);
int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid,
@@ -69,5 +69,6 @@ int ovs_nla_put_actions(const struct nlattr *attr,
int len, struct sk_buff *skb);
void ovs_nla_free_flow_actions(struct sw_flow_actions *);
+void ovs_nla_free_flow_actions_rcu(struct sw_flow_actions *);
#endif /* flow_netlink.h */
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 65523948fb95..d22d8e948d0f 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -18,6 +18,7 @@
#include "flow.h"
#include "datapath.h"
+#include "flow_netlink.h"
#include <linux/uaccess.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
@@ -143,7 +144,8 @@ static void flow_free(struct sw_flow *flow)
if (ovs_identifier_is_key(&flow->id))
kfree(flow->id.unmasked_key);
- kfree((struct sw_flow_actions __force *)flow->sf_acts);
+ if (flow->sf_acts)
+ ovs_nla_free_flow_actions((struct sw_flow_actions __force *)flow->sf_acts);
for_each_node(node)
if (flow->stats[node])
kmem_cache_free(flow_stats_cache,
@@ -424,7 +426,7 @@ static u32 flow_hash(const struct sw_flow_key *key,
static int flow_key_start(const struct sw_flow_key *key)
{
- if (key->tun_key.ipv4_dst)
+ if (key->tun_key.u.ipv4.dst)
return 0;
else
return rounddown(offsetof(struct sw_flow_key, phy),
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
index 208c576bd1b6..d01bd6360970 100644
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c
@@ -77,7 +77,7 @@ static void geneve_rcv(struct geneve_sock *gs, struct sk_buff *skb)
struct vport *vport = gs->rcv_data;
struct genevehdr *geneveh = geneve_hdr(skb);
int opts_len;
- struct ovs_tunnel_info tun_info;
+ struct ip_tunnel_info tun_info;
__be64 key;
__be16 flags;
@@ -90,10 +90,9 @@ static void geneve_rcv(struct geneve_sock *gs, struct sk_buff *skb)
key = vni_to_tunnel_id(geneveh->vni);
- ovs_flow_tun_info_init(&tun_info, ip_hdr(skb),
- udp_hdr(skb)->source, udp_hdr(skb)->dest,
- key, flags,
- geneveh->options, opts_len);
+ ip_tunnel_info_init(&tun_info, ip_hdr(skb),
+ udp_hdr(skb)->source, udp_hdr(skb)->dest,
+ key, flags, geneveh->options, opts_len);
ovs_vport_receive(vport, skb, &tun_info);
}
@@ -165,8 +164,8 @@ error:
static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb)
{
- const struct ovs_key_ipv4_tunnel *tun_key;
- struct ovs_tunnel_info *tun_info;
+ const struct ip_tunnel_key *tun_key;
+ struct ip_tunnel_info *tun_info;
struct net *net = ovs_dp_get_net(vport->dp);
struct geneve_port *geneve_port = geneve_vport(vport);
__be16 dport = inet_sk(geneve_port->gs->sock->sk)->inet_sport;
@@ -183,7 +182,7 @@ static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb)
goto error;
}
- tun_key = &tun_info->tunnel;
+ tun_key = &tun_info->key;
rt = ovs_tunnel_route_lookup(net, tun_key, skb->mark, &fl, IPPROTO_UDP);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
@@ -204,8 +203,8 @@ static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb)
}
err = geneve_xmit_skb(geneve_port->gs, rt, skb, fl.saddr,
- tun_key->ipv4_dst, tun_key->ipv4_tos,
- tun_key->ipv4_ttl, df, sport, dport,
+ tun_key->u.ipv4.dst, tun_key->tos,
+ tun_key->ttl, df, sport, dport,
tun_key->tun_flags, vni, opts_len, opts,
!!(tun_key->tun_flags & TUNNEL_CSUM), false);
if (err < 0)
@@ -225,7 +224,7 @@ static const char *geneve_get_name(const struct vport *vport)
}
static int geneve_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
- struct ovs_tunnel_info *egress_tun_info)
+ struct ip_tunnel_info *egress_tun_info)
{
struct geneve_port *geneve_port = geneve_vport(vport);
struct net *net = ovs_dp_get_net(vport->dp);
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index f17ac9642f4e..871801d2ac23 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -45,239 +45,47 @@
#include "datapath.h"
#include "vport.h"
+#include "vport-netdev.h"
static struct vport_ops ovs_gre_vport_ops;
-/* Returns the least-significant 32 bits of a __be64. */
-static __be32 be64_get_low32(__be64 x)
+static struct vport *gre_tnl_create(const struct vport_parms *parms)
{
-#ifdef __BIG_ENDIAN
- return (__force __be32)x;
-#else
- return (__force __be32)((__force u64)x >> 32);
-#endif
-}
-
-static __be16 filter_tnl_flags(__be16 flags)
-{
- return flags & (TUNNEL_CSUM | TUNNEL_KEY);
-}
-
-static struct sk_buff *__build_header(struct sk_buff *skb,
- int tunnel_hlen)
-{
- struct tnl_ptk_info tpi;
- const struct ovs_key_ipv4_tunnel *tun_key;
-
- tun_key = &OVS_CB(skb)->egress_tun_info->tunnel;
-
- skb = gre_handle_offloads(skb, !!(tun_key->tun_flags & TUNNEL_CSUM));
- if (IS_ERR(skb))
- return skb;
-
- tpi.flags = filter_tnl_flags(tun_key->tun_flags);
- tpi.proto = htons(ETH_P_TEB);
- tpi.key = be64_get_low32(tun_key->tun_id);
- tpi.seq = 0;
- gre_build_header(skb, &tpi, tunnel_hlen);
-
- return skb;
-}
-
-static __be64 key_to_tunnel_id(__be32 key, __be32 seq)
-{
-#ifdef __BIG_ENDIAN
- return (__force __be64)((__force u64)seq << 32 | (__force u32)key);
-#else
- return (__force __be64)((__force u64)key << 32 | (__force u32)seq);
-#endif
-}
-
-/* Called with rcu_read_lock and BH disabled. */
-static int gre_rcv(struct sk_buff *skb,
- const struct tnl_ptk_info *tpi)
-{
- struct ovs_tunnel_info tun_info;
- struct ovs_net *ovs_net;
- struct vport *vport;
- __be64 key;
-
- ovs_net = net_generic(dev_net(skb->dev), ovs_net_id);
- vport = rcu_dereference(ovs_net->vport_net.gre_vport);
- if (unlikely(!vport))
- return PACKET_REJECT;
-
- key = key_to_tunnel_id(tpi->key, tpi->seq);
- ovs_flow_tun_info_init(&tun_info, ip_hdr(skb), 0, 0, key,
- filter_tnl_flags(tpi->flags), NULL, 0);
-
- ovs_vport_receive(vport, skb, &tun_info);
- return PACKET_RCVD;
-}
-
-/* Called with rcu_read_lock and BH disabled. */
-static int gre_err(struct sk_buff *skb, u32 info,
- const struct tnl_ptk_info *tpi)
-{
- struct ovs_net *ovs_net;
+ struct net *net = ovs_dp_get_net(parms->dp);
+ struct net_device *dev;
struct vport *vport;
- ovs_net = net_generic(dev_net(skb->dev), ovs_net_id);
- vport = rcu_dereference(ovs_net->vport_net.gre_vport);
-
- if (unlikely(!vport))
- return PACKET_REJECT;
- else
- return PACKET_RCVD;
-}
-
-static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
-{
- struct net *net = ovs_dp_get_net(vport->dp);
- const struct ovs_key_ipv4_tunnel *tun_key;
- struct flowi4 fl;
- struct rtable *rt;
- int min_headroom;
- int tunnel_hlen;
- __be16 df;
- int err;
-
- if (unlikely(!OVS_CB(skb)->egress_tun_info)) {
- err = -EINVAL;
- goto err_free_skb;
- }
-
- tun_key = &OVS_CB(skb)->egress_tun_info->tunnel;
- rt = ovs_tunnel_route_lookup(net, tun_key, skb->mark, &fl, IPPROTO_GRE);
- if (IS_ERR(rt)) {
- err = PTR_ERR(rt);
- goto err_free_skb;
- }
-
- tunnel_hlen = ip_gre_calc_hlen(tun_key->tun_flags);
-
- min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
- + tunnel_hlen + sizeof(struct iphdr)
- + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
- if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
- int head_delta = SKB_DATA_ALIGN(min_headroom -
- skb_headroom(skb) +
- 16);
- err = pskb_expand_head(skb, max_t(int, head_delta, 0),
- 0, GFP_ATOMIC);
- if (unlikely(err))
- goto err_free_rt;
- }
-
- skb = vlan_hwaccel_push_inside(skb);
- if (unlikely(!skb)) {
- err = -ENOMEM;
- goto err_free_rt;
- }
-
- /* Push Tunnel header. */
- skb = __build_header(skb, tunnel_hlen);
- if (IS_ERR(skb)) {
- err = PTR_ERR(skb);
- skb = NULL;
- goto err_free_rt;
+ vport = ovs_vport_alloc(0, &ovs_gre_vport_ops, parms);
+ if (IS_ERR(vport))
+ return vport;
+
+ rtnl_lock();
+ dev = gretap_fb_dev_create(net, parms->name, NET_NAME_USER);
+ if (IS_ERR(dev)) {
+ rtnl_unlock();
+ ovs_vport_free(vport);
+ return ERR_CAST(dev);
}
- df = tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ?
- htons(IP_DF) : 0;
-
- skb->ignore_df = 1;
-
- return iptunnel_xmit(skb->sk, rt, skb, fl.saddr,
- tun_key->ipv4_dst, IPPROTO_GRE,
- tun_key->ipv4_tos, tun_key->ipv4_ttl, df, false);
-err_free_rt:
- ip_rt_put(rt);
-err_free_skb:
- kfree_skb(skb);
- return err;
-}
-
-static struct gre_cisco_protocol gre_protocol = {
- .handler = gre_rcv,
- .err_handler = gre_err,
- .priority = 1,
-};
-
-static int gre_ports;
-static int gre_init(void)
-{
- int err;
-
- gre_ports++;
- if (gre_ports > 1)
- return 0;
-
- err = gre_cisco_register(&gre_protocol);
- if (err)
- pr_warn("cannot register gre protocol handler\n");
-
- return err;
-}
-
-static void gre_exit(void)
-{
- gre_ports--;
- if (gre_ports > 0)
- return;
-
- gre_cisco_unregister(&gre_protocol);
-}
+ dev_change_flags(dev, dev->flags | IFF_UP);
+ rtnl_unlock();
-static const char *gre_get_name(const struct vport *vport)
-{
- return vport_priv(vport);
+ return vport;
}
static struct vport *gre_create(const struct vport_parms *parms)
{
- struct net *net = ovs_dp_get_net(parms->dp);
- struct ovs_net *ovs_net;
struct vport *vport;
- int err;
-
- err = gre_init();
- if (err)
- return ERR_PTR(err);
-
- ovs_net = net_generic(net, ovs_net_id);
- if (ovsl_dereference(ovs_net->vport_net.gre_vport)) {
- vport = ERR_PTR(-EEXIST);
- goto error;
- }
- vport = ovs_vport_alloc(IFNAMSIZ, &ovs_gre_vport_ops, parms);
+ vport = gre_tnl_create(parms);
if (IS_ERR(vport))
- goto error;
-
- strncpy(vport_priv(vport), parms->name, IFNAMSIZ);
- rcu_assign_pointer(ovs_net->vport_net.gre_vport, vport);
- return vport;
-
-error:
- gre_exit();
- return vport;
-}
-
-static void gre_tnl_destroy(struct vport *vport)
-{
- struct net *net = ovs_dp_get_net(vport->dp);
- struct ovs_net *ovs_net;
-
- ovs_net = net_generic(net, ovs_net_id);
+ return vport;
- RCU_INIT_POINTER(ovs_net->vport_net.gre_vport, NULL);
- ovs_vport_deferred_free(vport);
- gre_exit();
+ return ovs_netdev_link(vport, parms->name);
}
static int gre_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
- struct ovs_tunnel_info *egress_tun_info)
+ struct ip_tunnel_info *egress_tun_info)
{
return ovs_tunnel_get_egress_info(egress_tun_info,
ovs_dp_get_net(vport->dp),
@@ -288,10 +96,9 @@ static int gre_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
static struct vport_ops ovs_gre_vport_ops = {
.type = OVS_VPORT_TYPE_GRE,
.create = gre_create,
- .destroy = gre_tnl_destroy,
- .get_name = gre_get_name,
- .send = gre_tnl_send,
+ .send = ovs_netdev_send,
.get_egress_tun_info = gre_get_egress_tun_info,
+ .destroy = ovs_netdev_tunnel_destroy,
.owner = THIS_MODULE,
};
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index 6a55f7105505..c058bbf876c3 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -156,49 +156,44 @@ static void do_setup(struct net_device *netdev)
static struct vport *internal_dev_create(const struct vport_parms *parms)
{
struct vport *vport;
- struct netdev_vport *netdev_vport;
struct internal_dev *internal_dev;
int err;
- vport = ovs_vport_alloc(sizeof(struct netdev_vport),
- &ovs_internal_vport_ops, parms);
+ vport = ovs_vport_alloc(0, &ovs_internal_vport_ops, parms);
if (IS_ERR(vport)) {
err = PTR_ERR(vport);
goto error;
}
- netdev_vport = netdev_vport_priv(vport);
-
- netdev_vport->dev = alloc_netdev(sizeof(struct internal_dev),
- parms->name, NET_NAME_UNKNOWN,
- do_setup);
- if (!netdev_vport->dev) {
+ vport->dev = alloc_netdev(sizeof(struct internal_dev),
+ parms->name, NET_NAME_UNKNOWN, do_setup);
+ if (!vport->dev) {
err = -ENOMEM;
goto error_free_vport;
}
- dev_net_set(netdev_vport->dev, ovs_dp_get_net(vport->dp));
- internal_dev = internal_dev_priv(netdev_vport->dev);
+ dev_net_set(vport->dev, ovs_dp_get_net(vport->dp));
+ internal_dev = internal_dev_priv(vport->dev);
internal_dev->vport = vport;
/* Restrict bridge port to current netns. */
if (vport->port_no == OVSP_LOCAL)
- netdev_vport->dev->features |= NETIF_F_NETNS_LOCAL;
+ vport->dev->features |= NETIF_F_NETNS_LOCAL;
rtnl_lock();
- err = register_netdevice(netdev_vport->dev);
+ err = register_netdevice(vport->dev);
if (err)
goto error_free_netdev;
- dev_set_promiscuity(netdev_vport->dev, 1);
+ dev_set_promiscuity(vport->dev, 1);
rtnl_unlock();
- netif_start_queue(netdev_vport->dev);
+ netif_start_queue(vport->dev);
return vport;
error_free_netdev:
rtnl_unlock();
- free_netdev(netdev_vport->dev);
+ free_netdev(vport->dev);
error_free_vport:
ovs_vport_free(vport);
error:
@@ -207,21 +202,19 @@ error:
static void internal_dev_destroy(struct vport *vport)
{
- struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
-
- netif_stop_queue(netdev_vport->dev);
+ netif_stop_queue(vport->dev);
rtnl_lock();
- dev_set_promiscuity(netdev_vport->dev, -1);
+ dev_set_promiscuity(vport->dev, -1);
/* unregister_netdevice() waits for an RCU grace period. */
- unregister_netdevice(netdev_vport->dev);
+ unregister_netdevice(vport->dev);
rtnl_unlock();
}
static int internal_dev_recv(struct vport *vport, struct sk_buff *skb)
{
- struct net_device *netdev = netdev_vport_priv(vport)->dev;
+ struct net_device *netdev = vport->dev;
int len;
if (unlikely(!(netdev->flags & IFF_UP))) {
@@ -249,7 +242,6 @@ static struct vport_ops ovs_internal_vport_ops = {
.type = OVS_VPORT_TYPE_INTERNAL,
.create = internal_dev_create,
.destroy = internal_dev_destroy,
- .get_name = ovs_netdev_get_name,
.send = internal_dev_recv,
};
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 33e6d6e2908f..a75011505039 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -26,10 +26,13 @@
#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
#include <linux/openvswitch.h>
+#include <linux/export.h>
-#include <net/llc.h>
+#include <net/ip_tunnels.h>
+#include <net/rtnetlink.h>
#include "datapath.h"
+#include "vport.h"
#include "vport-internal_dev.h"
#include "vport-netdev.h"
@@ -54,7 +57,7 @@ static void netdev_port_receive(struct vport *vport, struct sk_buff *skb)
skb_push(skb, ETH_HLEN);
ovs_skb_postpush_rcsum(skb, skb->data, ETH_HLEN);
- ovs_vport_receive(vport, skb, NULL);
+ ovs_vport_receive(vport, skb, skb_tunnel_info(skb));
return;
error:
@@ -83,105 +86,112 @@ static struct net_device *get_dpdev(const struct datapath *dp)
local = ovs_vport_ovsl(dp, OVSP_LOCAL);
BUG_ON(!local);
- return netdev_vport_priv(local)->dev;
+ return local->dev;
}
-static struct vport *netdev_create(const struct vport_parms *parms)
+struct vport *ovs_netdev_link(struct vport *vport, const char *name)
{
- struct vport *vport;
- struct netdev_vport *netdev_vport;
int err;
- vport = ovs_vport_alloc(sizeof(struct netdev_vport),
- &ovs_netdev_vport_ops, parms);
- if (IS_ERR(vport)) {
- err = PTR_ERR(vport);
- goto error;
- }
-
- netdev_vport = netdev_vport_priv(vport);
-
- netdev_vport->dev = dev_get_by_name(ovs_dp_get_net(vport->dp), parms->name);
- if (!netdev_vport->dev) {
+ vport->dev = dev_get_by_name(ovs_dp_get_net(vport->dp), name);
+ if (!vport->dev) {
err = -ENODEV;
goto error_free_vport;
}
- if (netdev_vport->dev->flags & IFF_LOOPBACK ||
- netdev_vport->dev->type != ARPHRD_ETHER ||
- ovs_is_internal_dev(netdev_vport->dev)) {
+ if (vport->dev->flags & IFF_LOOPBACK ||
+ vport->dev->type != ARPHRD_ETHER ||
+ ovs_is_internal_dev(vport->dev)) {
err = -EINVAL;
goto error_put;
}
rtnl_lock();
- err = netdev_master_upper_dev_link(netdev_vport->dev,
+ err = netdev_master_upper_dev_link(vport->dev,
get_dpdev(vport->dp));
if (err)
goto error_unlock;
- err = netdev_rx_handler_register(netdev_vport->dev, netdev_frame_hook,
+ err = netdev_rx_handler_register(vport->dev, netdev_frame_hook,
vport);
if (err)
goto error_master_upper_dev_unlink;
- dev_disable_lro(netdev_vport->dev);
- dev_set_promiscuity(netdev_vport->dev, 1);
- netdev_vport->dev->priv_flags |= IFF_OVS_DATAPATH;
+ dev_disable_lro(vport->dev);
+ dev_set_promiscuity(vport->dev, 1);
+ vport->dev->priv_flags |= IFF_OVS_DATAPATH;
rtnl_unlock();
return vport;
error_master_upper_dev_unlink:
- netdev_upper_dev_unlink(netdev_vport->dev, get_dpdev(vport->dp));
+ netdev_upper_dev_unlink(vport->dev, get_dpdev(vport->dp));
error_unlock:
rtnl_unlock();
error_put:
- dev_put(netdev_vport->dev);
+ dev_put(vport->dev);
error_free_vport:
ovs_vport_free(vport);
-error:
return ERR_PTR(err);
}
+EXPORT_SYMBOL_GPL(ovs_netdev_link);
-static void free_port_rcu(struct rcu_head *rcu)
+static struct vport *netdev_create(const struct vport_parms *parms)
{
- struct netdev_vport *netdev_vport = container_of(rcu,
- struct netdev_vport, rcu);
+ struct vport *vport;
- dev_put(netdev_vport->dev);
- ovs_vport_free(vport_from_priv(netdev_vport));
+ vport = ovs_vport_alloc(0, &ovs_netdev_vport_ops, parms);
+ if (IS_ERR(vport))
+ return vport;
+
+ return ovs_netdev_link(vport, parms->name);
}
-void ovs_netdev_detach_dev(struct vport *vport)
+static void vport_netdev_free(struct rcu_head *rcu)
{
- struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
+ struct vport *vport = container_of(rcu, struct vport, rcu);
+ if (vport->dev)
+ dev_put(vport->dev);
+ ovs_vport_free(vport);
+}
+
+void ovs_netdev_detach_dev(struct vport *vport)
+{
ASSERT_RTNL();
- netdev_vport->dev->priv_flags &= ~IFF_OVS_DATAPATH;
- netdev_rx_handler_unregister(netdev_vport->dev);
- netdev_upper_dev_unlink(netdev_vport->dev,
- netdev_master_upper_dev_get(netdev_vport->dev));
- dev_set_promiscuity(netdev_vport->dev, -1);
+ vport->dev->priv_flags &= ~IFF_OVS_DATAPATH;
+ netdev_rx_handler_unregister(vport->dev);
+ netdev_upper_dev_unlink(vport->dev,
+ netdev_master_upper_dev_get(vport->dev));
+ dev_set_promiscuity(vport->dev, -1);
}
+EXPORT_SYMBOL_GPL(ovs_netdev_detach_dev);
static void netdev_destroy(struct vport *vport)
{
- struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
-
rtnl_lock();
- if (netdev_vport->dev->priv_flags & IFF_OVS_DATAPATH)
+ if (vport->dev->priv_flags & IFF_OVS_DATAPATH)
ovs_netdev_detach_dev(vport);
rtnl_unlock();
- call_rcu(&netdev_vport->rcu, free_port_rcu);
+ call_rcu(&vport->rcu, vport_netdev_free);
}
-const char *ovs_netdev_get_name(const struct vport *vport)
+void ovs_netdev_tunnel_destroy(struct vport *vport)
{
- const struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
- return netdev_vport->dev->name;
+ rtnl_lock();
+ if (vport->dev->priv_flags & IFF_OVS_DATAPATH)
+ ovs_netdev_detach_dev(vport);
+
+ /* Early release so we can unregister the device */
+ dev_put(vport->dev);
+ rtnl_delete_link(vport->dev);
+ vport->dev = NULL;
+ rtnl_unlock();
+
+ call_rcu(&vport->rcu, vport_netdev_free);
}
+EXPORT_SYMBOL_GPL(ovs_netdev_tunnel_destroy);
static unsigned int packet_length(const struct sk_buff *skb)
{
@@ -193,20 +203,19 @@ static unsigned int packet_length(const struct sk_buff *skb)
return length;
}
-static int netdev_send(struct vport *vport, struct sk_buff *skb)
+int ovs_netdev_send(struct vport *vport, struct sk_buff *skb)
{
- struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
- int mtu = netdev_vport->dev->mtu;
+ int mtu = vport->dev->mtu;
int len;
if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) {
net_warn_ratelimited("%s: dropped over-mtu packet: %d > %d\n",
- netdev_vport->dev->name,
+ vport->dev->name,
packet_length(skb), mtu);
goto drop;
}
- skb->dev = netdev_vport->dev;
+ skb->dev = vport->dev;
len = skb->len;
dev_queue_xmit(skb);
@@ -216,6 +225,7 @@ drop:
kfree_skb(skb);
return 0;
}
+EXPORT_SYMBOL_GPL(ovs_netdev_send);
/* Returns null if this device is not attached to a datapath. */
struct vport *ovs_netdev_get_vport(struct net_device *dev)
@@ -231,8 +241,7 @@ static struct vport_ops ovs_netdev_vport_ops = {
.type = OVS_VPORT_TYPE_NETDEV,
.create = netdev_create,
.destroy = netdev_destroy,
- .get_name = ovs_netdev_get_name,
- .send = netdev_send,
+ .send = ovs_netdev_send,
};
int __init ovs_netdev_init(void)
diff --git a/net/openvswitch/vport-netdev.h b/net/openvswitch/vport-netdev.h
index 6f7038e79c52..497cc81f1aca 100644
--- a/net/openvswitch/vport-netdev.h
+++ b/net/openvswitch/vport-netdev.h
@@ -26,22 +26,12 @@
struct vport *ovs_netdev_get_vport(struct net_device *dev);
-struct netdev_vport {
- struct rcu_head rcu;
-
- struct net_device *dev;
-};
-
-static inline struct netdev_vport *
-netdev_vport_priv(const struct vport *vport)
-{
- return vport_priv(vport);
-}
-
-const char *ovs_netdev_get_name(const struct vport *);
+struct vport *ovs_netdev_link(struct vport *vport, const char *name);
+int ovs_netdev_send(struct vport *vport, struct sk_buff *skb);
void ovs_netdev_detach_dev(struct vport *);
int __init ovs_netdev_init(void);
void ovs_netdev_exit(void);
+void ovs_netdev_tunnel_destroy(struct vport *vport);
#endif /* vport_netdev.h */
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index 6d39766e7828..1e8b00a23a23 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -17,94 +17,37 @@
* 02110-1301, USA
*/
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/net.h>
-#include <linux/rculist.h>
-#include <linux/udp.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/openvswitch.h>
#include <linux/module.h>
-
-#include <net/icmp.h>
-#include <net/ip.h>
#include <net/udp.h>
#include <net/ip_tunnels.h>
#include <net/rtnetlink.h>
-#include <net/route.h>
-#include <net/dsfield.h>
-#include <net/inet_ecn.h>
-#include <net/net_namespace.h>
-#include <net/netns/generic.h>
#include <net/vxlan.h>
#include "datapath.h"
#include "vport.h"
-#include "vport-vxlan.h"
-
-/**
- * struct vxlan_port - Keeps track of open UDP ports
- * @vs: vxlan_sock created for the port.
- * @name: vport name.
- */
-struct vxlan_port {
- struct vxlan_sock *vs;
- char name[IFNAMSIZ];
- u32 exts; /* VXLAN_F_* in <net/vxlan.h> */
-};
-
-static struct vport_ops ovs_vxlan_vport_ops;
+#include "vport-netdev.h"
-static inline struct vxlan_port *vxlan_vport(const struct vport *vport)
-{
- return vport_priv(vport);
-}
-
-/* Called with rcu_read_lock and BH disabled. */
-static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
- struct vxlan_metadata *md)
-{
- struct ovs_tunnel_info tun_info;
- struct vxlan_port *vxlan_port;
- struct vport *vport = vs->data;
- struct iphdr *iph;
- struct ovs_vxlan_opts opts = {
- .gbp = md->gbp,
- };
- __be64 key;
- __be16 flags;
-
- flags = TUNNEL_KEY | (udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0);
- vxlan_port = vxlan_vport(vport);
- if (vxlan_port->exts & VXLAN_F_GBP && md->gbp)
- flags |= TUNNEL_VXLAN_OPT;
-
- /* Save outer tunnel values */
- iph = ip_hdr(skb);
- key = cpu_to_be64(ntohl(md->vni) >> 8);
- ovs_flow_tun_info_init(&tun_info, iph,
- udp_hdr(skb)->source, udp_hdr(skb)->dest,
- key, flags, &opts, sizeof(opts));
-
- ovs_vport_receive(vport, skb, &tun_info);
-}
+static struct vport_ops ovs_vxlan_netdev_vport_ops;
static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb)
{
- struct vxlan_port *vxlan_port = vxlan_vport(vport);
- __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
+ struct vxlan_dev *vxlan = netdev_priv(vport->dev);
+ __be16 dst_port = vxlan->cfg.dst_port;
if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port)))
return -EMSGSIZE;
- if (vxlan_port->exts) {
+ if (vxlan->flags & VXLAN_F_GBP) {
struct nlattr *exts;
exts = nla_nest_start(skb, OVS_TUNNEL_ATTR_EXTENSION);
if (!exts)
return -EMSGSIZE;
- if (vxlan_port->exts & VXLAN_F_GBP &&
+ if (vxlan->flags & VXLAN_F_GBP &&
nla_put_flag(skb, OVS_VXLAN_EXT_GBP))
return -EMSGSIZE;
@@ -114,23 +57,14 @@ static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb)
return 0;
}
-static void vxlan_tnl_destroy(struct vport *vport)
-{
- struct vxlan_port *vxlan_port = vxlan_vport(vport);
-
- vxlan_sock_release(vxlan_port->vs);
-
- ovs_vport_deferred_free(vport);
-}
-
-static const struct nla_policy exts_policy[OVS_VXLAN_EXT_MAX+1] = {
+static const struct nla_policy exts_policy[OVS_VXLAN_EXT_MAX + 1] = {
[OVS_VXLAN_EXT_GBP] = { .type = NLA_FLAG, },
};
-static int vxlan_configure_exts(struct vport *vport, struct nlattr *attr)
+static int vxlan_configure_exts(struct vport *vport, struct nlattr *attr,
+ struct vxlan_config *conf)
{
- struct nlattr *exts[OVS_VXLAN_EXT_MAX+1];
- struct vxlan_port *vxlan_port;
+ struct nlattr *exts[OVS_VXLAN_EXT_MAX + 1];
int err;
if (nla_len(attr) < sizeof(struct nlattr))
@@ -140,10 +74,8 @@ static int vxlan_configure_exts(struct vport *vport, struct nlattr *attr)
if (err < 0)
return err;
- vxlan_port = vxlan_vport(vport);
-
if (exts[OVS_VXLAN_EXT_GBP])
- vxlan_port->exts |= VXLAN_F_GBP;
+ conf->flags |= VXLAN_F_GBP;
return 0;
}
@@ -152,128 +84,74 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
{
struct net *net = ovs_dp_get_net(parms->dp);
struct nlattr *options = parms->options;
- struct vxlan_port *vxlan_port;
- struct vxlan_sock *vs;
+ struct net_device *dev;
struct vport *vport;
struct nlattr *a;
- u16 dst_port;
int err;
+ struct vxlan_config conf = {
+ .no_share = true,
+ .flags = VXLAN_F_COLLECT_METADATA,
+ };
if (!options) {
err = -EINVAL;
goto error;
}
+
a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT);
if (a && nla_len(a) == sizeof(u16)) {
- dst_port = nla_get_u16(a);
+ conf.dst_port = htons(nla_get_u16(a));
} else {
/* Require destination port from userspace. */
err = -EINVAL;
goto error;
}
- vport = ovs_vport_alloc(sizeof(struct vxlan_port),
- &ovs_vxlan_vport_ops, parms);
+ vport = ovs_vport_alloc(0, &ovs_vxlan_netdev_vport_ops, parms);
if (IS_ERR(vport))
return vport;
- vxlan_port = vxlan_vport(vport);
- strncpy(vxlan_port->name, parms->name, IFNAMSIZ);
-
a = nla_find_nested(options, OVS_TUNNEL_ATTR_EXTENSION);
if (a) {
- err = vxlan_configure_exts(vport, a);
+ err = vxlan_configure_exts(vport, a, &conf);
if (err) {
ovs_vport_free(vport);
goto error;
}
}
- vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true,
- vxlan_port->exts);
- if (IS_ERR(vs)) {
+ rtnl_lock();
+ dev = vxlan_dev_create(net, parms->name, NET_NAME_USER, &conf);
+ if (IS_ERR(dev)) {
+ rtnl_unlock();
ovs_vport_free(vport);
- return (void *)vs;
+ return ERR_CAST(dev);
}
- vxlan_port->vs = vs;
+ dev_change_flags(dev, dev->flags | IFF_UP);
+ rtnl_unlock();
return vport;
-
error:
return ERR_PTR(err);
}
-static int vxlan_ext_gbp(struct sk_buff *skb)
+static struct vport *vxlan_create(const struct vport_parms *parms)
{
- const struct ovs_tunnel_info *tun_info;
- const struct ovs_vxlan_opts *opts;
-
- tun_info = OVS_CB(skb)->egress_tun_info;
- opts = tun_info->options;
-
- if (tun_info->tunnel.tun_flags & TUNNEL_VXLAN_OPT &&
- tun_info->options_len >= sizeof(*opts))
- return opts->gbp;
- else
- return 0;
-}
-
-static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
-{
- struct net *net = ovs_dp_get_net(vport->dp);
- struct vxlan_port *vxlan_port = vxlan_vport(vport);
- struct sock *sk = vxlan_port->vs->sock->sk;
- __be16 dst_port = inet_sk(sk)->inet_sport;
- const struct ovs_key_ipv4_tunnel *tun_key;
- struct vxlan_metadata md = {0};
- struct rtable *rt;
- struct flowi4 fl;
- __be16 src_port;
- __be16 df;
- int err;
- u32 vxflags;
-
- if (unlikely(!OVS_CB(skb)->egress_tun_info)) {
- err = -EINVAL;
- goto error;
- }
-
- tun_key = &OVS_CB(skb)->egress_tun_info->tunnel;
- rt = ovs_tunnel_route_lookup(net, tun_key, skb->mark, &fl, IPPROTO_UDP);
- if (IS_ERR(rt)) {
- err = PTR_ERR(rt);
- goto error;
- }
-
- df = tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ?
- htons(IP_DF) : 0;
+ struct vport *vport;
- skb->ignore_df = 1;
+ vport = vxlan_tnl_create(parms);
+ if (IS_ERR(vport))
+ return vport;
- src_port = udp_flow_src_port(net, skb, 0, 0, true);
- md.vni = htonl(be64_to_cpu(tun_key->tun_id) << 8);
- md.gbp = vxlan_ext_gbp(skb);
- vxflags = vxlan_port->exts |
- (tun_key->tun_flags & TUNNEL_CSUM ? VXLAN_F_UDP_CSUM : 0);
-
- err = vxlan_xmit_skb(rt, sk, skb, fl.saddr, tun_key->ipv4_dst,
- tun_key->ipv4_tos, tun_key->ipv4_ttl, df,
- src_port, dst_port,
- &md, false, vxflags);
- if (err < 0)
- ip_rt_put(rt);
- return err;
-error:
- kfree_skb(skb);
- return err;
+ return ovs_netdev_link(vport, parms->name);
}
static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
- struct ovs_tunnel_info *egress_tun_info)
+ struct ip_tunnel_info *egress_tun_info)
{
+ struct vxlan_dev *vxlan = netdev_priv(vport->dev);
struct net *net = ovs_dp_get_net(vport->dp);
- struct vxlan_port *vxlan_port = vxlan_vport(vport);
- __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
+ __be16 dst_port = vxlan_dev_dst_port(vxlan);
__be16 src_port;
int port_min;
int port_max;
@@ -287,31 +165,23 @@ static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
src_port, dst_port);
}
-static const char *vxlan_get_name(const struct vport *vport)
-{
- struct vxlan_port *vxlan_port = vxlan_vport(vport);
- return vxlan_port->name;
-}
-
-static struct vport_ops ovs_vxlan_vport_ops = {
- .type = OVS_VPORT_TYPE_VXLAN,
- .create = vxlan_tnl_create,
- .destroy = vxlan_tnl_destroy,
- .get_name = vxlan_get_name,
- .get_options = vxlan_get_options,
- .send = vxlan_tnl_send,
+static struct vport_ops ovs_vxlan_netdev_vport_ops = {
+ .type = OVS_VPORT_TYPE_VXLAN,
+ .create = vxlan_create,
+ .destroy = ovs_netdev_tunnel_destroy,
+ .get_options = vxlan_get_options,
+ .send = ovs_netdev_send,
.get_egress_tun_info = vxlan_get_egress_tun_info,
- .owner = THIS_MODULE,
};
static int __init ovs_vxlan_tnl_init(void)
{
- return ovs_vport_ops_register(&ovs_vxlan_vport_ops);
+ return ovs_vport_ops_register(&ovs_vxlan_netdev_vport_ops);
}
static void __exit ovs_vxlan_tnl_exit(void)
{
- ovs_vport_ops_unregister(&ovs_vxlan_vport_ops);
+ ovs_vport_ops_unregister(&ovs_vxlan_netdev_vport_ops);
}
module_init(ovs_vxlan_tnl_init);
diff --git a/net/openvswitch/vport-vxlan.h b/net/openvswitch/vport-vxlan.h
deleted file mode 100644
index 4b08233e73d5..000000000000
--- a/net/openvswitch/vport-vxlan.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef VPORT_VXLAN_H
-#define VPORT_VXLAN_H 1
-
-#include <linux/kernel.h>
-#include <linux/types.h>
-
-struct ovs_vxlan_opts {
- __u32 gbp;
-};
-
-#endif
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 067a3fff1d2c..d73e5a16e7ca 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -113,7 +113,7 @@ struct vport *ovs_vport_locate(const struct net *net, const char *name)
struct vport *vport;
hlist_for_each_entry_rcu(vport, bucket, hash_node)
- if (!strcmp(name, vport->ops->get_name(vport)) &&
+ if (!strcmp(name, ovs_vport_name(vport)) &&
net_eq(ovs_dp_get_net(vport->dp), net))
return vport;
@@ -226,7 +226,7 @@ struct vport *ovs_vport_add(const struct vport_parms *parms)
}
bucket = hash_bucket(ovs_dp_get_net(vport->dp),
- vport->ops->get_name(vport));
+ ovs_vport_name(vport));
hlist_add_head_rcu(&vport->hash_node, bucket);
return vport;
}
@@ -469,7 +469,7 @@ u32 ovs_vport_find_upcall_portid(const struct vport *vport, struct sk_buff *skb)
* skb->data should point to the Ethernet header.
*/
void ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
- const struct ovs_tunnel_info *tun_info)
+ const struct ip_tunnel_info *tun_info)
{
struct pcpu_sw_netstats *stats;
struct sw_flow_key key;
@@ -572,22 +572,22 @@ void ovs_vport_deferred_free(struct vport *vport)
}
EXPORT_SYMBOL_GPL(ovs_vport_deferred_free);
-int ovs_tunnel_get_egress_info(struct ovs_tunnel_info *egress_tun_info,
+int ovs_tunnel_get_egress_info(struct ip_tunnel_info *egress_tun_info,
struct net *net,
- const struct ovs_tunnel_info *tun_info,
+ const struct ip_tunnel_info *tun_info,
u8 ipproto,
u32 skb_mark,
__be16 tp_src,
__be16 tp_dst)
{
- const struct ovs_key_ipv4_tunnel *tun_key;
+ const struct ip_tunnel_key *tun_key;
struct rtable *rt;
struct flowi4 fl;
if (unlikely(!tun_info))
return -EINVAL;
- tun_key = &tun_info->tunnel;
+ tun_key = &tun_info->key;
/* Route lookup to get srouce IP address.
* The process may need to be changed if the corresponding process
@@ -602,22 +602,22 @@ int ovs_tunnel_get_egress_info(struct ovs_tunnel_info *egress_tun_info,
/* Generate egress_tun_info based on tun_info,
* saddr, tp_src and tp_dst
*/
- __ovs_flow_tun_info_init(egress_tun_info,
- fl.saddr, tun_key->ipv4_dst,
- tun_key->ipv4_tos,
- tun_key->ipv4_ttl,
- tp_src, tp_dst,
- tun_key->tun_id,
- tun_key->tun_flags,
- tun_info->options,
- tun_info->options_len);
+ __ip_tunnel_info_init(egress_tun_info,
+ fl.saddr, tun_key->u.ipv4.dst,
+ tun_key->tos,
+ tun_key->ttl,
+ tp_src, tp_dst,
+ tun_key->tun_id,
+ tun_key->tun_flags,
+ tun_info->options,
+ tun_info->options_len);
return 0;
}
EXPORT_SYMBOL_GPL(ovs_tunnel_get_egress_info);
int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
- struct ovs_tunnel_info *info)
+ struct ip_tunnel_info *info)
{
/* get_egress_tun_info() is only implemented on tunnel ports. */
if (unlikely(!vport->ops->get_egress_tun_info))
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index bc85331a6c60..b88b3ee86f07 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -27,6 +27,7 @@
#include <linux/skbuff.h>
#include <linux/spinlock.h>
#include <linux/u64_stats_sync.h>
+#include <net/route.h>
#include "datapath.h"
@@ -58,15 +59,15 @@ u32 ovs_vport_find_upcall_portid(const struct vport *, struct sk_buff *);
int ovs_vport_send(struct vport *, struct sk_buff *);
-int ovs_tunnel_get_egress_info(struct ovs_tunnel_info *egress_tun_info,
+int ovs_tunnel_get_egress_info(struct ip_tunnel_info *egress_tun_info,
struct net *net,
- const struct ovs_tunnel_info *tun_info,
+ const struct ip_tunnel_info *tun_info,
u8 ipproto,
u32 skb_mark,
__be16 tp_src,
__be16 tp_dst);
int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
- struct ovs_tunnel_info *info);
+ struct ip_tunnel_info *info);
/* The following definitions are for implementers of vport devices: */
@@ -106,7 +107,7 @@ struct vport_portids {
* @detach_list: list used for detaching vport in net-exit call.
*/
struct vport {
- struct rcu_head rcu;
+ struct net_device *dev;
struct datapath *dp;
struct vport_portids __rcu *upcall_portids;
u16 port_no;
@@ -119,6 +120,7 @@ struct vport {
struct vport_err_stats err_stats;
struct list_head detach_list;
+ struct rcu_head rcu;
};
/**
@@ -176,7 +178,7 @@ struct vport_ops {
int (*send)(struct vport *, struct sk_buff *);
int (*get_egress_tun_info)(struct vport *, struct sk_buff *,
- struct ovs_tunnel_info *);
+ struct ip_tunnel_info *);
struct module *owner;
struct list_head list;
@@ -226,7 +228,7 @@ static inline struct vport *vport_from_priv(void *priv)
}
void ovs_vport_receive(struct vport *, struct sk_buff *,
- const struct ovs_tunnel_info *);
+ const struct ip_tunnel_info *);
static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb,
const void *start, unsigned int len)
@@ -235,11 +237,16 @@ static inline void ovs_skb_postpush_rcsum(struct sk_buff *skb,
skb->csum = csum_add(skb->csum, csum_partial(start, len, 0));
}
+static inline const char *ovs_vport_name(struct vport *vport)
+{
+ return vport->dev ? vport->dev->name : vport->ops->get_name(vport);
+}
+
int ovs_vport_ops_register(struct vport_ops *ops);
void ovs_vport_ops_unregister(struct vport_ops *ops);
static inline struct rtable *ovs_tunnel_route_lookup(struct net *net,
- const struct ovs_key_ipv4_tunnel *key,
+ const struct ip_tunnel_key *key,
u32 mark,
struct flowi4 *fl,
u8 protocol)
@@ -247,9 +254,9 @@ static inline struct rtable *ovs_tunnel_route_lookup(struct net *net,
struct rtable *rt;
memset(fl, 0, sizeof(*fl));
- fl->daddr = key->ipv4_dst;
- fl->saddr = key->ipv4_src;
- fl->flowi4_tos = RT_TOS(key->ipv4_tos);
+ fl->daddr = key->u.ipv4.dst;
+ fl->saddr = key->u.ipv4.src;
+ fl->flowi4_tos = RT_TOS(key->tos);
fl->flowi4_mark = mark;
fl->flowi4_proto = protocol;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index ed458b315ef4..7b8e39a22387 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -92,6 +92,7 @@
#ifdef CONFIG_INET
#include <net/inet_common.h>
#endif
+#include <linux/bpf.h>
#include "internal.h"
@@ -518,13 +519,11 @@ static void prb_del_retire_blk_timer(struct tpacket_kbdq_core *pkc)
}
static void prb_shutdown_retire_blk_timer(struct packet_sock *po,
- int tx_ring,
struct sk_buff_head *rb_queue)
{
struct tpacket_kbdq_core *pkc;
- pkc = tx_ring ? GET_PBDQC_FROM_RB(&po->tx_ring) :
- GET_PBDQC_FROM_RB(&po->rx_ring);
+ pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
spin_lock_bh(&rb_queue->lock);
pkc->delete_blk_timer = 1;
@@ -1412,6 +1411,22 @@ static unsigned int fanout_demux_qm(struct packet_fanout *f,
return skb_get_queue_mapping(skb) % num;
}
+static unsigned int fanout_demux_bpf(struct packet_fanout *f,
+ struct sk_buff *skb,
+ unsigned int num)
+{
+ struct bpf_prog *prog;
+ unsigned int ret = 0;
+
+ rcu_read_lock();
+ prog = rcu_dereference(f->bpf_prog);
+ if (prog)
+ ret = BPF_PROG_RUN(prog, skb) % num;
+ rcu_read_unlock();
+
+ return ret;
+}
+
static bool fanout_has_flag(struct packet_fanout *f, u16 flag)
{
return f->flags & (flag >> 8);
@@ -1456,6 +1471,10 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
case PACKET_FANOUT_ROLLOVER:
idx = fanout_demux_rollover(f, skb, 0, false, num);
break;
+ case PACKET_FANOUT_CBPF:
+ case PACKET_FANOUT_EBPF:
+ idx = fanout_demux_bpf(f, skb, num);
+ break;
}
if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER))
@@ -1504,6 +1523,103 @@ static bool match_fanout_group(struct packet_type *ptype, struct sock *sk)
return false;
}
+static void fanout_init_data(struct packet_fanout *f)
+{
+ switch (f->type) {
+ case PACKET_FANOUT_LB:
+ atomic_set(&f->rr_cur, 0);
+ break;
+ case PACKET_FANOUT_CBPF:
+ case PACKET_FANOUT_EBPF:
+ RCU_INIT_POINTER(f->bpf_prog, NULL);
+ break;
+ }
+}
+
+static void __fanout_set_data_bpf(struct packet_fanout *f, struct bpf_prog *new)
+{
+ struct bpf_prog *old;
+
+ spin_lock(&f->lock);
+ old = rcu_dereference_protected(f->bpf_prog, lockdep_is_held(&f->lock));
+ rcu_assign_pointer(f->bpf_prog, new);
+ spin_unlock(&f->lock);
+
+ if (old) {
+ synchronize_net();
+ bpf_prog_destroy(old);
+ }
+}
+
+static int fanout_set_data_cbpf(struct packet_sock *po, char __user *data,
+ unsigned int len)
+{
+ struct bpf_prog *new;
+ struct sock_fprog fprog;
+ int ret;
+
+ if (sock_flag(&po->sk, SOCK_FILTER_LOCKED))
+ return -EPERM;
+ if (len != sizeof(fprog))
+ return -EINVAL;
+ if (copy_from_user(&fprog, data, len))
+ return -EFAULT;
+
+ ret = bpf_prog_create_from_user(&new, &fprog, NULL);
+ if (ret)
+ return ret;
+
+ __fanout_set_data_bpf(po->fanout, new);
+ return 0;
+}
+
+static int fanout_set_data_ebpf(struct packet_sock *po, char __user *data,
+ unsigned int len)
+{
+ struct bpf_prog *new;
+ u32 fd;
+
+ if (sock_flag(&po->sk, SOCK_FILTER_LOCKED))
+ return -EPERM;
+ if (len != sizeof(fd))
+ return -EINVAL;
+ if (copy_from_user(&fd, data, len))
+ return -EFAULT;
+
+ new = bpf_prog_get(fd);
+ if (IS_ERR(new))
+ return PTR_ERR(new);
+ if (new->type != BPF_PROG_TYPE_SOCKET_FILTER) {
+ bpf_prog_put(new);
+ return -EINVAL;
+ }
+
+ __fanout_set_data_bpf(po->fanout, new);
+ return 0;
+}
+
+static int fanout_set_data(struct packet_sock *po, char __user *data,
+ unsigned int len)
+{
+ switch (po->fanout->type) {
+ case PACKET_FANOUT_CBPF:
+ return fanout_set_data_cbpf(po, data, len);
+ case PACKET_FANOUT_EBPF:
+ return fanout_set_data_ebpf(po, data, len);
+ default:
+ return -EINVAL;
+ };
+}
+
+static void fanout_release_data(struct packet_fanout *f)
+{
+ switch (f->type) {
+ case PACKET_FANOUT_CBPF:
+ case PACKET_FANOUT_EBPF:
+ __fanout_set_data_bpf(f, NULL);
+ };
+}
+
static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
{
struct packet_sock *po = pkt_sk(sk);
@@ -1521,6 +1637,8 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
case PACKET_FANOUT_CPU:
case PACKET_FANOUT_RND:
case PACKET_FANOUT_QM:
+ case PACKET_FANOUT_CBPF:
+ case PACKET_FANOUT_EBPF:
break;
default:
return -EINVAL;
@@ -1563,10 +1681,10 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
match->id = id;
match->type = type;
match->flags = flags;
- atomic_set(&match->rr_cur, 0);
INIT_LIST_HEAD(&match->list);
spin_lock_init(&match->lock);
atomic_set(&match->sk_ref, 0);
+ fanout_init_data(match);
match->prot_hook.type = po->prot_hook.type;
match->prot_hook.dev = po->prot_hook.dev;
match->prot_hook.func = packet_rcv_fanout;
@@ -1612,6 +1730,7 @@ static void fanout_release(struct sock *sk)
if (atomic_dec_and_test(&f->sk_ref)) {
list_del(&f->list);
dev_remove_pack(&f->prot_hook);
+ fanout_release_data(f);
kfree(f);
}
mutex_unlock(&fanout_mutex);
@@ -3531,6 +3650,13 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
return fanout_add(sk, val & 0xffff, val >> 16);
}
+ case PACKET_FANOUT_DATA:
+ {
+ if (!po->fanout)
+ return -EINVAL;
+
+ return fanout_set_data(po, optval, optlen);
+ }
case PACKET_TX_HAS_OFF:
{
unsigned int val;
@@ -4043,7 +4169,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
if (closing && (po->tp_version > TPACKET_V2)) {
/* Because we don't support block-based V3 on tx-ring */
if (!tx_ring)
- prb_shutdown_retire_blk_timer(po, tx_ring, rb_queue);
+ prb_shutdown_retire_blk_timer(po, rb_queue);
}
release_sock(sk);
diff --git a/net/packet/internal.h b/net/packet/internal.h
index e20b3e8829b8..9ee46314b7d7 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -79,7 +79,10 @@ struct packet_fanout {
u16 id;
u8 type;
u8 flags;
- atomic_t rr_cur;
+ union {
+ atomic_t rr_cur;
+ struct bpf_prog __rcu *bpf_prog;
+ };
struct list_head list;
struct sock *arr[PACKET_FANOUT_MAX];
spinlock_t lock;
diff --git a/net/rds/bind.c b/net/rds/bind.c
index 4ebd29c128b6..dd666fb9b4e1 100644
--- a/net/rds/bind.c
+++ b/net/rds/bind.c
@@ -185,7 +185,8 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
ret = 0;
goto out;
}
- trans = rds_trans_get_preferred(sin->sin_addr.s_addr);
+ trans = rds_trans_get_preferred(sock_net(sock->sk),
+ sin->sin_addr.s_addr);
if (!trans) {
ret = -EADDRNOTAVAIL;
rds_remove_bound(rs);
diff --git a/net/rds/connection.c b/net/rds/connection.c
index da6da57e5f36..d4fecb21ca25 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -117,7 +117,8 @@ static void rds_conn_reset(struct rds_connection *conn)
* For now they are not garbage collected once they're created. They
* are torn down as the module is removed, if ever.
*/
-static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
+static struct rds_connection *__rds_conn_create(struct net *net,
+ __be32 laddr, __be32 faddr,
struct rds_transport *trans, gfp_t gfp,
int is_outgoing)
{
@@ -157,6 +158,7 @@ new_conn:
conn->c_faddr = faddr;
spin_lock_init(&conn->c_lock);
conn->c_next_tx_seq = 1;
+ rds_conn_net_set(conn, net);
init_waitqueue_head(&conn->c_waitq);
INIT_LIST_HEAD(&conn->c_send_queue);
@@ -174,7 +176,7 @@ new_conn:
* can bind to the destination address then we'd rather the messages
* flow through loopback rather than either transport.
*/
- loop_trans = rds_trans_get_preferred(faddr);
+ loop_trans = rds_trans_get_preferred(net, faddr);
if (loop_trans) {
rds_trans_put(loop_trans);
conn->c_loopback = 1;
@@ -260,17 +262,19 @@ out:
return conn;
}
-struct rds_connection *rds_conn_create(__be32 laddr, __be32 faddr,
+struct rds_connection *rds_conn_create(struct net *net,
+ __be32 laddr, __be32 faddr,
struct rds_transport *trans, gfp_t gfp)
{
- return __rds_conn_create(laddr, faddr, trans, gfp, 0);
+ return __rds_conn_create(net, laddr, faddr, trans, gfp, 0);
}
EXPORT_SYMBOL_GPL(rds_conn_create);
-struct rds_connection *rds_conn_create_outgoing(__be32 laddr, __be32 faddr,
+struct rds_connection *rds_conn_create_outgoing(struct net *net,
+ __be32 laddr, __be32 faddr,
struct rds_transport *trans, gfp_t gfp)
{
- return __rds_conn_create(laddr, faddr, trans, gfp, 1);
+ return __rds_conn_create(net, laddr, faddr, trans, gfp, 1);
}
EXPORT_SYMBOL_GPL(rds_conn_create_outgoing);
diff --git a/net/rds/ib.c b/net/rds/ib.c
index ba2dffeff608..13814227b3b2 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -317,7 +317,7 @@ static void rds_ib_ic_info(struct socket *sock, unsigned int len,
* allowed to influence which paths have priority. We could call userspace
* asserting this policy "routing".
*/
-static int rds_ib_laddr_check(__be32 addr)
+static int rds_ib_laddr_check(struct net *net, __be32 addr)
{
int ret;
struct rdma_cm_id *cm_id;
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 0da2a45b33bd..f40d8f52b753 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -448,8 +448,9 @@ int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id,
(unsigned long long)be64_to_cpu(lguid),
(unsigned long long)be64_to_cpu(fguid));
- conn = rds_conn_create(dp->dp_daddr, dp->dp_saddr, &rds_ib_transport,
- GFP_KERNEL);
+ /* RDS/IB is not currently netns aware, thus init_net */
+ conn = rds_conn_create(&init_net, dp->dp_daddr, dp->dp_saddr,
+ &rds_ib_transport, GFP_KERNEL);
if (IS_ERR(conn)) {
rdsdebug("rds_conn_create failed (%ld)\n", PTR_ERR(conn));
conn = NULL;
diff --git a/net/rds/iw.c b/net/rds/iw.c
index 589935661d66..5d5a9d258658 100644
--- a/net/rds/iw.c
+++ b/net/rds/iw.c
@@ -218,7 +218,7 @@ static void rds_iw_ic_info(struct socket *sock, unsigned int len,
* allowed to influence which paths have priority. We could call userspace
* asserting this policy "routing".
*/
-static int rds_iw_laddr_check(__be32 addr)
+static int rds_iw_laddr_check(struct net *net, __be32 addr)
{
int ret;
struct rdma_cm_id *cm_id;
diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c
index 8f486fa32079..a6553a6fb2bc 100644
--- a/net/rds/iw_cm.c
+++ b/net/rds/iw_cm.c
@@ -398,8 +398,9 @@ int rds_iw_cm_handle_connect(struct rdma_cm_id *cm_id,
&dp->dp_saddr, &dp->dp_daddr,
RDS_PROTOCOL_MAJOR(version), RDS_PROTOCOL_MINOR(version));
- conn = rds_conn_create(dp->dp_daddr, dp->dp_saddr, &rds_iw_transport,
- GFP_KERNEL);
+ /* RDS/IW is not currently netns aware, thus init_net */
+ conn = rds_conn_create(&init_net, dp->dp_daddr, dp->dp_saddr,
+ &rds_iw_transport, GFP_KERNEL);
if (IS_ERR(conn)) {
rdsdebug("rds_conn_create failed (%ld)\n", PTR_ERR(conn));
conn = NULL;
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 2260c1e434b1..9005fb0586f6 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -128,8 +128,21 @@ struct rds_connection {
/* Protocol version */
unsigned int c_version;
+ possible_net_t c_net;
};
+static inline
+struct net *rds_conn_net(struct rds_connection *conn)
+{
+ return read_pnet(&conn->c_net);
+}
+
+static inline
+void rds_conn_net_set(struct rds_connection *conn, struct net *net)
+{
+ write_pnet(&conn->c_net, net);
+}
+
#define RDS_FLAG_CONG_BITMAP 0x01
#define RDS_FLAG_ACK_REQUIRED 0x02
#define RDS_FLAG_RETRANSMITTED 0x04
@@ -417,7 +430,7 @@ struct rds_transport {
unsigned int t_prefer_loopback:1;
unsigned int t_type;
- int (*laddr_check)(__be32 addr);
+ int (*laddr_check)(struct net *net, __be32 addr);
int (*conn_alloc)(struct rds_connection *conn, gfp_t gfp);
void (*conn_free)(void *data);
int (*conn_connect)(struct rds_connection *conn);
@@ -608,9 +621,11 @@ struct rds_message *rds_cong_update_alloc(struct rds_connection *conn);
/* conn.c */
int rds_conn_init(void);
void rds_conn_exit(void);
-struct rds_connection *rds_conn_create(__be32 laddr, __be32 faddr,
+struct rds_connection *rds_conn_create(struct net *net,
+ __be32 laddr, __be32 faddr,
struct rds_transport *trans, gfp_t gfp);
-struct rds_connection *rds_conn_create_outgoing(__be32 laddr, __be32 faddr,
+struct rds_connection *rds_conn_create_outgoing(struct net *net,
+ __be32 laddr, __be32 faddr,
struct rds_transport *trans, gfp_t gfp);
void rds_conn_shutdown(struct rds_connection *conn);
void rds_conn_destroy(struct rds_connection *conn);
@@ -795,7 +810,7 @@ void rds_connect_complete(struct rds_connection *conn);
/* transport.c */
int rds_trans_register(struct rds_transport *trans);
void rds_trans_unregister(struct rds_transport *trans);
-struct rds_transport *rds_trans_get_preferred(__be32 addr);
+struct rds_transport *rds_trans_get_preferred(struct net *net, __be32 addr);
void rds_trans_put(struct rds_transport *trans);
unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter,
unsigned int avail);
diff --git a/net/rds/send.c b/net/rds/send.c
index e9430f537f9c..2581b8e3dbe7 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -1023,7 +1023,8 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
if (rs->rs_conn && rs->rs_conn->c_faddr == daddr)
conn = rs->rs_conn;
else {
- conn = rds_conn_create_outgoing(rs->rs_bound_addr, daddr,
+ conn = rds_conn_create_outgoing(sock_net(sock->sk),
+ rs->rs_bound_addr, daddr,
rs->rs_transport,
sock->sk->sk_allocation);
if (IS_ERR(conn)) {
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index edac9ef2bc8b..c42b60bf4c68 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -35,6 +35,9 @@
#include <linux/in.h>
#include <linux/module.h>
#include <net/tcp.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <net/tcp.h>
#include "rds.h"
#include "tcp.h"
@@ -189,9 +192,9 @@ out:
spin_unlock_irqrestore(&rds_tcp_tc_list_lock, flags);
}
-static int rds_tcp_laddr_check(__be32 addr)
+static int rds_tcp_laddr_check(struct net *net, __be32 addr)
{
- if (inet_addr_type(&init_net, addr) == RTN_LOCAL)
+ if (inet_addr_type(net, addr) == RTN_LOCAL)
return 0;
return -EADDRNOTAVAIL;
}
@@ -250,16 +253,7 @@ static void rds_tcp_destroy_conns(void)
}
}
-static void rds_tcp_exit(void)
-{
- rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
- rds_tcp_listen_stop();
- rds_tcp_destroy_conns();
- rds_trans_unregister(&rds_tcp_transport);
- rds_tcp_recv_exit();
- kmem_cache_destroy(rds_tcp_conn_slab);
-}
-module_exit(rds_tcp_exit);
+static void rds_tcp_exit(void);
struct rds_transport rds_tcp_transport = {
.laddr_check = rds_tcp_laddr_check,
@@ -281,6 +275,136 @@ struct rds_transport rds_tcp_transport = {
.t_prefer_loopback = 1,
};
+static int rds_tcp_netid;
+
+/* per-network namespace private data for this module */
+struct rds_tcp_net {
+ struct socket *rds_tcp_listen_sock;
+ struct work_struct rds_tcp_accept_w;
+};
+
+static void rds_tcp_accept_worker(struct work_struct *work)
+{
+ struct rds_tcp_net *rtn = container_of(work,
+ struct rds_tcp_net,
+ rds_tcp_accept_w);
+
+ while (rds_tcp_accept_one(rtn->rds_tcp_listen_sock) == 0)
+ cond_resched();
+}
+
+void rds_tcp_accept_work(struct sock *sk)
+{
+ struct net *net = sock_net(sk);
+ struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+
+ queue_work(rds_wq, &rtn->rds_tcp_accept_w);
+}
+
+static __net_init int rds_tcp_init_net(struct net *net)
+{
+ struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+
+ rtn->rds_tcp_listen_sock = rds_tcp_listen_init(net);
+ if (!rtn->rds_tcp_listen_sock) {
+ pr_warn("could not set up listen sock\n");
+ return -EAFNOSUPPORT;
+ }
+ INIT_WORK(&rtn->rds_tcp_accept_w, rds_tcp_accept_worker);
+ return 0;
+}
+
+static void __net_exit rds_tcp_exit_net(struct net *net)
+{
+ struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+
+ /* If rds_tcp_exit_net() is called as a result of netns deletion,
+ * the rds_tcp_kill_sock() device notifier would already have cleaned
+ * up the listen socket, thus there is no work to do in this function.
+ *
+ * If rds_tcp_exit_net() is called as a result of module unload,
+ * i.e., due to rds_tcp_exit() -> unregister_pernet_subsys(), then
+ * we do need to clean up the listen socket here.
+ */
+ if (rtn->rds_tcp_listen_sock) {
+ rds_tcp_listen_stop(rtn->rds_tcp_listen_sock);
+ rtn->rds_tcp_listen_sock = NULL;
+ flush_work(&rtn->rds_tcp_accept_w);
+ }
+}
+
+static struct pernet_operations rds_tcp_net_ops = {
+ .init = rds_tcp_init_net,
+ .exit = rds_tcp_exit_net,
+ .id = &rds_tcp_netid,
+ .size = sizeof(struct rds_tcp_net),
+};
+
+static void rds_tcp_kill_sock(struct net *net)
+{
+ struct rds_tcp_connection *tc, *_tc;
+ struct sock *sk;
+ LIST_HEAD(tmp_list);
+ struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+
+ rds_tcp_listen_stop(rtn->rds_tcp_listen_sock);
+ rtn->rds_tcp_listen_sock = NULL;
+ flush_work(&rtn->rds_tcp_accept_w);
+ spin_lock_irq(&rds_tcp_conn_lock);
+ list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) {
+ struct net *c_net = read_pnet(&tc->conn->c_net);
+
+ if (net != c_net || !tc->t_sock)
+ continue;
+ list_move_tail(&tc->t_tcp_node, &tmp_list);
+ }
+ spin_unlock_irq(&rds_tcp_conn_lock);
+ list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) {
+ sk = tc->t_sock->sk;
+ sk->sk_prot->disconnect(sk, 0);
+ tcp_done(sk);
+ if (tc->conn->c_passive)
+ rds_conn_destroy(tc->conn->c_passive);
+ rds_conn_destroy(tc->conn);
+ }
+}
+
+static int rds_tcp_dev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+
+ /* rds-tcp registers as a pernet subys, so the ->exit will only
+ * get invoked after network acitivity has quiesced. We need to
+ * clean up all sockets to quiesce network activity, and use
+ * the unregistration of the per-net loopback device as a trigger
+ * to start that cleanup.
+ */
+ if (event == NETDEV_UNREGISTER_FINAL &&
+ dev->ifindex == LOOPBACK_IFINDEX)
+ rds_tcp_kill_sock(dev_net(dev));
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block rds_tcp_dev_notifier = {
+ .notifier_call = rds_tcp_dev_event,
+ .priority = -10, /* must be called after other network notifiers */
+};
+
+static void rds_tcp_exit(void)
+{
+ rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
+ unregister_pernet_subsys(&rds_tcp_net_ops);
+ if (unregister_netdevice_notifier(&rds_tcp_dev_notifier))
+ pr_warn("could not unregister rds_tcp_dev_notifier\n");
+ rds_tcp_destroy_conns();
+ rds_trans_unregister(&rds_tcp_transport);
+ rds_tcp_recv_exit();
+ kmem_cache_destroy(rds_tcp_conn_slab);
+}
+module_exit(rds_tcp_exit);
+
static int rds_tcp_init(void)
{
int ret;
@@ -293,6 +417,16 @@ static int rds_tcp_init(void)
goto out;
}
+ ret = register_netdevice_notifier(&rds_tcp_dev_notifier);
+ if (ret) {
+ pr_warn("could not register rds_tcp_dev_notifier\n");
+ goto out;
+ }
+
+ ret = register_pernet_subsys(&rds_tcp_net_ops);
+ if (ret)
+ goto out_slab;
+
ret = rds_tcp_recv_init();
if (ret)
goto out_slab;
@@ -301,19 +435,14 @@ static int rds_tcp_init(void)
if (ret)
goto out_recv;
- ret = rds_tcp_listen_init();
- if (ret)
- goto out_register;
-
rds_info_register_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
goto out;
-out_register:
- rds_trans_unregister(&rds_tcp_transport);
out_recv:
rds_tcp_recv_exit();
out_slab:
+ unregister_pernet_subsys(&rds_tcp_net_ops);
kmem_cache_destroy(rds_tcp_conn_slab);
out:
return ret;
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index 0dbdd37162da..64f873c0c6b6 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -52,6 +52,7 @@ u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc);
u32 rds_tcp_snd_una(struct rds_tcp_connection *tc);
u64 rds_tcp_map_seq(struct rds_tcp_connection *tc, u32 seq);
extern struct rds_transport rds_tcp_transport;
+void rds_tcp_accept_work(struct sock *sk);
/* tcp_connect.c */
int rds_tcp_conn_connect(struct rds_connection *conn);
@@ -59,9 +60,11 @@ void rds_tcp_conn_shutdown(struct rds_connection *conn);
void rds_tcp_state_change(struct sock *sk);
/* tcp_listen.c */
-int rds_tcp_listen_init(void);
-void rds_tcp_listen_stop(void);
+struct socket *rds_tcp_listen_init(struct net *);
+void rds_tcp_listen_stop(struct socket *);
void rds_tcp_listen_data_ready(struct sock *sk);
+int rds_tcp_accept_one(struct socket *sock);
+int rds_tcp_keepalive(struct socket *sock);
/* tcp_recv.c */
int rds_tcp_recv_init(void);
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index 973109c7b8e8..5cb16875c460 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -79,7 +79,8 @@ int rds_tcp_conn_connect(struct rds_connection *conn)
struct sockaddr_in src, dest;
int ret;
- ret = sock_create(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
+ ret = sock_create_kern(rds_conn_net(conn), PF_INET,
+ SOCK_STREAM, IPPROTO_TCP, &sock);
if (ret < 0)
goto out;
@@ -111,10 +112,12 @@ int rds_tcp_conn_connect(struct rds_connection *conn)
rdsdebug("connect to address %pI4 returned %d\n", &conn->c_faddr, ret);
if (ret == -EINPROGRESS)
ret = 0;
- if (ret == 0)
+ if (ret == 0) {
+ rds_tcp_keepalive(sock);
sock = NULL;
- else
+ } else {
rds_tcp_restore_callbacks(sock, conn->c_transport_data);
+ }
out:
if (sock)
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 0da49e34495f..444d78d0bd77 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -38,14 +38,7 @@
#include "rds.h"
#include "tcp.h"
-/*
- * cheesy, but simple..
- */
-static void rds_tcp_accept_worker(struct work_struct *work);
-static DECLARE_WORK(rds_tcp_listen_work, rds_tcp_accept_worker);
-static struct socket *rds_tcp_listen_sock;
-
-static int rds_tcp_keepalive(struct socket *sock)
+int rds_tcp_keepalive(struct socket *sock)
{
/* values below based on xs_udp_default_timeout */
int keepidle = 5; /* send a probe 'keepidle' secs after last data */
@@ -77,7 +70,7 @@ bail:
return ret;
}
-static int rds_tcp_accept_one(struct socket *sock)
+int rds_tcp_accept_one(struct socket *sock)
{
struct socket *new_sock = NULL;
struct rds_connection *conn;
@@ -85,8 +78,9 @@ static int rds_tcp_accept_one(struct socket *sock)
struct inet_sock *inet;
struct rds_tcp_connection *rs_tcp;
- ret = sock_create_lite(sock->sk->sk_family, sock->sk->sk_type,
- sock->sk->sk_protocol, &new_sock);
+ ret = sock_create_kern(sock_net(sock->sk), sock->sk->sk_family,
+ sock->sk->sk_type, sock->sk->sk_protocol,
+ &new_sock);
if (ret)
goto out;
@@ -108,7 +102,8 @@ static int rds_tcp_accept_one(struct socket *sock)
&inet->inet_saddr, ntohs(inet->inet_sport),
&inet->inet_daddr, ntohs(inet->inet_dport));
- conn = rds_conn_create(inet->inet_saddr, inet->inet_daddr,
+ conn = rds_conn_create(sock_net(sock->sk),
+ inet->inet_saddr, inet->inet_daddr,
&rds_tcp_transport, GFP_KERNEL);
if (IS_ERR(conn)) {
ret = PTR_ERR(conn);
@@ -148,12 +143,6 @@ out:
return ret;
}
-static void rds_tcp_accept_worker(struct work_struct *work)
-{
- while (rds_tcp_accept_one(rds_tcp_listen_sock) == 0)
- cond_resched();
-}
-
void rds_tcp_listen_data_ready(struct sock *sk)
{
void (*ready)(struct sock *sk);
@@ -174,20 +163,20 @@ void rds_tcp_listen_data_ready(struct sock *sk)
* socket
*/
if (sk->sk_state == TCP_LISTEN)
- queue_work(rds_wq, &rds_tcp_listen_work);
+ rds_tcp_accept_work(sk);
out:
read_unlock(&sk->sk_callback_lock);
ready(sk);
}
-int rds_tcp_listen_init(void)
+struct socket *rds_tcp_listen_init(struct net *net)
{
struct sockaddr_in sin;
struct socket *sock = NULL;
int ret;
- ret = sock_create(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
+ ret = sock_create_kern(net, PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
if (ret < 0)
goto out;
@@ -211,17 +200,15 @@ int rds_tcp_listen_init(void)
if (ret < 0)
goto out;
- rds_tcp_listen_sock = sock;
- sock = NULL;
+ return sock;
out:
if (sock)
sock_release(sock);
- return ret;
+ return NULL;
}
-void rds_tcp_listen_stop(void)
+void rds_tcp_listen_stop(struct socket *sock)
{
- struct socket *sock = rds_tcp_listen_sock;
struct sock *sk;
if (!sock)
@@ -242,5 +229,4 @@ void rds_tcp_listen_stop(void)
/* wait for accepts to stop and close the socket */
flush_workqueue(rds_wq);
sock_release(sock);
- rds_tcp_listen_sock = NULL;
}
diff --git a/net/rds/transport.c b/net/rds/transport.c
index 83498e1c75b8..f3afd1d60d3c 100644
--- a/net/rds/transport.c
+++ b/net/rds/transport.c
@@ -77,7 +77,7 @@ void rds_trans_put(struct rds_transport *trans)
module_put(trans->t_owner);
}
-struct rds_transport *rds_trans_get_preferred(__be32 addr)
+struct rds_transport *rds_trans_get_preferred(struct net *net, __be32 addr)
{
struct rds_transport *ret = NULL;
struct rds_transport *trans;
@@ -90,7 +90,7 @@ struct rds_transport *rds_trans_get_preferred(__be32 addr)
for (i = 0; i < RDS_TRANS_COUNT; i++) {
trans = transports[i];
- if (trans && (trans->laddr_check(addr) == 0) &&
+ if (trans && (trans->laddr_check(net, addr) == 0) &&
(!trans->t_owner || try_module_get(trans->t_owner))) {
ret = trans;
break;
diff --git a/net/rfkill/Kconfig b/net/rfkill/Kconfig
index 4c10e7e6c9f6..598d374f6a35 100644
--- a/net/rfkill/Kconfig
+++ b/net/rfkill/Kconfig
@@ -36,7 +36,8 @@ config RFKILL_REGULATOR
config RFKILL_GPIO
tristate "GPIO RFKILL driver"
- depends on RFKILL && GPIOLIB
+ depends on RFKILL
+ depends on GPIOLIB || COMPILE_TEST
default n
help
If you say yes here you get support of a generic gpio RFKILL
diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c
index d5d58d919552..93127220cb54 100644
--- a/net/rfkill/rfkill-gpio.c
+++ b/net/rfkill/rfkill-gpio.c
@@ -164,7 +164,6 @@ static int rfkill_gpio_remove(struct platform_device *pdev)
#ifdef CONFIG_ACPI
static const struct acpi_device_id rfkill_acpi_match[] = {
{ "BCM2E1A", RFKILL_TYPE_BLUETOOTH },
- { "BCM2E39", RFKILL_TYPE_BLUETOOTH },
{ "BCM2E3D", RFKILL_TYPE_BLUETOOTH },
{ "BCM2E40", RFKILL_TYPE_BLUETOOTH },
{ "BCM2E64", RFKILL_TYPE_BLUETOOTH },
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 43ec92680ae8..b087087ccfa9 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -27,6 +27,15 @@
#include <net/act_api.h>
#include <net/netlink.h>
+static void free_tcf(struct rcu_head *head)
+{
+ struct tcf_common *p = container_of(head, struct tcf_common, tcfc_rcu);
+
+ free_percpu(p->cpu_bstats);
+ free_percpu(p->cpu_qstats);
+ kfree(p);
+}
+
void tcf_hash_destroy(struct tc_action *a)
{
struct tcf_common *p = a->priv;
@@ -41,7 +50,7 @@ void tcf_hash_destroy(struct tc_action *a)
* gen_estimator est_timer() might access p->tcfc_lock
* or bstats, wait a RCU grace period before freeing p
*/
- kfree_rcu(p, tcfc_rcu);
+ call_rcu(&p->tcfc_rcu, free_tcf);
}
EXPORT_SYMBOL(tcf_hash_destroy);
@@ -231,15 +240,16 @@ void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est)
if (est)
gen_kill_estimator(&pc->tcfc_bstats,
&pc->tcfc_rate_est);
- kfree_rcu(pc, tcfc_rcu);
+ call_rcu(&pc->tcfc_rcu, free_tcf);
}
EXPORT_SYMBOL(tcf_hash_cleanup);
int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a,
- int size, int bind)
+ int size, int bind, bool cpustats)
{
struct tcf_hashinfo *hinfo = a->ops->hinfo;
struct tcf_common *p = kzalloc(size, GFP_KERNEL);
+ int err = -ENOMEM;
if (unlikely(!p))
return -ENOMEM;
@@ -247,18 +257,32 @@ int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a,
if (bind)
p->tcfc_bindcnt = 1;
+ if (cpustats) {
+ p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
+ if (!p->cpu_bstats) {
+err1:
+ kfree(p);
+ return err;
+ }
+ p->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
+ if (!p->cpu_qstats) {
+err2:
+ free_percpu(p->cpu_bstats);
+ goto err1;
+ }
+ }
spin_lock_init(&p->tcfc_lock);
INIT_HLIST_NODE(&p->tcfc_head);
p->tcfc_index = index ? index : tcf_hash_new_index(hinfo);
p->tcfc_tm.install = jiffies;
p->tcfc_tm.lastuse = jiffies;
if (est) {
- int err = gen_new_estimator(&p->tcfc_bstats, NULL,
- &p->tcfc_rate_est,
- &p->tcfc_lock, est);
+ err = gen_new_estimator(&p->tcfc_bstats, p->cpu_bstats,
+ &p->tcfc_rate_est,
+ &p->tcfc_lock, est);
if (err) {
- kfree(p);
- return err;
+ free_percpu(p->cpu_qstats);
+ goto err2;
}
}
@@ -616,10 +640,10 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a,
if (err < 0)
goto errout;
- if (gnet_stats_copy_basic(&d, NULL, &p->tcfc_bstats) < 0 ||
+ if (gnet_stats_copy_basic(&d, p->cpu_bstats, &p->tcfc_bstats) < 0 ||
gnet_stats_copy_rate_est(&d, &p->tcfc_bstats,
&p->tcfc_rate_est) < 0 ||
- gnet_stats_copy_queue(&d, NULL,
+ gnet_stats_copy_queue(&d, p->cpu_qstats,
&p->tcfc_qstats,
p->tcfc_qstats.qlen) < 0)
goto errout;
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index d0edeb7a1950..1b97dabc621a 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -278,7 +278,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
struct tc_act_bpf *parm;
struct tcf_bpf *prog;
bool is_bpf, is_ebpf;
- int ret;
+ int ret, res = 0;
if (!nla)
return -EINVAL;
@@ -287,41 +287,43 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
if (ret < 0)
return ret;
- is_bpf = tb[TCA_ACT_BPF_OPS_LEN] && tb[TCA_ACT_BPF_OPS];
- is_ebpf = tb[TCA_ACT_BPF_FD];
-
- if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf) ||
- !tb[TCA_ACT_BPF_PARMS])
+ if (!tb[TCA_ACT_BPF_PARMS])
return -EINVAL;
parm = nla_data(tb[TCA_ACT_BPF_PARMS]);
- memset(&cfg, 0, sizeof(cfg));
-
- ret = is_bpf ? tcf_bpf_init_from_ops(tb, &cfg) :
- tcf_bpf_init_from_efd(tb, &cfg);
- if (ret < 0)
- return ret;
-
if (!tcf_hash_check(parm->index, act, bind)) {
ret = tcf_hash_create(parm->index, est, act,
- sizeof(*prog), bind);
+ sizeof(*prog), bind, false);
if (ret < 0)
- goto destroy_fp;
+ return ret;
- ret = ACT_P_CREATED;
+ res = ACT_P_CREATED;
} else {
/* Don't override defaults. */
if (bind)
- goto destroy_fp;
+ return 0;
tcf_hash_release(act, bind);
- if (!replace) {
- ret = -EEXIST;
- goto destroy_fp;
- }
+ if (!replace)
+ return -EEXIST;
}
+ is_bpf = tb[TCA_ACT_BPF_OPS_LEN] && tb[TCA_ACT_BPF_OPS];
+ is_ebpf = tb[TCA_ACT_BPF_FD];
+
+ if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ memset(&cfg, 0, sizeof(cfg));
+
+ ret = is_bpf ? tcf_bpf_init_from_ops(tb, &cfg) :
+ tcf_bpf_init_from_efd(tb, &cfg);
+ if (ret < 0)
+ goto out;
+
prog = to_bpf(act);
spin_lock_bh(&prog->tcf_lock);
@@ -341,15 +343,16 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
spin_unlock_bh(&prog->tcf_lock);
- if (ret == ACT_P_CREATED)
+ if (res == ACT_P_CREATED)
tcf_hash_insert(act);
else
tcf_bpf_cfg_cleanup(&old);
- return ret;
+ return res;
+out:
+ if (res == ACT_P_CREATED)
+ tcf_hash_cleanup(act, est);
-destroy_fp:
- tcf_bpf_cfg_cleanup(&cfg);
return ret;
}
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 295d14bd6c67..5019a47b9270 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -37,6 +37,7 @@ static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a,
struct nf_conntrack_tuple tuple;
enum ip_conntrack_info ctinfo;
struct tcf_connmark_info *ca = a->priv;
+ struct nf_conntrack_zone zone;
struct nf_conn *c;
int proto;
@@ -70,7 +71,10 @@ static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a,
proto, &tuple))
goto out;
- thash = nf_conntrack_find_get(dev_net(skb->dev), ca->zone, &tuple);
+ zone.id = ca->zone;
+ zone.dir = NF_CT_DEFAULT_ZONE_DIR;
+
+ thash = nf_conntrack_find_get(dev_net(skb->dev), &zone, &tuple);
if (!thash)
goto out;
@@ -108,7 +112,8 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
parm = nla_data(tb[TCA_CONNMARK_PARMS]);
if (!tcf_hash_check(parm->index, a, bind)) {
- ret = tcf_hash_create(parm->index, est, a, sizeof(*ci), bind);
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*ci),
+ bind, false);
if (ret)
return ret;
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 4cd5cf1aedf8..b07c535ba8e7 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -62,7 +62,8 @@ static int tcf_csum_init(struct net *n, struct nlattr *nla, struct nlattr *est,
parm = nla_data(tb[TCA_CSUM_PARMS]);
if (!tcf_hash_check(parm->index, a, bind)) {
- ret = tcf_hash_create(parm->index, est, a, sizeof(*p), bind);
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*p),
+ bind, false);
if (ret)
return ret;
ret = ACT_P_CREATED;
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 7fffc2272701..5c1b05170736 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -28,14 +28,18 @@
#ifdef CONFIG_GACT_PROB
static int gact_net_rand(struct tcf_gact *gact)
{
- if (!gact->tcfg_pval || prandom_u32() % gact->tcfg_pval)
+ smp_rmb(); /* coupled with smp_wmb() in tcf_gact_init() */
+ if (prandom_u32() % gact->tcfg_pval)
return gact->tcf_action;
return gact->tcfg_paction;
}
static int gact_determ(struct tcf_gact *gact)
{
- if (!gact->tcfg_pval || gact->tcf_bstats.packets % gact->tcfg_pval)
+ u32 pack = atomic_inc_return(&gact->packets);
+
+ smp_rmb(); /* coupled with smp_wmb() in tcf_gact_init() */
+ if (pack % gact->tcfg_pval)
return gact->tcf_action;
return gact->tcfg_paction;
}
@@ -85,7 +89,8 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
#endif
if (!tcf_hash_check(parm->index, a, bind)) {
- ret = tcf_hash_create(parm->index, est, a, sizeof(*gact), bind);
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*gact),
+ bind, true);
if (ret)
return ret;
ret = ACT_P_CREATED;
@@ -99,16 +104,19 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
gact = to_gact(a);
- spin_lock_bh(&gact->tcf_lock);
+ ASSERT_RTNL();
gact->tcf_action = parm->action;
#ifdef CONFIG_GACT_PROB
if (p_parm) {
gact->tcfg_paction = p_parm->paction;
- gact->tcfg_pval = p_parm->pval;
+ gact->tcfg_pval = max_t(u16, 1, p_parm->pval);
+ /* Make sure tcfg_pval is written before tcfg_ptype
+ * coupled with smp_rmb() in gact_net_rand() & gact_determ()
+ */
+ smp_wmb();
gact->tcfg_ptype = p_parm->ptype;
}
#endif
- spin_unlock_bh(&gact->tcf_lock);
if (ret == ACT_P_CREATED)
tcf_hash_insert(a);
return ret;
@@ -118,23 +126,21 @@ static int tcf_gact(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
struct tcf_gact *gact = a->priv;
- int action = TC_ACT_SHOT;
+ int action = READ_ONCE(gact->tcf_action);
- spin_lock(&gact->tcf_lock);
#ifdef CONFIG_GACT_PROB
- if (gact->tcfg_ptype)
- action = gact_rand[gact->tcfg_ptype](gact);
- else
- action = gact->tcf_action;
-#else
- action = gact->tcf_action;
+ {
+ u32 ptype = READ_ONCE(gact->tcfg_ptype);
+
+ if (ptype)
+ action = gact_rand[ptype](gact);
+ }
#endif
- gact->tcf_bstats.bytes += qdisc_pkt_len(skb);
- gact->tcf_bstats.packets++;
+ bstats_cpu_update(this_cpu_ptr(gact->common.cpu_bstats), skb);
if (action == TC_ACT_SHOT)
- gact->tcf_qstats.drops++;
- gact->tcf_tm.lastuse = jiffies;
- spin_unlock(&gact->tcf_lock);
+ qstats_drop_inc(this_cpu_ptr(gact->common.cpu_qstats));
+
+ tcf_lastuse_update(&gact->tcf_tm);
return action;
}
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index cbc8dd7dd48a..99c9cc1c7af9 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -114,7 +114,7 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est,
index = nla_get_u32(tb[TCA_IPT_INDEX]);
if (!tcf_hash_check(index, a, bind) ) {
- ret = tcf_hash_create(index, est, a, sizeof(*ipt), bind);
+ ret = tcf_hash_create(index, est, a, sizeof(*ipt), bind, false);
if (ret)
return ret;
ret = ACT_P_CREATED;
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 268545050ddb..2d1be4a760fd 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -35,9 +35,11 @@ static LIST_HEAD(mirred_list);
static void tcf_mirred_release(struct tc_action *a, int bind)
{
struct tcf_mirred *m = to_mirred(a);
+ struct net_device *dev = rcu_dereference_protected(m->tcfm_dev, 1);
+
list_del(&m->tcfm_list);
- if (m->tcfm_dev)
- dev_put(m->tcfm_dev);
+ if (dev)
+ dev_put(dev);
}
static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = {
@@ -93,7 +95,8 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
if (!tcf_hash_check(parm->index, a, bind)) {
if (dev == NULL)
return -EINVAL;
- ret = tcf_hash_create(parm->index, est, a, sizeof(*m), bind);
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*m),
+ bind, true);
if (ret)
return ret;
ret = ACT_P_CREATED;
@@ -107,18 +110,18 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
}
m = to_mirred(a);
- spin_lock_bh(&m->tcf_lock);
+ ASSERT_RTNL();
m->tcf_action = parm->action;
m->tcfm_eaction = parm->eaction;
if (dev != NULL) {
m->tcfm_ifindex = parm->ifindex;
if (ret != ACT_P_CREATED)
- dev_put(m->tcfm_dev);
+ dev_put(rcu_dereference_protected(m->tcfm_dev, 1));
dev_hold(dev);
- m->tcfm_dev = dev;
+ rcu_assign_pointer(m->tcfm_dev, dev);
m->tcfm_ok_push = ok_push;
}
- spin_unlock_bh(&m->tcf_lock);
+
if (ret == ACT_P_CREATED) {
list_add(&m->tcfm_list, &mirred_list);
tcf_hash_insert(a);
@@ -133,20 +136,22 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
struct tcf_mirred *m = a->priv;
struct net_device *dev;
struct sk_buff *skb2;
+ int retval, err;
u32 at;
- int retval, err = 1;
- spin_lock(&m->tcf_lock);
- m->tcf_tm.lastuse = jiffies;
- bstats_update(&m->tcf_bstats, skb);
+ tcf_lastuse_update(&m->tcf_tm);
+
+ bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb);
- dev = m->tcfm_dev;
- if (!dev) {
- printk_once(KERN_NOTICE "tc mirred: target device is gone\n");
+ rcu_read_lock();
+ retval = READ_ONCE(m->tcf_action);
+ dev = rcu_dereference(m->tcfm_dev);
+ if (unlikely(!dev)) {
+ pr_notice_once("tc mirred: target device is gone\n");
goto out;
}
- if (!(dev->flags & IFF_UP)) {
+ if (unlikely(!(dev->flags & IFF_UP))) {
net_notice_ratelimited("tc mirred to Houston: device %s is down\n",
dev->name);
goto out;
@@ -154,7 +159,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
at = G_TC_AT(skb->tc_verd);
skb2 = skb_clone(skb, GFP_ATOMIC);
- if (skb2 == NULL)
+ if (!skb2)
goto out;
if (!(at & AT_EGRESS)) {
@@ -170,16 +175,13 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
skb2->dev = dev;
err = dev_queue_xmit(skb2);
-out:
if (err) {
- m->tcf_qstats.overlimits++;
+out:
+ qstats_overlimit_inc(this_cpu_ptr(m->common.cpu_qstats));
if (m->tcfm_eaction != TCA_EGRESS_MIRROR)
retval = TC_ACT_SHOT;
- else
- retval = m->tcf_action;
- } else
- retval = m->tcf_action;
- spin_unlock(&m->tcf_lock);
+ }
+ rcu_read_unlock();
return retval;
}
@@ -218,14 +220,16 @@ static int mirred_device_event(struct notifier_block *unused,
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct tcf_mirred *m;
+ ASSERT_RTNL();
if (event == NETDEV_UNREGISTER)
list_for_each_entry(m, &mirred_list, tcfm_list) {
- spin_lock_bh(&m->tcf_lock);
- if (m->tcfm_dev == dev) {
+ if (rcu_access_pointer(m->tcfm_dev) == dev) {
dev_put(dev);
- m->tcfm_dev = NULL;
+ /* Note : no rcu grace period necessary, as
+ * net_device are already rcu protected.
+ */
+ RCU_INIT_POINTER(m->tcfm_dev, NULL);
}
- spin_unlock_bh(&m->tcf_lock);
}
return NOTIFY_DONE;
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 270a030d5fd0..b7c4ead8b5a8 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -55,7 +55,8 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
parm = nla_data(tb[TCA_NAT_PARMS]);
if (!tcf_hash_check(parm->index, a, bind)) {
- ret = tcf_hash_create(parm->index, est, a, sizeof(*p), bind);
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*p),
+ bind, false);
if (ret)
return ret;
ret = ACT_P_CREATED;
@@ -161,7 +162,8 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a,
goto drop;
tcph = (void *)(skb_network_header(skb) + ihl);
- inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, 1);
+ inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr,
+ true);
break;
}
case IPPROTO_UDP:
@@ -177,7 +179,7 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a,
udph = (void *)(skb_network_header(skb) + ihl);
if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
inet_proto_csum_replace4(&udph->check, skb, addr,
- new_addr, 1);
+ new_addr, true);
if (!udph->check)
udph->check = CSUM_MANGLED_0;
}
@@ -230,7 +232,7 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a,
iph->saddr = new_addr;
inet_proto_csum_replace4(&icmph->checksum, skb, addr, new_addr,
- 0);
+ false);
break;
}
default:
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index ff8b466a73f6..e38a7701f154 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -57,7 +57,8 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
if (!tcf_hash_check(parm->index, a, bind)) {
if (!parm->nkeys)
return -EINVAL;
- ret = tcf_hash_create(parm->index, est, a, sizeof(*p), bind);
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*p),
+ bind, false);
if (ret)
return ret;
p = to_pedit(a);
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 6a8d9488613a..d6b708d6afdf 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -103,7 +103,8 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
defdata = nla_data(tb[TCA_DEF_DATA]);
if (!tcf_hash_check(parm->index, a, bind)) {
- ret = tcf_hash_create(parm->index, est, a, sizeof(*d), bind);
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*d),
+ bind, false);
if (ret)
return ret;
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index fcfeeaf838be..6751b5f8c046 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -99,7 +99,8 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
parm = nla_data(tb[TCA_SKBEDIT_PARMS]);
if (!tcf_hash_check(parm->index, a, bind)) {
- ret = tcf_hash_create(parm->index, est, a, sizeof(*d), bind);
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*d),
+ bind, false);
if (ret)
return ret;
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index d735ecf0b1a7..796785e0bf96 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -116,7 +116,8 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
action = parm->v_action;
if (!tcf_hash_check(parm->index, a, bind)) {
- ret = tcf_hash_create(parm->index, est, a, sizeof(*v), bind);
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*v),
+ bind, false);
if (ret)
return ret;
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index ea611b216412..4c85bd3a750c 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -30,35 +30,16 @@ static int cls_cgroup_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
struct cls_cgroup_head *head = rcu_dereference_bh(tp->root);
- u32 classid;
-
- classid = task_cls_state(current)->classid;
-
- /*
- * Due to the nature of the classifier it is required to ignore all
- * packets originating from softirq context as accessing `current'
- * would lead to false results.
- *
- * This test assumes that all callers of dev_queue_xmit() explicitely
- * disable bh. Knowing this, it is possible to detect softirq based
- * calls by looking at the number of nested bh disable calls because
- * softirqs always disables bh.
- */
- if (in_serving_softirq()) {
- /* If there is an sk_classid we'll use that. */
- if (!skb->sk)
- return -1;
- classid = skb->sk->sk_classid;
- }
+ u32 classid = task_get_classid(skb);
if (!classid)
return -1;
-
if (!tcf_em_tree_match(skb, &head->ematches, NULL))
return -1;
res->classid = classid;
res->class = 0;
+
return tcf_exts_exec(skb, &head->exts, res);
}
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 2e2398cfc694..2177eac0a61e 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -54,7 +54,7 @@ static int fifo_init(struct Qdisc *sch, struct nlattr *opt)
bool is_bfifo = sch->ops == &bfifo_qdisc_ops;
if (opt == NULL) {
- u32 limit = qdisc_dev(sch)->tx_queue_len ? : 1;
+ u32 limit = qdisc_dev(sch)->tx_queue_len;
if (is_bfifo)
limit *= psched_mtu(qdisc_dev(sch));
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 6efca30894aa..942fea8405a4 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -735,7 +735,7 @@ static void attach_one_default_qdisc(struct net_device *dev,
{
struct Qdisc *qdisc = &noqueue_qdisc;
- if (dev->tx_queue_len) {
+ if (dev->tx_queue_len && !(dev->priv_flags & IFF_NO_QUEUE)) {
qdisc = qdisc_create_dflt(dev_queue,
default_qdisc_ops, TC_H_ROOT);
if (!qdisc) {
@@ -755,7 +755,9 @@ static void attach_default_qdiscs(struct net_device *dev)
txq = netdev_get_tx_queue(dev, 0);
- if (!netif_is_multiqueue(dev) || dev->tx_queue_len == 0) {
+ if (!netif_is_multiqueue(dev) ||
+ dev->tx_queue_len == 0 ||
+ dev->priv_flags & IFF_NO_QUEUE) {
netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
dev->qdisc = txq->qdisc_sleeping;
atomic_inc(&dev->qdisc->refcnt);
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index abb9f2fec28f..80105109f756 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -512,11 +512,9 @@ static int gred_init(struct Qdisc *sch, struct nlattr *opt)
if (tb[TCA_GRED_LIMIT])
sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]);
- else {
- u32 qlen = qdisc_dev(sch)->tx_queue_len ? : 1;
-
- sch->limit = qlen * psched_mtu(qdisc_dev(sch));
- }
+ else
+ sch->limit = qdisc_dev(sch)->tx_queue_len
+ * psched_mtu(qdisc_dev(sch));
return gred_change_table_def(sch, tb[TCA_GRED_DPS]);
}
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index f1acb0f60dc3..cf4b0f865d1b 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1048,11 +1048,9 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
if (tb[TCA_HTB_DIRECT_QLEN])
q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
- else {
+ else
q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
- if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
- q->direct_qlen = 2;
- }
+
if ((q->rate2quantum = gopt->rate2quantum) < 1)
q->rate2quantum = 1;
q->defcls = gopt->defcls;
diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c
index ade9445a55ab..5abfe44678d4 100644
--- a/net/sched/sch_plug.c
+++ b/net/sched/sch_plug.c
@@ -130,12 +130,8 @@ static int plug_init(struct Qdisc *sch, struct nlattr *opt)
q->unplug_indefinite = false;
if (opt == NULL) {
- /* We will set a default limit of 100 pkts (~150kB)
- * in case tx_queue_len is not available. The
- * default value is completely arbitrary.
- */
- u32 pkt_limit = qdisc_dev(sch)->tx_queue_len ? : 100;
- q->limit = pkt_limit * psched_mtu(qdisc_dev(sch));
+ q->limit = qdisc_dev(sch)->tx_queue_len
+ * psched_mtu(qdisc_dev(sch));
} else {
struct tc_plug_qopt *ctl = nla_data(opt);
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index b8d73bca683c..ffaeea63d473 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -186,7 +186,6 @@ struct qfq_sched {
u64 oldV, V; /* Precise virtual times. */
struct qfq_aggregate *in_serv_agg; /* Aggregate being served. */
- u32 num_active_agg; /* Num. of active aggregates */
u32 wsum; /* weight sum */
u32 iwsum; /* inverse weight sum */
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 4b815193326c..dcdff5c769a1 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -502,7 +502,7 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt)
limit = ctl->limit;
if (limit == 0)
- limit = max_t(u32, qdisc_dev(sch)->tx_queue_len, 1);
+ limit = qdisc_dev(sch)->tx_queue_len;
child = fifo_create_dflt(sch, &pfifo_qdisc_ops, limit);
if (IS_ERR(child))
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 59e80356672b..4345790ad326 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -487,23 +487,35 @@ static void sctp_v4_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
*/
rcu_read_lock();
list_for_each_entry_rcu(laddr, &bp->address_list, list) {
+ struct net_device *odev;
+
if (!laddr->valid)
continue;
- if ((laddr->state == SCTP_ADDR_SRC) &&
- (AF_INET == laddr->a.sa.sa_family)) {
- fl4->fl4_sport = laddr->a.v4.sin_port;
- flowi4_update_output(fl4,
- asoc->base.sk->sk_bound_dev_if,
- RT_CONN_FLAGS(asoc->base.sk),
- daddr->v4.sin_addr.s_addr,
- laddr->a.v4.sin_addr.s_addr);
-
- rt = ip_route_output_key(sock_net(sk), fl4);
- if (!IS_ERR(rt)) {
- dst = &rt->dst;
- goto out_unlock;
- }
- }
+ if (laddr->state != SCTP_ADDR_SRC ||
+ AF_INET != laddr->a.sa.sa_family)
+ continue;
+
+ fl4->fl4_sport = laddr->a.v4.sin_port;
+ flowi4_update_output(fl4,
+ asoc->base.sk->sk_bound_dev_if,
+ RT_CONN_FLAGS(asoc->base.sk),
+ daddr->v4.sin_addr.s_addr,
+ laddr->a.v4.sin_addr.s_addr);
+
+ rt = ip_route_output_key(sock_net(sk), fl4);
+ if (IS_ERR(rt))
+ continue;
+
+ /* Ensure the src address belongs to the output
+ * interface.
+ */
+ odev = __ip_dev_find(sock_net(sk), laddr->a.v4.sin_addr.s_addr,
+ false);
+ if (!odev || odev->ifindex != fl4->flowi4_oif)
+ continue;
+
+ dst = &rt->dst;
+ break;
}
out_unlock:
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 3ee27b7704ff..d7eaa7354cf7 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -853,7 +853,7 @@ nomem:
/*
* Respond to a normal COOKIE ACK chunk.
- * We are the side that is being asked for an association.
+ * We are the side that is asking for an association.
*
* RFC 2960 5.1 Normal Establishment of an Association
*
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 9f2add3cba26..16c1c43980a1 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -810,7 +810,7 @@ static int switchdev_port_fdb_dump_cb(struct net_device *dev,
ndm->ndm_flags = NTF_SELF;
ndm->ndm_type = 0;
ndm->ndm_ifindex = dev->ifindex;
- ndm->ndm_state = NUD_REACHABLE;
+ ndm->ndm_state = obj->u.fdb.ndm_state;
if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, obj->u.fdb.addr))
goto nla_put_failure;
@@ -910,13 +910,9 @@ static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi)
if (switchdev_port_attr_get(dev, &attr))
return NULL;
- if (nhsel > 0) {
- if (prev_attr.u.ppid.id_len != attr.u.ppid.id_len)
+ if (nhsel > 0 &&
+ !netdev_phys_item_id_same(&prev_attr.u.ppid, &attr.u.ppid))
return NULL;
- if (memcmp(prev_attr.u.ppid.id, attr.u.ppid.id,
- attr.u.ppid.id_len))
- return NULL;
- }
prev_attr = attr;
}
@@ -1043,3 +1039,106 @@ void switchdev_fib_ipv4_abort(struct fib_info *fi)
fi->fib_net->ipv4.fib_offload_disabled = true;
}
EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort);
+
+static bool switchdev_port_same_parent_id(struct net_device *a,
+ struct net_device *b)
+{
+ struct switchdev_attr a_attr = {
+ .id = SWITCHDEV_ATTR_PORT_PARENT_ID,
+ .flags = SWITCHDEV_F_NO_RECURSE,
+ };
+ struct switchdev_attr b_attr = {
+ .id = SWITCHDEV_ATTR_PORT_PARENT_ID,
+ .flags = SWITCHDEV_F_NO_RECURSE,
+ };
+
+ if (switchdev_port_attr_get(a, &a_attr) ||
+ switchdev_port_attr_get(b, &b_attr))
+ return false;
+
+ return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid);
+}
+
+static u32 switchdev_port_fwd_mark_get(struct net_device *dev,
+ struct net_device *group_dev)
+{
+ struct net_device *lower_dev;
+ struct list_head *iter;
+
+ netdev_for_each_lower_dev(group_dev, lower_dev, iter) {
+ if (lower_dev == dev)
+ continue;
+ if (switchdev_port_same_parent_id(dev, lower_dev))
+ return lower_dev->offload_fwd_mark;
+ return switchdev_port_fwd_mark_get(dev, lower_dev);
+ }
+
+ return dev->ifindex;
+}
+
+static void switchdev_port_fwd_mark_reset(struct net_device *group_dev,
+ u32 old_mark, u32 *reset_mark)
+{
+ struct net_device *lower_dev;
+ struct list_head *iter;
+
+ netdev_for_each_lower_dev(group_dev, lower_dev, iter) {
+ if (lower_dev->offload_fwd_mark == old_mark) {
+ if (!*reset_mark)
+ *reset_mark = lower_dev->ifindex;
+ lower_dev->offload_fwd_mark = *reset_mark;
+ }
+ switchdev_port_fwd_mark_reset(lower_dev, old_mark, reset_mark);
+ }
+}
+
+/**
+ * switchdev_port_fwd_mark_set - Set port offload forwarding mark
+ *
+ * @dev: port device
+ * @group_dev: containing device
+ * @joining: true if dev is joining group; false if leaving group
+ *
+ * An ungrouped port's offload mark is just its ifindex. A grouped
+ * port's (member of a bridge, for example) offload mark is the ifindex
+ * of one of the ports in the group with the same parent (switch) ID.
+ * Ports on the same device in the same group will have the same mark.
+ *
+ * Example:
+ *
+ * br0 ifindex=9
+ * sw1p1 ifindex=2 mark=2
+ * sw1p2 ifindex=3 mark=2
+ * sw2p1 ifindex=4 mark=5
+ * sw2p2 ifindex=5 mark=5
+ *
+ * If sw2p2 leaves the bridge, we'll have:
+ *
+ * br0 ifindex=9
+ * sw1p1 ifindex=2 mark=2
+ * sw1p2 ifindex=3 mark=2
+ * sw2p1 ifindex=4 mark=4
+ * sw2p2 ifindex=5 mark=5
+ */
+void switchdev_port_fwd_mark_set(struct net_device *dev,
+ struct net_device *group_dev,
+ bool joining)
+{
+ u32 mark = dev->ifindex;
+ u32 reset_mark = 0;
+
+ if (group_dev && joining) {
+ mark = switchdev_port_fwd_mark_get(dev, group_dev);
+ } else if (group_dev && !joining) {
+ if (dev->offload_fwd_mark == mark)
+ /* Ohoh, this port was the mark reference port,
+ * but it's leaving the group, so reset the
+ * mark for the remaining ports in the group.
+ */
+ switchdev_port_fwd_mark_reset(group_dev, mark,
+ &reset_mark);
+ }
+
+ dev->offload_fwd_mark = mark;
+}
+EXPORT_SYMBOL_GPL(switchdev_port_fwd_mark_set);
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index a816382fc8af..8b010c976b2f 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -316,6 +316,29 @@ void tipc_bclink_update_link_state(struct tipc_node *n_ptr,
}
}
+void tipc_bclink_sync_state(struct tipc_node *n, struct tipc_msg *hdr)
+{
+ u16 last = msg_last_bcast(hdr);
+ int mtyp = msg_type(hdr);
+
+ if (unlikely(msg_user(hdr) != LINK_PROTOCOL))
+ return;
+ if (mtyp == STATE_MSG) {
+ tipc_bclink_update_link_state(n, last);
+ return;
+ }
+ /* Compatibility: older nodes don't know BCAST_PROTOCOL synchronization,
+ * and transfer synch info in LINK_PROTOCOL messages.
+ */
+ if (tipc_node_is_up(n))
+ return;
+ if ((mtyp != RESET_MSG) && (mtyp != ACTIVATE_MSG))
+ return;
+ n->bclink.last_sent = last;
+ n->bclink.last_in = last;
+ n->bclink.oos_state = 0;
+}
+
/**
* bclink_peek_nack - monitor retransmission requests sent by other nodes
*
@@ -358,10 +381,9 @@ int tipc_bclink_xmit(struct net *net, struct sk_buff_head *list)
/* Prepare clone of message for local node */
skb = tipc_msg_reassemble(list);
- if (unlikely(!skb)) {
- __skb_queue_purge(list);
+ if (unlikely(!skb))
return -EHOSTUNREACH;
- }
+
/* Broadcast to all nodes */
if (likely(bclink)) {
tipc_bclink_lock(net);
@@ -413,7 +435,7 @@ static void bclink_accept_pkt(struct tipc_node *node, u32 seqno)
* all nodes in the cluster don't ACK at the same time
*/
if (((seqno - tn->own_addr) % TIPC_MIN_LINK_WIN) == 0) {
- tipc_link_proto_xmit(node->active_links[node->addr & 1],
+ tipc_link_proto_xmit(node_active_link(node, node->addr),
STATE_MSG, 0, 0, 0, 0);
tn->bcl->stats.sent_acks++;
}
@@ -925,7 +947,6 @@ int tipc_bclink_init(struct net *net)
tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT);
bcl->bearer_id = MAX_BEARERS;
rcu_assign_pointer(tn->bearer_list[MAX_BEARERS], &bcbearer->bearer);
- bcl->state = WORKING_WORKING;
bcl->pmsg = (struct tipc_msg *)&bcl->proto_msg;
msg_set_prevnode(bcl->pmsg, tn->own_addr);
strlcpy(bcl->name, tipc_bclink_name, TIPC_MAX_LINK_NAME);
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index 3c290a48f720..d74c69bcf60b 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -133,5 +133,6 @@ void tipc_bclink_wakeup_users(struct net *net);
int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg);
int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]);
void tipc_bclink_input(struct net *net);
+void tipc_bclink_sync_state(struct tipc_node *n, struct tipc_msg *msg);
#endif
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 00bc0e620532..ce9f7bfc0b92 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -343,7 +343,7 @@ restart:
static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b_ptr)
{
pr_info("Resetting bearer <%s>\n", b_ptr->name);
- tipc_link_delete_list(net, b_ptr->identity);
+ tipc_node_delete_links(net, b_ptr->identity);
tipc_disc_reset(net, b_ptr);
return 0;
}
@@ -361,7 +361,7 @@ static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr)
pr_info("Disabling bearer <%s>\n", b_ptr->name);
b_ptr->media->disable_media(b_ptr);
- tipc_link_delete_list(net, b_ptr->identity);
+ tipc_node_delete_links(net, b_ptr->identity);
if (b_ptr->link_req)
tipc_disc_delete(b_ptr->link_req);
@@ -470,6 +470,32 @@ void tipc_bearer_send(struct net *net, u32 bearer_id, struct sk_buff *buf,
rcu_read_unlock();
}
+/* tipc_bearer_xmit() -send buffer to destination over bearer
+ */
+void tipc_bearer_xmit(struct net *net, u32 bearer_id,
+ struct sk_buff_head *xmitq,
+ struct tipc_media_addr *dst)
+{
+ struct tipc_net *tn = net_generic(net, tipc_net_id);
+ struct tipc_bearer *b;
+ struct sk_buff *skb, *tmp;
+
+ if (skb_queue_empty(xmitq))
+ return;
+
+ rcu_read_lock();
+ b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]);
+ if (likely(b)) {
+ skb_queue_walk_safe(xmitq, skb, tmp) {
+ __skb_dequeue(xmitq);
+ b->media->send_msg(net, skb, b, dst);
+ /* Until we remove cloning in tipc_l2_send_msg(): */
+ kfree_skb(skb);
+ }
+ }
+ rcu_read_unlock();
+}
+
/**
* tipc_l2_rcv_msg - handle incoming TIPC message from an interface
* @buf: the received packet
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index dc714d977768..6426f242f626 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -217,5 +217,8 @@ void tipc_bearer_cleanup(void);
void tipc_bearer_stop(struct net *net);
void tipc_bearer_send(struct net *net, u32 bearer_id, struct sk_buff *buf,
struct tipc_media_addr *dest);
+void tipc_bearer_xmit(struct net *net, u32 bearer_id,
+ struct sk_buff_head *xmitq,
+ struct tipc_media_addr *dst);
#endif /* _TIPC_BEARER_H */
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 0fcf133d5cb7..b96b41eabf12 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -109,6 +109,11 @@ struct tipc_net {
atomic_t subscription_count;
};
+static inline struct tipc_net *tipc_net(struct net *net)
+{
+ return net_generic(net, tipc_net_id);
+}
+
static inline u16 mod(u16 x)
{
return x & 0xffffu;
@@ -129,6 +134,11 @@ static inline int less(u16 left, u16 right)
return less_eq(left, right) && (mod(right) != mod(left));
}
+static inline int in_range(u16 val, u16 min, u16 max)
+{
+ return !less(val, min) && !more(val, max);
+}
+
#ifdef CONFIG_SYSCTL
int tipc_register_sysctl(void);
void tipc_unregister_sysctl(void);
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 967e292f53c8..d14e0a4aa9af 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -35,7 +35,7 @@
*/
#include "core.h"
-#include "link.h"
+#include "node.h"
#include "discover.h"
/* min delay during bearer start up */
@@ -120,30 +120,24 @@ static void disc_dupl_alert(struct tipc_bearer *b_ptr, u32 node_addr,
* @buf: buffer containing message
* @bearer: bearer that message arrived on
*/
-void tipc_disc_rcv(struct net *net, struct sk_buff *buf,
+void tipc_disc_rcv(struct net *net, struct sk_buff *skb,
struct tipc_bearer *bearer)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct tipc_node *node;
- struct tipc_link *link;
struct tipc_media_addr maddr;
- struct sk_buff *rbuf;
- struct tipc_msg *msg = buf_msg(buf);
- u32 ddom = msg_dest_domain(msg);
- u32 onode = msg_prevnode(msg);
- u32 net_id = msg_bc_netid(msg);
- u32 mtyp = msg_type(msg);
- u32 signature = msg_node_sig(msg);
- u16 caps = msg_node_capabilities(msg);
- bool addr_match = false;
- bool sign_match = false;
- bool link_up = false;
- bool accept_addr = false;
- bool accept_sign = false;
+ struct sk_buff *rskb;
+ struct tipc_msg *hdr = buf_msg(skb);
+ u32 ddom = msg_dest_domain(hdr);
+ u32 onode = msg_prevnode(hdr);
+ u32 net_id = msg_bc_netid(hdr);
+ u32 mtyp = msg_type(hdr);
+ u32 signature = msg_node_sig(hdr);
+ u16 caps = msg_node_capabilities(hdr);
bool respond = false;
+ bool dupl_addr = false;
- bearer->media->msg2addr(bearer, &maddr, msg_media_addr(msg));
- kfree_skb(buf);
+ bearer->media->msg2addr(bearer, &maddr, msg_media_addr(hdr));
+ kfree_skb(skb);
/* Ensure message from node is valid and communication is permitted */
if (net_id != tn->net_id)
@@ -165,102 +159,20 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *buf,
if (!tipc_in_scope(bearer->domain, onode))
return;
- node = tipc_node_create(net, onode);
- if (!node)
- return;
- tipc_node_lock(node);
- node->capabilities = caps;
- link = node->links[bearer->identity];
-
- /* Prepare to validate requesting node's signature and media address */
- sign_match = (signature == node->signature);
- addr_match = link && !memcmp(&link->media_addr, &maddr, sizeof(maddr));
- link_up = link && tipc_link_is_up(link);
-
-
- /* These three flags give us eight permutations: */
-
- if (sign_match && addr_match && link_up) {
- /* All is fine. Do nothing. */
- } else if (sign_match && addr_match && !link_up) {
- /* Respond. The link will come up in due time */
- respond = true;
- } else if (sign_match && !addr_match && link_up) {
- /* Peer has changed i/f address without rebooting.
- * If so, the link will reset soon, and the next
- * discovery will be accepted. So we can ignore it.
- * It may also be an cloned or malicious peer having
- * chosen the same node address and signature as an
- * existing one.
- * Ignore requests until the link goes down, if ever.
- */
- disc_dupl_alert(bearer, onode, &maddr);
- } else if (sign_match && !addr_match && !link_up) {
- /* Peer link has changed i/f address without rebooting.
- * It may also be a cloned or malicious peer; we can't
- * distinguish between the two.
- * The signature is correct, so we must accept.
- */
- accept_addr = true;
- respond = true;
- } else if (!sign_match && addr_match && link_up) {
- /* Peer node rebooted. Two possibilities:
- * - Delayed re-discovery; this link endpoint has already
- * reset and re-established contact with the peer, before
- * receiving a discovery message from that node.
- * (The peer happened to receive one from this node first).
- * - The peer came back so fast that our side has not
- * discovered it yet. Probing from this side will soon
- * reset the link, since there can be no working link
- * endpoint at the peer end, and the link will re-establish.
- * Accept the signature, since it comes from a known peer.
- */
- accept_sign = true;
- } else if (!sign_match && addr_match && !link_up) {
- /* The peer node has rebooted.
- * Accept signature, since it is a known peer.
- */
- accept_sign = true;
- respond = true;
- } else if (!sign_match && !addr_match && link_up) {
- /* Peer rebooted with new address, or a new/duplicate peer.
- * Ignore until the link goes down, if ever.
- */
+ tipc_node_check_dest(net, onode, bearer, caps, signature,
+ &maddr, &respond, &dupl_addr);
+ if (dupl_addr)
disc_dupl_alert(bearer, onode, &maddr);
- } else if (!sign_match && !addr_match && !link_up) {
- /* Peer rebooted with new address, or it is a new peer.
- * Accept signature and address.
- */
- accept_sign = true;
- accept_addr = true;
- respond = true;
- }
-
- if (accept_sign)
- node->signature = signature;
-
- if (accept_addr) {
- if (!link)
- link = tipc_link_create(node, bearer, &maddr);
- if (link) {
- memcpy(&link->media_addr, &maddr, sizeof(maddr));
- tipc_link_reset(link);
- } else {
- respond = false;
- }
- }
/* Send response, if necessary */
if (respond && (mtyp == DSC_REQ_MSG)) {
- rbuf = tipc_buf_acquire(MAX_H_SIZE);
- if (rbuf) {
- tipc_disc_init_msg(net, rbuf, DSC_RESP_MSG, bearer);
- tipc_bearer_send(net, bearer->identity, rbuf, &maddr);
- kfree_skb(rbuf);
+ rskb = tipc_buf_acquire(MAX_H_SIZE);
+ if (rskb) {
+ tipc_disc_init_msg(net, rskb, DSC_RESP_MSG, bearer);
+ tipc_bearer_send(net, bearer->identity, rskb, &maddr);
+ kfree_skb(rskb);
}
}
- tipc_node_unlock(node);
- tipc_node_put(node);
}
/**
diff --git a/net/tipc/link.c b/net/tipc/link.c
index eaa9fe54b4ae..f067e5425560 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -48,9 +48,8 @@
/*
* Error message prefixes
*/
-static const char *link_co_err = "Link changeover error, ";
+static const char *link_co_err = "Link tunneling error, ";
static const char *link_rst_msg = "Resetting link ";
-static const char *link_unk_evt = "Unknown link event ";
static const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = {
[TIPC_NLA_LINK_UNSPEC] = { .type = NLA_UNSPEC },
@@ -77,256 +76,413 @@ static const struct nla_policy tipc_nl_prop_policy[TIPC_NLA_PROP_MAX + 1] = {
};
/*
+ * Interval between NACKs when packets arrive out of order
+ */
+#define TIPC_NACK_INTV (TIPC_MIN_LINK_WIN * 2)
+/*
* Out-of-range value for link session numbers
*/
-#define INVALID_SESSION 0x10000
+#define WILDCARD_SESSION 0x10000
-/*
- * Link state events:
+/* Link FSM states:
*/
-#define STARTING_EVT 856384768 /* link processing trigger */
-#define TRAFFIC_MSG_EVT 560815u /* rx'd ??? */
-#define SILENCE_EVT 560817u /* timer dicovered silence from peer */
+enum {
+ LINK_ESTABLISHED = 0xe,
+ LINK_ESTABLISHING = 0xe << 4,
+ LINK_RESET = 0x1 << 8,
+ LINK_RESETTING = 0x2 << 12,
+ LINK_PEER_RESET = 0xd << 16,
+ LINK_FAILINGOVER = 0xf << 20,
+ LINK_SYNCHING = 0xc << 24
+};
-/*
- * State value stored in 'failover_pkts'
+/* Link FSM state checking routines
*/
-#define FIRST_FAILOVER 0xffffu
-
-static void link_handle_out_of_seq_msg(struct tipc_link *link,
- struct sk_buff *skb);
-static void tipc_link_proto_rcv(struct tipc_link *link,
- struct sk_buff *skb);
-static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tol);
-static void link_state_event(struct tipc_link *l_ptr, u32 event);
+static int link_is_up(struct tipc_link *l)
+{
+ return l->state & (LINK_ESTABLISHED | LINK_SYNCHING);
+}
+
+static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
+ struct sk_buff_head *xmitq);
+static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
+ u16 rcvgap, int tolerance, int priority,
+ struct sk_buff_head *xmitq);
static void link_reset_statistics(struct tipc_link *l_ptr);
static void link_print(struct tipc_link *l_ptr, const char *str);
-static void tipc_link_sync_xmit(struct tipc_link *l);
static void tipc_link_sync_rcv(struct tipc_node *n, struct sk_buff *buf);
-static void tipc_link_input(struct tipc_link *l, struct sk_buff *skb);
-static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb);
-static bool tipc_link_failover_rcv(struct tipc_link *l, struct sk_buff **skb);
-static void link_set_timer(struct tipc_link *link, unsigned long time);
+
/*
- * Simple link routines
+ * Simple non-static link routines (i.e. referenced outside this file)
*/
-static unsigned int align(unsigned int i)
+bool tipc_link_is_up(struct tipc_link *l)
{
- return (i + 3) & ~3u;
+ return link_is_up(l);
}
-static void tipc_link_release(struct kref *kref)
+bool tipc_link_is_reset(struct tipc_link *l)
{
- kfree(container_of(kref, struct tipc_link, ref));
+ return l->state & (LINK_RESET | LINK_FAILINGOVER | LINK_ESTABLISHING);
}
-static void tipc_link_get(struct tipc_link *l_ptr)
+bool tipc_link_is_synching(struct tipc_link *l)
{
- kref_get(&l_ptr->ref);
+ return l->state == LINK_SYNCHING;
}
-static void tipc_link_put(struct tipc_link *l_ptr)
+bool tipc_link_is_failingover(struct tipc_link *l)
{
- kref_put(&l_ptr->ref, tipc_link_release);
+ return l->state == LINK_FAILINGOVER;
}
-static struct tipc_link *tipc_parallel_link(struct tipc_link *l)
+bool tipc_link_is_blocked(struct tipc_link *l)
{
- if (l->owner->active_links[0] != l)
- return l->owner->active_links[0];
- return l->owner->active_links[1];
+ return l->state & (LINK_RESETTING | LINK_PEER_RESET | LINK_FAILINGOVER);
}
-/*
- * Simple non-static link routines (i.e. referenced outside this file)
- */
-int tipc_link_is_up(struct tipc_link *l_ptr)
+int tipc_link_is_active(struct tipc_link *l)
{
- if (!l_ptr)
- return 0;
- return link_working_working(l_ptr) || link_working_unknown(l_ptr);
+ struct tipc_node *n = l->owner;
+
+ return (node_active_link(n, 0) == l) || (node_active_link(n, 1) == l);
}
-int tipc_link_is_active(struct tipc_link *l_ptr)
+static u32 link_own_addr(struct tipc_link *l)
{
- return (l_ptr->owner->active_links[0] == l_ptr) ||
- (l_ptr->owner->active_links[1] == l_ptr);
+ return msg_prevnode(l->pmsg);
}
/**
- * link_timeout - handle expiration of link timer
- * @l_ptr: pointer to link
+ * tipc_link_create - create a new link
+ * @n: pointer to associated node
+ * @b: pointer to associated bearer
+ * @ownnode: identity of own node
+ * @peer: identity of peer node
+ * @maddr: media address to be used
+ * @inputq: queue to put messages ready for delivery
+ * @namedq: queue to put binding table update messages ready for delivery
+ * @link: return value, pointer to put the created link
+ *
+ * Returns true if link was created, otherwise false
*/
-static void link_timeout(unsigned long data)
+bool tipc_link_create(struct tipc_node *n, struct tipc_bearer *b, u32 session,
+ u32 ownnode, u32 peer, struct tipc_media_addr *maddr,
+ struct sk_buff_head *inputq, struct sk_buff_head *namedq,
+ struct tipc_link **link)
{
- struct tipc_link *l_ptr = (struct tipc_link *)data;
- struct sk_buff *skb;
+ struct tipc_link *l;
+ struct tipc_msg *hdr;
+ char *if_name;
+
+ l = kzalloc(sizeof(*l), GFP_ATOMIC);
+ if (!l)
+ return false;
+ *link = l;
+
+ /* Note: peer i/f name is completed by reset/activate message */
+ if_name = strchr(b->name, ':') + 1;
+ sprintf(l->name, "%u.%u.%u:%s-%u.%u.%u:unknown",
+ tipc_zone(ownnode), tipc_cluster(ownnode), tipc_node(ownnode),
+ if_name, tipc_zone(peer), tipc_cluster(peer), tipc_node(peer));
+
+ l->addr = peer;
+ l->media_addr = maddr;
+ l->owner = n;
+ l->peer_session = WILDCARD_SESSION;
+ l->bearer_id = b->identity;
+ l->tolerance = b->tolerance;
+ l->net_plane = b->net_plane;
+ l->advertised_mtu = b->mtu;
+ l->mtu = b->mtu;
+ l->priority = b->priority;
+ tipc_link_set_queue_limits(l, b->window);
+ l->inputq = inputq;
+ l->namedq = namedq;
+ l->state = LINK_RESETTING;
+ l->pmsg = (struct tipc_msg *)&l->proto_msg;
+ hdr = l->pmsg;
+ tipc_msg_init(ownnode, hdr, LINK_PROTOCOL, RESET_MSG, INT_H_SIZE, peer);
+ msg_set_size(hdr, sizeof(l->proto_msg));
+ msg_set_session(hdr, session);
+ msg_set_bearer_id(hdr, l->bearer_id);
+ strcpy((char *)msg_data(hdr), if_name);
+ __skb_queue_head_init(&l->transmq);
+ __skb_queue_head_init(&l->backlogq);
+ __skb_queue_head_init(&l->deferdq);
+ skb_queue_head_init(&l->wakeupq);
+ skb_queue_head_init(l->inputq);
+ return true;
+}
- tipc_node_lock(l_ptr->owner);
+/* tipc_link_build_bcast_sync_msg() - synchronize broadcast link endpoints.
+ *
+ * Give a newly added peer node the sequence number where it should
+ * start receiving and acking broadcast packets.
+ */
+void tipc_link_build_bcast_sync_msg(struct tipc_link *l,
+ struct sk_buff_head *xmitq)
+{
+ struct sk_buff *skb;
+ struct sk_buff_head list;
+ u16 last_sent;
- /* update counters used in statistical profiling of send traffic */
- l_ptr->stats.accu_queue_sz += skb_queue_len(&l_ptr->transmq);
- l_ptr->stats.queue_sz_counts++;
+ skb = tipc_msg_create(BCAST_PROTOCOL, STATE_MSG, INT_H_SIZE,
+ 0, l->addr, link_own_addr(l), 0, 0, 0);
+ if (!skb)
+ return;
+ last_sent = tipc_bclink_get_last_sent(l->owner->net);
+ msg_set_last_bcast(buf_msg(skb), last_sent);
+ __skb_queue_head_init(&list);
+ __skb_queue_tail(&list, skb);
+ tipc_link_xmit(l, &list, xmitq);
+}
- skb = skb_peek(&l_ptr->transmq);
- if (skb) {
- struct tipc_msg *msg = buf_msg(skb);
- u32 length = msg_size(msg);
+/**
+ * tipc_link_fsm_evt - link finite state machine
+ * @l: pointer to link
+ * @evt: state machine event to be processed
+ */
+int tipc_link_fsm_evt(struct tipc_link *l, int evt)
+{
+ int rc = 0;
- if ((msg_user(msg) == MSG_FRAGMENTER) &&
- (msg_type(msg) == FIRST_FRAGMENT)) {
- length = msg_size(msg_get_wrapped(msg));
+ switch (l->state) {
+ case LINK_RESETTING:
+ switch (evt) {
+ case LINK_PEER_RESET_EVT:
+ l->state = LINK_PEER_RESET;
+ break;
+ case LINK_RESET_EVT:
+ l->state = LINK_RESET;
+ break;
+ case LINK_FAILURE_EVT:
+ case LINK_FAILOVER_BEGIN_EVT:
+ case LINK_ESTABLISH_EVT:
+ case LINK_FAILOVER_END_EVT:
+ case LINK_SYNCH_BEGIN_EVT:
+ case LINK_SYNCH_END_EVT:
+ default:
+ goto illegal_evt;
}
- if (length) {
- l_ptr->stats.msg_lengths_total += length;
- l_ptr->stats.msg_length_counts++;
- if (length <= 64)
- l_ptr->stats.msg_length_profile[0]++;
- else if (length <= 256)
- l_ptr->stats.msg_length_profile[1]++;
- else if (length <= 1024)
- l_ptr->stats.msg_length_profile[2]++;
- else if (length <= 4096)
- l_ptr->stats.msg_length_profile[3]++;
- else if (length <= 16384)
- l_ptr->stats.msg_length_profile[4]++;
- else if (length <= 32768)
- l_ptr->stats.msg_length_profile[5]++;
- else
- l_ptr->stats.msg_length_profile[6]++;
+ break;
+ case LINK_RESET:
+ switch (evt) {
+ case LINK_PEER_RESET_EVT:
+ l->state = LINK_ESTABLISHING;
+ break;
+ case LINK_FAILOVER_BEGIN_EVT:
+ l->state = LINK_FAILINGOVER;
+ case LINK_FAILURE_EVT:
+ case LINK_RESET_EVT:
+ case LINK_ESTABLISH_EVT:
+ case LINK_FAILOVER_END_EVT:
+ break;
+ case LINK_SYNCH_BEGIN_EVT:
+ case LINK_SYNCH_END_EVT:
+ default:
+ goto illegal_evt;
+ }
+ break;
+ case LINK_PEER_RESET:
+ switch (evt) {
+ case LINK_RESET_EVT:
+ l->state = LINK_ESTABLISHING;
+ break;
+ case LINK_PEER_RESET_EVT:
+ case LINK_ESTABLISH_EVT:
+ case LINK_FAILURE_EVT:
+ break;
+ case LINK_SYNCH_BEGIN_EVT:
+ case LINK_SYNCH_END_EVT:
+ case LINK_FAILOVER_BEGIN_EVT:
+ case LINK_FAILOVER_END_EVT:
+ default:
+ goto illegal_evt;
}
+ break;
+ case LINK_FAILINGOVER:
+ switch (evt) {
+ case LINK_FAILOVER_END_EVT:
+ l->state = LINK_RESET;
+ break;
+ case LINK_PEER_RESET_EVT:
+ case LINK_RESET_EVT:
+ case LINK_ESTABLISH_EVT:
+ case LINK_FAILURE_EVT:
+ break;
+ case LINK_FAILOVER_BEGIN_EVT:
+ case LINK_SYNCH_BEGIN_EVT:
+ case LINK_SYNCH_END_EVT:
+ default:
+ goto illegal_evt;
+ }
+ break;
+ case LINK_ESTABLISHING:
+ switch (evt) {
+ case LINK_ESTABLISH_EVT:
+ l->state = LINK_ESTABLISHED;
+ rc |= TIPC_LINK_UP_EVT;
+ break;
+ case LINK_FAILOVER_BEGIN_EVT:
+ l->state = LINK_FAILINGOVER;
+ break;
+ case LINK_PEER_RESET_EVT:
+ case LINK_RESET_EVT:
+ case LINK_FAILURE_EVT:
+ case LINK_SYNCH_BEGIN_EVT:
+ case LINK_FAILOVER_END_EVT:
+ break;
+ case LINK_SYNCH_END_EVT:
+ default:
+ goto illegal_evt;
+ }
+ break;
+ case LINK_ESTABLISHED:
+ switch (evt) {
+ case LINK_PEER_RESET_EVT:
+ l->state = LINK_PEER_RESET;
+ rc |= TIPC_LINK_DOWN_EVT;
+ break;
+ case LINK_FAILURE_EVT:
+ l->state = LINK_RESETTING;
+ rc |= TIPC_LINK_DOWN_EVT;
+ break;
+ case LINK_RESET_EVT:
+ l->state = LINK_RESET;
+ break;
+ case LINK_ESTABLISH_EVT:
+ break;
+ case LINK_SYNCH_BEGIN_EVT:
+ l->state = LINK_SYNCHING;
+ break;
+ case LINK_SYNCH_END_EVT:
+ case LINK_FAILOVER_BEGIN_EVT:
+ case LINK_FAILOVER_END_EVT:
+ default:
+ goto illegal_evt;
+ }
+ break;
+ case LINK_SYNCHING:
+ switch (evt) {
+ case LINK_PEER_RESET_EVT:
+ l->state = LINK_PEER_RESET;
+ rc |= TIPC_LINK_DOWN_EVT;
+ break;
+ case LINK_FAILURE_EVT:
+ l->state = LINK_RESETTING;
+ rc |= TIPC_LINK_DOWN_EVT;
+ break;
+ case LINK_RESET_EVT:
+ l->state = LINK_RESET;
+ break;
+ case LINK_ESTABLISH_EVT:
+ case LINK_SYNCH_BEGIN_EVT:
+ break;
+ case LINK_SYNCH_END_EVT:
+ l->state = LINK_ESTABLISHED;
+ break;
+ case LINK_FAILOVER_BEGIN_EVT:
+ case LINK_FAILOVER_END_EVT:
+ default:
+ goto illegal_evt;
+ }
+ break;
+ default:
+ pr_err("Unknown FSM state %x in %s\n", l->state, l->name);
}
-
- /* do all other link processing performed on a periodic basis */
- if (l_ptr->silent_intv_cnt || tipc_bclink_acks_missing(l_ptr->owner))
- link_state_event(l_ptr, SILENCE_EVT);
- l_ptr->silent_intv_cnt++;
- if (skb_queue_len(&l_ptr->backlogq))
- tipc_link_push_packets(l_ptr);
- link_set_timer(l_ptr, l_ptr->keepalive_intv);
- tipc_node_unlock(l_ptr->owner);
- tipc_link_put(l_ptr);
-}
-
-static void link_set_timer(struct tipc_link *link, unsigned long time)
-{
- if (!mod_timer(&link->timer, jiffies + time))
- tipc_link_get(link);
+ return rc;
+illegal_evt:
+ pr_err("Illegal FSM event %x in state %x on link %s\n",
+ evt, l->state, l->name);
+ return rc;
}
-/**
- * tipc_link_create - create a new link
- * @n_ptr: pointer to associated node
- * @b_ptr: pointer to associated bearer
- * @media_addr: media address to use when sending messages over link
- *
- * Returns pointer to link.
+/* link_profile_stats - update statistical profiling of traffic
*/
-struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
- struct tipc_bearer *b_ptr,
- const struct tipc_media_addr *media_addr)
+static void link_profile_stats(struct tipc_link *l)
{
- struct tipc_net *tn = net_generic(n_ptr->net, tipc_net_id);
- struct tipc_link *l_ptr;
+ struct sk_buff *skb;
struct tipc_msg *msg;
- char *if_name;
- char addr_string[16];
- u32 peer = n_ptr->addr;
+ int length;
- if (n_ptr->link_cnt >= MAX_BEARERS) {
- tipc_addr_string_fill(addr_string, n_ptr->addr);
- pr_err("Cannot establish %uth link to %s. Max %u allowed.\n",
- n_ptr->link_cnt, addr_string, MAX_BEARERS);
- return NULL;
- }
+ /* Update counters used in statistical profiling of send traffic */
+ l->stats.accu_queue_sz += skb_queue_len(&l->transmq);
+ l->stats.queue_sz_counts++;
- if (n_ptr->links[b_ptr->identity]) {
- tipc_addr_string_fill(addr_string, n_ptr->addr);
- pr_err("Attempt to establish second link on <%s> to %s\n",
- b_ptr->name, addr_string);
- return NULL;
- }
+ skb = skb_peek(&l->transmq);
+ if (!skb)
+ return;
+ msg = buf_msg(skb);
+ length = msg_size(msg);
- l_ptr = kzalloc(sizeof(*l_ptr), GFP_ATOMIC);
- if (!l_ptr) {
- pr_warn("Link creation failed, no memory\n");
- return NULL;
+ if (msg_user(msg) == MSG_FRAGMENTER) {
+ if (msg_type(msg) != FIRST_FRAGMENT)
+ return;
+ length = msg_size(msg_get_wrapped(msg));
}
- kref_init(&l_ptr->ref);
- l_ptr->addr = peer;
- if_name = strchr(b_ptr->name, ':') + 1;
- sprintf(l_ptr->name, "%u.%u.%u:%s-%u.%u.%u:unknown",
- tipc_zone(tn->own_addr), tipc_cluster(tn->own_addr),
- tipc_node(tn->own_addr),
- if_name,
- tipc_zone(peer), tipc_cluster(peer), tipc_node(peer));
- /* note: peer i/f name is updated by reset/activate message */
- memcpy(&l_ptr->media_addr, media_addr, sizeof(*media_addr));
- l_ptr->owner = n_ptr;
- l_ptr->peer_session = INVALID_SESSION;
- l_ptr->bearer_id = b_ptr->identity;
- link_set_supervision_props(l_ptr, b_ptr->tolerance);
- l_ptr->state = RESET_UNKNOWN;
-
- l_ptr->pmsg = (struct tipc_msg *)&l_ptr->proto_msg;
- msg = l_ptr->pmsg;
- tipc_msg_init(tn->own_addr, msg, LINK_PROTOCOL, RESET_MSG, INT_H_SIZE,
- l_ptr->addr);
- msg_set_size(msg, sizeof(l_ptr->proto_msg));
- msg_set_session(msg, (tn->random & 0xffff));
- msg_set_bearer_id(msg, b_ptr->identity);
- strcpy((char *)msg_data(msg), if_name);
- l_ptr->net_plane = b_ptr->net_plane;
- l_ptr->advertised_mtu = b_ptr->mtu;
- l_ptr->mtu = l_ptr->advertised_mtu;
- l_ptr->priority = b_ptr->priority;
- tipc_link_set_queue_limits(l_ptr, b_ptr->window);
- l_ptr->snd_nxt = 1;
- __skb_queue_head_init(&l_ptr->transmq);
- __skb_queue_head_init(&l_ptr->backlogq);
- __skb_queue_head_init(&l_ptr->deferdq);
- skb_queue_head_init(&l_ptr->wakeupq);
- skb_queue_head_init(&l_ptr->inputq);
- skb_queue_head_init(&l_ptr->namedq);
- link_reset_statistics(l_ptr);
- tipc_node_attach_link(n_ptr, l_ptr);
- setup_timer(&l_ptr->timer, link_timeout, (unsigned long)l_ptr);
- link_state_event(l_ptr, STARTING_EVT);
-
- return l_ptr;
+ l->stats.msg_lengths_total += length;
+ l->stats.msg_length_counts++;
+ if (length <= 64)
+ l->stats.msg_length_profile[0]++;
+ else if (length <= 256)
+ l->stats.msg_length_profile[1]++;
+ else if (length <= 1024)
+ l->stats.msg_length_profile[2]++;
+ else if (length <= 4096)
+ l->stats.msg_length_profile[3]++;
+ else if (length <= 16384)
+ l->stats.msg_length_profile[4]++;
+ else if (length <= 32768)
+ l->stats.msg_length_profile[5]++;
+ else
+ l->stats.msg_length_profile[6]++;
}
-/**
- * tipc_link_delete - Delete a link
- * @l: link to be deleted
+/* tipc_link_timeout - perform periodic task as instructed from node timeout
*/
-void tipc_link_delete(struct tipc_link *l)
+int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq)
{
- tipc_link_reset(l);
- if (del_timer(&l->timer))
- tipc_link_put(l);
- l->flags |= LINK_STOPPED;
- /* Delete link now, or when timer is finished: */
- tipc_link_reset_fragments(l);
- tipc_node_detach_link(l->owner, l);
- tipc_link_put(l);
-}
+ int rc = 0;
+ int mtyp = STATE_MSG;
+ bool xmit = false;
+ bool prb = false;
+
+ link_profile_stats(l);
+
+ switch (l->state) {
+ case LINK_ESTABLISHED:
+ case LINK_SYNCHING:
+ if (!l->silent_intv_cnt) {
+ if (tipc_bclink_acks_missing(l->owner))
+ xmit = true;
+ } else if (l->silent_intv_cnt <= l->abort_limit) {
+ xmit = true;
+ prb = true;
+ } else {
+ rc |= tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
+ }
+ l->silent_intv_cnt++;
+ break;
+ case LINK_RESET:
+ xmit = true;
+ mtyp = RESET_MSG;
+ break;
+ case LINK_ESTABLISHING:
+ xmit = true;
+ mtyp = ACTIVATE_MSG;
+ break;
+ case LINK_PEER_RESET:
+ case LINK_RESETTING:
+ case LINK_FAILINGOVER:
+ break;
+ default:
+ break;
+ }
-void tipc_link_delete_list(struct net *net, unsigned int bearer_id)
-{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct tipc_link *link;
- struct tipc_node *node;
+ if (xmit)
+ tipc_link_build_proto_msg(l, mtyp, prb, 0, 0, 0, xmitq);
- rcu_read_lock();
- list_for_each_entry_rcu(node, &tn->node_list, list) {
- tipc_node_lock(node);
- link = node->links[bearer_id];
- if (link)
- tipc_link_delete(link);
- tipc_node_unlock(node);
- }
- rcu_read_unlock();
+ return rc;
}
/**
@@ -334,7 +490,7 @@ void tipc_link_delete_list(struct net *net, unsigned int bearer_id)
* @link: congested link
* @list: message that was attempted sent
* Create pseudo msg to send back to user when congestion abates
- * Only consumes message if there is an error
+ * Does not consume buffer list
*/
static int link_schedule_user(struct tipc_link *link, struct sk_buff_head *list)
{
@@ -347,8 +503,7 @@ static int link_schedule_user(struct tipc_link *link, struct sk_buff_head *list)
/* This really cannot happen... */
if (unlikely(imp > TIPC_CRITICAL_IMPORTANCE)) {
pr_warn("%s<%s>, send queue full", link_rst_msg, link->name);
- tipc_link_reset(link);
- goto err;
+ return -ENOBUFS;
}
/* Non-blocking sender: */
if (TIPC_SKB_CB(skb_peek(list))->wakeup_pending)
@@ -358,15 +513,12 @@ static int link_schedule_user(struct tipc_link *link, struct sk_buff_head *list)
skb = tipc_msg_create(SOCK_WAKEUP, 0, INT_H_SIZE, 0,
addr, addr, oport, 0, 0);
if (!skb)
- goto err;
+ return -ENOBUFS;
TIPC_SKB_CB(skb)->chain_sz = skb_queue_len(list);
TIPC_SKB_CB(skb)->chain_imp = imp;
skb_queue_tail(&link->wakeupq, skb);
link->stats.link_congs++;
return -ELINKCONG;
-err:
- __skb_queue_purge(list);
- return -ENOBUFS;
}
/**
@@ -388,9 +540,7 @@ void link_prepare_wakeup(struct tipc_link *l)
if ((pnd[imp] + l->backlog[imp].len) >= lim)
break;
skb_unlink(skb, &l->wakeupq);
- skb_queue_tail(&l->inputq, skb);
- l->owner->inputq = &l->inputq;
- l->owner->action_flags |= TIPC_MSG_EVT;
+ skb_queue_tail(l->inputq, skb);
}
}
@@ -426,208 +576,36 @@ void tipc_link_purge_queues(struct tipc_link *l_ptr)
tipc_link_reset_fragments(l_ptr);
}
-void tipc_link_reset(struct tipc_link *l_ptr)
+void tipc_link_reset(struct tipc_link *l)
{
- u32 prev_state = l_ptr->state;
- int was_active_link = tipc_link_is_active(l_ptr);
- struct tipc_node *owner = l_ptr->owner;
- struct tipc_link *pl = tipc_parallel_link(l_ptr);
-
- msg_set_session(l_ptr->pmsg, ((msg_session(l_ptr->pmsg) + 1) & 0xffff));
+ tipc_link_fsm_evt(l, LINK_RESET_EVT);
/* Link is down, accept any session */
- l_ptr->peer_session = INVALID_SESSION;
-
- /* Prepare for renewed mtu size negotiation */
- l_ptr->mtu = l_ptr->advertised_mtu;
-
- l_ptr->state = RESET_UNKNOWN;
+ l->peer_session = WILDCARD_SESSION;
- if ((prev_state == RESET_UNKNOWN) || (prev_state == RESET_RESET))
- return;
-
- tipc_node_link_down(l_ptr->owner, l_ptr);
- tipc_bearer_remove_dest(owner->net, l_ptr->bearer_id, l_ptr->addr);
+ /* If peer is up, it only accepts an incremented session number */
+ msg_set_session(l->pmsg, msg_session(l->pmsg) + 1);
- if (was_active_link && tipc_node_is_up(l_ptr->owner) && (pl != l_ptr)) {
- l_ptr->flags |= LINK_FAILINGOVER;
- l_ptr->failover_checkpt = l_ptr->rcv_nxt;
- pl->failover_pkts = FIRST_FAILOVER;
- pl->failover_checkpt = l_ptr->rcv_nxt;
- pl->failover_skb = l_ptr->reasm_buf;
- } else {
- kfree_skb(l_ptr->reasm_buf);
- }
- /* Clean up all queues, except inputq: */
- __skb_queue_purge(&l_ptr->transmq);
- __skb_queue_purge(&l_ptr->deferdq);
- if (!owner->inputq)
- owner->inputq = &l_ptr->inputq;
- skb_queue_splice_init(&l_ptr->wakeupq, owner->inputq);
- if (!skb_queue_empty(owner->inputq))
- owner->action_flags |= TIPC_MSG_EVT;
- tipc_link_purge_backlog(l_ptr);
- l_ptr->reasm_buf = NULL;
- l_ptr->rcv_unacked = 0;
- l_ptr->snd_nxt = 1;
- l_ptr->silent_intv_cnt = 0;
- l_ptr->stale_count = 0;
- link_reset_statistics(l_ptr);
-}
-
-static void link_activate(struct tipc_link *link)
-{
- struct tipc_node *node = link->owner;
-
- link->rcv_nxt = 1;
- link->stats.recv_info = 1;
- link->silent_intv_cnt = 0;
- tipc_node_link_up(node, link);
- tipc_bearer_add_dest(node->net, link->bearer_id, link->addr);
-}
-
-/**
- * link_state_event - link finite state machine
- * @l_ptr: pointer to link
- * @event: state machine event to process
- */
-static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
-{
- struct tipc_link *other;
- unsigned long timer_intv = l_ptr->keepalive_intv;
-
- if (l_ptr->flags & LINK_STOPPED)
- return;
-
- if (!(l_ptr->flags & LINK_STARTED) && (event != STARTING_EVT))
- return; /* Not yet. */
-
- if (l_ptr->flags & LINK_FAILINGOVER)
- return;
-
- switch (l_ptr->state) {
- case WORKING_WORKING:
- switch (event) {
- case TRAFFIC_MSG_EVT:
- case ACTIVATE_MSG:
- l_ptr->silent_intv_cnt = 0;
- break;
- case SILENCE_EVT:
- if (!l_ptr->silent_intv_cnt) {
- if (tipc_bclink_acks_missing(l_ptr->owner))
- tipc_link_proto_xmit(l_ptr, STATE_MSG,
- 0, 0, 0, 0);
- break;
- }
- l_ptr->state = WORKING_UNKNOWN;
- tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0);
- break;
- case RESET_MSG:
- pr_debug("%s<%s>, requested by peer\n",
- link_rst_msg, l_ptr->name);
- tipc_link_reset(l_ptr);
- l_ptr->state = RESET_RESET;
- tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG,
- 0, 0, 0, 0);
- break;
- default:
- pr_debug("%s%u in WW state\n", link_unk_evt, event);
- }
- break;
- case WORKING_UNKNOWN:
- switch (event) {
- case TRAFFIC_MSG_EVT:
- case ACTIVATE_MSG:
- l_ptr->state = WORKING_WORKING;
- l_ptr->silent_intv_cnt = 0;
- break;
- case RESET_MSG:
- pr_debug("%s<%s>, requested by peer while probing\n",
- link_rst_msg, l_ptr->name);
- tipc_link_reset(l_ptr);
- l_ptr->state = RESET_RESET;
- tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG,
- 0, 0, 0, 0);
- break;
- case SILENCE_EVT:
- if (!l_ptr->silent_intv_cnt) {
- l_ptr->state = WORKING_WORKING;
- if (tipc_bclink_acks_missing(l_ptr->owner))
- tipc_link_proto_xmit(l_ptr, STATE_MSG,
- 0, 0, 0, 0);
- } else if (l_ptr->silent_intv_cnt <
- l_ptr->abort_limit) {
- tipc_link_proto_xmit(l_ptr, STATE_MSG,
- 1, 0, 0, 0);
- } else { /* Link has failed */
- pr_debug("%s<%s>, peer not responding\n",
- link_rst_msg, l_ptr->name);
- tipc_link_reset(l_ptr);
- l_ptr->state = RESET_UNKNOWN;
- tipc_link_proto_xmit(l_ptr, RESET_MSG,
- 0, 0, 0, 0);
- }
- break;
- default:
- pr_err("%s%u in WU state\n", link_unk_evt, event);
- }
- break;
- case RESET_UNKNOWN:
- switch (event) {
- case TRAFFIC_MSG_EVT:
- break;
- case ACTIVATE_MSG:
- other = l_ptr->owner->active_links[0];
- if (other && link_working_unknown(other))
- break;
- l_ptr->state = WORKING_WORKING;
- link_activate(l_ptr);
- tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0);
- if (l_ptr->owner->working_links == 1)
- tipc_link_sync_xmit(l_ptr);
- break;
- case RESET_MSG:
- l_ptr->state = RESET_RESET;
- tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG,
- 1, 0, 0, 0);
- break;
- case STARTING_EVT:
- l_ptr->flags |= LINK_STARTED;
- link_set_timer(l_ptr, timer_intv);
- break;
- case SILENCE_EVT:
- tipc_link_proto_xmit(l_ptr, RESET_MSG, 0, 0, 0, 0);
- break;
- default:
- pr_err("%s%u in RU state\n", link_unk_evt, event);
- }
- break;
- case RESET_RESET:
- switch (event) {
- case TRAFFIC_MSG_EVT:
- case ACTIVATE_MSG:
- other = l_ptr->owner->active_links[0];
- if (other && link_working_unknown(other))
- break;
- l_ptr->state = WORKING_WORKING;
- link_activate(l_ptr);
- tipc_link_proto_xmit(l_ptr, STATE_MSG, 1, 0, 0, 0);
- if (l_ptr->owner->working_links == 1)
- tipc_link_sync_xmit(l_ptr);
- break;
- case RESET_MSG:
- break;
- case SILENCE_EVT:
- tipc_link_proto_xmit(l_ptr, ACTIVATE_MSG,
- 0, 0, 0, 0);
- break;
- default:
- pr_err("%s%u in RR state\n", link_unk_evt, event);
- }
- break;
- default:
- pr_err("Unknown link state %u/%u\n", l_ptr->state, event);
- }
+ /* Prepare for renewed mtu size negotiation */
+ l->mtu = l->advertised_mtu;
+
+ /* Clean up all queues: */
+ __skb_queue_purge(&l->transmq);
+ __skb_queue_purge(&l->deferdq);
+ skb_queue_splice_init(&l->wakeupq, l->inputq);
+
+ tipc_link_purge_backlog(l);
+ kfree_skb(l->reasm_buf);
+ kfree_skb(l->failover_reasm_skb);
+ l->reasm_buf = NULL;
+ l->failover_reasm_skb = NULL;
+ l->rcv_unacked = 0;
+ l->snd_nxt = 1;
+ l->rcv_nxt = 1;
+ l->silent_intv_cnt = 0;
+ l->stats.recv_info = 0;
+ l->stale_count = 0;
+ link_reset_statistics(l);
}
/**
@@ -635,8 +613,7 @@ static void link_state_event(struct tipc_link *l_ptr, unsigned int event)
* @link: link to use
* @list: chain of buffers containing message
*
- * Consumes the buffer chain, except when returning -ELINKCONG,
- * since the caller then may want to make more send attempts.
+ * Consumes the buffer chain, except when returning an error code,
* Returns 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS
* Messages at TIPC_SYSTEM_IMPORTANCE are always accepted
*/
@@ -650,7 +627,7 @@ int __tipc_link_xmit(struct net *net, struct tipc_link *link,
u16 ack = mod(link->rcv_nxt - 1);
u16 seqno = link->snd_nxt;
u16 bc_last_in = link->owner->bclink.last_in;
- struct tipc_media_addr *addr = &link->media_addr;
+ struct tipc_media_addr *addr = link->media_addr;
struct sk_buff_head *transmq = &link->transmq;
struct sk_buff_head *backlogq = &link->backlogq;
struct sk_buff *skb, *bskb;
@@ -660,10 +637,9 @@ int __tipc_link_xmit(struct net *net, struct tipc_link *link,
if (unlikely(link->backlog[i].len >= link->backlog[i].limit))
return link_schedule_user(link, list);
}
- if (unlikely(msg_size(msg) > mtu)) {
- __skb_queue_purge(list);
+ if (unlikely(msg_size(msg) > mtu))
return -EMSGSIZE;
- }
+
/* Prepare each packet for sending, and add to relevant queue: */
while (skb_queue_len(list)) {
skb = skb_peek(list);
@@ -700,101 +676,76 @@ int __tipc_link_xmit(struct net *net, struct tipc_link *link,
return 0;
}
-static void skb2list(struct sk_buff *skb, struct sk_buff_head *list)
-{
- skb_queue_head_init(list);
- __skb_queue_tail(list, skb);
-}
-
-static int __tipc_link_xmit_skb(struct tipc_link *link, struct sk_buff *skb)
-{
- struct sk_buff_head head;
-
- skb2list(skb, &head);
- return __tipc_link_xmit(link->owner->net, link, &head);
-}
-
-/* tipc_link_xmit_skb(): send single buffer to destination
- * Buffers sent via this functon are generally TIPC_SYSTEM_IMPORTANCE
- * messages, which will not be rejected
- * The only exception is datagram messages rerouted after secondary
- * lookup, which are rare and safe to dispose of anyway.
- * TODO: Return real return value, and let callers use
- * tipc_wait_for_sendpkt() where applicable
- */
-int tipc_link_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode,
- u32 selector)
-{
- struct sk_buff_head head;
- int rc;
-
- skb2list(skb, &head);
- rc = tipc_link_xmit(net, &head, dnode, selector);
- if (rc == -ELINKCONG)
- kfree_skb(skb);
- return 0;
-}
-
/**
- * tipc_link_xmit() is the general link level function for message sending
- * @net: the applicable net namespace
+ * tipc_link_xmit(): enqueue buffer list according to queue situation
+ * @link: link to use
* @list: chain of buffers containing message
- * @dsz: amount of user data to be sent
- * @dnode: address of destination node
- * @selector: a number used for deterministic link selection
- * Consumes the buffer chain, except when returning -ELINKCONG
- * Returns 0 if success, otherwise errno: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE
+ * @xmitq: returned list of packets to be sent by caller
+ *
+ * Consumes the buffer chain, except when returning -ELINKCONG,
+ * since the caller then may want to make more send attempts.
+ * Returns 0 if success, or errno: -ELINKCONG, -EMSGSIZE or -ENOBUFS
+ * Messages at TIPC_SYSTEM_IMPORTANCE are always accepted
*/
-int tipc_link_xmit(struct net *net, struct sk_buff_head *list, u32 dnode,
- u32 selector)
+int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
+ struct sk_buff_head *xmitq)
{
- struct tipc_link *link = NULL;
- struct tipc_node *node;
- int rc = -EHOSTUNREACH;
+ struct tipc_msg *hdr = buf_msg(skb_peek(list));
+ unsigned int maxwin = l->window;
+ unsigned int i, imp = msg_importance(hdr);
+ unsigned int mtu = l->mtu;
+ u16 ack = l->rcv_nxt - 1;
+ u16 seqno = l->snd_nxt;
+ u16 bc_last_in = l->owner->bclink.last_in;
+ struct sk_buff_head *transmq = &l->transmq;
+ struct sk_buff_head *backlogq = &l->backlogq;
+ struct sk_buff *skb, *_skb, *bskb;
- node = tipc_node_find(net, dnode);
- if (node) {
- tipc_node_lock(node);
- link = node->active_links[selector & 1];
- if (link)
- rc = __tipc_link_xmit(net, link, list);
- tipc_node_unlock(node);
- tipc_node_put(node);
- }
- if (link)
- return rc;
-
- if (likely(in_own_node(net, dnode))) {
- tipc_sk_rcv(net, list);
- return 0;
+ /* Match msg importance against this and all higher backlog limits: */
+ for (i = imp; i <= TIPC_SYSTEM_IMPORTANCE; i++) {
+ if (unlikely(l->backlog[i].len >= l->backlog[i].limit))
+ return link_schedule_user(l, list);
}
+ if (unlikely(msg_size(hdr) > mtu))
+ return -EMSGSIZE;
- __skb_queue_purge(list);
- return rc;
-}
-
-/*
- * tipc_link_sync_xmit - synchronize broadcast link endpoints.
- *
- * Give a newly added peer node the sequence number where it should
- * start receiving and acking broadcast packets.
- *
- * Called with node locked
- */
-static void tipc_link_sync_xmit(struct tipc_link *link)
-{
- struct sk_buff *skb;
- struct tipc_msg *msg;
-
- skb = tipc_buf_acquire(INT_H_SIZE);
- if (!skb)
- return;
+ /* Prepare each packet for sending, and add to relevant queue: */
+ while (skb_queue_len(list)) {
+ skb = skb_peek(list);
+ hdr = buf_msg(skb);
+ msg_set_seqno(hdr, seqno);
+ msg_set_ack(hdr, ack);
+ msg_set_bcast_ack(hdr, bc_last_in);
- msg = buf_msg(skb);
- tipc_msg_init(link_own_addr(link), msg, BCAST_PROTOCOL, STATE_MSG,
- INT_H_SIZE, link->addr);
- msg_set_last_bcast(msg, link->owner->bclink.acked);
- __tipc_link_xmit_skb(link, skb);
+ if (likely(skb_queue_len(transmq) < maxwin)) {
+ _skb = skb_clone(skb, GFP_ATOMIC);
+ if (!_skb)
+ return -ENOBUFS;
+ __skb_dequeue(list);
+ __skb_queue_tail(transmq, skb);
+ __skb_queue_tail(xmitq, _skb);
+ l->rcv_unacked = 0;
+ seqno++;
+ continue;
+ }
+ if (tipc_msg_bundle(skb_peek_tail(backlogq), hdr, mtu)) {
+ kfree_skb(__skb_dequeue(list));
+ l->stats.sent_bundled++;
+ continue;
+ }
+ if (tipc_msg_make_bundle(&bskb, hdr, mtu, l->addr)) {
+ kfree_skb(__skb_dequeue(list));
+ __skb_queue_tail(backlogq, bskb);
+ l->backlog[msg_importance(buf_msg(bskb))].len++;
+ l->stats.sent_bundled++;
+ l->stats.sent_bundles++;
+ continue;
+ }
+ l->backlog[imp].len += skb_queue_len(list);
+ skb_queue_splice_tail_init(list, backlogq);
+ }
+ l->snd_nxt = seqno;
+ return 0;
}
/*
@@ -842,29 +793,37 @@ void tipc_link_push_packets(struct tipc_link *link)
link->rcv_unacked = 0;
__skb_queue_tail(&link->transmq, skb);
tipc_bearer_send(link->owner->net, link->bearer_id,
- skb, &link->media_addr);
+ skb, link->media_addr);
}
link->snd_nxt = seqno;
}
-void tipc_link_reset_all(struct tipc_node *node)
+void tipc_link_advance_backlog(struct tipc_link *l, struct sk_buff_head *xmitq)
{
- char addr_string[16];
- u32 i;
-
- tipc_node_lock(node);
+ struct sk_buff *skb, *_skb;
+ struct tipc_msg *hdr;
+ u16 seqno = l->snd_nxt;
+ u16 ack = l->rcv_nxt - 1;
- pr_warn("Resetting all links to %s\n",
- tipc_addr_string_fill(addr_string, node->addr));
-
- for (i = 0; i < MAX_BEARERS; i++) {
- if (node->links[i]) {
- link_print(node->links[i], "Resetting link\n");
- tipc_link_reset(node->links[i]);
- }
+ while (skb_queue_len(&l->transmq) < l->window) {
+ skb = skb_peek(&l->backlogq);
+ if (!skb)
+ break;
+ _skb = skb_clone(skb, GFP_ATOMIC);
+ if (!_skb)
+ break;
+ __skb_dequeue(&l->backlogq);
+ hdr = buf_msg(skb);
+ l->backlog[msg_importance(hdr)].len--;
+ __skb_queue_tail(&l->transmq, skb);
+ __skb_queue_tail(xmitq, _skb);
+ msg_set_ack(hdr, ack);
+ msg_set_seqno(hdr, seqno);
+ msg_set_bcast_ack(hdr, l->owner->bclink.last_in);
+ l->rcv_unacked = 0;
+ seqno++;
}
-
- tipc_node_unlock(node);
+ l->snd_nxt = seqno;
}
static void link_retransmit_failure(struct tipc_link *l_ptr,
@@ -877,9 +836,12 @@ static void link_retransmit_failure(struct tipc_link *l_ptr,
if (l_ptr->addr) {
/* Handle failure on standard link */
- link_print(l_ptr, "Resetting link\n");
- tipc_link_reset(l_ptr);
-
+ link_print(l_ptr, "Resetting link ");
+ pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n",
+ msg_user(msg), msg_type(msg), msg_size(msg),
+ msg_errcode(msg));
+ pr_info("sqno %u, prev: %x, src: %x\n",
+ msg_seqno(msg), msg_prevnode(msg), msg_orignode(msg));
} else {
/* Handle failure on broadcast link */
struct tipc_node *n_ptr;
@@ -934,191 +896,45 @@ void tipc_link_retransmit(struct tipc_link *l_ptr, struct sk_buff *skb,
msg_set_ack(msg, mod(l_ptr->rcv_nxt - 1));
msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in);
tipc_bearer_send(l_ptr->owner->net, l_ptr->bearer_id, skb,
- &l_ptr->media_addr);
+ l_ptr->media_addr);
retransmits--;
l_ptr->stats.retransmitted++;
}
}
-/* link_synch(): check if all packets arrived before the synch
- * point have been consumed
- * Returns true if the parallel links are synched, otherwise false
- */
-static bool link_synch(struct tipc_link *l)
+static int tipc_link_retransm(struct tipc_link *l, int retransm,
+ struct sk_buff_head *xmitq)
{
- unsigned int post_synch;
- struct tipc_link *pl;
+ struct sk_buff *_skb, *skb = skb_peek(&l->transmq);
+ struct tipc_msg *hdr;
- pl = tipc_parallel_link(l);
- if (pl == l)
- goto synched;
-
- /* Was last pre-synch packet added to input queue ? */
- if (less_eq(pl->rcv_nxt, l->synch_point))
- return false;
-
- /* Is it still in the input queue ? */
- post_synch = mod(pl->rcv_nxt - l->synch_point) - 1;
- if (skb_queue_len(&pl->inputq) > post_synch)
- return false;
-synched:
- l->flags &= ~LINK_SYNCHING;
- return true;
-}
-
-static void link_retrieve_defq(struct tipc_link *link,
- struct sk_buff_head *list)
-{
- u16 seq_no;
-
- if (skb_queue_empty(&link->deferdq))
- return;
-
- seq_no = buf_seqno(skb_peek(&link->deferdq));
- if (seq_no == link->rcv_nxt)
- skb_queue_splice_tail_init(&link->deferdq, list);
-}
-
-/**
- * tipc_rcv - process TIPC packets/messages arriving from off-node
- * @net: the applicable net namespace
- * @skb: TIPC packet
- * @b_ptr: pointer to bearer message arrived on
- *
- * Invoked with no locks held. Bearer pointer must point to a valid bearer
- * structure (i.e. cannot be NULL), but bearer can be inactive.
- */
-void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr)
-{
- struct tipc_net *tn = net_generic(net, tipc_net_id);
- struct sk_buff_head head;
- struct tipc_node *n_ptr;
- struct tipc_link *l_ptr;
- struct sk_buff *skb1, *tmp;
- struct tipc_msg *msg;
- u16 seq_no;
- u16 ackd;
- u32 released;
-
- skb2list(skb, &head);
-
- while ((skb = __skb_dequeue(&head))) {
- /* Ensure message is well-formed */
- if (unlikely(!tipc_msg_validate(skb)))
- goto discard;
-
- /* Handle arrival of a non-unicast link message */
- msg = buf_msg(skb);
- if (unlikely(msg_non_seq(msg))) {
- if (msg_user(msg) == LINK_CONFIG)
- tipc_disc_rcv(net, skb, b_ptr);
- else
- tipc_bclink_rcv(net, skb);
- continue;
- }
-
- /* Discard unicast link messages destined for another node */
- if (unlikely(!msg_short(msg) &&
- (msg_destnode(msg) != tn->own_addr)))
- goto discard;
-
- /* Locate neighboring node that sent message */
- n_ptr = tipc_node_find(net, msg_prevnode(msg));
- if (unlikely(!n_ptr))
- goto discard;
-
- tipc_node_lock(n_ptr);
- /* Locate unicast link endpoint that should handle message */
- l_ptr = n_ptr->links[b_ptr->identity];
- if (unlikely(!l_ptr))
- goto unlock;
-
- /* Verify that communication with node is currently allowed */
- if ((n_ptr->action_flags & TIPC_WAIT_PEER_LINKS_DOWN) &&
- msg_user(msg) == LINK_PROTOCOL &&
- (msg_type(msg) == RESET_MSG ||
- msg_type(msg) == ACTIVATE_MSG) &&
- !msg_redundant_link(msg))
- n_ptr->action_flags &= ~TIPC_WAIT_PEER_LINKS_DOWN;
-
- if (tipc_node_blocked(n_ptr))
- goto unlock;
-
- /* Validate message sequence number info */
- seq_no = msg_seqno(msg);
- ackd = msg_ack(msg);
-
- /* Release acked messages */
- if (unlikely(n_ptr->bclink.acked != msg_bcast_ack(msg)))
- tipc_bclink_acknowledge(n_ptr, msg_bcast_ack(msg));
-
- released = 0;
- skb_queue_walk_safe(&l_ptr->transmq, skb1, tmp) {
- if (more(buf_seqno(skb1), ackd))
- break;
- __skb_unlink(skb1, &l_ptr->transmq);
- kfree_skb(skb1);
- released = 1;
- }
-
- /* Try sending any messages link endpoint has pending */
- if (unlikely(skb_queue_len(&l_ptr->backlogq)))
- tipc_link_push_packets(l_ptr);
-
- if (released && !skb_queue_empty(&l_ptr->wakeupq))
- link_prepare_wakeup(l_ptr);
-
- /* Process the incoming packet */
- if (unlikely(!link_working_working(l_ptr))) {
- if (msg_user(msg) == LINK_PROTOCOL) {
- tipc_link_proto_rcv(l_ptr, skb);
- link_retrieve_defq(l_ptr, &head);
- skb = NULL;
- goto unlock;
- }
-
- /* Traffic message. Conditionally activate link */
- link_state_event(l_ptr, TRAFFIC_MSG_EVT);
-
- if (link_working_working(l_ptr)) {
- /* Re-insert buffer in front of queue */
- __skb_queue_head(&head, skb);
- skb = NULL;
- goto unlock;
- }
- goto unlock;
- }
-
- /* Link is now in state WORKING_WORKING */
- if (unlikely(seq_no != l_ptr->rcv_nxt)) {
- link_handle_out_of_seq_msg(l_ptr, skb);
- link_retrieve_defq(l_ptr, &head);
- skb = NULL;
- goto unlock;
- }
- l_ptr->silent_intv_cnt = 0;
+ if (!skb)
+ return 0;
- /* Synchronize with parallel link if applicable */
- if (unlikely((l_ptr->flags & LINK_SYNCHING) && !msg_dup(msg))) {
- if (!link_synch(l_ptr))
- goto unlock;
- }
- l_ptr->rcv_nxt++;
- if (unlikely(!skb_queue_empty(&l_ptr->deferdq)))
- link_retrieve_defq(l_ptr, &head);
- if (unlikely(++l_ptr->rcv_unacked >= TIPC_MIN_LINK_WIN)) {
- l_ptr->stats.sent_acks++;
- tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0);
- }
- tipc_link_input(l_ptr, skb);
- skb = NULL;
-unlock:
- tipc_node_unlock(n_ptr);
- tipc_node_put(n_ptr);
-discard:
- if (unlikely(skb))
- kfree_skb(skb);
+ /* Detect repeated retransmit failures on same packet */
+ if (likely(l->last_retransm != buf_seqno(skb))) {
+ l->last_retransm = buf_seqno(skb);
+ l->stale_count = 1;
+ } else if (++l->stale_count > 100) {
+ link_retransmit_failure(l, skb);
+ return tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
+ }
+ skb_queue_walk(&l->transmq, skb) {
+ if (!retransm)
+ return 0;
+ hdr = buf_msg(skb);
+ _skb = __pskb_copy(skb, MIN_H_SIZE, GFP_ATOMIC);
+ if (!_skb)
+ return 0;
+ hdr = buf_msg(_skb);
+ msg_set_ack(hdr, l->rcv_nxt - 1);
+ msg_set_bcast_ack(hdr, l->owner->bclink.last_in);
+ _skb->priority = TC_PRIO_CONTROL;
+ __skb_queue_tail(xmitq, _skb);
+ retransm--;
+ l->stats.retransmitted++;
}
+ return 0;
}
/* tipc_data_input - deliver data and name distr msgs to upper layer
@@ -1126,29 +942,22 @@ discard:
* Consumes buffer if message is of right type
* Node lock must be held
*/
-static bool tipc_data_input(struct tipc_link *link, struct sk_buff *skb)
+static bool tipc_data_input(struct tipc_link *link, struct sk_buff *skb,
+ struct sk_buff_head *inputq)
{
struct tipc_node *node = link->owner;
- struct tipc_msg *msg = buf_msg(skb);
- u32 dport = msg_destport(msg);
- switch (msg_user(msg)) {
+ switch (msg_user(buf_msg(skb))) {
case TIPC_LOW_IMPORTANCE:
case TIPC_MEDIUM_IMPORTANCE:
case TIPC_HIGH_IMPORTANCE:
case TIPC_CRITICAL_IMPORTANCE:
case CONN_MANAGER:
- if (tipc_skb_queue_tail(&link->inputq, skb, dport)) {
- node->inputq = &link->inputq;
- node->action_flags |= TIPC_MSG_EVT;
- }
+ __skb_queue_tail(inputq, skb);
return true;
case NAME_DISTRIBUTOR:
node->bclink.recv_permitted = true;
- node->namedq = &link->namedq;
- skb_queue_tail(&link->namedq, skb);
- if (skb_queue_len(&link->namedq) == 1)
- node->action_flags |= TIPC_NAMED_MSG_EVT;
+ skb_queue_tail(link->namedq, skb);
return true;
case MSG_BUNDLER:
case TUNNEL_PROTOCOL:
@@ -1165,54 +974,160 @@ static bool tipc_data_input(struct tipc_link *link, struct sk_buff *skb)
/* tipc_link_input - process packet that has passed link protocol check
*
* Consumes buffer
- * Node lock must be held
*/
-static void tipc_link_input(struct tipc_link *link, struct sk_buff *skb)
+static int tipc_link_input(struct tipc_link *l, struct sk_buff *skb,
+ struct sk_buff_head *inputq)
{
- struct tipc_node *node = link->owner;
- struct tipc_msg *msg = buf_msg(skb);
+ struct tipc_node *node = l->owner;
+ struct tipc_msg *hdr = buf_msg(skb);
+ struct sk_buff **reasm_skb = &l->reasm_buf;
struct sk_buff *iskb;
+ int usr = msg_user(hdr);
+ int rc = 0;
int pos = 0;
+ int ipos = 0;
- if (likely(tipc_data_input(link, skb)))
- return;
+ if (unlikely(usr == TUNNEL_PROTOCOL)) {
+ if (msg_type(hdr) == SYNCH_MSG) {
+ __skb_queue_purge(&l->deferdq);
+ goto drop;
+ }
+ if (!tipc_msg_extract(skb, &iskb, &ipos))
+ return rc;
+ kfree_skb(skb);
+ skb = iskb;
+ hdr = buf_msg(skb);
+ if (less(msg_seqno(hdr), l->drop_point))
+ goto drop;
+ if (tipc_data_input(l, skb, inputq))
+ return rc;
+ usr = msg_user(hdr);
+ reasm_skb = &l->failover_reasm_skb;
+ }
- switch (msg_user(msg)) {
- case TUNNEL_PROTOCOL:
- if (msg_dup(msg)) {
- link->flags |= LINK_SYNCHING;
- link->synch_point = msg_seqno(msg_get_wrapped(msg));
- kfree_skb(skb);
- break;
+ if (usr == MSG_BUNDLER) {
+ l->stats.recv_bundles++;
+ l->stats.recv_bundled += msg_msgcnt(hdr);
+ while (tipc_msg_extract(skb, &iskb, &pos))
+ tipc_data_input(l, iskb, inputq);
+ return 0;
+ } else if (usr == MSG_FRAGMENTER) {
+ l->stats.recv_fragments++;
+ if (tipc_buf_append(reasm_skb, &skb)) {
+ l->stats.recv_fragmented++;
+ tipc_data_input(l, skb, inputq);
+ } else if (!*reasm_skb) {
+ return tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
}
- if (!tipc_link_failover_rcv(link, &skb))
- break;
- if (msg_user(buf_msg(skb)) != MSG_BUNDLER) {
- tipc_data_input(link, skb);
+ return 0;
+ } else if (usr == BCAST_PROTOCOL) {
+ tipc_link_sync_rcv(node, skb);
+ return 0;
+ }
+drop:
+ kfree_skb(skb);
+ return 0;
+}
+
+static bool tipc_link_release_pkts(struct tipc_link *l, u16 acked)
+{
+ bool released = false;
+ struct sk_buff *skb, *tmp;
+
+ skb_queue_walk_safe(&l->transmq, skb, tmp) {
+ if (more(buf_seqno(skb), acked))
break;
+ __skb_unlink(skb, &l->transmq);
+ kfree_skb(skb);
+ released = true;
+ }
+ return released;
+}
+
+/* tipc_link_rcv - process TIPC packets/messages arriving from off-node
+ * @link: the link that should handle the message
+ * @skb: TIPC packet
+ * @xmitq: queue to place packets to be sent after this call
+ */
+int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb,
+ struct sk_buff_head *xmitq)
+{
+ struct sk_buff_head *arrvq = &l->deferdq;
+ struct sk_buff_head tmpq;
+ struct tipc_msg *hdr;
+ u16 seqno, rcv_nxt;
+ int rc = 0;
+
+ __skb_queue_head_init(&tmpq);
+
+ if (unlikely(!__tipc_skb_queue_sorted(arrvq, skb))) {
+ if (!(skb_queue_len(arrvq) % TIPC_NACK_INTV))
+ tipc_link_build_proto_msg(l, STATE_MSG, 0,
+ 0, 0, 0, xmitq);
+ return rc;
+ }
+
+ while ((skb = skb_peek(arrvq))) {
+ hdr = buf_msg(skb);
+
+ /* Verify and update link state */
+ if (unlikely(msg_user(hdr) == LINK_PROTOCOL)) {
+ __skb_dequeue(arrvq);
+ rc = tipc_link_proto_rcv(l, skb, xmitq);
+ continue;
}
- case MSG_BUNDLER:
- link->stats.recv_bundles++;
- link->stats.recv_bundled += msg_msgcnt(msg);
- while (tipc_msg_extract(skb, &iskb, &pos))
- tipc_data_input(link, iskb);
- break;
- case MSG_FRAGMENTER:
- link->stats.recv_fragments++;
- if (tipc_buf_append(&link->reasm_buf, &skb)) {
- link->stats.recv_fragmented++;
- tipc_data_input(link, skb);
- } else if (!link->reasm_buf) {
- tipc_link_reset(link);
+ if (unlikely(!link_is_up(l))) {
+ rc = tipc_link_fsm_evt(l, LINK_ESTABLISH_EVT);
+ if (!link_is_up(l)) {
+ kfree_skb(__skb_dequeue(arrvq));
+ goto exit;
+ }
}
- break;
- case BCAST_PROTOCOL:
- tipc_link_sync_rcv(node, skb);
- break;
- default:
- break;
- };
+
+ l->silent_intv_cnt = 0;
+
+ /* Forward queues and wake up waiting users */
+ if (likely(tipc_link_release_pkts(l, msg_ack(hdr)))) {
+ tipc_link_advance_backlog(l, xmitq);
+ if (unlikely(!skb_queue_empty(&l->wakeupq)))
+ link_prepare_wakeup(l);
+ }
+
+ /* Defer reception if there is a gap in the sequence */
+ seqno = msg_seqno(hdr);
+ rcv_nxt = l->rcv_nxt;
+ if (unlikely(less(rcv_nxt, seqno))) {
+ l->stats.deferred_recv++;
+ goto exit;
+ }
+
+ __skb_dequeue(arrvq);
+
+ /* Drop if packet already received */
+ if (unlikely(more(rcv_nxt, seqno))) {
+ l->stats.duplicates++;
+ kfree_skb(skb);
+ goto exit;
+ }
+
+ /* Packet can be delivered */
+ l->rcv_nxt++;
+ l->stats.recv_info++;
+ if (unlikely(!tipc_data_input(l, skb, &tmpq)))
+ rc = tipc_link_input(l, skb, &tmpq);
+
+ /* Ack at regular intervals */
+ if (unlikely(++l->rcv_unacked >= TIPC_MIN_LINK_WIN)) {
+ l->rcv_unacked = 0;
+ l->stats.sent_acks++;
+ tipc_link_build_proto_msg(l, STATE_MSG,
+ 0, 0, 0, 0, xmitq);
+ }
+ }
+exit:
+ tipc_skb_queue_splice_tail(&tmpq, l->inputq);
+ return rc;
}
/**
@@ -1255,458 +1170,249 @@ u32 tipc_link_defer_pkt(struct sk_buff_head *list, struct sk_buff *skb)
}
/*
- * link_handle_out_of_seq_msg - handle arrival of out-of-sequence packet
+ * Send protocol message to the other endpoint.
*/
-static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr,
- struct sk_buff *buf)
+void tipc_link_proto_xmit(struct tipc_link *l, u32 msg_typ, int probe_msg,
+ u32 gap, u32 tolerance, u32 priority)
{
- u32 seq_no = buf_seqno(buf);
+ struct sk_buff *skb = NULL;
+ struct sk_buff_head xmitq;
- if (likely(msg_user(buf_msg(buf)) == LINK_PROTOCOL)) {
- tipc_link_proto_rcv(l_ptr, buf);
+ __skb_queue_head_init(&xmitq);
+ tipc_link_build_proto_msg(l, msg_typ, probe_msg, gap,
+ tolerance, priority, &xmitq);
+ skb = __skb_dequeue(&xmitq);
+ if (!skb)
return;
- }
-
- /* Record OOS packet arrival */
- l_ptr->silent_intv_cnt = 0;
+ tipc_bearer_send(l->owner->net, l->bearer_id, skb, l->media_addr);
+ l->rcv_unacked = 0;
+ kfree_skb(skb);
+}
- /*
- * Discard packet if a duplicate; otherwise add it to deferred queue
- * and notify peer of gap as per protocol specification
- */
- if (less(seq_no, l_ptr->rcv_nxt)) {
- l_ptr->stats.duplicates++;
- kfree_skb(buf);
+/* tipc_link_build_proto_msg: prepare link protocol message for transmission
+ */
+static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
+ u16 rcvgap, int tolerance, int priority,
+ struct sk_buff_head *xmitq)
+{
+ struct sk_buff *skb = NULL;
+ struct tipc_msg *hdr = l->pmsg;
+ u16 snd_nxt = l->snd_nxt;
+ u16 rcv_nxt = l->rcv_nxt;
+ u16 rcv_last = rcv_nxt - 1;
+ int node_up = l->owner->bclink.recv_permitted;
+
+ /* Don't send protocol message during reset or link failover */
+ if (tipc_link_is_blocked(l))
return;
- }
- if (tipc_link_defer_pkt(&l_ptr->deferdq, buf)) {
- l_ptr->stats.deferred_recv++;
- if ((skb_queue_len(&l_ptr->deferdq) % TIPC_MIN_LINK_WIN) == 1)
- tipc_link_proto_xmit(l_ptr, STATE_MSG, 0, 0, 0, 0);
+ msg_set_type(hdr, mtyp);
+ msg_set_net_plane(hdr, l->net_plane);
+ msg_set_bcast_ack(hdr, l->owner->bclink.last_in);
+ msg_set_last_bcast(hdr, tipc_bclink_get_last_sent(l->owner->net));
+ msg_set_link_tolerance(hdr, tolerance);
+ msg_set_linkprio(hdr, priority);
+ msg_set_redundant_link(hdr, node_up);
+ msg_set_seq_gap(hdr, 0);
+
+ /* Compatibility: created msg must not be in sequence with pkt flow */
+ msg_set_seqno(hdr, snd_nxt + U16_MAX / 2);
+
+ if (mtyp == STATE_MSG) {
+ if (!tipc_link_is_up(l))
+ return;
+ msg_set_next_sent(hdr, snd_nxt);
+
+ /* Override rcvgap if there are packets in deferred queue */
+ if (!skb_queue_empty(&l->deferdq))
+ rcvgap = buf_seqno(skb_peek(&l->deferdq)) - rcv_nxt;
+ if (rcvgap) {
+ msg_set_seq_gap(hdr, rcvgap);
+ l->stats.sent_nacks++;
+ }
+ msg_set_ack(hdr, rcv_last);
+ msg_set_probe(hdr, probe);
+ if (probe)
+ l->stats.sent_probes++;
+ l->stats.sent_states++;
} else {
- l_ptr->stats.duplicates++;
+ /* RESET_MSG or ACTIVATE_MSG */
+ msg_set_max_pkt(hdr, l->advertised_mtu);
+ msg_set_ack(hdr, l->rcv_nxt - 1);
+ msg_set_next_sent(hdr, 1);
}
+ skb = tipc_buf_acquire(msg_size(hdr));
+ if (!skb)
+ return;
+ skb_copy_to_linear_data(skb, hdr, msg_size(hdr));
+ skb->priority = TC_PRIO_CONTROL;
+ __skb_queue_tail(xmitq, skb);
}
-/*
- * Send protocol message to the other endpoint.
+/* tipc_link_tnl_prepare(): prepare and return a list of tunnel packets
+ * with contents of the link's tranmsit and backlog queues.
*/
-void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int probe_msg,
- u32 gap, u32 tolerance, u32 priority)
+void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl,
+ int mtyp, struct sk_buff_head *xmitq)
{
- struct sk_buff *buf = NULL;
- struct tipc_msg *msg = l_ptr->pmsg;
- u32 msg_size = sizeof(l_ptr->proto_msg);
- int r_flag;
- u16 last_rcv;
-
- /* Don't send protocol message during link failover */
- if (l_ptr->flags & LINK_FAILINGOVER)
- return;
+ struct sk_buff *skb, *tnlskb;
+ struct tipc_msg *hdr, tnlhdr;
+ struct sk_buff_head *queue = &l->transmq;
+ struct sk_buff_head tmpxq, tnlq;
+ u16 pktlen, pktcnt, seqno = l->snd_nxt;
- /* Abort non-RESET send if communication with node is prohibited */
- if ((tipc_node_blocked(l_ptr->owner)) && (msg_typ != RESET_MSG))
+ if (!tnl)
return;
- /* Create protocol message with "out-of-sequence" sequence number */
- msg_set_type(msg, msg_typ);
- msg_set_net_plane(msg, l_ptr->net_plane);
- msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in);
- msg_set_last_bcast(msg, tipc_bclink_get_last_sent(l_ptr->owner->net));
-
- if (msg_typ == STATE_MSG) {
- u16 next_sent = l_ptr->snd_nxt;
+ skb_queue_head_init(&tnlq);
+ skb_queue_head_init(&tmpxq);
- if (!tipc_link_is_up(l_ptr))
+ /* At least one packet required for safe algorithm => add dummy */
+ skb = tipc_msg_create(TIPC_LOW_IMPORTANCE, TIPC_DIRECT_MSG,
+ BASIC_H_SIZE, 0, l->addr, link_own_addr(l),
+ 0, 0, TIPC_ERR_NO_PORT);
+ if (!skb) {
+ pr_warn("%sunable to create tunnel packet\n", link_co_err);
+ return;
+ }
+ skb_queue_tail(&tnlq, skb);
+ tipc_link_xmit(l, &tnlq, &tmpxq);
+ __skb_queue_purge(&tmpxq);
+
+ /* Initialize reusable tunnel packet header */
+ tipc_msg_init(link_own_addr(l), &tnlhdr, TUNNEL_PROTOCOL,
+ mtyp, INT_H_SIZE, l->addr);
+ pktcnt = skb_queue_len(&l->transmq) + skb_queue_len(&l->backlogq);
+ msg_set_msgcnt(&tnlhdr, pktcnt);
+ msg_set_bearer_id(&tnlhdr, l->peer_bearer_id);
+tnl:
+ /* Wrap each packet into a tunnel packet */
+ skb_queue_walk(queue, skb) {
+ hdr = buf_msg(skb);
+ if (queue == &l->backlogq)
+ msg_set_seqno(hdr, seqno++);
+ pktlen = msg_size(hdr);
+ msg_set_size(&tnlhdr, pktlen + INT_H_SIZE);
+ tnlskb = tipc_buf_acquire(pktlen + INT_H_SIZE);
+ if (!tnlskb) {
+ pr_warn("%sunable to send packet\n", link_co_err);
return;
- msg_set_next_sent(msg, next_sent);
- if (!skb_queue_empty(&l_ptr->deferdq)) {
- last_rcv = buf_seqno(skb_peek(&l_ptr->deferdq));
- gap = mod(last_rcv - l_ptr->rcv_nxt);
}
- msg_set_seq_gap(msg, gap);
- if (gap)
- l_ptr->stats.sent_nacks++;
- msg_set_link_tolerance(msg, tolerance);
- msg_set_linkprio(msg, priority);
- msg_set_max_pkt(msg, l_ptr->mtu);
- msg_set_ack(msg, mod(l_ptr->rcv_nxt - 1));
- msg_set_probe(msg, probe_msg != 0);
- if (probe_msg)
- l_ptr->stats.sent_probes++;
- l_ptr->stats.sent_states++;
- } else { /* RESET_MSG or ACTIVATE_MSG */
- msg_set_ack(msg, mod(l_ptr->failover_checkpt - 1));
- msg_set_seq_gap(msg, 0);
- msg_set_next_sent(msg, 1);
- msg_set_probe(msg, 0);
- msg_set_link_tolerance(msg, l_ptr->tolerance);
- msg_set_linkprio(msg, l_ptr->priority);
- msg_set_max_pkt(msg, l_ptr->advertised_mtu);
+ skb_copy_to_linear_data(tnlskb, &tnlhdr, INT_H_SIZE);
+ skb_copy_to_linear_data_offset(tnlskb, INT_H_SIZE, hdr, pktlen);
+ __skb_queue_tail(&tnlq, tnlskb);
+ }
+ if (queue != &l->backlogq) {
+ queue = &l->backlogq;
+ goto tnl;
}
- r_flag = (l_ptr->owner->working_links > tipc_link_is_up(l_ptr));
- msg_set_redundant_link(msg, r_flag);
- msg_set_linkprio(msg, l_ptr->priority);
- msg_set_size(msg, msg_size);
-
- msg_set_seqno(msg, mod(l_ptr->snd_nxt + (0xffff / 2)));
-
- buf = tipc_buf_acquire(msg_size);
- if (!buf)
- return;
+ tipc_link_xmit(tnl, &tnlq, xmitq);
- skb_copy_to_linear_data(buf, msg, sizeof(l_ptr->proto_msg));
- buf->priority = TC_PRIO_CONTROL;
- tipc_bearer_send(l_ptr->owner->net, l_ptr->bearer_id, buf,
- &l_ptr->media_addr);
- l_ptr->rcv_unacked = 0;
- kfree_skb(buf);
+ if (mtyp == FAILOVER_MSG) {
+ tnl->drop_point = l->rcv_nxt;
+ tnl->failover_reasm_skb = l->reasm_buf;
+ l->reasm_buf = NULL;
+ }
}
-/*
- * Receive protocol message :
+/* tipc_link_proto_rcv(): receive link level protocol message :
* Note that network plane id propagates through the network, and may
- * change at any time. The node with lowest address rules
+ * change at any time. The node with lowest numerical id determines
+ * network plane
*/
-static void tipc_link_proto_rcv(struct tipc_link *l_ptr,
- struct sk_buff *buf)
+static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
+ struct sk_buff_head *xmitq)
{
- u32 rec_gap = 0;
- u32 msg_tol;
- struct tipc_msg *msg = buf_msg(buf);
+ struct tipc_msg *hdr = buf_msg(skb);
+ u16 rcvgap = 0;
+ u16 nacked_gap = msg_seq_gap(hdr);
+ u16 peers_snd_nxt = msg_next_sent(hdr);
+ u16 peers_tol = msg_link_tolerance(hdr);
+ u16 peers_prio = msg_linkprio(hdr);
+ char *if_name;
+ int rc = 0;
- if (l_ptr->flags & LINK_FAILINGOVER)
+ if (tipc_link_is_blocked(l))
goto exit;
- if (l_ptr->net_plane != msg_net_plane(msg))
- if (link_own_addr(l_ptr) > msg_prevnode(msg))
- l_ptr->net_plane = msg_net_plane(msg);
-
- switch (msg_type(msg)) {
+ if (link_own_addr(l) > msg_prevnode(hdr))
+ l->net_plane = msg_net_plane(hdr);
+ switch (msg_type(hdr)) {
case RESET_MSG:
- if (!link_working_unknown(l_ptr) &&
- (l_ptr->peer_session != INVALID_SESSION)) {
- if (less_eq(msg_session(msg), l_ptr->peer_session))
- break; /* duplicate or old reset: ignore */
- }
-
- if (!msg_redundant_link(msg) && (link_working_working(l_ptr) ||
- link_working_unknown(l_ptr))) {
- /*
- * peer has lost contact -- don't allow peer's links
- * to reactivate before we recognize loss & clean up
- */
- l_ptr->owner->action_flags |= TIPC_WAIT_OWN_LINKS_DOWN;
- }
-
- link_state_event(l_ptr, RESET_MSG);
+ /* Ignore duplicate RESET with old session number */
+ if ((less_eq(msg_session(hdr), l->peer_session)) &&
+ (l->peer_session != WILDCARD_SESSION))
+ break;
/* fall thru' */
- case ACTIVATE_MSG:
- /* Update link settings according other endpoint's values */
- strcpy((strrchr(l_ptr->name, ':') + 1), (char *)msg_data(msg));
- msg_tol = msg_link_tolerance(msg);
- if (msg_tol > l_ptr->tolerance)
- link_set_supervision_props(l_ptr, msg_tol);
-
- if (msg_linkprio(msg) > l_ptr->priority)
- l_ptr->priority = msg_linkprio(msg);
-
- if (l_ptr->mtu > msg_max_pkt(msg))
- l_ptr->mtu = msg_max_pkt(msg);
-
- /* Synchronize broadcast link info, if not done previously */
- if (!tipc_node_is_up(l_ptr->owner)) {
- l_ptr->owner->bclink.last_sent =
- l_ptr->owner->bclink.last_in =
- msg_last_bcast(msg);
- l_ptr->owner->bclink.oos_state = 0;
- }
-
- l_ptr->peer_session = msg_session(msg);
- l_ptr->peer_bearer_id = msg_bearer_id(msg);
-
- if (msg_type(msg) == ACTIVATE_MSG)
- link_state_event(l_ptr, ACTIVATE_MSG);
- break;
- case STATE_MSG:
+ case ACTIVATE_MSG:
- msg_tol = msg_link_tolerance(msg);
- if (msg_tol)
- link_set_supervision_props(l_ptr, msg_tol);
-
- if (msg_linkprio(msg) &&
- (msg_linkprio(msg) != l_ptr->priority)) {
- pr_debug("%s<%s>, priority change %u->%u\n",
- link_rst_msg, l_ptr->name,
- l_ptr->priority, msg_linkprio(msg));
- l_ptr->priority = msg_linkprio(msg);
- tipc_link_reset(l_ptr); /* Enforce change to take effect */
+ /* Complete own link name with peer's interface name */
+ if_name = strrchr(l->name, ':') + 1;
+ if (sizeof(l->name) - (if_name - l->name) <= TIPC_MAX_IF_NAME)
break;
- }
-
- /* Record reception; force mismatch at next timeout: */
- l_ptr->silent_intv_cnt = 0;
-
- link_state_event(l_ptr, TRAFFIC_MSG_EVT);
- l_ptr->stats.recv_states++;
- if (link_reset_unknown(l_ptr))
+ if (msg_data_sz(hdr) < TIPC_MAX_IF_NAME)
break;
+ strncpy(if_name, msg_data(hdr), TIPC_MAX_IF_NAME);
- if (less_eq(l_ptr->rcv_nxt, msg_next_sent(msg)))
- rec_gap = mod(msg_next_sent(msg) - l_ptr->rcv_nxt);
+ /* Update own tolerance if peer indicates a non-zero value */
+ if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL))
+ l->tolerance = peers_tol;
- if (msg_probe(msg))
- l_ptr->stats.recv_probes++;
+ /* Update own priority if peer's priority is higher */
+ if (in_range(peers_prio, l->priority + 1, TIPC_MAX_LINK_PRI))
+ l->priority = peers_prio;
- /* Protocol message before retransmits, reduce loss risk */
- if (l_ptr->owner->bclink.recv_permitted)
- tipc_bclink_update_link_state(l_ptr->owner,
- msg_last_bcast(msg));
-
- if (rec_gap || (msg_probe(msg))) {
- tipc_link_proto_xmit(l_ptr, STATE_MSG, 0,
- rec_gap, 0, 0);
- }
- if (msg_seq_gap(msg)) {
- l_ptr->stats.recv_nacks++;
- tipc_link_retransmit(l_ptr, skb_peek(&l_ptr->transmq),
- msg_seq_gap(msg));
+ if (msg_type(hdr) == RESET_MSG) {
+ rc |= tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT);
+ } else if (!link_is_up(l)) {
+ tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT);
+ rc |= tipc_link_fsm_evt(l, LINK_ESTABLISH_EVT);
}
+ l->peer_session = msg_session(hdr);
+ l->peer_bearer_id = msg_bearer_id(hdr);
+ if (l->mtu > msg_max_pkt(hdr))
+ l->mtu = msg_max_pkt(hdr);
break;
- }
-exit:
- kfree_skb(buf);
-}
-
-
-/* tipc_link_tunnel_xmit(): Tunnel one packet via a link belonging to
- * a different bearer. Owner node is locked.
- */
-static void tipc_link_tunnel_xmit(struct tipc_link *l_ptr,
- struct tipc_msg *tunnel_hdr,
- struct tipc_msg *msg,
- u32 selector)
-{
- struct tipc_link *tunnel;
- struct sk_buff *skb;
- u32 length = msg_size(msg);
-
- tunnel = l_ptr->owner->active_links[selector & 1];
- if (!tipc_link_is_up(tunnel)) {
- pr_warn("%stunnel link no longer available\n", link_co_err);
- return;
- }
- msg_set_size(tunnel_hdr, length + INT_H_SIZE);
- skb = tipc_buf_acquire(length + INT_H_SIZE);
- if (!skb) {
- pr_warn("%sunable to send tunnel msg\n", link_co_err);
- return;
- }
- skb_copy_to_linear_data(skb, tunnel_hdr, INT_H_SIZE);
- skb_copy_to_linear_data_offset(skb, INT_H_SIZE, msg, length);
- __tipc_link_xmit_skb(tunnel, skb);
-}
-
-
-/* tipc_link_failover_send_queue(): A link has gone down, but a second
- * link is still active. We can do failover. Tunnel the failing link's
- * whole send queue via the remaining link. This way, we don't lose
- * any packets, and sequence order is preserved for subsequent traffic
- * sent over the remaining link. Owner node is locked.
- */
-void tipc_link_failover_send_queue(struct tipc_link *l_ptr)
-{
- int msgcount;
- struct tipc_link *tunnel = l_ptr->owner->active_links[0];
- struct tipc_msg tunnel_hdr;
- struct sk_buff *skb;
- int split_bundles;
-
- if (!tunnel)
- return;
- tipc_msg_init(link_own_addr(l_ptr), &tunnel_hdr, TUNNEL_PROTOCOL,
- FAILOVER_MSG, INT_H_SIZE, l_ptr->addr);
-
- skb_queue_walk(&l_ptr->backlogq, skb) {
- msg_set_seqno(buf_msg(skb), l_ptr->snd_nxt);
- l_ptr->snd_nxt = mod(l_ptr->snd_nxt + 1);
- }
- skb_queue_splice_tail_init(&l_ptr->backlogq, &l_ptr->transmq);
- tipc_link_purge_backlog(l_ptr);
- msgcount = skb_queue_len(&l_ptr->transmq);
- msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id);
- msg_set_msgcnt(&tunnel_hdr, msgcount);
-
- if (skb_queue_empty(&l_ptr->transmq)) {
- skb = tipc_buf_acquire(INT_H_SIZE);
- if (skb) {
- skb_copy_to_linear_data(skb, &tunnel_hdr, INT_H_SIZE);
- msg_set_size(&tunnel_hdr, INT_H_SIZE);
- __tipc_link_xmit_skb(tunnel, skb);
- } else {
- pr_warn("%sunable to send changeover msg\n",
- link_co_err);
- }
- return;
- }
-
- split_bundles = (l_ptr->owner->active_links[0] !=
- l_ptr->owner->active_links[1]);
+ case STATE_MSG:
- skb_queue_walk(&l_ptr->transmq, skb) {
- struct tipc_msg *msg = buf_msg(skb);
+ /* Update own tolerance if peer indicates a non-zero value */
+ if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL))
+ l->tolerance = peers_tol;
- if ((msg_user(msg) == MSG_BUNDLER) && split_bundles) {
- struct tipc_msg *m = msg_get_wrapped(msg);
- unchar *pos = (unchar *)m;
+ l->silent_intv_cnt = 0;
+ l->stats.recv_states++;
+ if (msg_probe(hdr))
+ l->stats.recv_probes++;
+ rc = tipc_link_fsm_evt(l, LINK_ESTABLISH_EVT);
+ if (!link_is_up(l))
+ break;
- msgcount = msg_msgcnt(msg);
- while (msgcount--) {
- msg_set_seqno(m, msg_seqno(msg));
- tipc_link_tunnel_xmit(l_ptr, &tunnel_hdr, m,
- msg_link_selector(m));
- pos += align(msg_size(m));
- m = (struct tipc_msg *)pos;
- }
- } else {
- tipc_link_tunnel_xmit(l_ptr, &tunnel_hdr, msg,
- msg_link_selector(msg));
+ /* Send NACK if peer has sent pkts we haven't received yet */
+ if (more(peers_snd_nxt, l->rcv_nxt))
+ rcvgap = peers_snd_nxt - l->rcv_nxt;
+ if (rcvgap || (msg_probe(hdr)))
+ tipc_link_build_proto_msg(l, STATE_MSG, 0, rcvgap,
+ 0, 0, xmitq);
+ tipc_link_release_pkts(l, msg_ack(hdr));
+
+ /* If NACK, retransmit will now start at right position */
+ if (nacked_gap) {
+ rc = tipc_link_retransm(l, nacked_gap, xmitq);
+ l->stats.recv_nacks++;
}
- }
-}
-
-/* tipc_link_dup_queue_xmit(): A second link has become active. Tunnel a
- * duplicate of the first link's send queue via the new link. This way, we
- * are guaranteed that currently queued packets from a socket are delivered
- * before future traffic from the same socket, even if this is using the
- * new link. The last arriving copy of each duplicate packet is dropped at
- * the receiving end by the regular protocol check, so packet cardinality
- * and sequence order is preserved per sender/receiver socket pair.
- * Owner node is locked.
- */
-void tipc_link_dup_queue_xmit(struct tipc_link *link,
- struct tipc_link *tnl)
-{
- struct sk_buff *skb;
- struct tipc_msg tnl_hdr;
- struct sk_buff_head *queue = &link->transmq;
- int mcnt;
- u16 seqno;
-
- tipc_msg_init(link_own_addr(link), &tnl_hdr, TUNNEL_PROTOCOL,
- SYNCH_MSG, INT_H_SIZE, link->addr);
- mcnt = skb_queue_len(&link->transmq) + skb_queue_len(&link->backlogq);
- msg_set_msgcnt(&tnl_hdr, mcnt);
- msg_set_bearer_id(&tnl_hdr, link->peer_bearer_id);
-
-tunnel_queue:
- skb_queue_walk(queue, skb) {
- struct sk_buff *outskb;
- struct tipc_msg *msg = buf_msg(skb);
- u32 len = msg_size(msg);
- msg_set_ack(msg, mod(link->rcv_nxt - 1));
- msg_set_bcast_ack(msg, link->owner->bclink.last_in);
- msg_set_size(&tnl_hdr, len + INT_H_SIZE);
- outskb = tipc_buf_acquire(len + INT_H_SIZE);
- if (outskb == NULL) {
- pr_warn("%sunable to send duplicate msg\n",
- link_co_err);
- return;
- }
- skb_copy_to_linear_data(outskb, &tnl_hdr, INT_H_SIZE);
- skb_copy_to_linear_data_offset(outskb, INT_H_SIZE,
- skb->data, len);
- __tipc_link_xmit_skb(tnl, outskb);
- if (!tipc_link_is_up(link))
- return;
- }
- if (queue == &link->backlogq)
- return;
- seqno = link->snd_nxt;
- skb_queue_walk(&link->backlogq, skb) {
- msg_set_seqno(buf_msg(skb), seqno);
- seqno = mod(seqno + 1);
- }
- queue = &link->backlogq;
- goto tunnel_queue;
-}
-
-/* tipc_link_failover_rcv(): Receive a tunnelled FAILOVER_MSG packet
- * Owner node is locked.
- */
-static bool tipc_link_failover_rcv(struct tipc_link *link,
- struct sk_buff **skb)
-{
- struct tipc_msg *msg = buf_msg(*skb);
- struct sk_buff *iskb = NULL;
- struct tipc_link *pl = NULL;
- int bearer_id = msg_bearer_id(msg);
- int pos = 0;
-
- if (msg_type(msg) != FAILOVER_MSG) {
- pr_warn("%sunknown tunnel pkt received\n", link_co_err);
- goto exit;
- }
- if (bearer_id >= MAX_BEARERS)
- goto exit;
-
- if (bearer_id == link->bearer_id)
- goto exit;
-
- pl = link->owner->links[bearer_id];
- if (pl && tipc_link_is_up(pl))
- tipc_link_reset(pl);
-
- if (link->failover_pkts == FIRST_FAILOVER)
- link->failover_pkts = msg_msgcnt(msg);
-
- /* Should we expect an inner packet? */
- if (!link->failover_pkts)
- goto exit;
-
- if (!tipc_msg_extract(*skb, &iskb, &pos)) {
- pr_warn("%sno inner failover pkt\n", link_co_err);
- *skb = NULL;
- goto exit;
- }
- link->failover_pkts--;
- *skb = NULL;
-
- /* Was this packet already delivered? */
- if (less(buf_seqno(iskb), link->failover_checkpt)) {
- kfree_skb(iskb);
- iskb = NULL;
- goto exit;
- }
- if (msg_user(buf_msg(iskb)) == MSG_FRAGMENTER) {
- link->stats.recv_fragments++;
- tipc_buf_append(&link->failover_skb, &iskb);
+ tipc_link_advance_backlog(l, xmitq);
+ if (unlikely(!skb_queue_empty(&l->wakeupq)))
+ link_prepare_wakeup(l);
}
exit:
- if (!link->failover_pkts && pl)
- pl->flags &= ~LINK_FAILINGOVER;
- kfree_skb(*skb);
- *skb = iskb;
- return *skb;
-}
-
-static void link_set_supervision_props(struct tipc_link *l_ptr, u32 tol)
-{
- unsigned long intv = ((tol / 4) > 500) ? 500 : tol / 4;
-
- if ((tol < TIPC_MIN_LINK_TOL) || (tol > TIPC_MAX_LINK_TOL))
- return;
-
- l_ptr->tolerance = tol;
- l_ptr->keepalive_intv = msecs_to_jiffies(intv);
- l_ptr->abort_limit = tol / (jiffies_to_msecs(l_ptr->keepalive_intv));
+ kfree_skb(skb);
+ return rc;
}
void tipc_link_set_queue_limits(struct tipc_link *l, u32 win)
@@ -1743,7 +1449,7 @@ static struct tipc_node *tipc_link_find_owner(struct net *net,
list_for_each_entry_rcu(n_ptr, &tn->node_list, list) {
tipc_node_lock(n_ptr);
for (i = 0; i < MAX_BEARERS; i++) {
- l_ptr = n_ptr->links[i];
+ l_ptr = n_ptr->links[i].link;
if (l_ptr && !strcmp(l_ptr->name, link_name)) {
*bearer_id = i;
found_node = n_ptr;
@@ -1770,27 +1476,16 @@ static void link_reset_statistics(struct tipc_link *l_ptr)
l_ptr->stats.recv_info = l_ptr->rcv_nxt;
}
-static void link_print(struct tipc_link *l_ptr, const char *str)
+static void link_print(struct tipc_link *l, const char *str)
{
- struct tipc_net *tn = net_generic(l_ptr->owner->net, tipc_net_id);
- struct tipc_bearer *b_ptr;
-
- rcu_read_lock();
- b_ptr = rcu_dereference_rtnl(tn->bearer_list[l_ptr->bearer_id]);
- if (b_ptr)
- pr_info("%s Link %x<%s>:", str, l_ptr->addr, b_ptr->name);
- rcu_read_unlock();
-
- if (link_working_unknown(l_ptr))
- pr_cont(":WU\n");
- else if (link_reset_reset(l_ptr))
- pr_cont(":RR\n");
- else if (link_reset_unknown(l_ptr))
- pr_cont(":RU\n");
- else if (link_working_working(l_ptr))
- pr_cont(":WW\n");
- else
- pr_cont("\n");
+ struct sk_buff *hskb = skb_peek(&l->transmq);
+ u16 head = hskb ? msg_seqno(buf_msg(hskb)) : l->snd_nxt;
+ u16 tail = l->snd_nxt - 1;
+
+ pr_info("%s Link <%s> state %x\n", str, l->name, l->state);
+ pr_info("XMTQ: %u [%u-%u], BKLGQ: %u, SNDNX: %u, RCVNX: %u\n",
+ skb_queue_len(&l->transmq), head, tail,
+ skb_queue_len(&l->backlogq), l->snd_nxt, l->rcv_nxt);
}
/* Parse and validate nested (link) properties valid for media, bearer and link
@@ -1865,7 +1560,7 @@ int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info)
tipc_node_lock(node);
- link = node->links[bearer_id];
+ link = node->links[bearer_id].link;
if (!link) {
res = -EINVAL;
goto out;
@@ -1885,7 +1580,7 @@ int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info)
u32 tol;
tol = nla_get_u32(props[TIPC_NLA_PROP_TOL]);
- link_set_supervision_props(link, tol);
+ link->tolerance = tol;
tipc_link_proto_xmit(link, STATE_MSG, 0, 0, tol, 0);
}
if (props[TIPC_NLA_PROP_PRIO]) {
@@ -2055,10 +1750,11 @@ static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg,
for (i = *prev_link; i < MAX_BEARERS; i++) {
*prev_link = i;
- if (!node->links[i])
+ if (!node->links[i].link)
continue;
- err = __tipc_nl_add_link(net, msg, node->links[i], NLM_F_MULTI);
+ err = __tipc_nl_add_link(net, msg,
+ node->links[i].link, NLM_F_MULTI);
if (err)
return err;
}
@@ -2172,7 +1868,7 @@ int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
tipc_node_lock(node);
- link = node->links[bearer_id];
+ link = node->links[bearer_id].link;
if (!link) {
tipc_node_unlock(node);
nlmsg_free(msg.skb);
@@ -2227,7 +1923,7 @@ int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info)
tipc_node_lock(node);
- link = node->links[bearer_id];
+ link = node->links[bearer_id].link;
if (!link) {
tipc_node_unlock(node);
return -EINVAL;
diff --git a/net/tipc/link.h b/net/tipc/link.h
index ae0a0ea572f2..39ff8b6919a4 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -49,19 +49,25 @@
*/
#define INVALID_LINK_SEQ 0x10000
-/* Link working states
+/* Link FSM events:
*/
-#define WORKING_WORKING 560810u
-#define WORKING_UNKNOWN 560811u
-#define RESET_UNKNOWN 560812u
-#define RESET_RESET 560813u
+enum {
+ LINK_ESTABLISH_EVT = 0xec1ab1e,
+ LINK_PEER_RESET_EVT = 0x9eed0e,
+ LINK_FAILURE_EVT = 0xfa110e,
+ LINK_RESET_EVT = 0x10ca1d0e,
+ LINK_FAILOVER_BEGIN_EVT = 0xfa110bee,
+ LINK_FAILOVER_END_EVT = 0xfa110ede,
+ LINK_SYNCH_BEGIN_EVT = 0xc1ccbee,
+ LINK_SYNCH_END_EVT = 0xc1ccede
+};
-/* Link endpoint execution states
+/* Events returned from link at packet reception or at timeout
*/
-#define LINK_STARTED 0x0001
-#define LINK_STOPPED 0x0002
-#define LINK_SYNCHING 0x0004
-#define LINK_FAILINGOVER 0x0008
+enum {
+ TIPC_LINK_UP_EVT = 1,
+ TIPC_LINK_DOWN_EVT = (1 << 1)
+};
/* Starting value for maximum packet size negotiation on unicast links
* (unless bearer MTU is less)
@@ -106,7 +112,6 @@ struct tipc_stats {
* @timer: link timer
* @owner: pointer to peer node
* @refcnt: reference counter for permanent references (owner node & timer)
- * @flags: execution state flags for link endpoint instance
* @peer_session: link session # being used by peer end of link
* @peer_bearer_id: bearer id used by link's peer endpoint
* @bearer_id: local bearer id used by link
@@ -143,20 +148,17 @@ struct tipc_stats {
struct tipc_link {
u32 addr;
char name[TIPC_MAX_LINK_NAME];
- struct tipc_media_addr media_addr;
- struct timer_list timer;
+ struct tipc_media_addr *media_addr;
struct tipc_node *owner;
- struct kref ref;
/* Management and link supervision data */
- unsigned int flags;
u32 peer_session;
u32 peer_bearer_id;
u32 bearer_id;
u32 tolerance;
unsigned long keepalive_intv;
u32 abort_limit;
- int state;
+ u32 state;
u32 silent_intv_cnt;
struct {
unchar hdr[INT_H_SIZE];
@@ -165,12 +167,10 @@ struct tipc_link {
struct tipc_msg *pmsg;
u32 priority;
char net_plane;
- u16 synch_point;
- /* Failover */
- u16 failover_pkts;
- u16 failover_checkpt;
- struct sk_buff *failover_skb;
+ /* Failover/synch */
+ u16 drop_point;
+ struct sk_buff *failover_reasm_skb;
/* Max packet negotiation */
u16 mtu;
@@ -192,8 +192,8 @@ struct tipc_link {
u16 rcv_nxt;
u32 rcv_unacked;
struct sk_buff_head deferdq;
- struct sk_buff_head inputq;
- struct sk_buff_head namedq;
+ struct sk_buff_head *inputq;
+ struct sk_buff_head *namedq;
/* Congestion handling */
struct sk_buff_head wakeupq;
@@ -205,28 +205,29 @@ struct tipc_link {
struct tipc_stats stats;
};
-struct tipc_port;
-
-struct tipc_link *tipc_link_create(struct tipc_node *n_ptr,
- struct tipc_bearer *b_ptr,
- const struct tipc_media_addr *media_addr);
-void tipc_link_delete(struct tipc_link *link);
-void tipc_link_delete_list(struct net *net, unsigned int bearer_id);
-void tipc_link_failover_send_queue(struct tipc_link *l_ptr);
-void tipc_link_dup_queue_xmit(struct tipc_link *l_ptr, struct tipc_link *dest);
+bool tipc_link_create(struct tipc_node *n, struct tipc_bearer *b, u32 session,
+ u32 ownnode, u32 peer, struct tipc_media_addr *maddr,
+ struct sk_buff_head *inputq, struct sk_buff_head *namedq,
+ struct tipc_link **link);
+void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl,
+ int mtyp, struct sk_buff_head *xmitq);
+void tipc_link_build_bcast_sync_msg(struct tipc_link *l,
+ struct sk_buff_head *xmitq);
+int tipc_link_fsm_evt(struct tipc_link *l, int evt);
void tipc_link_reset_fragments(struct tipc_link *l_ptr);
-int tipc_link_is_up(struct tipc_link *l_ptr);
+bool tipc_link_is_up(struct tipc_link *l);
+bool tipc_link_is_reset(struct tipc_link *l);
+bool tipc_link_is_synching(struct tipc_link *l);
+bool tipc_link_is_failingover(struct tipc_link *l);
+bool tipc_link_is_blocked(struct tipc_link *l);
int tipc_link_is_active(struct tipc_link *l_ptr);
void tipc_link_purge_queues(struct tipc_link *l_ptr);
void tipc_link_purge_backlog(struct tipc_link *l);
-void tipc_link_reset_all(struct tipc_node *node);
void tipc_link_reset(struct tipc_link *l_ptr);
-int tipc_link_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest,
- u32 selector);
-int tipc_link_xmit(struct net *net, struct sk_buff_head *list, u32 dest,
- u32 selector);
int __tipc_link_xmit(struct net *net, struct tipc_link *link,
struct sk_buff_head *list);
+int tipc_link_xmit(struct tipc_link *link, struct sk_buff_head *list,
+ struct sk_buff_head *xmitq);
void tipc_link_proto_xmit(struct tipc_link *l_ptr, u32 msg_typ, int prob,
u32 gap, u32 tolerance, u32 priority);
void tipc_link_push_packets(struct tipc_link *l_ptr);
@@ -242,34 +243,8 @@ int tipc_nl_link_get(struct sk_buff *skb, struct genl_info *info);
int tipc_nl_link_set(struct sk_buff *skb, struct genl_info *info);
int tipc_nl_link_reset_stats(struct sk_buff *skb, struct genl_info *info);
int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]);
-void link_prepare_wakeup(struct tipc_link *l);
-
-static inline u32 link_own_addr(struct tipc_link *l)
-{
- return msg_prevnode(l->pmsg);
-}
-
-/*
- * Link status checking routines
- */
-static inline int link_working_working(struct tipc_link *l_ptr)
-{
- return l_ptr->state == WORKING_WORKING;
-}
-
-static inline int link_working_unknown(struct tipc_link *l_ptr)
-{
- return l_ptr->state == WORKING_UNKNOWN;
-}
-
-static inline int link_reset_unknown(struct tipc_link *l_ptr)
-{
- return l_ptr->state == RESET_UNKNOWN;
-}
-
-static inline int link_reset_reset(struct tipc_link *l_ptr)
-{
- return l_ptr->state == RESET_RESET;
-}
+int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq);
+int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb,
+ struct sk_buff_head *xmitq);
#endif
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 08b4cc7d496d..562c926a51cc 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -463,60 +463,72 @@ bool tipc_msg_make_bundle(struct sk_buff **skb, struct tipc_msg *msg,
/**
* tipc_msg_reverse(): swap source and destination addresses and add error code
- * @buf: buffer containing message to be reversed
- * @dnode: return value: node where to send message after reversal
- * @err: error code to be set in message
- * Consumes buffer if failure
+ * @own_node: originating node id for reversed message
+ * @skb: buffer containing message to be reversed; may be replaced.
+ * @err: error code to be set in message, if any
+ * Consumes buffer at failure
* Returns true if success, otherwise false
*/
-bool tipc_msg_reverse(u32 own_addr, struct sk_buff *buf, u32 *dnode,
- int err)
+bool tipc_msg_reverse(u32 own_node, struct sk_buff **skb, int err)
{
- struct tipc_msg *msg = buf_msg(buf);
+ struct sk_buff *_skb = *skb;
+ struct tipc_msg *hdr = buf_msg(_skb);
struct tipc_msg ohdr;
- uint rdsz = min_t(uint, msg_data_sz(msg), MAX_FORWARD_SIZE);
+ int dlen = min_t(uint, msg_data_sz(hdr), MAX_FORWARD_SIZE);
- if (skb_linearize(buf))
+ if (skb_linearize(_skb))
goto exit;
- msg = buf_msg(buf);
- if (msg_dest_droppable(msg))
+ hdr = buf_msg(_skb);
+ if (msg_dest_droppable(hdr))
goto exit;
- if (msg_errcode(msg))
+ if (msg_errcode(hdr))
goto exit;
- memcpy(&ohdr, msg, msg_hdr_sz(msg));
- msg_set_errcode(msg, err);
- msg_set_origport(msg, msg_destport(&ohdr));
- msg_set_destport(msg, msg_origport(&ohdr));
- msg_set_prevnode(msg, own_addr);
- if (!msg_short(msg)) {
- msg_set_orignode(msg, msg_destnode(&ohdr));
- msg_set_destnode(msg, msg_orignode(&ohdr));
+
+ /* Take a copy of original header before altering message */
+ memcpy(&ohdr, hdr, msg_hdr_sz(hdr));
+
+ /* Never return SHORT header; expand by replacing buffer if necessary */
+ if (msg_short(hdr)) {
+ *skb = tipc_buf_acquire(BASIC_H_SIZE + dlen);
+ if (!*skb)
+ goto exit;
+ memcpy((*skb)->data + BASIC_H_SIZE, msg_data(hdr), dlen);
+ kfree_skb(_skb);
+ _skb = *skb;
+ hdr = buf_msg(_skb);
+ memcpy(hdr, &ohdr, BASIC_H_SIZE);
+ msg_set_hdr_sz(hdr, BASIC_H_SIZE);
}
- msg_set_size(msg, msg_hdr_sz(msg) + rdsz);
- skb_trim(buf, msg_size(msg));
- skb_orphan(buf);
- *dnode = msg_orignode(&ohdr);
+
+ /* Now reverse the concerned fields */
+ msg_set_errcode(hdr, err);
+ msg_set_origport(hdr, msg_destport(&ohdr));
+ msg_set_destport(hdr, msg_origport(&ohdr));
+ msg_set_destnode(hdr, msg_prevnode(&ohdr));
+ msg_set_prevnode(hdr, own_node);
+ msg_set_orignode(hdr, own_node);
+ msg_set_size(hdr, msg_hdr_sz(hdr) + dlen);
+ skb_trim(_skb, msg_size(hdr));
+ skb_orphan(_skb);
return true;
exit:
- kfree_skb(buf);
- *dnode = 0;
+ kfree_skb(_skb);
+ *skb = NULL;
return false;
}
/**
* tipc_msg_lookup_dest(): try to find new destination for named message
* @skb: the buffer containing the message.
- * @dnode: return value: next-hop node, if destination found
- * @err: return value: error code to use, if message to be rejected
+ * @err: error code to be used by caller if lookup fails
* Does not consume buffer
* Returns true if a destination is found, false otherwise
*/
-bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb,
- u32 *dnode, int *err)
+bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err)
{
struct tipc_msg *msg = buf_msg(skb);
- u32 dport;
- u32 own_addr = tipc_own_addr(net);
+ u32 dport, dnode;
+ u32 onode = tipc_own_addr(net);
if (!msg_isdata(msg))
return false;
@@ -529,15 +541,15 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb,
return false;
if (msg_reroute_cnt(msg))
return false;
- *dnode = addr_domain(net, msg_lookup_scope(msg));
+ dnode = addr_domain(net, msg_lookup_scope(msg));
dport = tipc_nametbl_translate(net, msg_nametype(msg),
- msg_nameinst(msg), dnode);
+ msg_nameinst(msg), &dnode);
if (!dport)
return false;
msg_incr_reroute_cnt(msg);
- if (*dnode != own_addr)
- msg_set_prevnode(msg, own_addr);
- msg_set_destnode(msg, *dnode);
+ if (dnode != onode)
+ msg_set_prevnode(msg, onode);
+ msg_set_destnode(msg, dnode);
msg_set_destport(msg, dport);
*err = TIPC_OK;
return true;
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 19c45fb66238..a82c5848d4bc 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -38,6 +38,7 @@
#define _TIPC_MSG_H
#include <linux/tipc.h>
+#include "core.h"
/*
* Constants and routines used to read and write TIPC payload message headers
@@ -109,7 +110,6 @@ struct tipc_skb_cb {
struct sk_buff *tail;
bool validated;
bool wakeup_pending;
- bool bundling;
u16 chain_sz;
u16 chain_imp;
};
@@ -558,15 +558,6 @@ static inline void msg_set_node_capabilities(struct tipc_msg *m, u32 n)
msg_set_bits(m, 1, 15, 0x1fff, n);
}
-static inline bool msg_dup(struct tipc_msg *m)
-{
- if (likely(msg_user(m) != TUNNEL_PROTOCOL))
- return false;
- if (msg_type(m) != SYNCH_MSG)
- return false;
- return true;
-}
-
/*
* Word 2
*/
@@ -620,12 +611,12 @@ static inline void msg_set_fragm_no(struct tipc_msg *m, u32 n)
}
-static inline u32 msg_next_sent(struct tipc_msg *m)
+static inline u16 msg_next_sent(struct tipc_msg *m)
{
return msg_bits(m, 4, 0, 0xffff);
}
-static inline void msg_set_next_sent(struct tipc_msg *m, u32 n)
+static inline void msg_set_next_sent(struct tipc_msg *m, u16 n)
{
msg_set_bits(m, 4, 0, 0xffff, n);
}
@@ -658,12 +649,12 @@ static inline void msg_set_link_selector(struct tipc_msg *m, u32 n)
/*
* Word 5
*/
-static inline u32 msg_session(struct tipc_msg *m)
+static inline u16 msg_session(struct tipc_msg *m)
{
return msg_bits(m, 5, 16, 0xffff);
}
-static inline void msg_set_session(struct tipc_msg *m, u32 n)
+static inline void msg_set_session(struct tipc_msg *m, u16 n)
{
msg_set_bits(m, 5, 16, 0xffff, n);
}
@@ -726,12 +717,12 @@ static inline char *msg_media_addr(struct tipc_msg *m)
/*
* Word 9
*/
-static inline u32 msg_msgcnt(struct tipc_msg *m)
+static inline u16 msg_msgcnt(struct tipc_msg *m)
{
return msg_bits(m, 9, 16, 0xffff);
}
-static inline void msg_set_msgcnt(struct tipc_msg *m, u32 n)
+static inline void msg_set_msgcnt(struct tipc_msg *m, u16 n)
{
msg_set_bits(m, 9, 16, 0xffff, n);
}
@@ -766,10 +757,25 @@ static inline void msg_set_link_tolerance(struct tipc_msg *m, u32 n)
msg_set_bits(m, 9, 0, 0xffff, n);
}
+static inline bool msg_peer_link_is_up(struct tipc_msg *m)
+{
+ if (likely(msg_user(m) != LINK_PROTOCOL))
+ return true;
+ if (msg_type(m) == STATE_MSG)
+ return true;
+ return false;
+}
+
+static inline bool msg_peer_node_is_up(struct tipc_msg *m)
+{
+ if (msg_peer_link_is_up(m))
+ return true;
+ return msg_redundant_link(m);
+}
+
struct sk_buff *tipc_buf_acquire(u32 size);
bool tipc_msg_validate(struct sk_buff *skb);
-bool tipc_msg_reverse(u32 own_addr, struct sk_buff *buf, u32 *dnode,
- int err);
+bool tipc_msg_reverse(u32 own_addr, struct sk_buff **skb, int err);
void tipc_msg_init(u32 own_addr, struct tipc_msg *m, u32 user, u32 type,
u32 hsize, u32 destnode);
struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz,
@@ -782,8 +788,7 @@ bool tipc_msg_make_bundle(struct sk_buff **skb, struct tipc_msg *msg,
bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos);
int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
int offset, int dsz, int mtu, struct sk_buff_head *list);
-bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, u32 *dnode,
- int *err);
+bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err);
struct sk_buff *tipc_msg_reassemble(struct sk_buff_head *list);
static inline u16 buf_seqno(struct sk_buff *skb)
@@ -857,26 +862,65 @@ static inline struct sk_buff *tipc_skb_dequeue(struct sk_buff_head *list,
return skb;
}
-/* tipc_skb_queue_tail(): add buffer to tail of list;
+/* tipc_skb_queue_sorted(); sort pkt into list according to sequence number
* @list: list to be appended to
- * @skb: buffer to append. Always appended
- * @dport: the destination port of the buffer
- * returns true if dport differs from previous destination
+ * @skb: buffer to add
+ * Returns true if queue should treated further, otherwise false
*/
-static inline bool tipc_skb_queue_tail(struct sk_buff_head *list,
- struct sk_buff *skb, u32 dport)
+static inline bool __tipc_skb_queue_sorted(struct sk_buff_head *list,
+ struct sk_buff *skb)
{
- struct sk_buff *_skb = NULL;
- bool rv = false;
+ struct sk_buff *_skb, *tmp;
+ struct tipc_msg *hdr = buf_msg(skb);
+ u16 seqno = msg_seqno(hdr);
- spin_lock_bh(&list->lock);
- _skb = skb_peek_tail(list);
- if (!_skb || (msg_destport(buf_msg(_skb)) != dport) ||
- (skb_queue_len(list) > 32))
- rv = true;
+ if (skb_queue_empty(list) || (msg_user(hdr) == LINK_PROTOCOL)) {
+ __skb_queue_head(list, skb);
+ return true;
+ }
+ if (likely(less(seqno, buf_seqno(skb_peek(list))))) {
+ __skb_queue_head(list, skb);
+ return true;
+ }
+ if (!more(seqno, buf_seqno(skb_peek_tail(list)))) {
+ skb_queue_walk_safe(list, _skb, tmp) {
+ if (likely(less(seqno, buf_seqno(_skb)))) {
+ __skb_queue_before(list, _skb, skb);
+ return true;
+ }
+ }
+ }
__skb_queue_tail(list, skb);
+ return false;
+}
+
+/* tipc_skb_queue_splice_tail - append an skb list to lock protected list
+ * @list: the new list to append. Not lock protected
+ * @head: target list. Lock protected.
+ */
+static inline void tipc_skb_queue_splice_tail(struct sk_buff_head *list,
+ struct sk_buff_head *head)
+{
+ spin_lock_bh(&head->lock);
+ skb_queue_splice_tail(list, head);
+ spin_unlock_bh(&head->lock);
+}
+
+/* tipc_skb_queue_splice_tail_init - merge two lock protected skb lists
+ * @list: the new list to add. Lock protected. Will be reinitialized
+ * @head: target list. Lock protected.
+ */
+static inline void tipc_skb_queue_splice_tail_init(struct sk_buff_head *list,
+ struct sk_buff_head *head)
+{
+ struct sk_buff_head tmp;
+
+ __skb_queue_head_init(&tmp);
+
+ spin_lock_bh(&list->lock);
+ skb_queue_splice_tail_init(list, &tmp);
spin_unlock_bh(&list->lock);
- return rv;
+ tipc_skb_queue_splice_tail(&tmp, head);
}
#endif
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 41e7b7e4dda0..e6018b7eb197 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -96,13 +96,13 @@ void named_cluster_distribute(struct net *net, struct sk_buff *skb)
dnode = node->addr;
if (in_own_node(net, dnode))
continue;
- if (!tipc_node_active_links(node))
+ if (!tipc_node_is_up(node))
continue;
oskb = pskb_copy(skb, GFP_ATOMIC);
if (!oskb)
break;
msg_set_destnode(buf_msg(oskb), dnode);
- tipc_link_xmit_skb(net, oskb, dnode, dnode);
+ tipc_node_xmit_skb(net, oskb, dnode, dnode);
}
rcu_read_unlock();
@@ -223,7 +223,7 @@ void tipc_named_node_up(struct net *net, u32 dnode)
&tn->nametbl->publ_list[TIPC_ZONE_SCOPE]);
rcu_read_unlock();
- tipc_link_xmit(net, &head, dnode, dnode);
+ tipc_node_xmit(net, &head, dnode, dnode);
}
static void tipc_publ_subscribe(struct net *net, struct publication *publ,
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 53e0fee80086..1eadc95e1132 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -1114,7 +1114,7 @@ static int tipc_nl_compat_recv(struct sk_buff *skb, struct genl_info *info)
}
len = nlmsg_attrlen(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN);
- if (TLV_GET_LEN(msg.req) && !TLV_OK(msg.req, len)) {
+ if (len && !TLV_OK(msg.req, len)) {
msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_SUPPORTED);
err = -EOPNOTSUPP;
goto send;
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 0b1d61a5f853..7c191641b44f 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -40,10 +40,42 @@
#include "name_distr.h"
#include "socket.h"
#include "bcast.h"
+#include "discover.h"
-static void node_lost_contact(struct tipc_node *n_ptr);
+/* Node FSM states and events:
+ */
+enum {
+ SELF_DOWN_PEER_DOWN = 0xdd,
+ SELF_UP_PEER_UP = 0xaa,
+ SELF_DOWN_PEER_LEAVING = 0xd1,
+ SELF_UP_PEER_COMING = 0xac,
+ SELF_COMING_PEER_UP = 0xca,
+ SELF_LEAVING_PEER_DOWN = 0x1d,
+ NODE_FAILINGOVER = 0xf0,
+ NODE_SYNCHING = 0xcc
+};
+
+enum {
+ SELF_ESTABL_CONTACT_EVT = 0xece,
+ SELF_LOST_CONTACT_EVT = 0x1ce,
+ PEER_ESTABL_CONTACT_EVT = 0x9ece,
+ PEER_LOST_CONTACT_EVT = 0x91ce,
+ NODE_FAILOVER_BEGIN_EVT = 0xfbe,
+ NODE_FAILOVER_END_EVT = 0xfee,
+ NODE_SYNCH_BEGIN_EVT = 0xcbe,
+ NODE_SYNCH_END_EVT = 0xcee
+};
+
+static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id,
+ struct sk_buff_head *xmitq,
+ struct tipc_media_addr **maddr);
+static void tipc_node_link_down(struct tipc_node *n, int bearer_id,
+ bool delete);
+static void node_lost_contact(struct tipc_node *n, struct sk_buff_head *inputq);
static void node_established_contact(struct tipc_node *n_ptr);
static void tipc_node_delete(struct tipc_node *node);
+static void tipc_node_timeout(unsigned long data);
+static void tipc_node_fsm_evt(struct tipc_node *n, int evt);
struct tipc_sock_conn {
u32 port;
@@ -110,7 +142,7 @@ struct tipc_node *tipc_node_find(struct net *net, u32 addr)
return NULL;
}
-struct tipc_node *tipc_node_create(struct net *net, u32 addr)
+struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
struct tipc_node *n_ptr, *temp_node;
@@ -126,12 +158,14 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr)
}
n_ptr->addr = addr;
n_ptr->net = net;
+ n_ptr->capabilities = capabilities;
kref_init(&n_ptr->kref);
spin_lock_init(&n_ptr->lock);
INIT_HLIST_NODE(&n_ptr->hash);
INIT_LIST_HEAD(&n_ptr->list);
INIT_LIST_HEAD(&n_ptr->publ_list);
INIT_LIST_HEAD(&n_ptr->conn_sks);
+ skb_queue_head_init(&n_ptr->bclink.namedq);
__skb_queue_head_init(&n_ptr->bclink.deferdq);
hlist_add_head_rcu(&n_ptr->hash, &tn->node_htable[tipc_hashfn(addr)]);
list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
@@ -139,14 +173,32 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr)
break;
}
list_add_tail_rcu(&n_ptr->list, &temp_node->list);
- n_ptr->action_flags = TIPC_WAIT_PEER_LINKS_DOWN;
+ n_ptr->state = SELF_DOWN_PEER_LEAVING;
n_ptr->signature = INVALID_NODE_SIG;
+ n_ptr->active_links[0] = INVALID_BEARER_ID;
+ n_ptr->active_links[1] = INVALID_BEARER_ID;
tipc_node_get(n_ptr);
+ setup_timer(&n_ptr->timer, tipc_node_timeout, (unsigned long)n_ptr);
+ n_ptr->keepalive_intv = U32_MAX;
exit:
spin_unlock_bh(&tn->node_list_lock);
return n_ptr;
}
+static void tipc_node_calculate_timer(struct tipc_node *n, struct tipc_link *l)
+{
+ unsigned long tol = l->tolerance;
+ unsigned long intv = ((tol / 4) > 500) ? 500 : tol / 4;
+ unsigned long keepalive_intv = msecs_to_jiffies(intv);
+
+ /* Link with lowest tolerance determines timer interval */
+ if (keepalive_intv < n->keepalive_intv)
+ n->keepalive_intv = keepalive_intv;
+
+ /* Ensure link's abort limit corresponds to current interval */
+ l->abort_limit = l->tolerance / jiffies_to_msecs(n->keepalive_intv);
+}
+
static void tipc_node_delete(struct tipc_node *node)
{
list_del_rcu(&node->list);
@@ -160,8 +212,11 @@ void tipc_node_stop(struct net *net)
struct tipc_node *node, *t_node;
spin_lock_bh(&tn->node_list_lock);
- list_for_each_entry_safe(node, t_node, &tn->node_list, list)
+ list_for_each_entry_safe(node, t_node, &tn->node_list, list) {
+ if (del_timer(&node->timer))
+ tipc_node_put(node);
tipc_node_put(node);
+ }
spin_unlock_bh(&tn->node_list_lock);
}
@@ -219,158 +274,547 @@ void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port)
tipc_node_put(node);
}
+/* tipc_node_timeout - handle expiration of node timer
+ */
+static void tipc_node_timeout(unsigned long data)
+{
+ struct tipc_node *n = (struct tipc_node *)data;
+ struct tipc_link_entry *le;
+ struct sk_buff_head xmitq;
+ int bearer_id;
+ int rc = 0;
+
+ __skb_queue_head_init(&xmitq);
+
+ for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) {
+ tipc_node_lock(n);
+ le = &n->links[bearer_id];
+ if (le->link) {
+ /* Link tolerance may change asynchronously: */
+ tipc_node_calculate_timer(n, le->link);
+ rc = tipc_link_timeout(le->link, &xmitq);
+ }
+ tipc_node_unlock(n);
+ tipc_bearer_xmit(n->net, bearer_id, &xmitq, &le->maddr);
+ if (rc & TIPC_LINK_DOWN_EVT)
+ tipc_node_link_down(n, bearer_id, false);
+ }
+ if (!mod_timer(&n->timer, jiffies + n->keepalive_intv))
+ tipc_node_get(n);
+ tipc_node_put(n);
+}
+
/**
- * tipc_node_link_up - handle addition of link
- *
+ * __tipc_node_link_up - handle addition of link
+ * Node lock must be held by caller
* Link becomes active (alone or shared) or standby, depending on its priority.
*/
-void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
+static void __tipc_node_link_up(struct tipc_node *n, int bearer_id,
+ struct sk_buff_head *xmitq)
{
- struct tipc_link **active = &n_ptr->active_links[0];
+ int *slot0 = &n->active_links[0];
+ int *slot1 = &n->active_links[1];
+ struct tipc_link *ol = node_active_link(n, 0);
+ struct tipc_link *nl = n->links[bearer_id].link;
- n_ptr->working_links++;
- n_ptr->action_flags |= TIPC_NOTIFY_LINK_UP;
- n_ptr->link_id = l_ptr->peer_bearer_id << 16 | l_ptr->bearer_id;
+ if (!nl || !tipc_link_is_up(nl))
+ return;
- pr_debug("Established link <%s> on network plane %c\n",
- l_ptr->name, l_ptr->net_plane);
+ n->working_links++;
+ n->action_flags |= TIPC_NOTIFY_LINK_UP;
+ n->link_id = nl->peer_bearer_id << 16 | bearer_id;
- if (!active[0]) {
- active[0] = active[1] = l_ptr;
- node_established_contact(n_ptr);
- goto exit;
- }
- if (l_ptr->priority < active[0]->priority) {
- pr_debug("New link <%s> becomes standby\n", l_ptr->name);
- goto exit;
+ /* Leave room for tunnel header when returning 'mtu' to users: */
+ n->links[bearer_id].mtu = nl->mtu - INT_H_SIZE;
+
+ tipc_bearer_add_dest(n->net, bearer_id, n->addr);
+
+ pr_debug("Established link <%s> on network plane %c\n",
+ nl->name, nl->net_plane);
+
+ /* First link? => give it both slots */
+ if (!ol) {
+ *slot0 = bearer_id;
+ *slot1 = bearer_id;
+ tipc_link_build_bcast_sync_msg(nl, xmitq);
+ node_established_contact(n);
+ return;
}
- tipc_link_dup_queue_xmit(active[0], l_ptr);
- if (l_ptr->priority == active[0]->priority) {
- active[0] = l_ptr;
- goto exit;
+
+ /* Second link => redistribute slots */
+ if (nl->priority > ol->priority) {
+ pr_debug("Old link <%s> becomes standby\n", ol->name);
+ *slot0 = bearer_id;
+ *slot1 = bearer_id;
+ } else if (nl->priority == ol->priority) {
+ *slot0 = bearer_id;
+ } else {
+ pr_debug("New link <%s> is standby\n", nl->name);
}
- pr_debug("Old link <%s> becomes standby\n", active[0]->name);
- if (active[1] != active[0])
- pr_debug("Old link <%s> becomes standby\n", active[1]->name);
- active[0] = active[1] = l_ptr;
-exit:
- /* Leave room for changeover header when returning 'mtu' to users: */
- n_ptr->act_mtus[0] = active[0]->mtu - INT_H_SIZE;
- n_ptr->act_mtus[1] = active[1]->mtu - INT_H_SIZE;
+
+ /* Prepare synchronization with first link */
+ tipc_link_tnl_prepare(ol, nl, SYNCH_MSG, xmitq);
}
/**
- * node_select_active_links - select active link
+ * tipc_node_link_up - handle addition of link
+ *
+ * Link becomes active (alone or shared) or standby, depending on its priority.
*/
-static void node_select_active_links(struct tipc_node *n_ptr)
+static void tipc_node_link_up(struct tipc_node *n, int bearer_id,
+ struct sk_buff_head *xmitq)
{
- struct tipc_link **active = &n_ptr->active_links[0];
- u32 i;
- u32 highest_prio = 0;
+ tipc_node_lock(n);
+ __tipc_node_link_up(n, bearer_id, xmitq);
+ tipc_node_unlock(n);
+}
- active[0] = active[1] = NULL;
+/**
+ * __tipc_node_link_down - handle loss of link
+ */
+static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id,
+ struct sk_buff_head *xmitq,
+ struct tipc_media_addr **maddr)
+{
+ struct tipc_link_entry *le = &n->links[*bearer_id];
+ int *slot0 = &n->active_links[0];
+ int *slot1 = &n->active_links[1];
+ int i, highest = 0;
+ struct tipc_link *l, *_l, *tnl;
+
+ l = n->links[*bearer_id].link;
+ if (!l || tipc_link_is_reset(l))
+ return;
- for (i = 0; i < MAX_BEARERS; i++) {
- struct tipc_link *l_ptr = n_ptr->links[i];
+ n->working_links--;
+ n->action_flags |= TIPC_NOTIFY_LINK_DOWN;
+ n->link_id = l->peer_bearer_id << 16 | *bearer_id;
- if (!l_ptr || !tipc_link_is_up(l_ptr) ||
- (l_ptr->priority < highest_prio))
- continue;
+ tipc_bearer_remove_dest(n->net, *bearer_id, n->addr);
+
+ pr_debug("Lost link <%s> on network plane %c\n",
+ l->name, l->net_plane);
- if (l_ptr->priority > highest_prio) {
- highest_prio = l_ptr->priority;
- active[0] = active[1] = l_ptr;
- } else {
- active[1] = l_ptr;
+ /* Select new active link if any available */
+ *slot0 = INVALID_BEARER_ID;
+ *slot1 = INVALID_BEARER_ID;
+ for (i = 0; i < MAX_BEARERS; i++) {
+ _l = n->links[i].link;
+ if (!_l || !tipc_link_is_up(_l))
+ continue;
+ if (_l == l)
+ continue;
+ if (_l->priority < highest)
+ continue;
+ if (_l->priority > highest) {
+ highest = _l->priority;
+ *slot0 = i;
+ *slot1 = i;
+ continue;
}
+ *slot1 = i;
+ }
+
+ if (!tipc_node_is_up(n)) {
+ tipc_link_reset(l);
+ node_lost_contact(n, &le->inputq);
+ return;
}
+
+ /* There is still a working link => initiate failover */
+ tnl = node_active_link(n, 0);
+ n->sync_point = tnl->rcv_nxt + (U16_MAX / 2 - 1);
+ tipc_link_tnl_prepare(l, tnl, FAILOVER_MSG, xmitq);
+ tipc_link_reset(l);
+ tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT);
+ tipc_node_fsm_evt(n, NODE_FAILOVER_BEGIN_EVT);
+ *maddr = &n->links[tnl->bearer_id].maddr;
+ *bearer_id = tnl->bearer_id;
}
-/**
- * tipc_node_link_down - handle loss of link
- */
-void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
+static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete)
{
- struct tipc_net *tn = net_generic(n_ptr->net, tipc_net_id);
- struct tipc_link **active;
+ struct tipc_link_entry *le = &n->links[bearer_id];
+ struct tipc_media_addr *maddr;
+ struct sk_buff_head xmitq;
+
+ __skb_queue_head_init(&xmitq);
+
+ tipc_node_lock(n);
+ __tipc_node_link_down(n, &bearer_id, &xmitq, &maddr);
+ if (delete && le->link) {
+ kfree(le->link);
+ le->link = NULL;
+ n->link_cnt--;
+ }
+ tipc_node_unlock(n);
- n_ptr->working_links--;
- n_ptr->action_flags |= TIPC_NOTIFY_LINK_DOWN;
- n_ptr->link_id = l_ptr->peer_bearer_id << 16 | l_ptr->bearer_id;
+ tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr);
+ tipc_sk_rcv(n->net, &le->inputq);
+}
- if (!tipc_link_is_active(l_ptr)) {
- pr_debug("Lost standby link <%s> on network plane %c\n",
- l_ptr->name, l_ptr->net_plane);
- return;
- }
- pr_debug("Lost link <%s> on network plane %c\n",
- l_ptr->name, l_ptr->net_plane);
-
- active = &n_ptr->active_links[0];
- if (active[0] == l_ptr)
- active[0] = active[1];
- if (active[1] == l_ptr)
- active[1] = active[0];
- if (active[0] == l_ptr)
- node_select_active_links(n_ptr);
- if (tipc_node_is_up(n_ptr))
- tipc_link_failover_send_queue(l_ptr);
- else
- node_lost_contact(n_ptr);
-
- /* Leave room for changeover header when returning 'mtu' to users: */
- if (active[0]) {
- n_ptr->act_mtus[0] = active[0]->mtu - INT_H_SIZE;
- n_ptr->act_mtus[1] = active[1]->mtu - INT_H_SIZE;
+bool tipc_node_is_up(struct tipc_node *n)
+{
+ return n->active_links[0] != INVALID_BEARER_ID;
+}
+
+void tipc_node_check_dest(struct net *net, u32 onode,
+ struct tipc_bearer *b,
+ u16 capabilities, u32 signature,
+ struct tipc_media_addr *maddr,
+ bool *respond, bool *dupl_addr)
+{
+ struct tipc_node *n;
+ struct tipc_link *l;
+ struct tipc_link_entry *le;
+ bool addr_match = false;
+ bool sign_match = false;
+ bool link_up = false;
+ bool accept_addr = false;
+ bool reset = true;
+
+ *dupl_addr = false;
+ *respond = false;
+
+ n = tipc_node_create(net, onode, capabilities);
+ if (!n)
return;
+
+ tipc_node_lock(n);
+
+ le = &n->links[b->identity];
+
+ /* Prepare to validate requesting node's signature and media address */
+ l = le->link;
+ link_up = l && tipc_link_is_up(l);
+ addr_match = l && !memcmp(&le->maddr, maddr, sizeof(*maddr));
+ sign_match = (signature == n->signature);
+
+ /* These three flags give us eight permutations: */
+
+ if (sign_match && addr_match && link_up) {
+ /* All is fine. Do nothing. */
+ reset = false;
+ } else if (sign_match && addr_match && !link_up) {
+ /* Respond. The link will come up in due time */
+ *respond = true;
+ } else if (sign_match && !addr_match && link_up) {
+ /* Peer has changed i/f address without rebooting.
+ * If so, the link will reset soon, and the next
+ * discovery will be accepted. So we can ignore it.
+ * It may also be an cloned or malicious peer having
+ * chosen the same node address and signature as an
+ * existing one.
+ * Ignore requests until the link goes down, if ever.
+ */
+ *dupl_addr = true;
+ } else if (sign_match && !addr_match && !link_up) {
+ /* Peer link has changed i/f address without rebooting.
+ * It may also be a cloned or malicious peer; we can't
+ * distinguish between the two.
+ * The signature is correct, so we must accept.
+ */
+ accept_addr = true;
+ *respond = true;
+ } else if (!sign_match && addr_match && link_up) {
+ /* Peer node rebooted. Two possibilities:
+ * - Delayed re-discovery; this link endpoint has already
+ * reset and re-established contact with the peer, before
+ * receiving a discovery message from that node.
+ * (The peer happened to receive one from this node first).
+ * - The peer came back so fast that our side has not
+ * discovered it yet. Probing from this side will soon
+ * reset the link, since there can be no working link
+ * endpoint at the peer end, and the link will re-establish.
+ * Accept the signature, since it comes from a known peer.
+ */
+ n->signature = signature;
+ } else if (!sign_match && addr_match && !link_up) {
+ /* The peer node has rebooted.
+ * Accept signature, since it is a known peer.
+ */
+ n->signature = signature;
+ *respond = true;
+ } else if (!sign_match && !addr_match && link_up) {
+ /* Peer rebooted with new address, or a new/duplicate peer.
+ * Ignore until the link goes down, if ever.
+ */
+ *dupl_addr = true;
+ } else if (!sign_match && !addr_match && !link_up) {
+ /* Peer rebooted with new address, or it is a new peer.
+ * Accept signature and address.
+ */
+ n->signature = signature;
+ accept_addr = true;
+ *respond = true;
}
- /* Loopback link went down? No fragmentation needed from now on. */
- if (n_ptr->addr == tn->own_addr) {
- n_ptr->act_mtus[0] = MAX_MSG_SIZE;
- n_ptr->act_mtus[1] = MAX_MSG_SIZE;
+
+ if (!accept_addr)
+ goto exit;
+
+ /* Now create new link if not already existing */
+ if (!l) {
+ if (n->link_cnt == 2) {
+ pr_warn("Cannot establish 3rd link to %x\n", n->addr);
+ goto exit;
+ }
+ if (!tipc_link_create(n, b, mod(tipc_net(net)->random),
+ tipc_own_addr(net), onode, &le->maddr,
+ &le->inputq, &n->bclink.namedq, &l)) {
+ *respond = false;
+ goto exit;
+ }
+ tipc_link_reset(l);
+ le->link = l;
+ n->link_cnt++;
+ tipc_node_calculate_timer(n, l);
+ if (n->link_cnt == 1)
+ if (!mod_timer(&n->timer, jiffies + n->keepalive_intv))
+ tipc_node_get(n);
}
+ memcpy(&le->maddr, maddr, sizeof(*maddr));
+exit:
+ tipc_node_unlock(n);
+ if (reset)
+ tipc_node_link_down(n, b->identity, false);
+ tipc_node_put(n);
}
-int tipc_node_active_links(struct tipc_node *n_ptr)
+void tipc_node_delete_links(struct net *net, int bearer_id)
{
- return n_ptr->active_links[0] != NULL;
+ struct tipc_net *tn = net_generic(net, tipc_net_id);
+ struct tipc_node *n;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(n, &tn->node_list, list) {
+ tipc_node_link_down(n, bearer_id, true);
+ }
+ rcu_read_unlock();
}
-int tipc_node_is_up(struct tipc_node *n_ptr)
+static void tipc_node_reset_links(struct tipc_node *n)
{
- return tipc_node_active_links(n_ptr);
+ char addr_string[16];
+ int i;
+
+ pr_warn("Resetting all links to %s\n",
+ tipc_addr_string_fill(addr_string, n->addr));
+
+ for (i = 0; i < MAX_BEARERS; i++) {
+ tipc_node_link_down(n, i, false);
+ }
}
-void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
+/* tipc_node_fsm_evt - node finite state machine
+ * Determines when contact is allowed with peer node
+ */
+static void tipc_node_fsm_evt(struct tipc_node *n, int evt)
{
- n_ptr->links[l_ptr->bearer_id] = l_ptr;
- n_ptr->link_cnt++;
+ int state = n->state;
+
+ switch (state) {
+ case SELF_DOWN_PEER_DOWN:
+ switch (evt) {
+ case SELF_ESTABL_CONTACT_EVT:
+ state = SELF_UP_PEER_COMING;
+ break;
+ case PEER_ESTABL_CONTACT_EVT:
+ state = SELF_COMING_PEER_UP;
+ break;
+ case SELF_LOST_CONTACT_EVT:
+ case PEER_LOST_CONTACT_EVT:
+ break;
+ case NODE_SYNCH_END_EVT:
+ case NODE_SYNCH_BEGIN_EVT:
+ case NODE_FAILOVER_BEGIN_EVT:
+ case NODE_FAILOVER_END_EVT:
+ default:
+ goto illegal_evt;
+ }
+ break;
+ case SELF_UP_PEER_UP:
+ switch (evt) {
+ case SELF_LOST_CONTACT_EVT:
+ state = SELF_DOWN_PEER_LEAVING;
+ break;
+ case PEER_LOST_CONTACT_EVT:
+ state = SELF_LEAVING_PEER_DOWN;
+ break;
+ case NODE_SYNCH_BEGIN_EVT:
+ state = NODE_SYNCHING;
+ break;
+ case NODE_FAILOVER_BEGIN_EVT:
+ state = NODE_FAILINGOVER;
+ break;
+ case SELF_ESTABL_CONTACT_EVT:
+ case PEER_ESTABL_CONTACT_EVT:
+ case NODE_SYNCH_END_EVT:
+ case NODE_FAILOVER_END_EVT:
+ break;
+ default:
+ goto illegal_evt;
+ }
+ break;
+ case SELF_DOWN_PEER_LEAVING:
+ switch (evt) {
+ case PEER_LOST_CONTACT_EVT:
+ state = SELF_DOWN_PEER_DOWN;
+ break;
+ case SELF_ESTABL_CONTACT_EVT:
+ case PEER_ESTABL_CONTACT_EVT:
+ case SELF_LOST_CONTACT_EVT:
+ break;
+ case NODE_SYNCH_END_EVT:
+ case NODE_SYNCH_BEGIN_EVT:
+ case NODE_FAILOVER_BEGIN_EVT:
+ case NODE_FAILOVER_END_EVT:
+ default:
+ goto illegal_evt;
+ }
+ break;
+ case SELF_UP_PEER_COMING:
+ switch (evt) {
+ case PEER_ESTABL_CONTACT_EVT:
+ state = SELF_UP_PEER_UP;
+ break;
+ case SELF_LOST_CONTACT_EVT:
+ state = SELF_DOWN_PEER_LEAVING;
+ break;
+ case SELF_ESTABL_CONTACT_EVT:
+ case PEER_LOST_CONTACT_EVT:
+ break;
+ case NODE_SYNCH_END_EVT:
+ case NODE_SYNCH_BEGIN_EVT:
+ case NODE_FAILOVER_BEGIN_EVT:
+ case NODE_FAILOVER_END_EVT:
+ default:
+ goto illegal_evt;
+ }
+ break;
+ case SELF_COMING_PEER_UP:
+ switch (evt) {
+ case SELF_ESTABL_CONTACT_EVT:
+ state = SELF_UP_PEER_UP;
+ break;
+ case PEER_LOST_CONTACT_EVT:
+ state = SELF_LEAVING_PEER_DOWN;
+ break;
+ case SELF_LOST_CONTACT_EVT:
+ case PEER_ESTABL_CONTACT_EVT:
+ break;
+ case NODE_SYNCH_END_EVT:
+ case NODE_SYNCH_BEGIN_EVT:
+ case NODE_FAILOVER_BEGIN_EVT:
+ case NODE_FAILOVER_END_EVT:
+ default:
+ goto illegal_evt;
+ }
+ break;
+ case SELF_LEAVING_PEER_DOWN:
+ switch (evt) {
+ case SELF_LOST_CONTACT_EVT:
+ state = SELF_DOWN_PEER_DOWN;
+ break;
+ case SELF_ESTABL_CONTACT_EVT:
+ case PEER_ESTABL_CONTACT_EVT:
+ case PEER_LOST_CONTACT_EVT:
+ break;
+ case NODE_SYNCH_END_EVT:
+ case NODE_SYNCH_BEGIN_EVT:
+ case NODE_FAILOVER_BEGIN_EVT:
+ case NODE_FAILOVER_END_EVT:
+ default:
+ goto illegal_evt;
+ }
+ break;
+ case NODE_FAILINGOVER:
+ switch (evt) {
+ case SELF_LOST_CONTACT_EVT:
+ state = SELF_DOWN_PEER_LEAVING;
+ break;
+ case PEER_LOST_CONTACT_EVT:
+ state = SELF_LEAVING_PEER_DOWN;
+ break;
+ case NODE_FAILOVER_END_EVT:
+ state = SELF_UP_PEER_UP;
+ break;
+ case NODE_FAILOVER_BEGIN_EVT:
+ case SELF_ESTABL_CONTACT_EVT:
+ case PEER_ESTABL_CONTACT_EVT:
+ break;
+ case NODE_SYNCH_BEGIN_EVT:
+ case NODE_SYNCH_END_EVT:
+ default:
+ goto illegal_evt;
+ }
+ break;
+ case NODE_SYNCHING:
+ switch (evt) {
+ case SELF_LOST_CONTACT_EVT:
+ state = SELF_DOWN_PEER_LEAVING;
+ break;
+ case PEER_LOST_CONTACT_EVT:
+ state = SELF_LEAVING_PEER_DOWN;
+ break;
+ case NODE_SYNCH_END_EVT:
+ state = SELF_UP_PEER_UP;
+ break;
+ case NODE_FAILOVER_BEGIN_EVT:
+ state = NODE_FAILINGOVER;
+ break;
+ case NODE_SYNCH_BEGIN_EVT:
+ case SELF_ESTABL_CONTACT_EVT:
+ case PEER_ESTABL_CONTACT_EVT:
+ break;
+ case NODE_FAILOVER_END_EVT:
+ default:
+ goto illegal_evt;
+ }
+ break;
+ default:
+ pr_err("Unknown node fsm state %x\n", state);
+ break;
+ }
+ n->state = state;
+ return;
+
+illegal_evt:
+ pr_err("Illegal node fsm evt %x in state %x\n", evt, state);
}
-void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr)
+bool tipc_node_filter_pkt(struct tipc_node *n, struct tipc_msg *hdr)
{
- int i;
+ int state = n->state;
- for (i = 0; i < MAX_BEARERS; i++) {
- if (l_ptr != n_ptr->links[i])
- continue;
- n_ptr->links[i] = NULL;
- n_ptr->link_cnt--;
+ if (likely(state == SELF_UP_PEER_UP))
+ return true;
+
+ if (state == SELF_LEAVING_PEER_DOWN)
+ return false;
+
+ if (state == SELF_DOWN_PEER_LEAVING) {
+ if (msg_peer_node_is_up(hdr))
+ return false;
}
+
+ return true;
}
static void node_established_contact(struct tipc_node *n_ptr)
{
+ tipc_node_fsm_evt(n_ptr, SELF_ESTABL_CONTACT_EVT);
n_ptr->action_flags |= TIPC_NOTIFY_NODE_UP;
n_ptr->bclink.oos_state = 0;
n_ptr->bclink.acked = tipc_bclink_get_last_sent(n_ptr->net);
tipc_bclink_add_node(n_ptr->net, n_ptr->addr);
}
-static void node_lost_contact(struct tipc_node *n_ptr)
+static void node_lost_contact(struct tipc_node *n_ptr,
+ struct sk_buff_head *inputq)
{
char addr_string[16];
struct tipc_sock_conn *conn, *safe;
+ struct tipc_link *l;
struct list_head *conns = &n_ptr->conn_sks;
struct sk_buff *skb;
struct tipc_net *tn = net_generic(n_ptr->net, tipc_net_id);
@@ -396,21 +840,13 @@ static void node_lost_contact(struct tipc_node *n_ptr)
/* Abort any ongoing link failover */
for (i = 0; i < MAX_BEARERS; i++) {
- struct tipc_link *l_ptr = n_ptr->links[i];
- if (!l_ptr)
- continue;
- l_ptr->flags &= ~LINK_FAILINGOVER;
- l_ptr->failover_checkpt = 0;
- l_ptr->failover_pkts = 0;
- kfree_skb(l_ptr->failover_skb);
- l_ptr->failover_skb = NULL;
- tipc_link_reset_fragments(l_ptr);
+ l = n_ptr->links[i].link;
+ if (l)
+ tipc_link_fsm_evt(l, LINK_FAILOVER_END_EVT);
}
- n_ptr->action_flags &= ~TIPC_WAIT_OWN_LINKS_DOWN;
-
/* Prevent re-contact with node until cleanup is done */
- n_ptr->action_flags |= TIPC_WAIT_PEER_LINKS_DOWN;
+ tipc_node_fsm_evt(n_ptr, SELF_LOST_CONTACT_EVT);
/* Notify publications from this node */
n_ptr->action_flags |= TIPC_NOTIFY_NODE_DOWN;
@@ -421,10 +857,8 @@ static void node_lost_contact(struct tipc_node *n_ptr)
SHORT_H_SIZE, 0, tn->own_addr,
conn->peer_node, conn->port,
conn->peer_port, TIPC_ERR_NO_NODE);
- if (likely(skb)) {
- skb_queue_tail(n_ptr->inputq, skb);
- n_ptr->action_flags |= TIPC_MSG_EVT;
- }
+ if (likely(skb))
+ skb_queue_tail(inputq, skb);
list_del(&conn->list);
kfree(conn);
}
@@ -453,7 +887,7 @@ int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 addr,
goto exit;
tipc_node_lock(node);
- link = node->links[bearer_id];
+ link = node->links[bearer_id].link;
if (link) {
strncpy(linkname, link->name, len);
err = 0;
@@ -471,27 +905,20 @@ void tipc_node_unlock(struct tipc_node *node)
u32 flags = node->action_flags;
u32 link_id = 0;
struct list_head *publ_list;
- struct sk_buff_head *inputq = node->inputq;
- struct sk_buff_head *namedq;
- if (likely(!flags || (flags == TIPC_MSG_EVT))) {
- node->action_flags = 0;
+ if (likely(!flags)) {
spin_unlock_bh(&node->lock);
- if (flags == TIPC_MSG_EVT)
- tipc_sk_rcv(net, inputq);
return;
}
addr = node->addr;
link_id = node->link_id;
- namedq = node->namedq;
publ_list = &node->publ_list;
- node->action_flags &= ~(TIPC_MSG_EVT |
- TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP |
+ node->action_flags &= ~(TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP |
TIPC_NOTIFY_LINK_DOWN | TIPC_NOTIFY_LINK_UP |
TIPC_WAKEUP_BCAST_USERS | TIPC_BCAST_MSG_EVT |
- TIPC_NAMED_MSG_EVT | TIPC_BCAST_RESET);
+ TIPC_BCAST_RESET);
spin_unlock_bh(&node->lock);
@@ -512,17 +939,11 @@ void tipc_node_unlock(struct tipc_node *node)
tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr,
link_id, addr);
- if (flags & TIPC_MSG_EVT)
- tipc_sk_rcv(net, inputq);
-
- if (flags & TIPC_NAMED_MSG_EVT)
- tipc_named_rcv(net, namedq);
-
if (flags & TIPC_BCAST_MSG_EVT)
tipc_bclink_input(net);
if (flags & TIPC_BCAST_RESET)
- tipc_link_reset_all(node);
+ tipc_node_reset_links(node);
}
/* Caller should hold node lock for the passed node */
@@ -559,6 +980,279 @@ msg_full:
return -EMSGSIZE;
}
+static struct tipc_link *tipc_node_select_link(struct tipc_node *n, int sel,
+ int *bearer_id,
+ struct tipc_media_addr **maddr)
+{
+ int id = n->active_links[sel & 1];
+
+ if (unlikely(id < 0))
+ return NULL;
+
+ *bearer_id = id;
+ *maddr = &n->links[id].maddr;
+ return n->links[id].link;
+}
+
+/**
+ * tipc_node_xmit() is the general link level function for message sending
+ * @net: the applicable net namespace
+ * @list: chain of buffers containing message
+ * @dnode: address of destination node
+ * @selector: a number used for deterministic link selection
+ * Consumes the buffer chain, except when returning -ELINKCONG
+ * Returns 0 if success, otherwise errno: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE
+ */
+int tipc_node_xmit(struct net *net, struct sk_buff_head *list,
+ u32 dnode, int selector)
+{
+ struct tipc_link *l = NULL;
+ struct tipc_node *n;
+ struct sk_buff_head xmitq;
+ struct tipc_media_addr *maddr;
+ int bearer_id;
+ int rc = -EHOSTUNREACH;
+
+ __skb_queue_head_init(&xmitq);
+ n = tipc_node_find(net, dnode);
+ if (likely(n)) {
+ tipc_node_lock(n);
+ l = tipc_node_select_link(n, selector, &bearer_id, &maddr);
+ if (likely(l))
+ rc = tipc_link_xmit(l, list, &xmitq);
+ tipc_node_unlock(n);
+ if (unlikely(rc == -ENOBUFS))
+ tipc_node_link_down(n, bearer_id, false);
+ tipc_node_put(n);
+ }
+ if (likely(!rc)) {
+ tipc_bearer_xmit(net, bearer_id, &xmitq, maddr);
+ return 0;
+ }
+ if (likely(in_own_node(net, dnode))) {
+ tipc_sk_rcv(net, list);
+ return 0;
+ }
+ return rc;
+}
+
+/* tipc_node_xmit_skb(): send single buffer to destination
+ * Buffers sent via this functon are generally TIPC_SYSTEM_IMPORTANCE
+ * messages, which will not be rejected
+ * The only exception is datagram messages rerouted after secondary
+ * lookup, which are rare and safe to dispose of anyway.
+ * TODO: Return real return value, and let callers use
+ * tipc_wait_for_sendpkt() where applicable
+ */
+int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode,
+ u32 selector)
+{
+ struct sk_buff_head head;
+ int rc;
+
+ skb_queue_head_init(&head);
+ __skb_queue_tail(&head, skb);
+ rc = tipc_node_xmit(net, &head, dnode, selector);
+ if (rc == -ELINKCONG)
+ kfree_skb(skb);
+ return 0;
+}
+
+/**
+ * tipc_node_check_state - check and if necessary update node state
+ * @skb: TIPC packet
+ * @bearer_id: identity of bearer delivering the packet
+ * Returns true if state is ok, otherwise consumes buffer and returns false
+ */
+static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb,
+ int bearer_id, struct sk_buff_head *xmitq)
+{
+ struct tipc_msg *hdr = buf_msg(skb);
+ int usr = msg_user(hdr);
+ int mtyp = msg_type(hdr);
+ u16 oseqno = msg_seqno(hdr);
+ u16 iseqno = msg_seqno(msg_get_wrapped(hdr));
+ u16 exp_pkts = msg_msgcnt(hdr);
+ u16 rcv_nxt, syncpt, dlv_nxt;
+ int state = n->state;
+ struct tipc_link *l, *pl = NULL;
+ struct tipc_media_addr *maddr;
+ int i, pb_id;
+
+ l = n->links[bearer_id].link;
+ if (!l)
+ return false;
+ rcv_nxt = l->rcv_nxt;
+
+
+ if (likely((state == SELF_UP_PEER_UP) && (usr != TUNNEL_PROTOCOL)))
+ return true;
+
+ /* Find parallel link, if any */
+ for (i = 0; i < MAX_BEARERS; i++) {
+ if ((i != bearer_id) && n->links[i].link) {
+ pl = n->links[i].link;
+ break;
+ }
+ }
+
+ /* Update node accesibility if applicable */
+ if (state == SELF_UP_PEER_COMING) {
+ if (!tipc_link_is_up(l))
+ return true;
+ if (!msg_peer_link_is_up(hdr))
+ return true;
+ tipc_node_fsm_evt(n, PEER_ESTABL_CONTACT_EVT);
+ }
+
+ if (state == SELF_DOWN_PEER_LEAVING) {
+ if (msg_peer_node_is_up(hdr))
+ return false;
+ tipc_node_fsm_evt(n, PEER_LOST_CONTACT_EVT);
+ }
+
+ /* Ignore duplicate packets */
+ if (less(oseqno, rcv_nxt))
+ return true;
+
+ /* Initiate or update failover mode if applicable */
+ if ((usr == TUNNEL_PROTOCOL) && (mtyp == FAILOVER_MSG)) {
+ syncpt = oseqno + exp_pkts - 1;
+ if (pl && tipc_link_is_up(pl)) {
+ pb_id = pl->bearer_id;
+ __tipc_node_link_down(n, &pb_id, xmitq, &maddr);
+ tipc_skb_queue_splice_tail_init(pl->inputq, l->inputq);
+ }
+ /* If pkts arrive out of order, use lowest calculated syncpt */
+ if (less(syncpt, n->sync_point))
+ n->sync_point = syncpt;
+ }
+
+ /* Open parallel link when tunnel link reaches synch point */
+ if ((n->state == NODE_FAILINGOVER) && !tipc_link_is_failingover(l)) {
+ if (!more(rcv_nxt, n->sync_point))
+ return true;
+ tipc_node_fsm_evt(n, NODE_FAILOVER_END_EVT);
+ if (pl)
+ tipc_link_fsm_evt(pl, LINK_FAILOVER_END_EVT);
+ return true;
+ }
+
+ /* Initiate or update synch mode if applicable */
+ if ((usr == TUNNEL_PROTOCOL) && (mtyp == SYNCH_MSG)) {
+ syncpt = iseqno + exp_pkts - 1;
+ if (!tipc_link_is_up(l)) {
+ tipc_link_fsm_evt(l, LINK_ESTABLISH_EVT);
+ __tipc_node_link_up(n, bearer_id, xmitq);
+ }
+ if (n->state == SELF_UP_PEER_UP) {
+ n->sync_point = syncpt;
+ tipc_link_fsm_evt(l, LINK_SYNCH_BEGIN_EVT);
+ tipc_node_fsm_evt(n, NODE_SYNCH_BEGIN_EVT);
+ }
+ if (less(syncpt, n->sync_point))
+ n->sync_point = syncpt;
+ }
+
+ /* Open tunnel link when parallel link reaches synch point */
+ if ((n->state == NODE_SYNCHING) && tipc_link_is_synching(l)) {
+ if (pl)
+ dlv_nxt = mod(pl->rcv_nxt - skb_queue_len(pl->inputq));
+ if (!pl || more(dlv_nxt, n->sync_point)) {
+ tipc_link_fsm_evt(l, LINK_SYNCH_END_EVT);
+ tipc_node_fsm_evt(n, NODE_SYNCH_END_EVT);
+ return true;
+ }
+ if ((usr == TUNNEL_PROTOCOL) && (mtyp == SYNCH_MSG))
+ return true;
+ if (usr == LINK_PROTOCOL)
+ return true;
+ return false;
+ }
+ return true;
+}
+
+/**
+ * tipc_rcv - process TIPC packets/messages arriving from off-node
+ * @net: the applicable net namespace
+ * @skb: TIPC packet
+ * @bearer: pointer to bearer message arrived on
+ *
+ * Invoked with no locks held. Bearer pointer must point to a valid bearer
+ * structure (i.e. cannot be NULL), but bearer can be inactive.
+ */
+void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
+{
+ struct sk_buff_head xmitq;
+ struct tipc_node *n;
+ struct tipc_msg *hdr = buf_msg(skb);
+ int usr = msg_user(hdr);
+ int bearer_id = b->identity;
+ struct tipc_link_entry *le;
+ int rc = 0;
+
+ __skb_queue_head_init(&xmitq);
+
+ /* Ensure message is well-formed */
+ if (unlikely(!tipc_msg_validate(skb)))
+ goto discard;
+
+ /* Handle arrival of a non-unicast link packet */
+ if (unlikely(msg_non_seq(hdr))) {
+ if (usr == LINK_CONFIG)
+ tipc_disc_rcv(net, skb, b);
+ else
+ tipc_bclink_rcv(net, skb);
+ return;
+ }
+
+ /* Locate neighboring node that sent packet */
+ n = tipc_node_find(net, msg_prevnode(hdr));
+ if (unlikely(!n))
+ goto discard;
+ le = &n->links[bearer_id];
+
+ tipc_node_lock(n);
+
+ /* Is reception permitted at the moment ? */
+ if (!tipc_node_filter_pkt(n, hdr))
+ goto unlock;
+
+ if (unlikely(msg_user(hdr) == LINK_PROTOCOL))
+ tipc_bclink_sync_state(n, hdr);
+
+ /* Release acked broadcast packets */
+ if (unlikely(n->bclink.acked != msg_bcast_ack(hdr)))
+ tipc_bclink_acknowledge(n, msg_bcast_ack(hdr));
+
+ /* Check and if necessary update node state */
+ if (likely(tipc_node_check_state(n, skb, bearer_id, &xmitq))) {
+ rc = tipc_link_rcv(le->link, skb, &xmitq);
+ skb = NULL;
+ }
+unlock:
+ tipc_node_unlock(n);
+
+ if (unlikely(rc & TIPC_LINK_UP_EVT))
+ tipc_node_link_up(n, bearer_id, &xmitq);
+
+ if (unlikely(rc & TIPC_LINK_DOWN_EVT))
+ tipc_node_link_down(n, bearer_id, false);
+
+ if (unlikely(!skb_queue_empty(&n->bclink.namedq)))
+ tipc_named_rcv(net, &n->bclink.namedq);
+
+ if (!skb_queue_empty(&le->inputq))
+ tipc_sk_rcv(net, &le->inputq);
+
+ if (!skb_queue_empty(&xmitq))
+ tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr);
+
+ tipc_node_put(n);
+discard:
+ kfree_skb(skb);
+}
+
int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
int err;
diff --git a/net/tipc/node.h b/net/tipc/node.h
index 5a834cf142c8..344b3e7594fd 100644
--- a/net/tipc/node.h
+++ b/net/tipc/node.h
@@ -45,23 +45,19 @@
/* Out-of-range value for node signature */
#define INVALID_NODE_SIG 0x10000
+#define INVALID_BEARER_ID -1
+
/* Flags used to take different actions according to flag type
- * TIPC_WAIT_PEER_LINKS_DOWN: wait to see that peer's links are down
- * TIPC_WAIT_OWN_LINKS_DOWN: wait until peer node is declared down
* TIPC_NOTIFY_NODE_DOWN: notify node is down
* TIPC_NOTIFY_NODE_UP: notify node is up
* TIPC_DISTRIBUTE_NAME: publish or withdraw link state name type
*/
enum {
- TIPC_MSG_EVT = 1,
- TIPC_WAIT_PEER_LINKS_DOWN = (1 << 1),
- TIPC_WAIT_OWN_LINKS_DOWN = (1 << 2),
TIPC_NOTIFY_NODE_DOWN = (1 << 3),
TIPC_NOTIFY_NODE_UP = (1 << 4),
TIPC_WAKEUP_BCAST_USERS = (1 << 5),
TIPC_NOTIFY_LINK_UP = (1 << 6),
TIPC_NOTIFY_LINK_DOWN = (1 << 7),
- TIPC_NAMED_MSG_EVT = (1 << 8),
TIPC_BCAST_MSG_EVT = (1 << 9),
TIPC_BCAST_RESET = (1 << 10)
};
@@ -85,10 +81,17 @@ struct tipc_node_bclink {
u32 deferred_size;
struct sk_buff_head deferdq;
struct sk_buff *reasm_buf;
- int inputq_map;
+ struct sk_buff_head namedq;
bool recv_permitted;
};
+struct tipc_link_entry {
+ struct tipc_link *link;
+ u32 mtu;
+ struct sk_buff_head inputq;
+ struct tipc_media_addr maddr;
+};
+
/**
* struct tipc_node - TIPC node structure
* @addr: network address of node
@@ -98,11 +101,12 @@ struct tipc_node_bclink {
* @hash: links to adjacent nodes in unsorted hash chain
* @inputq: pointer to input queue containing messages for msg event
* @namedq: pointer to name table input queue with name table messages
- * @curr_link: the link holding the node lock, if any
- * @active_links: pointers to active links to node
- * @links: pointers to all links to node
+ * @active_links: bearer ids of active links, used as index into links[] array
+ * @links: array containing references to all links to node
* @action_flags: bit mask of different types of node actions
* @bclink: broadcast-related info
+ * @state: connectivity state vs peer node
+ * @sync_point: sequence number where synch/failover is finished
* @list: links to adjacent nodes in sorted list of cluster's nodes
* @working_links: number of working links to node (both active and standby)
* @link_cnt: number of links to node
@@ -118,14 +122,13 @@ struct tipc_node {
spinlock_t lock;
struct net *net;
struct hlist_node hash;
- struct sk_buff_head *inputq;
- struct sk_buff_head *namedq;
- struct tipc_link *active_links[2];
- u32 act_mtus[2];
- struct tipc_link *links[MAX_BEARERS];
+ int active_links[2];
+ struct tipc_link_entry links[MAX_BEARERS];
int action_flags;
struct tipc_node_bclink bclink;
struct list_head list;
+ int state;
+ u16 sync_point;
int link_cnt;
u16 working_links;
u16 capabilities;
@@ -133,25 +136,32 @@ struct tipc_node {
u32 link_id;
struct list_head publ_list;
struct list_head conn_sks;
+ unsigned long keepalive_intv;
+ struct timer_list timer;
struct rcu_head rcu;
};
struct tipc_node *tipc_node_find(struct net *net, u32 addr);
void tipc_node_put(struct tipc_node *node);
-struct tipc_node *tipc_node_create(struct net *net, u32 addr);
void tipc_node_stop(struct net *net);
+void tipc_node_check_dest(struct net *net, u32 onode,
+ struct tipc_bearer *bearer,
+ u16 capabilities, u32 signature,
+ struct tipc_media_addr *maddr,
+ bool *respond, bool *dupl_addr);
+void tipc_node_delete_links(struct net *net, int bearer_id);
void tipc_node_attach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr);
void tipc_node_detach_link(struct tipc_node *n_ptr, struct tipc_link *l_ptr);
-void tipc_node_link_down(struct tipc_node *n_ptr, struct tipc_link *l_ptr);
-void tipc_node_link_up(struct tipc_node *n_ptr, struct tipc_link *l_ptr);
-int tipc_node_active_links(struct tipc_node *n_ptr);
-int tipc_node_is_up(struct tipc_node *n_ptr);
+bool tipc_node_is_up(struct tipc_node *n);
int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node,
char *linkname, size_t len);
void tipc_node_unlock(struct tipc_node *node);
+int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode,
+ int selector);
+int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest,
+ u32 selector);
int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port);
void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port);
-
int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb);
static inline void tipc_node_lock(struct tipc_node *node)
@@ -159,26 +169,30 @@ static inline void tipc_node_lock(struct tipc_node *node)
spin_lock_bh(&node->lock);
}
-static inline bool tipc_node_blocked(struct tipc_node *node)
+static inline struct tipc_link *node_active_link(struct tipc_node *n, int sel)
{
- return (node->action_flags & (TIPC_WAIT_PEER_LINKS_DOWN |
- TIPC_NOTIFY_NODE_DOWN | TIPC_WAIT_OWN_LINKS_DOWN));
+ int bearer_id = n->active_links[sel & 1];
+
+ if (unlikely(bearer_id == INVALID_BEARER_ID))
+ return NULL;
+
+ return n->links[bearer_id].link;
}
-static inline uint tipc_node_get_mtu(struct net *net, u32 addr, u32 selector)
+static inline unsigned int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel)
{
- struct tipc_node *node;
- u32 mtu;
-
- node = tipc_node_find(net, addr);
+ struct tipc_node *n;
+ int bearer_id;
+ unsigned int mtu = MAX_MSG_SIZE;
- if (likely(node)) {
- mtu = node->act_mtus[selector & 1];
- tipc_node_put(node);
- } else {
- mtu = MAX_MSG_SIZE;
- }
+ n = tipc_node_find(net, addr);
+ if (unlikely(!n))
+ return mtu;
+ bearer_id = n->active_links[sel & 1];
+ if (likely(bearer_id != INVALID_BEARER_ID))
+ mtu = n->links[bearer_id].mtu;
+ tipc_node_put(n);
return mtu;
}
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 3a7567f690f3..1060d52ff23e 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -248,6 +248,22 @@ static void tsk_advance_rx_queue(struct sock *sk)
kfree_skb(__skb_dequeue(&sk->sk_receive_queue));
}
+/* tipc_sk_respond() : send response message back to sender
+ */
+static void tipc_sk_respond(struct sock *sk, struct sk_buff *skb, int err)
+{
+ u32 selector;
+ u32 dnode;
+ u32 onode = tipc_own_addr(sock_net(sk));
+
+ if (!tipc_msg_reverse(onode, &skb, err))
+ return;
+
+ dnode = msg_destnode(buf_msg(skb));
+ selector = msg_origport(buf_msg(skb));
+ tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector);
+}
+
/**
* tsk_rej_rx_queue - reject all buffers in socket receive queue
*
@@ -256,13 +272,9 @@ static void tsk_advance_rx_queue(struct sock *sk)
static void tsk_rej_rx_queue(struct sock *sk)
{
struct sk_buff *skb;
- u32 dnode;
- u32 own_node = tsk_own_node(tipc_sk(sk));
- while ((skb = __skb_dequeue(&sk->sk_receive_queue))) {
- if (tipc_msg_reverse(own_node, skb, &dnode, TIPC_ERR_NO_PORT))
- tipc_link_xmit_skb(sock_net(sk), skb, dnode, 0);
- }
+ while ((skb = __skb_dequeue(&sk->sk_receive_queue)))
+ tipc_sk_respond(sk, skb, TIPC_ERR_NO_PORT);
}
/* tsk_peer_msg - verify if message was sent by connected port's peer
@@ -441,9 +453,7 @@ static int tipc_release(struct socket *sock)
tsk->connected = 0;
tipc_node_remove_conn(net, dnode, tsk->portid);
}
- if (tipc_msg_reverse(tsk_own_node(tsk), skb, &dnode,
- TIPC_ERR_NO_PORT))
- tipc_link_xmit_skb(net, skb, dnode, 0);
+ tipc_sk_respond(sk, skb, TIPC_ERR_NO_PORT);
}
}
@@ -456,7 +466,7 @@ static int tipc_release(struct socket *sock)
tsk_own_node(tsk), tsk_peer_port(tsk),
tsk->portid, TIPC_ERR_NO_PORT);
if (skb)
- tipc_link_xmit_skb(net, skb, dnode, tsk->portid);
+ tipc_node_xmit_skb(net, skb, dnode, tsk->portid);
tipc_node_remove_conn(net, dnode, tsk->portid);
}
@@ -686,21 +696,22 @@ new_mtu:
do {
rc = tipc_bclink_xmit(net, pktchain);
- if (likely(rc >= 0)) {
- rc = dsz;
- break;
+ if (likely(!rc))
+ return dsz;
+
+ if (rc == -ELINKCONG) {
+ tsk->link_cong = 1;
+ rc = tipc_wait_for_sndmsg(sock, &timeo);
+ if (!rc)
+ continue;
}
+ __skb_queue_purge(pktchain);
if (rc == -EMSGSIZE) {
msg->msg_iter = save;
goto new_mtu;
}
- if (rc != -ELINKCONG)
- break;
- tipc_sk(sk)->link_cong = 1;
- rc = tipc_wait_for_sndmsg(sock, &timeo);
- if (rc)
- __skb_queue_purge(pktchain);
- } while (!rc);
+ break;
+ } while (1);
return rc;
}
@@ -763,35 +774,35 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
/**
* tipc_sk_proto_rcv - receive a connection mng protocol message
* @tsk: receiving socket
- * @skb: pointer to message buffer. Set to NULL if buffer is consumed.
+ * @skb: pointer to message buffer.
*/
-static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff **skb)
+static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb)
{
- struct tipc_msg *msg = buf_msg(*skb);
+ struct sock *sk = &tsk->sk;
+ struct tipc_msg *hdr = buf_msg(skb);
+ int mtyp = msg_type(hdr);
int conn_cong;
- u32 dnode;
- u32 own_node = tsk_own_node(tsk);
+
/* Ignore if connection cannot be validated: */
- if (!tsk_peer_msg(tsk, msg))
+ if (!tsk_peer_msg(tsk, hdr))
goto exit;
tsk->probing_state = TIPC_CONN_OK;
- if (msg_type(msg) == CONN_ACK) {
+ if (mtyp == CONN_PROBE) {
+ msg_set_type(hdr, CONN_PROBE_REPLY);
+ tipc_sk_respond(sk, skb, TIPC_OK);
+ return;
+ } else if (mtyp == CONN_ACK) {
conn_cong = tsk_conn_cong(tsk);
- tsk->sent_unacked -= msg_msgcnt(msg);
+ tsk->sent_unacked -= msg_msgcnt(hdr);
if (conn_cong)
- tsk->sk.sk_write_space(&tsk->sk);
- } else if (msg_type(msg) == CONN_PROBE) {
- if (tipc_msg_reverse(own_node, *skb, &dnode, TIPC_OK)) {
- msg_set_type(msg, CONN_PROBE_REPLY);
- return;
- }
+ sk->sk_write_space(sk);
+ } else if (mtyp != CONN_PROBE_REPLY) {
+ pr_warn("Received unknown CONN_PROTO msg\n");
}
- /* Do nothing if msg_type() == CONN_PROBE_REPLY */
exit:
- kfree_skb(*skb);
- *skb = NULL;
+ kfree_skb(skb);
}
static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p)
@@ -924,24 +935,25 @@ new_mtu:
do {
skb = skb_peek(pktchain);
TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong;
- rc = tipc_link_xmit(net, pktchain, dnode, tsk->portid);
- if (likely(rc >= 0)) {
+ rc = tipc_node_xmit(net, pktchain, dnode, tsk->portid);
+ if (likely(!rc)) {
if (sock->state != SS_READY)
sock->state = SS_CONNECTING;
- rc = dsz;
- break;
+ return dsz;
}
+ if (rc == -ELINKCONG) {
+ tsk->link_cong = 1;
+ rc = tipc_wait_for_sndmsg(sock, &timeo);
+ if (!rc)
+ continue;
+ }
+ __skb_queue_purge(pktchain);
if (rc == -EMSGSIZE) {
m->msg_iter = save;
goto new_mtu;
}
- if (rc != -ELINKCONG)
- break;
- tsk->link_cong = 1;
- rc = tipc_wait_for_sndmsg(sock, &timeo);
- if (rc)
- __skb_queue_purge(pktchain);
- } while (!rc);
+ break;
+ } while (1);
return rc;
}
@@ -1043,15 +1055,16 @@ next:
return rc;
do {
if (likely(!tsk_conn_cong(tsk))) {
- rc = tipc_link_xmit(net, pktchain, dnode, portid);
+ rc = tipc_node_xmit(net, pktchain, dnode, portid);
if (likely(!rc)) {
tsk->sent_unacked++;
sent += send;
if (sent == dsz)
- break;
+ return dsz;
goto next;
}
if (rc == -EMSGSIZE) {
+ __skb_queue_purge(pktchain);
tsk->max_pkt = tipc_node_get_mtu(net, dnode,
portid);
m->msg_iter = save;
@@ -1059,13 +1072,13 @@ next:
}
if (rc != -ELINKCONG)
break;
+
tsk->link_cong = 1;
}
rc = tipc_wait_for_sndpkt(sock, &timeo);
- if (rc)
- __skb_queue_purge(pktchain);
} while (!rc);
+ __skb_queue_purge(pktchain);
return sent ? sent : rc;
}
@@ -1221,7 +1234,7 @@ static void tipc_sk_send_ack(struct tipc_sock *tsk, uint ack)
return;
msg = buf_msg(skb);
msg_set_msgcnt(msg, ack);
- tipc_link_xmit_skb(net, skb, dnode, msg_link_selector(msg));
+ tipc_node_xmit_skb(net, skb, dnode, msg_link_selector(msg));
}
static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
@@ -1507,82 +1520,81 @@ static void tipc_data_ready(struct sock *sk)
* @tsk: TIPC socket
* @skb: pointer to message buffer. Set to NULL if buffer is consumed
*
- * Returns 0 (TIPC_OK) if everything ok, -TIPC_ERR_NO_PORT otherwise
+ * Returns true if everything ok, false otherwise
*/
-static int filter_connect(struct tipc_sock *tsk, struct sk_buff **skb)
+static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb)
{
struct sock *sk = &tsk->sk;
struct net *net = sock_net(sk);
struct socket *sock = sk->sk_socket;
- struct tipc_msg *msg = buf_msg(*skb);
- int retval = -TIPC_ERR_NO_PORT;
+ struct tipc_msg *hdr = buf_msg(skb);
- if (msg_mcast(msg))
- return retval;
+ if (unlikely(msg_mcast(hdr)))
+ return false;
switch ((int)sock->state) {
case SS_CONNECTED:
+
/* Accept only connection-based messages sent by peer */
- if (tsk_peer_msg(tsk, msg)) {
- if (unlikely(msg_errcode(msg))) {
- sock->state = SS_DISCONNECTING;
- tsk->connected = 0;
- /* let timer expire on it's own */
- tipc_node_remove_conn(net, tsk_peer_node(tsk),
- tsk->portid);
- }
- retval = TIPC_OK;
+ if (unlikely(!tsk_peer_msg(tsk, hdr)))
+ return false;
+
+ if (unlikely(msg_errcode(hdr))) {
+ sock->state = SS_DISCONNECTING;
+ tsk->connected = 0;
+ /* Let timer expire on it's own */
+ tipc_node_remove_conn(net, tsk_peer_node(tsk),
+ tsk->portid);
}
- break;
+ return true;
+
case SS_CONNECTING:
- /* Accept only ACK or NACK message */
- if (unlikely(!msg_connected(msg)))
- break;
+ /* Accept only ACK or NACK message */
+ if (unlikely(!msg_connected(hdr)))
+ return false;
- if (unlikely(msg_errcode(msg))) {
+ if (unlikely(msg_errcode(hdr))) {
sock->state = SS_DISCONNECTING;
sk->sk_err = ECONNREFUSED;
- retval = TIPC_OK;
- break;
+ return true;
}
- if (unlikely(msg_importance(msg) > TIPC_CRITICAL_IMPORTANCE)) {
+ if (unlikely(!msg_isdata(hdr))) {
sock->state = SS_DISCONNECTING;
sk->sk_err = EINVAL;
- retval = TIPC_OK;
- break;
+ return true;
}
- tipc_sk_finish_conn(tsk, msg_origport(msg), msg_orignode(msg));
- msg_set_importance(&tsk->phdr, msg_importance(msg));
+ tipc_sk_finish_conn(tsk, msg_origport(hdr), msg_orignode(hdr));
+ msg_set_importance(&tsk->phdr, msg_importance(hdr));
sock->state = SS_CONNECTED;
- /* If an incoming message is an 'ACK-', it should be
- * discarded here because it doesn't contain useful
- * data. In addition, we should try to wake up
- * connect() routine if sleeping.
- */
- if (msg_data_sz(msg) == 0) {
- kfree_skb(*skb);
- *skb = NULL;
- if (waitqueue_active(sk_sleep(sk)))
- wake_up_interruptible(sk_sleep(sk));
- }
- retval = TIPC_OK;
- break;
+ /* If 'ACK+' message, add to socket receive queue */
+ if (msg_data_sz(hdr))
+ return true;
+
+ /* If empty 'ACK-' message, wake up sleeping connect() */
+ if (waitqueue_active(sk_sleep(sk)))
+ wake_up_interruptible(sk_sleep(sk));
+
+ /* 'ACK-' message is neither accepted nor rejected: */
+ msg_set_dest_droppable(hdr, 1);
+ return false;
+
case SS_LISTENING:
case SS_UNCONNECTED:
+
/* Accept only SYN message */
- if (!msg_connected(msg) && !(msg_errcode(msg)))
- retval = TIPC_OK;
+ if (!msg_connected(hdr) && !(msg_errcode(hdr)))
+ return true;
break;
case SS_DISCONNECTING:
break;
default:
pr_err("Unknown socket state %u\n", sock->state);
}
- return retval;
+ return false;
}
/**
@@ -1617,61 +1629,70 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *buf)
/**
* filter_rcv - validate incoming message
* @sk: socket
- * @skb: pointer to message. Set to NULL if buffer is consumed.
+ * @skb: pointer to message.
*
* Enqueues message on receive queue if acceptable; optionally handles
* disconnect indication for a connected socket.
*
* Called with socket lock already taken
*
- * Returns 0 (TIPC_OK) if message was ok, -TIPC error code if rejected
+ * Returns true if message was added to socket receive queue, otherwise false
*/
-static int filter_rcv(struct sock *sk, struct sk_buff **skb)
+static bool filter_rcv(struct sock *sk, struct sk_buff *skb)
{
struct socket *sock = sk->sk_socket;
struct tipc_sock *tsk = tipc_sk(sk);
- struct tipc_msg *msg = buf_msg(*skb);
- unsigned int limit = rcvbuf_limit(sk, *skb);
- int rc = TIPC_OK;
+ struct tipc_msg *hdr = buf_msg(skb);
+ unsigned int limit = rcvbuf_limit(sk, skb);
+ int err = TIPC_OK;
+ int usr = msg_user(hdr);
- if (unlikely(msg_user(msg) == CONN_MANAGER)) {
+ if (unlikely(msg_user(hdr) == CONN_MANAGER)) {
tipc_sk_proto_rcv(tsk, skb);
- return TIPC_OK;
+ return false;
}
- if (unlikely(msg_user(msg) == SOCK_WAKEUP)) {
- kfree_skb(*skb);
+ if (unlikely(usr == SOCK_WAKEUP)) {
+ kfree_skb(skb);
tsk->link_cong = 0;
sk->sk_write_space(sk);
- *skb = NULL;
- return TIPC_OK;
+ return false;
}
- /* Reject message if it is wrong sort of message for socket */
- if (msg_type(msg) > TIPC_DIRECT_MSG)
- return -TIPC_ERR_NO_PORT;
+ /* Drop if illegal message type */
+ if (unlikely(msg_type(hdr) > TIPC_DIRECT_MSG)) {
+ kfree_skb(skb);
+ return false;
+ }
- if (sock->state == SS_READY) {
- if (msg_connected(msg))
- return -TIPC_ERR_NO_PORT;
- } else {
- rc = filter_connect(tsk, skb);
- if (rc != TIPC_OK || !*skb)
- return rc;
+ /* Reject if wrong message type for current socket state */
+ if (unlikely(sock->state == SS_READY)) {
+ if (msg_connected(hdr)) {
+ err = TIPC_ERR_NO_PORT;
+ goto reject;
+ }
+ } else if (unlikely(!filter_connect(tsk, skb))) {
+ err = TIPC_ERR_NO_PORT;
+ goto reject;
}
/* Reject message if there isn't room to queue it */
- if (sk_rmem_alloc_get(sk) + (*skb)->truesize >= limit)
- return -TIPC_ERR_OVERLOAD;
+ if (unlikely(sk_rmem_alloc_get(sk) + skb->truesize >= limit)) {
+ err = TIPC_ERR_OVERLOAD;
+ goto reject;
+ }
/* Enqueue message */
- TIPC_SKB_CB(*skb)->handle = NULL;
- __skb_queue_tail(&sk->sk_receive_queue, *skb);
- skb_set_owner_r(*skb, sk);
+ TIPC_SKB_CB(skb)->handle = NULL;
+ __skb_queue_tail(&sk->sk_receive_queue, skb);
+ skb_set_owner_r(skb, sk);
sk->sk_data_ready(sk);
- *skb = NULL;
- return TIPC_OK;
+ return true;
+
+reject:
+ tipc_sk_respond(sk, skb, err);
+ return false;
}
/**
@@ -1685,22 +1706,10 @@ static int filter_rcv(struct sock *sk, struct sk_buff **skb)
*/
static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb)
{
- int err;
- atomic_t *dcnt;
- u32 dnode;
- struct tipc_sock *tsk = tipc_sk(sk);
- struct net *net = sock_net(sk);
- uint truesize = skb->truesize;
+ unsigned int truesize = skb->truesize;
- err = filter_rcv(sk, &skb);
- if (likely(!skb)) {
- dcnt = &tsk->dupl_rcvcnt;
- if (atomic_read(dcnt) < TIPC_CONN_OVERLOAD_LIMIT)
- atomic_add(truesize, dcnt);
- return 0;
- }
- if (!err || tipc_msg_reverse(tsk_own_node(tsk), skb, &dnode, -err))
- tipc_link_xmit_skb(net, skb, dnode, tsk->portid);
+ if (likely(filter_rcv(sk, skb)))
+ atomic_add(truesize, &tipc_sk(sk)->dupl_rcvcnt);
return 0;
}
@@ -1710,45 +1719,43 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb)
* @inputq: list of incoming buffers with potentially different destinations
* @sk: socket where the buffers should be enqueued
* @dport: port number for the socket
- * @_skb: returned buffer to be forwarded or rejected, if applicable
*
* Caller must hold socket lock
- *
- * Returns TIPC_OK if all buffers enqueued, otherwise -TIPC_ERR_OVERLOAD
- * or -TIPC_ERR_NO_PORT
*/
-static int tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
- u32 dport, struct sk_buff **_skb)
+static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
+ u32 dport)
{
unsigned int lim;
atomic_t *dcnt;
- int err;
struct sk_buff *skb;
unsigned long time_limit = jiffies + 2;
while (skb_queue_len(inputq)) {
if (unlikely(time_after_eq(jiffies, time_limit)))
- return TIPC_OK;
+ return;
+
skb = tipc_skb_dequeue(inputq, dport);
if (unlikely(!skb))
- return TIPC_OK;
+ return;
+
+ /* Add message directly to receive queue if possible */
if (!sock_owned_by_user(sk)) {
- err = filter_rcv(sk, &skb);
- if (likely(!skb))
- continue;
- *_skb = skb;
- return err;
+ filter_rcv(sk, skb);
+ continue;
}
+
+ /* Try backlog, compensating for double-counted bytes */
dcnt = &tipc_sk(sk)->dupl_rcvcnt;
if (sk->sk_backlog.len)
atomic_set(dcnt, 0);
lim = rcvbuf_limit(sk, skb) + atomic_read(dcnt);
if (likely(!sk_add_backlog(sk, skb, lim)))
continue;
- *_skb = skb;
- return -TIPC_ERR_OVERLOAD;
+
+ /* Overload => reject message back to sender */
+ tipc_sk_respond(sk, skb, TIPC_ERR_OVERLOAD);
+ break;
}
- return TIPC_OK;
}
/**
@@ -1756,49 +1763,46 @@ static int tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
* @inputq: buffer list containing the buffers
* Consumes all buffers in list until inputq is empty
* Note: may be called in multiple threads referring to the same queue
- * Returns 0 if last buffer was accepted, otherwise -EHOSTUNREACH
- * Only node local calls check the return value, sending single-buffer queues
*/
-int tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq)
+void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq)
{
u32 dnode, dport = 0;
int err;
- struct sk_buff *skb;
struct tipc_sock *tsk;
- struct tipc_net *tn;
struct sock *sk;
+ struct sk_buff *skb;
while (skb_queue_len(inputq)) {
- err = -TIPC_ERR_NO_PORT;
- skb = NULL;
dport = tipc_skb_peek_port(inputq, dport);
tsk = tipc_sk_lookup(net, dport);
+
if (likely(tsk)) {
sk = &tsk->sk;
if (likely(spin_trylock_bh(&sk->sk_lock.slock))) {
- err = tipc_sk_enqueue(inputq, sk, dport, &skb);
+ tipc_sk_enqueue(inputq, sk, dport);
spin_unlock_bh(&sk->sk_lock.slock);
- dport = 0;
}
sock_put(sk);
- } else {
- skb = tipc_skb_dequeue(inputq, dport);
- }
- if (likely(!skb))
continue;
- if (tipc_msg_lookup_dest(net, skb, &dnode, &err))
- goto xmit;
- if (!err) {
- dnode = msg_destnode(buf_msg(skb));
- goto xmit;
}
- tn = net_generic(net, tipc_net_id);
- if (!tipc_msg_reverse(tn->own_addr, skb, &dnode, -err))
+
+ /* No destination socket => dequeue skb if still there */
+ skb = tipc_skb_dequeue(inputq, dport);
+ if (!skb)
+ return;
+
+ /* Try secondary lookup if unresolved named message */
+ err = TIPC_ERR_NO_PORT;
+ if (tipc_msg_lookup_dest(net, skb, &err))
+ goto xmit;
+
+ /* Prepare for message rejection */
+ if (!tipc_msg_reverse(tipc_own_addr(net), &skb, err))
continue;
xmit:
- tipc_link_xmit_skb(net, skb, dnode, dport);
+ dnode = msg_destnode(buf_msg(skb));
+ tipc_node_xmit_skb(net, skb, dnode, dport);
}
- return err ? -EHOSTUNREACH : 0;
}
static int tipc_wait_for_connect(struct socket *sock, long *timeo_p)
@@ -2067,7 +2071,10 @@ static int tipc_shutdown(struct socket *sock, int how)
struct net *net = sock_net(sk);
struct tipc_sock *tsk = tipc_sk(sk);
struct sk_buff *skb;
- u32 dnode;
+ u32 dnode = tsk_peer_node(tsk);
+ u32 dport = tsk_peer_port(tsk);
+ u32 onode = tipc_own_addr(net);
+ u32 oport = tsk->portid;
int res;
if (how != SHUT_RDWR)
@@ -2080,6 +2087,8 @@ static int tipc_shutdown(struct socket *sock, int how)
case SS_CONNECTED:
restart:
+ dnode = tsk_peer_node(tsk);
+
/* Disconnect and send a 'FIN+' or 'FIN-' message to peer */
skb = __skb_dequeue(&sk->sk_receive_queue);
if (skb) {
@@ -2087,19 +2096,13 @@ restart:
kfree_skb(skb);
goto restart;
}
- if (tipc_msg_reverse(tsk_own_node(tsk), skb, &dnode,
- TIPC_CONN_SHUTDOWN))
- tipc_link_xmit_skb(net, skb, dnode,
- tsk->portid);
+ tipc_sk_respond(sk, skb, TIPC_CONN_SHUTDOWN);
} else {
- dnode = tsk_peer_node(tsk);
-
skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE,
TIPC_CONN_MSG, SHORT_H_SIZE,
- 0, dnode, tsk_own_node(tsk),
- tsk_peer_port(tsk),
- tsk->portid, TIPC_CONN_SHUTDOWN);
- tipc_link_xmit_skb(net, skb, dnode, tsk->portid);
+ 0, dnode, onode, dport, oport,
+ TIPC_CONN_SHUTDOWN);
+ tipc_node_xmit_skb(net, skb, dnode, tsk->portid);
}
tsk->connected = 0;
sock->state = SS_DISCONNECTING;
@@ -2161,7 +2164,7 @@ static void tipc_sk_timeout(unsigned long data)
}
bh_unlock_sock(sk);
if (skb)
- tipc_link_xmit_skb(sock_net(sk), skb, peer_node, tsk->portid);
+ tipc_node_xmit_skb(sock_net(sk), skb, peer_node, tsk->portid);
exit:
sock_put(sk);
}
diff --git a/net/tipc/socket.h b/net/tipc/socket.h
index bf6551389522..4241f22069dc 100644
--- a/net/tipc/socket.h
+++ b/net/tipc/socket.h
@@ -44,7 +44,7 @@
SKB_TRUESIZE(TIPC_MAX_USER_MSG_SIZE))
int tipc_socket_init(void);
void tipc_socket_stop(void);
-int tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq);
+void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq);
void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
struct sk_buff_head *inputq);
void tipc_sk_reinit(struct net *net);
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index 66deebc66aa1..c170d3138953 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -194,7 +194,8 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
.saddr = src->ipv6,
.flowi6_proto = IPPROTO_UDP
};
- err = ipv6_stub->ipv6_dst_lookup(ub->ubsock->sk, &ndst, &fl6);
+ err = ipv6_stub->ipv6_dst_lookup(net, ub->ubsock->sk, &ndst,
+ &fl6);
if (err)
goto tx_error;
ttl = ip6_dst_hoplimit(ndst);
diff --git a/net/wimax/op-rfkill.c b/net/wimax/op-rfkill.c
index 7d730543f243..477364ad750e 100644
--- a/net/wimax/op-rfkill.c
+++ b/net/wimax/op-rfkill.c
@@ -135,8 +135,7 @@ EXPORT_SYMBOL_GPL(wimax_report_rfkill_hw);
* @state: New state of the RF kill switch. %WIMAX_RF_ON radio on,
* %WIMAX_RF_OFF radio off.
*
- * Reports changes in the software RF switch state to the the WiMAX
- * stack.
+ * Reports changes in the software RF switch state to the WiMAX stack.
*
* The main use is during initialization, so the driver can query the
* device for its current software radio kill switch state and feed it
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 2a0bbd22854b..3893409dee95 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -407,6 +407,9 @@ use_default_name:
INIT_LIST_HEAD(&rdev->bss_list);
INIT_WORK(&rdev->scan_done_wk, __cfg80211_scan_done);
INIT_WORK(&rdev->sched_scan_results_wk, __cfg80211_sched_scan_results);
+ INIT_LIST_HEAD(&rdev->mlme_unreg);
+ spin_lock_init(&rdev->mlme_unreg_lock);
+ INIT_WORK(&rdev->mlme_unreg_wk, cfg80211_mlme_unreg_wk);
INIT_DELAYED_WORK(&rdev->dfs_update_channels_wk,
cfg80211_dfs_channels_update_work);
#ifdef CONFIG_CFG80211_WEXT
@@ -802,6 +805,7 @@ void wiphy_unregister(struct wiphy *wiphy)
cancel_delayed_work_sync(&rdev->dfs_update_channels_wk);
flush_work(&rdev->destroy_work);
flush_work(&rdev->sched_scan_stop_wk);
+ flush_work(&rdev->mlme_unreg_wk);
#ifdef CONFIG_PM
if (rdev->wiphy.wowlan_config && rdev->ops->set_wakeup)
@@ -855,6 +859,7 @@ void cfg80211_unregister_wdev(struct wireless_dev *wdev)
switch (wdev->iftype) {
case NL80211_IFTYPE_P2P_DEVICE:
+ cfg80211_mlme_purge_registrations(wdev);
cfg80211_stop_p2p_device(rdev, wdev);
break;
default:
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 311eef26bf88..b9d5bc8c148d 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -59,6 +59,10 @@ struct cfg80211_registered_device {
struct list_head beacon_registrations;
spinlock_t beacon_registrations_lock;
+ struct list_head mlme_unreg;
+ spinlock_t mlme_unreg_lock;
+ struct work_struct mlme_unreg_wk;
+
/* protected by RTNL only */
int num_running_ifaces;
int num_running_monitor_ifaces;
@@ -348,6 +352,7 @@ void cfg80211_mlme_down(struct cfg80211_registered_device *rdev,
int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid,
u16 frame_type, const u8 *match_data,
int match_len);
+void cfg80211_mlme_unreg_wk(struct work_struct *wk);
void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid);
void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev);
int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 7aae329e2b4e..fb44fa3bf4ef 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -2,6 +2,7 @@
* cfg80211 MLME SAP interface
*
* Copyright (c) 2009, Jouni Malinen <j@w1.fi>
+ * Copyright (c) 2015 Intel Deutschland GmbH
*/
#include <linux/kernel.h>
@@ -389,6 +390,7 @@ void cfg80211_mlme_down(struct cfg80211_registered_device *rdev,
struct cfg80211_mgmt_registration {
struct list_head list;
+ struct wireless_dev *wdev;
u32 nlportid;
@@ -399,6 +401,46 @@ struct cfg80211_mgmt_registration {
u8 match[];
};
+static void
+cfg80211_process_mlme_unregistrations(struct cfg80211_registered_device *rdev)
+{
+ struct cfg80211_mgmt_registration *reg;
+
+ ASSERT_RTNL();
+
+ spin_lock_bh(&rdev->mlme_unreg_lock);
+ while ((reg = list_first_entry_or_null(&rdev->mlme_unreg,
+ struct cfg80211_mgmt_registration,
+ list))) {
+ list_del(&reg->list);
+ spin_unlock_bh(&rdev->mlme_unreg_lock);
+
+ if (rdev->ops->mgmt_frame_register) {
+ u16 frame_type = le16_to_cpu(reg->frame_type);
+
+ rdev_mgmt_frame_register(rdev, reg->wdev,
+ frame_type, false);
+ }
+
+ kfree(reg);
+
+ spin_lock_bh(&rdev->mlme_unreg_lock);
+ }
+ spin_unlock_bh(&rdev->mlme_unreg_lock);
+}
+
+void cfg80211_mlme_unreg_wk(struct work_struct *wk)
+{
+ struct cfg80211_registered_device *rdev;
+
+ rdev = container_of(wk, struct cfg80211_registered_device,
+ mlme_unreg_wk);
+
+ rtnl_lock();
+ cfg80211_process_mlme_unregistrations(rdev);
+ rtnl_unlock();
+}
+
int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid,
u16 frame_type, const u8 *match_data,
int match_len)
@@ -449,11 +491,18 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid,
nreg->match_len = match_len;
nreg->nlportid = snd_portid;
nreg->frame_type = cpu_to_le16(frame_type);
+ nreg->wdev = wdev;
list_add(&nreg->list, &wdev->mgmt_registrations);
+ spin_unlock_bh(&wdev->mgmt_registrations_lock);
+
+ /* process all unregistrations to avoid driver confusion */
+ cfg80211_process_mlme_unregistrations(rdev);
if (rdev->ops->mgmt_frame_register)
rdev_mgmt_frame_register(rdev, wdev, frame_type, true);
+ return 0;
+
out:
spin_unlock_bh(&wdev->mgmt_registrations_lock);
@@ -472,15 +521,12 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid)
if (reg->nlportid != nlportid)
continue;
- if (rdev->ops->mgmt_frame_register) {
- u16 frame_type = le16_to_cpu(reg->frame_type);
-
- rdev_mgmt_frame_register(rdev, wdev,
- frame_type, false);
- }
-
list_del(&reg->list);
- kfree(reg);
+ spin_lock(&rdev->mlme_unreg_lock);
+ list_add_tail(&reg->list, &rdev->mlme_unreg);
+ spin_unlock(&rdev->mlme_unreg_lock);
+
+ schedule_work(&rdev->mlme_unreg_wk);
}
spin_unlock_bh(&wdev->mgmt_registrations_lock);
@@ -496,16 +542,15 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid)
void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev)
{
- struct cfg80211_mgmt_registration *reg, *tmp;
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
spin_lock_bh(&wdev->mgmt_registrations_lock);
-
- list_for_each_entry_safe(reg, tmp, &wdev->mgmt_registrations, list) {
- list_del(&reg->list);
- kfree(reg);
- }
-
+ spin_lock(&rdev->mlme_unreg_lock);
+ list_splice_tail_init(&wdev->mgmt_registrations, &rdev->mlme_unreg);
+ spin_unlock(&rdev->mlme_unreg_lock);
spin_unlock_bh(&wdev->mgmt_registrations_lock);
+
+ cfg80211_process_mlme_unregistrations(rdev);
}
int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 76b41578a838..5d8748b4c8a2 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2321,6 +2321,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
rdev->wiphy.frag_threshold = old_frag_threshold;
rdev->wiphy.rts_threshold = old_rts_threshold;
rdev->wiphy.coverage_class = old_coverage_class;
+ return result;
}
}
return 0;
@@ -7390,7 +7391,8 @@ static int nl80211_set_mcast_rate(struct sk_buff *skb, struct genl_info *info)
int err;
if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC &&
- dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT)
+ dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT &&
+ dev->ieee80211_ptr->iftype != NL80211_IFTYPE_OCB)
return -EOPNOTSUPP;
if (!rdev->ops->set_mcast_rate)
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index c6e83a7468c0..c23516d0f807 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -733,6 +733,8 @@ static inline void
rdev_mgmt_frame_register(struct cfg80211_registered_device *rdev,
struct wireless_dev *wdev, u16 frame_type, bool reg)
{
+ might_sleep();
+
trace_rdev_mgmt_frame_register(&rdev->wiphy, wdev , frame_type, reg);
rdev->ops->mgmt_frame_register(&rdev->wiphy, wdev , frame_type, reg);
trace_rdev_return_void(&rdev->wiphy);
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index aa2d75482017..b144485946f2 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1004,7 +1004,7 @@ static u32 map_regdom_flags(u32 rd_flags)
static const struct ieee80211_reg_rule *
freq_reg_info_regd(struct wiphy *wiphy, u32 center_freq,
- const struct ieee80211_regdomain *regd)
+ const struct ieee80211_regdomain *regd, u32 bw)
{
int i;
bool band_rule_found = false;
@@ -1028,7 +1028,7 @@ freq_reg_info_regd(struct wiphy *wiphy, u32 center_freq,
if (!band_rule_found)
band_rule_found = freq_in_rule_band(fr, center_freq);
- bw_fits = reg_does_bw_fit(fr, center_freq, MHZ_TO_KHZ(20));
+ bw_fits = reg_does_bw_fit(fr, center_freq, bw);
if (band_rule_found && bw_fits)
return rr;
@@ -1040,14 +1040,26 @@ freq_reg_info_regd(struct wiphy *wiphy, u32 center_freq,
return ERR_PTR(-EINVAL);
}
-const struct ieee80211_reg_rule *freq_reg_info(struct wiphy *wiphy,
- u32 center_freq)
+const struct ieee80211_reg_rule *__freq_reg_info(struct wiphy *wiphy,
+ u32 center_freq, u32 min_bw)
{
- const struct ieee80211_regdomain *regd;
+ const struct ieee80211_regdomain *regd = reg_get_regdomain(wiphy);
+ const struct ieee80211_reg_rule *reg_rule = NULL;
+ u32 bw;
- regd = reg_get_regdomain(wiphy);
+ for (bw = MHZ_TO_KHZ(20); bw >= min_bw; bw = bw / 2) {
+ reg_rule = freq_reg_info_regd(wiphy, center_freq, regd, bw);
+ if (!IS_ERR(reg_rule))
+ return reg_rule;
+ }
- return freq_reg_info_regd(wiphy, center_freq, regd);
+ return reg_rule;
+}
+
+const struct ieee80211_reg_rule *freq_reg_info(struct wiphy *wiphy,
+ u32 center_freq)
+{
+ return __freq_reg_info(wiphy, center_freq, MHZ_TO_KHZ(20));
}
EXPORT_SYMBOL(freq_reg_info);
@@ -1176,8 +1188,20 @@ static void handle_channel(struct wiphy *wiphy,
if (reg_rule->flags & NL80211_RRF_AUTO_BW)
max_bandwidth_khz = reg_get_max_bandwidth(regd, reg_rule);
+ /* If we get a reg_rule we can assume that at least 5Mhz fit */
+ if (!reg_does_bw_fit(freq_range, MHZ_TO_KHZ(chan->center_freq),
+ MHZ_TO_KHZ(10)))
+ bw_flags |= IEEE80211_CHAN_NO_10MHZ;
+ if (!reg_does_bw_fit(freq_range, MHZ_TO_KHZ(chan->center_freq),
+ MHZ_TO_KHZ(20)))
+ bw_flags |= IEEE80211_CHAN_NO_20MHZ;
+
+ if (max_bandwidth_khz < MHZ_TO_KHZ(10))
+ bw_flags |= IEEE80211_CHAN_NO_10MHZ;
+ if (max_bandwidth_khz < MHZ_TO_KHZ(20))
+ bw_flags |= IEEE80211_CHAN_NO_20MHZ;
if (max_bandwidth_khz < MHZ_TO_KHZ(40))
- bw_flags = IEEE80211_CHAN_NO_HT40;
+ bw_flags |= IEEE80211_CHAN_NO_HT40;
if (max_bandwidth_khz < MHZ_TO_KHZ(80))
bw_flags |= IEEE80211_CHAN_NO_80MHZ;
if (max_bandwidth_khz < MHZ_TO_KHZ(160))
@@ -1695,9 +1719,15 @@ static void handle_channel_custom(struct wiphy *wiphy,
const struct ieee80211_power_rule *power_rule = NULL;
const struct ieee80211_freq_range *freq_range = NULL;
u32 max_bandwidth_khz;
+ u32 bw;
- reg_rule = freq_reg_info_regd(wiphy, MHZ_TO_KHZ(chan->center_freq),
- regd);
+ for (bw = MHZ_TO_KHZ(20); bw >= MHZ_TO_KHZ(5); bw = bw / 2) {
+ reg_rule = freq_reg_info_regd(wiphy,
+ MHZ_TO_KHZ(chan->center_freq),
+ regd, bw);
+ if (!IS_ERR(reg_rule))
+ break;
+ }
if (IS_ERR(reg_rule)) {
REG_DBG_PRINT("Disabling freq %d MHz as custom regd has no rule that fits it\n",
@@ -1721,8 +1751,20 @@ static void handle_channel_custom(struct wiphy *wiphy,
if (reg_rule->flags & NL80211_RRF_AUTO_BW)
max_bandwidth_khz = reg_get_max_bandwidth(regd, reg_rule);
+ /* If we get a reg_rule we can assume that at least 5Mhz fit */
+ if (!reg_does_bw_fit(freq_range, MHZ_TO_KHZ(chan->center_freq),
+ MHZ_TO_KHZ(10)))
+ bw_flags |= IEEE80211_CHAN_NO_10MHZ;
+ if (!reg_does_bw_fit(freq_range, MHZ_TO_KHZ(chan->center_freq),
+ MHZ_TO_KHZ(20)))
+ bw_flags |= IEEE80211_CHAN_NO_20MHZ;
+
+ if (max_bandwidth_khz < MHZ_TO_KHZ(10))
+ bw_flags |= IEEE80211_CHAN_NO_10MHZ;
+ if (max_bandwidth_khz < MHZ_TO_KHZ(20))
+ bw_flags |= IEEE80211_CHAN_NO_20MHZ;
if (max_bandwidth_khz < MHZ_TO_KHZ(40))
- bw_flags = IEEE80211_CHAN_NO_HT40;
+ bw_flags |= IEEE80211_CHAN_NO_HT40;
if (max_bandwidth_khz < MHZ_TO_KHZ(80))
bw_flags |= IEEE80211_CHAN_NO_80MHZ;
if (max_bandwidth_khz < MHZ_TO_KHZ(160))
@@ -2079,10 +2121,7 @@ static void reg_process_hint(struct regulatory_request *reg_request)
reg_process_hint_core(reg_request);
return;
case NL80211_REGDOM_SET_BY_USER:
- treatment = reg_process_hint_user(reg_request);
- if (treatment == REG_REQ_IGNORE ||
- treatment == REG_REQ_ALREADY_SET)
- return;
+ reg_process_hint_user(reg_request);
return;
case NL80211_REGDOM_SET_BY_DRIVER:
if (!wiphy)
@@ -2099,7 +2138,9 @@ static void reg_process_hint(struct regulatory_request *reg_request)
goto out_free;
}
- /* This is required so that the orig_* parameters are saved */
+ /* This is required so that the orig_* parameters are saved.
+ * NOTE: treatment must be set for any case that reaches here!
+ */
if (treatment == REG_REQ_ALREADY_SET && wiphy &&
wiphy->regulatory_flags & REGULATORY_STRICT_REG) {
wiphy_update_regulatory(wiphy, reg_request->initiator);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 18cead7645be..94af3d065785 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -115,7 +115,8 @@ static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
rcu_read_unlock();
}
-static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos,
+static inline struct dst_entry *__xfrm_dst_lookup(struct net *net,
+ int tos, int oif,
const xfrm_address_t *saddr,
const xfrm_address_t *daddr,
int family)
@@ -127,14 +128,15 @@ static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos,
if (unlikely(afinfo == NULL))
return ERR_PTR(-EAFNOSUPPORT);
- dst = afinfo->dst_lookup(net, tos, saddr, daddr);
+ dst = afinfo->dst_lookup(net, tos, oif, saddr, daddr);
xfrm_policy_put_afinfo(afinfo);
return dst;
}
-static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos,
+static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x,
+ int tos, int oif,
xfrm_address_t *prev_saddr,
xfrm_address_t *prev_daddr,
int family)
@@ -153,7 +155,7 @@ static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos,
daddr = x->coaddr;
}
- dst = __xfrm_dst_lookup(net, tos, saddr, daddr, family);
+ dst = __xfrm_dst_lookup(net, tos, oif, saddr, daddr, family);
if (!IS_ERR(dst)) {
if (prev_saddr != saddr)
@@ -1373,15 +1375,15 @@ int __xfrm_sk_clone_policy(struct sock *sk)
}
static int
-xfrm_get_saddr(struct net *net, xfrm_address_t *local, xfrm_address_t *remote,
- unsigned short family)
+xfrm_get_saddr(struct net *net, int oif, xfrm_address_t *local,
+ xfrm_address_t *remote, unsigned short family)
{
int err;
struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
if (unlikely(afinfo == NULL))
return -EINVAL;
- err = afinfo->get_saddr(net, local, remote);
+ err = afinfo->get_saddr(net, oif, local, remote);
xfrm_policy_put_afinfo(afinfo);
return err;
}
@@ -1410,7 +1412,9 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl,
remote = &tmpl->id.daddr;
local = &tmpl->saddr;
if (xfrm_addr_any(local, tmpl->encap_family)) {
- error = xfrm_get_saddr(net, &tmp, remote, tmpl->encap_family);
+ error = xfrm_get_saddr(net, fl->flowi_oif,
+ &tmp, remote,
+ tmpl->encap_family);
if (error)
goto fail;
local = &tmp;
@@ -1690,8 +1694,8 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
family = xfrm[i]->props.family;
- dst = xfrm_dst_lookup(xfrm[i], tos, &saddr, &daddr,
- family);
+ dst = xfrm_dst_lookup(xfrm[i], tos, fl->flowi_oif,
+ &saddr, &daddr, family);
err = PTR_ERR(dst);
if (IS_ERR(dst))
goto put_states;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index bd16c6c7e1e7..a8de9e300200 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -925,12 +925,10 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb)
return err;
if (attrs[XFRMA_ADDRESS_FILTER]) {
- filter = kmalloc(sizeof(*filter), GFP_KERNEL);
+ filter = kmemdup(nla_data(attrs[XFRMA_ADDRESS_FILTER]),
+ sizeof(*filter), GFP_KERNEL);
if (filter == NULL)
return -ENOMEM;
-
- memcpy(filter, nla_data(attrs[XFRMA_ADDRESS_FILTER]),
- sizeof(*filter));
}
if (attrs[XFRMA_PROTO])
@@ -2048,7 +2046,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
xfrm_audit_policy_delete(xp, 1, true);
} else {
// reset the timers here?
- WARN(1, "Dont know what to do with soft policy expire\n");
+ WARN(1, "Don't know what to do with soft policy expire\n");
}
km_policy_expired(xp, p->dir, up->hard, nlh->nlmsg_pid);