summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/6lowpan/core.c5
-rw-r--r--net/6lowpan/iphc.c13
-rw-r--r--net/6lowpan/nhc.h2
-rw-r--r--net/6lowpan/nhc_udp.c13
-rw-r--r--net/Kconfig1
-rw-r--r--net/Makefile3
-rw-r--r--net/bluetooth/6lowpan.c78
-rw-r--r--net/bluetooth/hci_core.c147
-rw-r--r--net/bluetooth/hci_event.c21
-rw-r--r--net/bluetooth/hci_sock.c81
-rw-r--r--net/bluetooth/lib.c32
-rw-r--r--net/bluetooth/smp.c2
-rw-r--r--net/bridge/br_device.c4
-rw-r--r--net/bridge/br_fdb.c199
-rw-r--r--net/bridge/br_forward.c31
-rw-r--r--net/bridge/br_if.c3
-rw-r--r--net/bridge/br_input.c35
-rw-r--r--net/bridge/br_ioctl.c3
-rw-r--r--net/bridge/br_mdb.c24
-rw-r--r--net/bridge/br_multicast.c4
-rw-r--r--net/bridge/br_netfilter_hooks.c107
-rw-r--r--net/bridge/br_netfilter_ipv6.c21
-rw-r--r--net/bridge/br_netlink.c527
-rw-r--r--net/bridge/br_private.h208
-rw-r--r--net/bridge/br_stp.c41
-rw-r--r--net/bridge/br_stp_bpdu.c12
-rw-r--r--net/bridge/br_sysfs_br.c11
-rw-r--r--net/bridge/br_vlan.c772
-rw-r--r--net/bridge/netfilter/ebt_log.c2
-rw-r--r--net/bridge/netfilter/ebt_nflog.c2
-rw-r--r--net/bridge/netfilter/ebtable_broute.c8
-rw-r--r--net/bridge/netfilter/ebtable_filter.c13
-rw-r--r--net/bridge/netfilter/ebtable_nat.c13
-rw-r--r--net/bridge/netfilter/ebtables.c14
-rw-r--r--net/bridge/netfilter/nf_tables_bridge.c20
-rw-r--r--net/bridge/netfilter/nft_reject_bridge.c19
-rw-r--r--net/can/bcm.c15
-rw-r--r--net/core/dev.c45
-rw-r--r--net/core/dst.c14
-rw-r--r--net/core/filter.c135
-rw-r--r--net/core/lwtunnel.c4
-rw-r--r--net/core/neighbour.c45
-rw-r--r--net/core/net-sysfs.c11
-rw-r--r--net/core/netpoll.c23
-rw-r--r--net/core/request_sock.c88
-rw-r--r--net/core/rtnetlink.c6
-rw-r--r--net/core/sock.c33
-rw-r--r--net/core/sock_diag.c14
-rw-r--r--net/core/utils.c49
-rw-r--r--net/dcb/dcbnl.c30
-rw-r--r--net/dccp/dccp.h12
-rw-r--r--net/dccp/ipv4.c88
-rw-r--r--net/dccp/ipv6.c113
-rw-r--r--net/dccp/minisocks.c4
-rw-r--r--net/dccp/output.c17
-rw-r--r--net/decnet/dn_neigh.c23
-rw-r--r--net/decnet/dn_nsp_in.c7
-rw-r--r--net/decnet/dn_nsp_out.c4
-rw-r--r--net/decnet/dn_route.c38
-rw-r--r--net/decnet/netfilter/dn_rtmsg.c2
-rw-r--r--net/dsa/dsa.c4
-rw-r--r--net/dsa/slave.c137
-rw-r--r--net/ethernet/eth.c2
-rw-r--r--net/ieee802154/6lowpan/6lowpan_i.h14
-rw-r--r--net/ieee802154/6lowpan/core.c126
-rw-r--r--net/ieee802154/6lowpan/reassembly.c157
-rw-r--r--net/ieee802154/6lowpan/rx.c354
-rw-r--r--net/ieee802154/6lowpan/tx.c90
-rw-r--r--net/ieee802154/Kconfig5
-rw-r--r--net/ieee802154/core.c12
-rw-r--r--net/ieee802154/core.h1
-rw-r--r--net/ieee802154/header_ops.c20
-rw-r--r--net/ieee802154/nl802154.c1133
-rw-r--r--net/ieee802154/rdev-ops.h109
-rw-r--r--net/ieee802154/socket.c8
-rw-r--r--net/ipv4/af_inet.c30
-rw-r--r--net/ipv4/arp.c20
-rw-r--r--net/ipv4/devinet.c3
-rw-r--r--net/ipv4/fib_frontend.c12
-rw-r--r--net/ipv4/fib_semantics.c161
-rw-r--r--net/ipv4/icmp.c27
-rw-r--r--net/ipv4/igmp.c6
-rw-r--r--net/ipv4/inet_connection_sock.c258
-rw-r--r--net/ipv4/inet_diag.c96
-rw-r--r--net/ipv4/inet_hashtables.c22
-rw-r--r--net/ipv4/ip_forward.c19
-rw-r--r--net/ipv4/ip_fragment.c13
-rw-r--r--net/ipv4/ip_input.c47
-rw-r--r--net/ipv4/ip_output.c139
-rw-r--r--net/ipv4/ip_tunnel_core.c6
-rw-r--r--net/ipv4/ip_vti.c2
-rw-r--r--net/ipv4/ipconfig.c32
-rw-r--r--net/ipv4/ipmr.c15
-rw-r--r--net/ipv4/netfilter.c7
-rw-r--r--net/ipv4/netfilter/arp_tables.c15
-rw-r--r--net/ipv4/netfilter/arptable_filter.c7
-rw-r--r--net/ipv4/netfilter/ip_tables.c31
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c12
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c2
-rw-r--r--net/ipv4/netfilter/ipt_SYNPROXY.c32
-rw-r--r--net/ipv4/netfilter/ipt_ah.c2
-rw-r--r--net/ipv4/netfilter/ipt_rpfilter.c5
-rw-r--r--net/ipv4/netfilter/iptable_filter.c9
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c19
-rw-r--r--net/ipv4/netfilter/iptable_nat.c26
-rw-r--r--net/ipv4/netfilter/iptable_raw.c9
-rw-r--r--net/ipv4/netfilter/iptable_security.c12
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c18
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c4
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c13
-rw-r--r--net/ipv4/netfilter/nf_dup_ipv4.c25
-rw-r--r--net/ipv4/netfilter/nf_nat_l3proto_ipv4.c44
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c2
-rw-r--r--net/ipv4/netfilter/nf_reject_ipv4.c6
-rw-r--r--net/ipv4/netfilter/nf_tables_arp.c6
-rw-r--r--net/ipv4/netfilter/nf_tables_ipv4.c10
-rw-r--r--net/ipv4/netfilter/nft_chain_nat_ipv4.c22
-rw-r--r--net/ipv4/netfilter/nft_chain_route_ipv4.c8
-rw-r--r--net/ipv4/netfilter/nft_dup_ipv4.c2
-rw-r--r--net/ipv4/netfilter/nft_masq_ipv4.c2
-rw-r--r--net/ipv4/netfilter/nft_redir_ipv4.c2
-rw-r--r--net/ipv4/netfilter/nft_reject_ipv4.c5
-rw-r--r--net/ipv4/raw.c13
-rw-r--r--net/ipv4/route.c216
-rw-r--r--net/ipv4/syncookies.c19
-rw-r--r--net/ipv4/tcp.c38
-rw-r--r--net/ipv4/tcp_cong.c12
-rw-r--r--net/ipv4/tcp_fastopen.c71
-rw-r--r--net/ipv4/tcp_input.c102
-rw-r--r--net/ipv4/tcp_ipv4.c197
-rw-r--r--net/ipv4/tcp_minisocks.c54
-rw-r--r--net/ipv4/tcp_output.c80
-rw-r--r--net/ipv4/tcp_timer.c6
-rw-r--r--net/ipv4/udp.c28
-rw-r--r--net/ipv4/xfrm4_input.c7
-rw-r--r--net/ipv4/xfrm4_output.c11
-rw-r--r--net/ipv4/xfrm4_policy.c9
-rw-r--r--net/ipv6/addrconf.c27
-rw-r--r--net/ipv6/datagram.c2
-rw-r--r--net/ipv6/icmp.c6
-rw-r--r--net/ipv6/ila.c4
-rw-r--r--net/ipv6/inet6_connection_sock.c77
-rw-r--r--net/ipv6/inet6_hashtables.c2
-rw-r--r--net/ipv6/ip6_fib.c1
-rw-r--r--net/ipv6/ip6_input.c15
-rw-r--r--net/ipv6/ip6_output.c89
-rw-r--r--net/ipv6/ip6_vti.c2
-rw-r--r--net/ipv6/ip6mr.c12
-rw-r--r--net/ipv6/mcast.c9
-rw-r--r--net/ipv6/mip6.c16
-rw-r--r--net/ipv6/ndisc.c50
-rw-r--r--net/ipv6/netfilter.c7
-rw-r--r--net/ipv6/netfilter/ip6_tables.c33
-rw-r--r--net/ipv6/netfilter/ip6t_REJECT.c2
-rw-r--r--net/ipv6/netfilter/ip6t_SYNPROXY.c12
-rw-r--r--net/ipv6/netfilter/ip6t_rpfilter.c6
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c6
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c18
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c26
-rw-r--r--net/ipv6/netfilter/ip6table_raw.c6
-rw-r--r--net/ipv6/netfilter/ip6table_security.c7
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c18
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c7
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c8
-rw-r--r--net/ipv6/netfilter/nf_defrag_ipv6_hooks.c9
-rw-r--r--net/ipv6/netfilter/nf_dup_ipv6.c25
-rw-r--r--net/ipv6/netfilter/nf_nat_l3proto_ipv6.c44
-rw-r--r--net/ipv6/netfilter/nf_nat_masquerade_ipv6.c2
-rw-r--r--net/ipv6/netfilter/nf_reject_ipv6.c6
-rw-r--r--net/ipv6/netfilter/nf_tables_ipv6.c10
-rw-r--r--net/ipv6/netfilter/nft_chain_nat_ipv6.c22
-rw-r--r--net/ipv6/netfilter/nft_chain_route_ipv6.c14
-rw-r--r--net/ipv6/netfilter/nft_dup_ipv6.c2
-rw-r--r--net/ipv6/netfilter/nft_redir_ipv6.c3
-rw-r--r--net/ipv6/netfilter/nft_reject_ipv6.c7
-rw-r--r--net/ipv6/output_core.c24
-rw-r--r--net/ipv6/raw.c9
-rw-r--r--net/ipv6/route.c107
-rw-r--r--net/ipv6/syncookies.c13
-rw-r--r--net/ipv6/tcp_ipv6.c178
-rw-r--r--net/ipv6/udp.c11
-rw-r--r--net/ipv6/xfrm6_input.c4
-rw-r--r--net/ipv6/xfrm6_output.c22
-rw-r--r--net/ipv6/xfrm6_policy.c8
-rw-r--r--net/iucv/af_iucv.c9
-rw-r--r--net/iucv/iucv.c12
-rw-r--r--net/l2tp/l2tp_core.h3
-rw-r--r--net/l2tp/l2tp_eth.c1
-rw-r--r--net/l2tp/l2tp_ip.c1
-rw-r--r--net/l2tp/l2tp_ip6.c1
-rw-r--r--net/l2tp/l2tp_netlink.c7
-rw-r--r--net/l2tp/l2tp_ppp.c1
-rw-r--r--net/l3mdev/Kconfig10
-rw-r--r--net/l3mdev/Makefile5
-rw-r--r--net/l3mdev/l3mdev.c92
-rw-r--r--net/mac80211/agg-rx.c8
-rw-r--r--net/mac80211/agg-tx.c15
-rw-r--r--net/mac80211/cfg.c56
-rw-r--r--net/mac80211/debugfs.c2
-rw-r--r--net/mac80211/debugfs_key.c51
-rw-r--r--net/mac80211/debugfs_netdev.c41
-rw-r--r--net/mac80211/driver-ops.c222
-rw-r--r--net/mac80211/driver-ops.h259
-rw-r--r--net/mac80211/ieee80211_i.h19
-rw-r--r--net/mac80211/iface.c2
-rw-r--r--net/mac80211/main.c3
-rw-r--r--net/mac80211/mesh.c82
-rw-r--r--net/mac80211/mesh.h10
-rw-r--r--net/mac80211/mesh_plink.c9
-rw-r--r--net/mac80211/mlme.c155
-rw-r--r--net/mac80211/offchannel.c6
-rw-r--r--net/mac80211/pm.c3
-rw-r--r--net/mac80211/rate.c5
-rw-r--r--net/mac80211/rc80211_minstrel_debugfs.c12
-rw-r--r--net/mac80211/rc80211_minstrel_ht_debugfs.c12
-rw-r--r--net/mac80211/sta_info.c8
-rw-r--r--net/mac80211/sta_info.h2
-rw-r--r--net/mac80211/status.c108
-rw-r--r--net/mac80211/tdls.c8
-rw-r--r--net/mac80211/trace.h40
-rw-r--r--net/mac80211/tx.c22
-rw-r--r--net/mac80211/util.c11
-rw-r--r--net/mac802154/cfg.c205
-rw-r--r--net/mac802154/iface.c118
-rw-r--r--net/mac802154/llsec.c1
-rw-r--r--net/mac802154/rx.c4
-rw-r--r--net/mac802154/tx.c7
-rw-r--r--net/mpls/mpls_iptunnel.c2
-rw-r--r--net/netfilter/Kconfig15
-rw-r--r--net/netfilter/Makefile2
-rw-r--r--net/netfilter/core.c13
-rw-r--r--net/netfilter/ipset/ip_set_core.c9
-rw-r--r--net/netfilter/ipvs/ip_vs_app.c36
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c91
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c534
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c291
-rw-r--r--net/netfilter/ipvs/ip_vs_est.c20
-rw-r--r--net/netfilter/ipvs/ip_vs_ftp.c27
-rw-r--r--net/netfilter/ipvs/ip_vs_lblc.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_lblcr.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_nfct.c5
-rw-r--r--net/netfilter/ipvs/ip_vs_pe_sip.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_proto.c33
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_ah_esp.c32
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c58
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c61
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_udp.c49
-rw-r--r--net/netfilter/ipvs/ip_vs_sh.c45
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c87
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c85
-rw-r--r--net/netfilter/nf_conntrack_core.c22
-rw-r--r--net/netfilter/nf_conntrack_netlink.c98
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_generic.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c3
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c1
-rw-r--r--net/netfilter/nf_conntrack_proto_udplite.c1
-rw-r--r--net/netfilter/nf_nat_core.c4
-rw-r--r--net/netfilter/nf_queue.c42
-rw-r--r--net/netfilter/nf_tables_api.c1
-rw-r--r--net/netfilter/nf_tables_core.c10
-rw-r--r--net/netfilter/nf_tables_netdev.c20
-rw-r--r--net/netfilter/nfnetlink.c2
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c34
-rw-r--r--net/netfilter/nfnetlink_log.c89
-rw-r--r--net/netfilter/nfnetlink_queue.c (renamed from net/netfilter/nfnetlink_queue_core.c)73
-rw-r--r--net/netfilter/nfnetlink_queue_ct.c113
-rw-r--r--net/netfilter/nft_log.c3
-rw-r--r--net/netfilter/nft_meta.c4
-rw-r--r--net/netfilter/nft_queue.c2
-rw-r--r--net/netfilter/nft_reject_inet.c19
-rw-r--r--net/netfilter/x_tables.c1
-rw-r--r--net/netfilter/xt_CT.c7
-rw-r--r--net/netfilter/xt_LOG.c2
-rw-r--r--net/netfilter/xt_NFLOG.c2
-rw-r--r--net/netfilter/xt_TCPMSS.c2
-rw-r--r--net/netfilter/xt_TEE.c4
-rw-r--r--net/netfilter/xt_TPROXY.c24
-rw-r--r--net/netfilter/xt_addrtype.c4
-rw-r--r--net/netfilter/xt_connlimit.c4
-rw-r--r--net/netfilter/xt_ipvs.c5
-rw-r--r--net/netfilter/xt_osf.c2
-rw-r--r--net/netfilter/xt_recent.c2
-rw-r--r--net/netfilter/xt_socket.c14
-rw-r--r--net/netlink/genetlink.c14
-rw-r--r--net/openvswitch/actions.c13
-rw-r--r--net/openvswitch/conntrack.c6
-rw-r--r--net/openvswitch/datapath.c3
-rw-r--r--net/openvswitch/flow.c4
-rw-r--r--net/openvswitch/flow.h1
-rw-r--r--net/openvswitch/flow_netlink.c129
-rw-r--r--net/openvswitch/flow_table.c2
-rw-r--r--net/openvswitch/vport-vxlan.c3
-rw-r--r--net/packet/af_packet.c34
-rw-r--r--net/rds/af_rds.c8
-rw-r--r--net/rds/bind.c85
-rw-r--r--net/rds/connection.c22
-rw-r--r--net/rds/ib.c47
-rw-r--r--net/rds/ib.h78
-rw-r--r--net/rds/ib_cm.c114
-rw-r--r--net/rds/ib_rdma.c116
-rw-r--r--net/rds/ib_recv.c136
-rw-r--r--net/rds/ib_send.c110
-rw-r--r--net/rds/ib_stats.c22
-rw-r--r--net/rds/rds.h5
-rw-r--r--net/rds/send.c19
-rw-r--r--net/rds/tcp.c16
-rw-r--r--net/rds/tcp_listen.c25
-rw-r--r--net/rds/tcp_send.c8
-rw-r--r--net/rds/threads.c2
-rw-r--r--net/rxrpc/ar-connection.c4
-rw-r--r--net/rxrpc/ar-internal.h4
-rw-r--r--net/rxrpc/ar-transport.c4
-rw-r--r--net/sched/act_bpf.c1
-rw-r--r--net/sched/act_connmark.c5
-rw-r--r--net/sched/act_ipt.c1
-rw-r--r--net/sched/cls_bpf.c82
-rw-r--r--net/sched/em_ipset.c1
-rw-r--r--net/sched/sch_blackhole.c15
-rw-r--r--net/sched/sch_dsmark.c63
-rw-r--r--net/sched/sch_fq.c13
-rw-r--r--net/sctp/sm_make_chunk.c2
-rw-r--r--net/sctp/sm_statefuns.c2
-rw-r--r--net/switchdev/switchdev.c617
-rw-r--r--net/tipc/bearer.c8
-rw-r--r--net/tipc/link.c170
-rw-r--r--net/tipc/link.h5
-rw-r--r--net/tipc/msg.c31
-rw-r--r--net/tipc/msg.h34
-rw-r--r--net/tipc/node.c43
-rw-r--r--net/tipc/udp_media.c1
-rw-r--r--net/wireless/core.c1
-rw-r--r--net/wireless/nl80211.c209
-rw-r--r--net/wireless/reg.c4
-rw-r--r--net/xfrm/xfrm_output.c20
-rw-r--r--net/xfrm/xfrm_policy.c19
338 files changed, 9288 insertions, 5465 deletions
diff --git a/net/6lowpan/core.c b/net/6lowpan/core.c
index ae1896fa45e2..83b19e072224 100644
--- a/net/6lowpan/core.c
+++ b/net/6lowpan/core.c
@@ -17,6 +17,11 @@
void lowpan_netdev_setup(struct net_device *dev, enum lowpan_lltypes lltype)
{
+ dev->addr_len = EUI64_ADDR_LEN;
+ dev->type = ARPHRD_6LOWPAN;
+ dev->mtu = IPV6_MIN_MTU;
+ dev->priv_flags |= IFF_NO_QUEUE;
+
lowpan_priv(dev)->lltype = lltype;
}
EXPORT_SYMBOL(lowpan_netdev_setup);
diff --git a/net/6lowpan/iphc.c b/net/6lowpan/iphc.c
index 1e0071fdcf72..78c8a495b571 100644
--- a/net/6lowpan/iphc.c
+++ b/net/6lowpan/iphc.c
@@ -366,7 +366,18 @@ lowpan_header_decompress(struct sk_buff *skb, struct net_device *dev,
return err;
}
- hdr.payload_len = htons(skb->len);
+ switch (lowpan_priv(dev)->lltype) {
+ case LOWPAN_LLTYPE_IEEE802154:
+ if (lowpan_802154_cb(skb)->d_size)
+ hdr.payload_len = htons(lowpan_802154_cb(skb)->d_size -
+ sizeof(struct ipv6hdr));
+ else
+ hdr.payload_len = htons(skb->len);
+ break;
+ default:
+ hdr.payload_len = htons(skb->len);
+ break;
+ }
pr_debug("skb headroom size = %d, data length = %d\n",
skb_headroom(skb), skb->len);
diff --git a/net/6lowpan/nhc.h b/net/6lowpan/nhc.h
index ed44938eb5de..c249f17fa37b 100644
--- a/net/6lowpan/nhc.h
+++ b/net/6lowpan/nhc.h
@@ -8,8 +8,6 @@
#include <net/6lowpan.h>
#include <net/ipv6.h>
-#define LOWPAN_NHC_MAX_ID_LEN 1
-
/**
* LOWPAN_NHC - helper macro to generate nh id fields and lowpan_nhc struct
*
diff --git a/net/6lowpan/nhc_udp.c b/net/6lowpan/nhc_udp.c
index c6bcaeb428ae..72d0b57eb6e5 100644
--- a/net/6lowpan/nhc_udp.c
+++ b/net/6lowpan/nhc_udp.c
@@ -71,7 +71,18 @@ static int udp_uncompress(struct sk_buff *skb, size_t needed)
* here, we obtain the hint from the remaining size of the
* frame
*/
- uh.len = htons(skb->len + sizeof(struct udphdr));
+ switch (lowpan_priv(skb->dev)->lltype) {
+ case LOWPAN_LLTYPE_IEEE802154:
+ if (lowpan_802154_cb(skb)->d_size)
+ uh.len = htons(lowpan_802154_cb(skb)->d_size -
+ sizeof(struct ipv6hdr));
+ else
+ uh.len = htons(skb->len + sizeof(struct udphdr));
+ break;
+ default:
+ uh.len = htons(skb->len + sizeof(struct udphdr));
+ break;
+ }
pr_debug("uncompressed UDP length: src = %d", ntohs(uh.len));
/* replace the compressed UDP head by the uncompressed UDP
diff --git a/net/Kconfig b/net/Kconfig
index 7021c1bf44d6..127da94ae25e 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -232,6 +232,7 @@ source "net/netlink/Kconfig"
source "net/mpls/Kconfig"
source "net/hsr/Kconfig"
source "net/switchdev/Kconfig"
+source "net/l3mdev/Kconfig"
config RPS
bool
diff --git a/net/Makefile b/net/Makefile
index 3995613e5510..a5d04098dfce 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -74,3 +74,6 @@ obj-$(CONFIG_HSR) += hsr/
ifneq ($(CONFIG_NET_SWITCHDEV),)
obj-y += switchdev/
endif
+ifneq ($(CONFIG_NET_L3_MASTER_DEV),)
+obj-y += l3mdev/
+endif
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 131e79cde350..db73b8a1433f 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -35,7 +35,6 @@ static struct dentry *lowpan_enable_debugfs;
static struct dentry *lowpan_control_debugfs;
#define IFACE_NAME_TEMPLATE "bt%d"
-#define EUI64_ADDR_LEN 8
struct skb_cb {
struct in6_addr addr;
@@ -674,13 +673,8 @@ static struct header_ops header_ops = {
static void netdev_setup(struct net_device *dev)
{
- dev->addr_len = EUI64_ADDR_LEN;
- dev->type = ARPHRD_6LOWPAN;
-
dev->hard_header_len = 0;
dev->needed_tailroom = 0;
- dev->mtu = IPV6_MIN_MTU;
- dev->tx_queue_len = 0;
dev->flags = IFF_RUNNING | IFF_POINTOPOINT |
IFF_MULTICAST;
dev->watchdog_timeo = 0;
@@ -775,24 +769,7 @@ static struct l2cap_chan *chan_create(void)
chan->chan_type = L2CAP_CHAN_CONN_ORIENTED;
chan->mode = L2CAP_MODE_LE_FLOWCTL;
- chan->omtu = 65535;
- chan->imtu = chan->omtu;
-
- return chan;
-}
-
-static struct l2cap_chan *chan_open(struct l2cap_chan *pchan)
-{
- struct l2cap_chan *chan;
-
- chan = chan_create();
- if (!chan)
- return NULL;
-
- chan->remote_mps = chan->omtu;
- chan->mps = chan->omtu;
-
- chan->state = BT_CONNECTED;
+ chan->imtu = 1280;
return chan;
}
@@ -919,7 +896,10 @@ static inline struct l2cap_chan *chan_new_conn_cb(struct l2cap_chan *pchan)
{
struct l2cap_chan *chan;
- chan = chan_open(pchan);
+ chan = chan_create();
+ if (!chan)
+ return NULL;
+
chan->ops = pchan->ops;
BT_DBG("chan %p pchan %p", chan, pchan);
@@ -1065,34 +1045,23 @@ static inline __u8 bdaddr_type(__u8 type)
return BDADDR_LE_RANDOM;
}
-static struct l2cap_chan *chan_get(void)
-{
- struct l2cap_chan *pchan;
-
- pchan = chan_create();
- if (!pchan)
- return NULL;
-
- pchan->ops = &bt_6lowpan_chan_ops;
-
- return pchan;
-}
-
static int bt_6lowpan_connect(bdaddr_t *addr, u8 dst_type)
{
- struct l2cap_chan *pchan;
+ struct l2cap_chan *chan;
int err;
- pchan = chan_get();
- if (!pchan)
+ chan = chan_create();
+ if (!chan)
return -EINVAL;
- err = l2cap_chan_connect(pchan, cpu_to_le16(L2CAP_PSM_IPSP), 0,
+ chan->ops = &bt_6lowpan_chan_ops;
+
+ err = l2cap_chan_connect(chan, cpu_to_le16(L2CAP_PSM_IPSP), 0,
addr, dst_type);
- BT_DBG("chan %p err %d", pchan, err);
+ BT_DBG("chan %p err %d", chan, err);
if (err < 0)
- l2cap_chan_put(pchan);
+ l2cap_chan_put(chan);
return err;
}
@@ -1117,31 +1086,32 @@ static int bt_6lowpan_disconnect(struct l2cap_conn *conn, u8 dst_type)
static struct l2cap_chan *bt_6lowpan_listen(void)
{
bdaddr_t *addr = BDADDR_ANY;
- struct l2cap_chan *pchan;
+ struct l2cap_chan *chan;
int err;
if (!enable_6lowpan)
return NULL;
- pchan = chan_get();
- if (!pchan)
+ chan = chan_create();
+ if (!chan)
return NULL;
- pchan->state = BT_LISTEN;
- pchan->src_type = BDADDR_LE_PUBLIC;
+ chan->ops = &bt_6lowpan_chan_ops;
+ chan->state = BT_LISTEN;
+ chan->src_type = BDADDR_LE_PUBLIC;
- atomic_set(&pchan->nesting, L2CAP_NESTING_PARENT);
+ atomic_set(&chan->nesting, L2CAP_NESTING_PARENT);
- BT_DBG("chan %p src type %d", pchan, pchan->src_type);
+ BT_DBG("chan %p src type %d", chan, chan->src_type);
- err = l2cap_add_psm(pchan, addr, cpu_to_le16(L2CAP_PSM_IPSP));
+ err = l2cap_add_psm(chan, addr, cpu_to_le16(L2CAP_PSM_IPSP));
if (err) {
- l2cap_chan_put(pchan);
+ l2cap_chan_put(chan);
BT_ERR("psm cannot be added err %d", err);
return NULL;
}
- return pchan;
+ return chan;
}
static int get_l2cap_conn(char *buf, bdaddr_t *addr, u8 *addr_type,
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index e837539452fb..e4e53bd663df 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -134,6 +134,66 @@ static const struct file_operations dut_mode_fops = {
.llseek = default_llseek,
};
+static ssize_t vendor_diag_read(struct file *file, char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct hci_dev *hdev = file->private_data;
+ char buf[3];
+
+ buf[0] = hci_dev_test_flag(hdev, HCI_VENDOR_DIAG) ? 'Y': 'N';
+ buf[1] = '\n';
+ buf[2] = '\0';
+ return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
+}
+
+static ssize_t vendor_diag_write(struct file *file, const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct hci_dev *hdev = file->private_data;
+ char buf[32];
+ size_t buf_size = min(count, (sizeof(buf)-1));
+ bool enable;
+ int err;
+
+ if (copy_from_user(buf, user_buf, buf_size))
+ return -EFAULT;
+
+ buf[buf_size] = '\0';
+ if (strtobool(buf, &enable))
+ return -EINVAL;
+
+ hci_req_lock(hdev);
+ err = hdev->set_diag(hdev, enable);
+ hci_req_unlock(hdev);
+
+ if (err < 0)
+ return err;
+
+ if (enable)
+ hci_dev_set_flag(hdev, HCI_VENDOR_DIAG);
+ else
+ hci_dev_clear_flag(hdev, HCI_VENDOR_DIAG);
+
+ return count;
+}
+
+static const struct file_operations vendor_diag_fops = {
+ .open = simple_open,
+ .read = vendor_diag_read,
+ .write = vendor_diag_write,
+ .llseek = default_llseek,
+};
+
+static void hci_debugfs_create_basic(struct hci_dev *hdev)
+{
+ debugfs_create_file("dut_mode", 0644, hdev->debugfs, hdev,
+ &dut_mode_fops);
+
+ if (hdev->set_diag)
+ debugfs_create_file("vendor_diag", 0644, hdev->debugfs, hdev,
+ &vendor_diag_fops);
+}
+
/* ---- HCI requests ---- */
static void hci_req_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode,
@@ -693,7 +753,8 @@ static void hci_init3_req(struct hci_request *req, unsigned long opt)
hci_setup_event_mask(req);
- if (hdev->commands[6] & 0x20) {
+ if (hdev->commands[6] & 0x20 &&
+ !test_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks)) {
struct hci_cp_read_stored_link_key cp;
bacpy(&cp.bdaddr, BDADDR_ANY);
@@ -849,13 +910,8 @@ static int __hci_init(struct hci_dev *hdev)
if (err < 0)
return err;
- /* The Device Under Test (DUT) mode is special and available for
- * all controller types. So just create it early on.
- */
- if (hci_dev_test_flag(hdev, HCI_SETUP)) {
- debugfs_create_file("dut_mode", 0644, hdev->debugfs, hdev,
- &dut_mode_fops);
- }
+ if (hci_dev_test_flag(hdev, HCI_SETUP))
+ hci_debugfs_create_basic(hdev);
err = __hci_req_sync(hdev, hci_init2_req, 0, HCI_INIT_TIMEOUT);
if (err < 0)
@@ -932,6 +988,9 @@ static int __hci_unconf_init(struct hci_dev *hdev)
if (err < 0)
return err;
+ if (hci_dev_test_flag(hdev, HCI_SETUP))
+ hci_debugfs_create_basic(hdev);
+
return 0;
}
@@ -1384,6 +1443,9 @@ static int hci_dev_do_open(struct hci_dev *hdev)
goto done;
}
+ set_bit(HCI_RUNNING, &hdev->flags);
+ hci_notify(hdev, HCI_DEV_OPEN);
+
atomic_set(&hdev->cmd_cnt, 1);
set_bit(HCI_INIT, &hdev->flags);
@@ -1465,6 +1527,9 @@ static int hci_dev_do_open(struct hci_dev *hdev)
hdev->sent_cmd = NULL;
}
+ clear_bit(HCI_RUNNING, &hdev->flags);
+ hci_notify(hdev, HCI_DEV_CLOSE);
+
hdev->close(hdev);
hdev->flags &= BIT(HCI_RAW);
}
@@ -1548,8 +1613,10 @@ static void hci_pend_le_actions_clear(struct hci_dev *hdev)
BT_DBG("All LE pending actions cleared");
}
-static int hci_dev_do_close(struct hci_dev *hdev)
+int hci_dev_do_close(struct hci_dev *hdev)
{
+ bool auto_off;
+
BT_DBG("%s %p", hdev->name, hdev);
if (!hci_dev_test_flag(hdev, HCI_UNREGISTER) &&
@@ -1605,10 +1672,10 @@ static int hci_dev_do_close(struct hci_dev *hdev)
hci_discovery_set_state(hdev, DISCOVERY_STOPPED);
- if (!hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF)) {
- if (hdev->dev_type == HCI_BREDR)
- mgmt_powered(hdev, 0);
- }
+ auto_off = hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF);
+
+ if (!auto_off && hdev->dev_type == HCI_BREDR)
+ mgmt_powered(hdev, 0);
hci_inquiry_cache_flush(hdev);
hci_pend_le_actions_clear(hdev);
@@ -1625,9 +1692,8 @@ static int hci_dev_do_close(struct hci_dev *hdev)
/* Reset device */
skb_queue_purge(&hdev->cmd_q);
atomic_set(&hdev->cmd_cnt, 1);
- if (!hci_dev_test_flag(hdev, HCI_AUTO_OFF) &&
- !hci_dev_test_flag(hdev, HCI_UNCONFIGURED) &&
- test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks)) {
+ if (test_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks) &&
+ !auto_off && !hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) {
set_bit(HCI_INIT, &hdev->flags);
__hci_req_sync(hdev, hci_reset_req, 0, HCI_CMD_TIMEOUT);
clear_bit(HCI_INIT, &hdev->flags);
@@ -1648,6 +1714,9 @@ static int hci_dev_do_close(struct hci_dev *hdev)
hdev->sent_cmd = NULL;
}
+ clear_bit(HCI_RUNNING, &hdev->flags);
+ hci_notify(hdev, HCI_DEV_CLOSE);
+
/* After this point our queues are empty
* and no tasks are scheduled. */
hdev->close(hdev);
@@ -3463,6 +3532,13 @@ int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb)
return -ENXIO;
}
+ if (bt_cb(skb)->pkt_type != HCI_EVENT_PKT &&
+ bt_cb(skb)->pkt_type != HCI_ACLDATA_PKT &&
+ bt_cb(skb)->pkt_type != HCI_SCODATA_PKT) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+
/* Incoming skb */
bt_cb(skb)->incoming = 1;
@@ -3476,6 +3552,21 @@ int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb)
}
EXPORT_SYMBOL(hci_recv_frame);
+/* Receive diagnostic message from HCI drivers */
+int hci_recv_diag(struct hci_dev *hdev, struct sk_buff *skb)
+{
+ /* Time stamp */
+ __net_timestamp(skb);
+
+ /* Mark as diagnostic packet and send to monitor */
+ bt_cb(skb)->pkt_type = HCI_DIAG_PKT;
+ hci_send_to_monitor(hdev, skb);
+
+ kfree_skb(skb);
+ return 0;
+}
+EXPORT_SYMBOL(hci_recv_diag);
+
/* ---- Interface to upper protocols ---- */
int hci_register_cb(struct hci_cb *cb)
@@ -3522,6 +3613,11 @@ static void hci_send_frame(struct hci_dev *hdev, struct sk_buff *skb)
/* Get rid of skb owner, prior to sending to the driver. */
skb_orphan(skb);
+ if (!test_bit(HCI_RUNNING, &hdev->flags)) {
+ kfree_skb(skb);
+ return;
+ }
+
err = hdev->send(hdev, skb);
if (err < 0) {
BT_ERR("%s sending frame failed (%d)", hdev->name, err);
@@ -3572,6 +3668,25 @@ void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode)
return hdev->sent_cmd->data + HCI_COMMAND_HDR_SIZE;
}
+/* Send HCI command and wait for command commplete event */
+struct sk_buff *hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen,
+ const void *param, u32 timeout)
+{
+ struct sk_buff *skb;
+
+ if (!test_bit(HCI_UP, &hdev->flags))
+ return ERR_PTR(-ENETDOWN);
+
+ bt_dev_dbg(hdev, "opcode 0x%4.4x plen %d", opcode, plen);
+
+ hci_req_lock(hdev);
+ skb = __hci_cmd_sync(hdev, opcode, plen, param, timeout);
+ hci_req_unlock(hdev);
+
+ return skb;
+}
+EXPORT_SYMBOL(hci_cmd_sync);
+
/* Send ACL data */
static void hci_add_acl_hdr(struct sk_buff *skb, __u16 handle, __u16 flags)
{
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index bc31099d3b5b..b4571d84cafa 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -4724,6 +4724,27 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
struct hci_conn *conn;
bool match;
u32 flags;
+ u8 *ptr, real_len;
+
+ /* Find the end of the data in case the report contains padded zero
+ * bytes at the end causing an invalid length value.
+ *
+ * When data is NULL, len is 0 so there is no need for extra ptr
+ * check as 'ptr < data + 0' is already false in such case.
+ */
+ for (ptr = data; ptr < data + len && *ptr; ptr += *ptr + 1) {
+ if (ptr + 1 + *ptr > data + len)
+ break;
+ }
+
+ real_len = ptr - data;
+
+ /* Adjust for actual length */
+ if (len != real_len) {
+ BT_ERR_RATELIMITED("%s advertising data length corrected",
+ hdev->name);
+ len = real_len;
+ }
/* If the direct address is present, then this report is from
* a LE Direct Advertising Report event. In that case it is
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index f2d30d1156c9..9a100c1fd7b5 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -279,6 +279,9 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb)
else
opcode = cpu_to_le16(HCI_MON_SCO_TX_PKT);
break;
+ case HCI_DIAG_PKT:
+ opcode = cpu_to_le16(HCI_MON_VENDOR_DIAG);
+ break;
default:
return;
}
@@ -303,6 +306,7 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event)
{
struct hci_mon_hdr *hdr;
struct hci_mon_new_index *ni;
+ struct hci_mon_index_info *ii;
struct sk_buff *skb;
__le16 opcode;
@@ -312,7 +316,7 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event)
if (!skb)
return NULL;
- ni = (void *) skb_put(skb, HCI_MON_NEW_INDEX_SIZE);
+ ni = (void *)skb_put(skb, HCI_MON_NEW_INDEX_SIZE);
ni->type = hdev->dev_type;
ni->bus = hdev->bus;
bacpy(&ni->bdaddr, &hdev->bdaddr);
@@ -329,6 +333,34 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event)
opcode = cpu_to_le16(HCI_MON_DEL_INDEX);
break;
+ case HCI_DEV_UP:
+ skb = bt_skb_alloc(HCI_MON_INDEX_INFO_SIZE, GFP_ATOMIC);
+ if (!skb)
+ return NULL;
+
+ ii = (void *)skb_put(skb, HCI_MON_INDEX_INFO_SIZE);
+ bacpy(&ii->bdaddr, &hdev->bdaddr);
+ ii->manufacturer = cpu_to_le16(hdev->manufacturer);
+
+ opcode = cpu_to_le16(HCI_MON_INDEX_INFO);
+ break;
+
+ case HCI_DEV_OPEN:
+ skb = bt_skb_alloc(0, GFP_ATOMIC);
+ if (!skb)
+ return NULL;
+
+ opcode = cpu_to_le16(HCI_MON_OPEN_INDEX);
+ break;
+
+ case HCI_DEV_CLOSE:
+ skb = bt_skb_alloc(0, GFP_ATOMIC);
+ if (!skb)
+ return NULL;
+
+ opcode = cpu_to_le16(HCI_MON_CLOSE_INDEX);
+ break;
+
default:
return NULL;
}
@@ -358,6 +390,26 @@ static void send_monitor_replay(struct sock *sk)
if (sock_queue_rcv_skb(sk, skb))
kfree_skb(skb);
+
+ if (!test_bit(HCI_RUNNING, &hdev->flags))
+ continue;
+
+ skb = create_monitor_event(hdev, HCI_DEV_OPEN);
+ if (!skb)
+ continue;
+
+ if (sock_queue_rcv_skb(sk, skb))
+ kfree_skb(skb);
+
+ if (!test_bit(HCI_UP, &hdev->flags))
+ continue;
+
+ skb = create_monitor_event(hdev, HCI_DEV_UP);
+ if (!skb)
+ continue;
+
+ if (sock_queue_rcv_skb(sk, skb))
+ kfree_skb(skb);
}
read_unlock(&hci_dev_list_lock);
@@ -392,14 +444,12 @@ static void hci_si_event(struct hci_dev *hdev, int type, int dlen, void *data)
void hci_sock_dev_event(struct hci_dev *hdev, int event)
{
- struct hci_ev_si_device ev;
-
BT_DBG("hdev %s event %d", hdev->name, event);
- /* Send event to monitor */
if (atomic_read(&monitor_promisc)) {
struct sk_buff *skb;
+ /* Send event to monitor */
skb = create_monitor_event(hdev, event);
if (skb) {
hci_send_to_channel(HCI_CHANNEL_MONITOR, skb,
@@ -408,10 +458,14 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event)
}
}
- /* Send event to sockets */
- ev.event = event;
- ev.dev_id = hdev->id;
- hci_si_event(NULL, HCI_EV_SI_DEVICE, sizeof(ev), &ev);
+ if (event <= HCI_DEV_DOWN) {
+ struct hci_ev_si_device ev;
+
+ /* Send event to sockets */
+ ev.event = event;
+ ev.dev_id = hdev->id;
+ hci_si_event(NULL, HCI_EV_SI_DEVICE, sizeof(ev), &ev);
+ }
if (event == HCI_DEV_UNREG) {
struct sock *sk;
@@ -503,7 +557,16 @@ static int hci_sock_release(struct socket *sock)
if (hdev) {
if (hci_pi(sk)->channel == HCI_CHANNEL_USER) {
- hci_dev_close(hdev->id);
+ /* When releasing an user channel exclusive access,
+ * call hci_dev_do_close directly instead of calling
+ * hci_dev_close to ensure the exclusive access will
+ * be released and the controller brought back down.
+ *
+ * The checking of HCI_AUTO_OFF is not needed in this
+ * case since it will have been cleared already when
+ * opening the user channel.
+ */
+ hci_dev_do_close(hdev);
hci_dev_clear_flag(hdev, HCI_USER_CHANNEL);
mgmt_index_added(hdev);
}
diff --git a/net/bluetooth/lib.c b/net/bluetooth/lib.c
index b36bc0415854..aa4cf64e32a6 100644
--- a/net/bluetooth/lib.c
+++ b/net/bluetooth/lib.c
@@ -151,6 +151,22 @@ void bt_info(const char *format, ...)
}
EXPORT_SYMBOL(bt_info);
+void bt_warn(const char *format, ...)
+{
+ struct va_format vaf;
+ va_list args;
+
+ va_start(args, format);
+
+ vaf.fmt = format;
+ vaf.va = &args;
+
+ pr_warn("%pV", &vaf);
+
+ va_end(args);
+}
+EXPORT_SYMBOL(bt_warn);
+
void bt_err(const char *format, ...)
{
struct va_format vaf;
@@ -166,3 +182,19 @@ void bt_err(const char *format, ...)
va_end(args);
}
EXPORT_SYMBOL(bt_err);
+
+void bt_err_ratelimited(const char *format, ...)
+{
+ struct va_format vaf;
+ va_list args;
+
+ va_start(args, format);
+
+ vaf.fmt = format;
+ vaf.va = &args;
+
+ pr_err_ratelimited("%pV", &vaf);
+
+ va_end(args);
+}
+EXPORT_SYMBOL(bt_err_ratelimited);
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 0510a577a7b5..25644e1bc479 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -495,7 +495,7 @@ static int smp_ah(struct crypto_blkcipher *tfm, const u8 irk[16],
}
/* The output of the random address function ah is:
- * ah(h, r) = e(k, r') mod 2^24
+ * ah(k, r) = e(k, r') mod 2^24
* The output of the security function e is then truncated to 24 bits
* by taking the least significant 24 bits of the output of e as the
* result of ah.
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 6ed2feb51e3c..5e88d3e17546 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -56,7 +56,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
skb_reset_mac_header(skb);
skb_pull(skb, ETH_HLEN);
- if (!br_allowed_ingress(br, br_get_vlan_info(br), skb, &vid))
+ if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid))
goto out;
if (is_broadcast_ether_addr(dest))
@@ -391,7 +391,7 @@ void br_dev_setup(struct net_device *dev)
br->bridge_max_age = br->max_age = 20 * HZ;
br->bridge_hello_time = br->hello_time = 2 * HZ;
br->bridge_forward_delay = br->forward_delay = 15 * HZ;
- br->ageing_time = 300 * HZ;
+ br->ageing_time = BR_DEFAULT_AGEING_TIME;
br_netfilter_rtable_init(br);
br_stp_timer_init(br);
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 9e9875da0a4f..c88bd8e8937e 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -133,15 +133,16 @@ static void fdb_del_hw_addr(struct net_bridge *br, const unsigned char *addr)
static void fdb_del_external_learn(struct net_bridge_fdb_entry *f)
{
- struct switchdev_obj obj = {
- .id = SWITCHDEV_OBJ_PORT_FDB,
- .u.fdb = {
- .addr = f->addr.addr,
- .vid = f->vlan_id,
+ struct switchdev_obj_port_fdb fdb = {
+ .obj = {
+ .id = SWITCHDEV_OBJ_ID_PORT_FDB,
+ .flags = SWITCHDEV_F_DEFER,
},
+ .vid = f->vlan_id,
};
- switchdev_port_obj_del(f->dst->dev, &obj);
+ ether_addr_copy(fdb.addr, f->addr.addr);
+ switchdev_port_obj_del(f->dst->dev, &fdb.obj);
}
static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f)
@@ -163,22 +164,27 @@ static void fdb_delete_local(struct net_bridge *br,
struct net_bridge_fdb_entry *f)
{
const unsigned char *addr = f->addr.addr;
- u16 vid = f->vlan_id;
+ struct net_bridge_vlan_group *vg;
+ const struct net_bridge_vlan *v;
struct net_bridge_port *op;
+ u16 vid = f->vlan_id;
/* Maybe another port has same hw addr? */
list_for_each_entry(op, &br->port_list, list) {
+ vg = nbp_vlan_group(op);
if (op != p && ether_addr_equal(op->dev->dev_addr, addr) &&
- (!vid || nbp_vlan_find(op, vid))) {
+ (!vid || br_vlan_find(vg, vid))) {
f->dst = op;
f->added_by_user = 0;
return;
}
}
+ vg = br_vlan_group(br);
+ v = br_vlan_find(vg, vid);
/* Maybe bridge device has same hw addr? */
if (p && ether_addr_equal(br->dev->dev_addr, addr) &&
- (!vid || br_vlan_find(br, vid))) {
+ (!vid || (v && br_vlan_should_use(v)))) {
f->dst = NULL;
f->added_by_user = 0;
return;
@@ -203,14 +209,14 @@ void br_fdb_find_delete_local(struct net_bridge *br,
void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr)
{
+ struct net_bridge_vlan_group *vg;
struct net_bridge *br = p->br;
- struct net_port_vlans *pv = nbp_get_vlan_info(p);
- bool no_vlan = !pv;
+ struct net_bridge_vlan *v;
int i;
- u16 vid;
spin_lock_bh(&br->hash_lock);
+ vg = nbp_vlan_group(p);
/* Search all chains since old address/hash is unknown */
for (i = 0; i < BR_HASH_SIZE; i++) {
struct hlist_node *h;
@@ -226,7 +232,7 @@ void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr)
* configured, we can safely be done at
* this point.
*/
- if (no_vlan)
+ if (!vg || !vg->num_vlans)
goto insert;
}
}
@@ -236,15 +242,15 @@ insert:
/* insert new address, may fail if invalid address or dup. */
fdb_insert(br, p, newaddr, 0);
- if (no_vlan)
+ if (!vg || !vg->num_vlans)
goto done;
/* Now add entries for every VLAN configured on the port.
* This function runs under RTNL so the bitmap will not change
* from under us.
*/
- for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID)
- fdb_insert(br, p, newaddr, vid);
+ list_for_each_entry(v, &vg->vlan_list, vlist)
+ fdb_insert(br, p, newaddr, v->vid);
done:
spin_unlock_bh(&br->hash_lock);
@@ -252,9 +258,9 @@ done:
void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
{
+ struct net_bridge_vlan_group *vg;
struct net_bridge_fdb_entry *f;
- struct net_port_vlans *pv;
- u16 vid = 0;
+ struct net_bridge_vlan *v;
spin_lock_bh(&br->hash_lock);
@@ -264,20 +270,18 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
fdb_delete_local(br, NULL, f);
fdb_insert(br, NULL, newaddr, 0);
-
+ vg = br_vlan_group(br);
+ if (!vg || !vg->num_vlans)
+ goto out;
/* Now remove and add entries for every VLAN configured on the
* bridge. This function runs under RTNL so the bitmap will not
* change from under us.
*/
- pv = br_get_vlan_info(br);
- if (!pv)
- goto out;
-
- for_each_set_bit_from(vid, pv->vlan_bitmap, VLAN_N_VID) {
- f = __br_fdb_get(br, br->dev->dev_addr, vid);
+ list_for_each_entry(v, &vg->vlan_list, vlist) {
+ f = __br_fdb_get(br, br->dev->dev_addr, v->vid);
if (f && f->is_local && !f->dst)
fdb_delete_local(br, NULL, f);
- fdb_insert(br, NULL, newaddr, vid);
+ fdb_insert(br, NULL, newaddr, v->vid);
}
out:
spin_unlock_bh(&br->hash_lock);
@@ -299,6 +303,8 @@ void br_fdb_cleanup(unsigned long _data)
unsigned long this_timer;
if (f->is_static)
continue;
+ if (f->added_by_external_learn)
+ continue;
this_timer = f->updated + delay;
if (time_before_eq(this_timer, jiffies))
fdb_delete(br, f);
@@ -605,13 +611,14 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
}
}
-static int fdb_to_nud(const struct net_bridge_fdb_entry *fdb)
+static int fdb_to_nud(const struct net_bridge *br,
+ const struct net_bridge_fdb_entry *fdb)
{
if (fdb->is_local)
return NUD_PERMANENT;
else if (fdb->is_static)
return NUD_NOARP;
- else if (has_expired(fdb->dst->br, fdb))
+ else if (has_expired(br, fdb))
return NUD_STALE;
else
return NUD_REACHABLE;
@@ -637,7 +644,7 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br,
ndm->ndm_flags = fdb->added_by_external_learn ? NTF_EXT_LEARNED : 0;
ndm->ndm_type = 0;
ndm->ndm_ifindex = fdb->dst ? fdb->dst->dev->ifindex : br->dev->ifindex;
- ndm->ndm_state = fdb_to_nud(fdb);
+ ndm->ndm_state = fdb_to_nud(br, fdb);
if (nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->addr))
goto nla_put_failure;
@@ -782,7 +789,7 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
}
}
- if (fdb_to_nud(fdb) != state) {
+ if (fdb_to_nud(br, fdb) != state) {
if (state & NUD_PERMANENT) {
fdb->is_local = 1;
if (!fdb->is_static) {
@@ -842,9 +849,11 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev,
const unsigned char *addr, u16 vid, u16 nlh_flags)
{
- struct net_bridge_port *p;
+ struct net_bridge_vlan_group *vg;
+ struct net_bridge_port *p = NULL;
+ struct net_bridge_vlan *v;
+ struct net_bridge *br = NULL;
int err = 0;
- struct net_port_vlans *pv;
if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE))) {
pr_info("bridge: RTM_NEWNEIGH with invalid state %#x\n", ndm->ndm_state);
@@ -856,34 +865,51 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
return -EINVAL;
}
- p = br_port_get_rtnl(dev);
- if (p == NULL) {
- pr_info("bridge: RTM_NEWNEIGH %s not a bridge port\n",
- dev->name);
- return -EINVAL;
+ if (dev->priv_flags & IFF_EBRIDGE) {
+ br = netdev_priv(dev);
+ vg = br_vlan_group(br);
+ } else {
+ p = br_port_get_rtnl(dev);
+ if (!p) {
+ pr_info("bridge: RTM_NEWNEIGH %s not a bridge port\n",
+ dev->name);
+ return -EINVAL;
+ }
+ vg = nbp_vlan_group(p);
}
- pv = nbp_get_vlan_info(p);
if (vid) {
- if (!pv || !test_bit(vid, pv->vlan_bitmap)) {
- pr_info("bridge: RTM_NEWNEIGH with unconfigured "
- "vlan %d on port %s\n", vid, dev->name);
+ v = br_vlan_find(vg, vid);
+ if (!v || !br_vlan_should_use(v)) {
+ pr_info("bridge: RTM_NEWNEIGH with unconfigured vlan %d on %s\n", vid, dev->name);
return -EINVAL;
}
/* VID was specified, so use it. */
- err = __br_fdb_add(ndm, p, addr, nlh_flags, vid);
+ if (dev->priv_flags & IFF_EBRIDGE)
+ err = br_fdb_insert(br, NULL, addr, vid);
+ else
+ err = __br_fdb_add(ndm, p, addr, nlh_flags, vid);
} else {
- err = __br_fdb_add(ndm, p, addr, nlh_flags, 0);
- if (err || !pv)
+ if (dev->priv_flags & IFF_EBRIDGE)
+ err = br_fdb_insert(br, NULL, addr, 0);
+ else
+ err = __br_fdb_add(ndm, p, addr, nlh_flags, 0);
+ if (err || !vg || !vg->num_vlans)
goto out;
/* We have vlans configured on this port and user didn't
* specify a VLAN. To be nice, add/update entry for every
* vlan on this port.
*/
- for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
- err = __br_fdb_add(ndm, p, addr, nlh_flags, vid);
+ list_for_each_entry(v, &vg->vlan_list, vlist) {
+ if (!br_vlan_should_use(v))
+ continue;
+ if (dev->priv_flags & IFF_EBRIDGE)
+ err = br_fdb_insert(br, NULL, addr, v->vid);
+ else
+ err = __br_fdb_add(ndm, p, addr, nlh_flags,
+ v->vid);
if (err)
goto out;
}
@@ -893,6 +919,32 @@ out:
return err;
}
+static int fdb_delete_by_addr(struct net_bridge *br, const u8 *addr,
+ u16 vid)
+{
+ struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
+ struct net_bridge_fdb_entry *fdb;
+
+ fdb = fdb_find(head, addr, vid);
+ if (!fdb)
+ return -ENOENT;
+
+ fdb_delete(br, fdb);
+ return 0;
+}
+
+static int __br_fdb_delete_by_addr(struct net_bridge *br,
+ const unsigned char *addr, u16 vid)
+{
+ int err;
+
+ spin_lock_bh(&br->hash_lock);
+ err = fdb_delete_by_addr(br, addr, vid);
+ spin_unlock_bh(&br->hash_lock);
+
+ return err;
+}
+
static int fdb_delete_by_addr_and_port(struct net_bridge_port *p,
const u8 *addr, u16 vlan)
{
@@ -925,38 +977,53 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev,
const unsigned char *addr, u16 vid)
{
- struct net_bridge_port *p;
+ struct net_bridge_vlan_group *vg;
+ struct net_bridge_port *p = NULL;
+ struct net_bridge_vlan *v;
+ struct net_bridge *br = NULL;
int err;
- struct net_port_vlans *pv;
- p = br_port_get_rtnl(dev);
- if (p == NULL) {
- pr_info("bridge: RTM_DELNEIGH %s not a bridge port\n",
- dev->name);
- return -EINVAL;
+ if (dev->priv_flags & IFF_EBRIDGE) {
+ br = netdev_priv(dev);
+ vg = br_vlan_group(br);
+ } else {
+ p = br_port_get_rtnl(dev);
+ if (!p) {
+ pr_info("bridge: RTM_DELNEIGH %s not a bridge port\n",
+ dev->name);
+ return -EINVAL;
+ }
+ vg = nbp_vlan_group(p);
}
- pv = nbp_get_vlan_info(p);
if (vid) {
- if (!pv || !test_bit(vid, pv->vlan_bitmap)) {
- pr_info("bridge: RTM_DELNEIGH with unconfigured "
- "vlan %d on port %s\n", vid, dev->name);
+ v = br_vlan_find(vg, vid);
+ if (!v) {
+ pr_info("bridge: RTM_DELNEIGH with unconfigured vlan %d on %s\n", vid, dev->name);
return -EINVAL;
}
- err = __br_fdb_delete(p, addr, vid);
+ if (dev->priv_flags & IFF_EBRIDGE)
+ err = __br_fdb_delete_by_addr(br, addr, vid);
+ else
+ err = __br_fdb_delete(p, addr, vid);
} else {
err = -ENOENT;
- err &= __br_fdb_delete(p, addr, 0);
- if (!pv)
+ if (dev->priv_flags & IFF_EBRIDGE)
+ err = __br_fdb_delete_by_addr(br, addr, 0);
+ else
+ err &= __br_fdb_delete(p, addr, 0);
+
+ if (!vg || !vg->num_vlans)
goto out;
- /* We have vlans configured on this port and user didn't
- * specify a VLAN. To be nice, add/update entry for every
- * vlan on this port.
- */
- for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
- err &= __br_fdb_delete(p, addr, vid);
+ list_for_each_entry(v, &vg->vlan_list, vlist) {
+ if (!br_vlan_should_use(v))
+ continue;
+ if (dev->priv_flags & IFF_EBRIDGE)
+ err = __br_fdb_delete_by_addr(br, addr, v->vid);
+ else
+ err &= __br_fdb_delete(p, addr, v->vid);
}
}
out:
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index fa7bfced888e..a9d424e20229 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -30,12 +30,14 @@ static int deliver_clone(const struct net_bridge_port *prev,
static inline int should_deliver(const struct net_bridge_port *p,
const struct sk_buff *skb)
{
+ struct net_bridge_vlan_group *vg;
+
+ vg = nbp_vlan_group_rcu(p);
return ((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) &&
- br_allowed_egress(p->br, nbp_get_vlan_info(p), skb) &&
- p->state == BR_STATE_FORWARDING;
+ br_allowed_egress(vg, skb) && p->state == BR_STATE_FORWARDING;
}
-int br_dev_queue_push_xmit(struct sock *sk, struct sk_buff *skb)
+int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
{
if (!is_skb_forwardable(skb->dev, skb))
goto drop;
@@ -65,10 +67,10 @@ drop:
}
EXPORT_SYMBOL_GPL(br_dev_queue_push_xmit);
-int br_forward_finish(struct sock *sk, struct sk_buff *skb)
+int br_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING, sk, skb,
- NULL, skb->dev,
+ return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING,
+ net, sk, skb, NULL, skb->dev,
br_dev_queue_push_xmit);
}
@@ -76,7 +78,10 @@ EXPORT_SYMBOL_GPL(br_forward_finish);
static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
{
- skb = br_handle_vlan(to->br, nbp_get_vlan_info(to), skb);
+ struct net_bridge_vlan_group *vg;
+
+ vg = nbp_vlan_group_rcu(to);
+ skb = br_handle_vlan(to->br, vg, skb);
if (!skb)
return;
@@ -92,13 +97,14 @@ static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
return;
}
- NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, NULL, skb,
- NULL, skb->dev,
+ NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT,
+ dev_net(skb->dev), NULL, skb,NULL, skb->dev,
br_forward_finish);
}
static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
{
+ struct net_bridge_vlan_group *vg;
struct net_device *indev;
if (skb_warn_if_lro(skb)) {
@@ -106,7 +112,8 @@ static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
return;
}
- skb = br_handle_vlan(to->br, nbp_get_vlan_info(to), skb);
+ vg = nbp_vlan_group_rcu(to);
+ skb = br_handle_vlan(to->br, vg, skb);
if (!skb)
return;
@@ -114,8 +121,8 @@ static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
skb->dev = to->dev;
skb_forward_csum(skb);
- NF_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD, NULL, skb,
- indev, skb->dev,
+ NF_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD,
+ dev_net(indev), NULL, skb, indev, skb->dev,
br_forward_finish);
}
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 45e4757c6fd2..ec02f5869a78 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -24,6 +24,7 @@
#include <linux/slab.h>
#include <net/sock.h>
#include <linux/if_vlan.h>
+#include <net/switchdev.h>
#include "br_private.h"
@@ -250,6 +251,8 @@ static void del_nbp(struct net_bridge_port *p)
nbp_vlan_flush(p);
br_fdb_delete_by_port(br, p, 0, 1);
+ switchdev_deferred_process();
+
nbp_update_port_count(br);
netdev_upper_dev_unlink(dev, br->dev);
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index f921a5dce22d..f7fba74108a9 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -26,38 +26,44 @@
br_should_route_hook_t __rcu *br_should_route_hook __read_mostly;
EXPORT_SYMBOL(br_should_route_hook);
+static int
+br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+ return netif_receive_skb(skb);
+}
+
static int br_pass_frame_up(struct sk_buff *skb)
{
struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev;
struct net_bridge *br = netdev_priv(brdev);
+ struct net_bridge_vlan_group *vg;
struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats);
- struct net_port_vlans *pv;
u64_stats_update_begin(&brstats->syncp);
brstats->rx_packets++;
brstats->rx_bytes += skb->len;
u64_stats_update_end(&brstats->syncp);
+ vg = br_vlan_group_rcu(br);
/* Bridge is just like any other port. Make sure the
* packet is allowed except in promisc modue when someone
* may be running packet capture.
*/
- pv = br_get_vlan_info(br);
if (!(brdev->flags & IFF_PROMISC) &&
- !br_allowed_egress(br, pv, skb)) {
+ !br_allowed_egress(vg, skb)) {
kfree_skb(skb);
return NET_RX_DROP;
}
indev = skb->dev;
skb->dev = brdev;
- skb = br_handle_vlan(br, pv, skb);
+ skb = br_handle_vlan(br, vg, skb);
if (!skb)
return NET_RX_DROP;
- return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, NULL, skb,
- indev, NULL,
- netif_receive_skb_sk);
+ return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN,
+ dev_net(indev), NULL, skb, indev, NULL,
+ br_netif_receive_skb);
}
static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br,
@@ -120,7 +126,7 @@ static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br,
}
/* note: already called with rcu_read_lock */
-int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb)
+int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
const unsigned char *dest = eth_hdr(skb)->h_dest;
struct net_bridge_port *p = br_port_get_rcu(skb->dev);
@@ -134,7 +140,7 @@ int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb)
if (!p || p->state == BR_STATE_DISABLED)
goto drop;
- if (!br_allowed_ingress(p->br, nbp_get_vlan_info(p), skb, &vid))
+ if (!br_allowed_ingress(p->br, nbp_vlan_group_rcu(p), skb, &vid))
goto out;
/* insert into forwarding database after filtering to avoid spoofing */
@@ -208,7 +214,7 @@ drop:
EXPORT_SYMBOL_GPL(br_handle_frame_finish);
/* note: already called with rcu_read_lock */
-static int br_handle_local_finish(struct sock *sk, struct sk_buff *skb)
+static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct net_bridge_port *p = br_port_get_rcu(skb->dev);
u16 vid = 0;
@@ -278,8 +284,9 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
}
/* Deliver packet to local host only */
- if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, NULL, skb,
- skb->dev, NULL, br_handle_local_finish)) {
+ if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN,
+ dev_net(skb->dev), NULL, skb, skb->dev, NULL,
+ br_handle_local_finish)) {
return RX_HANDLER_CONSUMED; /* consumed by filter */
} else {
*pskb = skb;
@@ -303,8 +310,8 @@ forward:
if (ether_addr_equal(p->br->dev->dev_addr, dest))
skb->pkt_type = PACKET_HOST;
- NF_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, NULL, skb,
- skb->dev, NULL,
+ NF_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING,
+ dev_net(skb->dev), NULL, skb, skb->dev, NULL,
br_handle_frame_finish);
break;
default:
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index 8d423bc649b9..263b4de4de57 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -200,8 +200,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN))
return -EPERM;
- br->ageing_time = clock_t_to_jiffies(args[1]);
- return 0;
+ return br_set_ageing_time(br, args[1]);
case BRCTL_GET_PORT_INFO:
{
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index d747275fad18..cd8deea2d074 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -464,11 +464,11 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br,
static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
- unsigned short vid = VLAN_N_VID;
+ struct net_bridge_vlan_group *vg;
struct net_device *dev, *pdev;
struct br_mdb_entry *entry;
struct net_bridge_port *p;
- struct net_port_vlans *pv;
+ struct net_bridge_vlan *v;
struct net_bridge *br;
int err;
@@ -489,10 +489,10 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
if (!p || p->br != br || p->state == BR_STATE_DISABLED)
return -EINVAL;
- pv = nbp_get_vlan_info(p);
- if (br_vlan_enabled(br) && pv && entry->vid == 0) {
- for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
- entry->vid = vid;
+ vg = nbp_vlan_group(p);
+ if (br_vlan_enabled(br) && vg && entry->vid == 0) {
+ list_for_each_entry(v, &vg->vlan_list, vlist) {
+ entry->vid = v->vid;
err = __br_mdb_add(net, br, entry);
if (err)
break;
@@ -566,11 +566,11 @@ unlock:
static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
{
struct net *net = sock_net(skb->sk);
- unsigned short vid = VLAN_N_VID;
+ struct net_bridge_vlan_group *vg;
struct net_device *dev, *pdev;
struct br_mdb_entry *entry;
struct net_bridge_port *p;
- struct net_port_vlans *pv;
+ struct net_bridge_vlan *v;
struct net_bridge *br;
int err;
@@ -591,10 +591,10 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
if (!p || p->br != br || p->state == BR_STATE_DISABLED)
return -EINVAL;
- pv = nbp_get_vlan_info(p);
- if (br_vlan_enabled(br) && pv && entry->vid == 0) {
- for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
- entry->vid = vid;
+ vg = nbp_vlan_group(p);
+ if (br_vlan_enabled(br) && vg && entry->vid == 0) {
+ list_for_each_entry(v, &vg->vlan_list, vlist) {
+ entry->vid = v->vid;
err = __br_mdb_del(br, entry);
if (!err)
__br_mdb_notify(dev, entry, RTM_DELMDB);
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 480b3de1a0e3..03661d97463c 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -829,8 +829,8 @@ static void __br_multicast_send_query(struct net_bridge *br,
if (port) {
skb->dev = port->dev;
- NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, NULL, skb,
- NULL, skb->dev,
+ NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT,
+ dev_net(port->dev), NULL, skb, NULL, skb->dev,
br_dev_queue_push_xmit);
} else {
br_multicast_select_own_querier(br, ip, skb);
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 0a6f095bb0c9..7ddbe7ec81d6 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -111,7 +111,6 @@ static inline __be16 pppoe_proto(const struct sk_buff *skb)
/* largest possible L2 header, see br_nf_dev_queue_xmit() */
#define NF_BRIDGE_MAX_MAC_HEADER_LENGTH (PPPOE_SES_HLEN + ETH_HLEN)
-#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
struct brnf_frag_data {
char mac[NF_BRIDGE_MAX_MAC_HEADER_LENGTH];
u8 encap_size;
@@ -121,7 +120,6 @@ struct brnf_frag_data {
};
static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage);
-#endif
static void nf_bridge_info_free(struct sk_buff *skb)
{
@@ -189,10 +187,9 @@ static inline void nf_bridge_pull_encap_header_rcsum(struct sk_buff *skb)
* expected format
*/
-static int br_validate_ipv4(struct sk_buff *skb)
+static int br_validate_ipv4(struct net *net, struct sk_buff *skb)
{
const struct iphdr *iph;
- struct net_device *dev = skb->dev;
u32 len;
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
@@ -213,13 +210,13 @@ static int br_validate_ipv4(struct sk_buff *skb)
len = ntohs(iph->tot_len);
if (skb->len < len) {
- IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INTRUNCATEDPKTS);
+ IP_INC_STATS_BH(net, IPSTATS_MIB_INTRUNCATEDPKTS);
goto drop;
} else if (len < (iph->ihl*4))
goto inhdr_error;
if (pskb_trim_rcsum(skb, len)) {
- IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS);
+ IP_INC_STATS_BH(net, IPSTATS_MIB_INDISCARDS);
goto drop;
}
@@ -232,7 +229,7 @@ static int br_validate_ipv4(struct sk_buff *skb)
return 0;
inhdr_error:
- IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS);
+ IP_INC_STATS_BH(net, IPSTATS_MIB_INHDRERRORS);
drop:
return -1;
}
@@ -256,7 +253,7 @@ void nf_bridge_update_protocol(struct sk_buff *skb)
* don't, we use the neighbour framework to find out. In both cases, we make
* sure that br_handle_frame_finish() is called afterwards.
*/
-int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb)
+int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct neighbour *neigh;
struct dst_entry *dst;
@@ -273,7 +270,7 @@ int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb)
if (neigh->hh.hh_len) {
neigh_hh_bridge(&neigh->hh, skb);
skb->dev = nf_bridge->physindev;
- ret = br_handle_frame_finish(sk, skb);
+ ret = br_handle_frame_finish(net, sk, skb);
} else {
/* the neighbour function below overwrites the complete
* MAC header, so we save the Ethernet source address and
@@ -342,7 +339,7 @@ br_nf_ipv4_daddr_was_changed(const struct sk_buff *skb,
* device, we proceed as if ip_route_input() succeeded. If it differs from the
* logical bridge port or if ip_route_output_key() fails we drop the packet.
*/
-static int br_nf_pre_routing_finish(struct sock *sk, struct sk_buff *skb)
+static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
struct iphdr *iph = ip_hdr(skb);
@@ -371,7 +368,7 @@ static int br_nf_pre_routing_finish(struct sock *sk, struct sk_buff *skb)
if (err != -EHOSTUNREACH || !in_dev || IN_DEV_FORWARD(in_dev))
goto free_skb;
- rt = ip_route_output(dev_net(dev), iph->daddr, 0,
+ rt = ip_route_output(net, iph->daddr, 0,
RT_TOS(iph->tos), 0);
if (!IS_ERR(rt)) {
/* - Bridged-and-DNAT'ed traffic doesn't
@@ -393,7 +390,7 @@ bridged_dnat:
nf_bridge_push_encap_header(skb);
NF_HOOK_THRESH(NFPROTO_BRIDGE,
NF_BR_PRE_ROUTING,
- sk, skb, skb->dev, NULL,
+ net, sk, skb, skb->dev, NULL,
br_nf_pre_routing_finish_bridge,
1);
return 0;
@@ -413,7 +410,7 @@ bridged_dnat:
skb->dev = nf_bridge->physindev;
nf_bridge_update_protocol(skb);
nf_bridge_push_encap_header(skb);
- NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, sk, skb,
+ NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, net, sk, skb,
skb->dev, NULL,
br_handle_frame_finish, 1);
@@ -464,7 +461,7 @@ struct net_device *setup_pre_routing(struct sk_buff *skb)
* receiving device) to make netfilter happy, the REDIRECT
* target in particular. Save the original destination IP
* address to be able to detect DNAT afterwards. */
-static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops,
+static unsigned int br_nf_pre_routing(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
@@ -486,7 +483,7 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops,
return NF_ACCEPT;
nf_bridge_pull_encap_header_rcsum(skb);
- return br_nf_pre_routing_ipv6(ops, skb, state);
+ return br_nf_pre_routing_ipv6(priv, skb, state);
}
if (!brnf_call_iptables && !br->nf_call_iptables)
@@ -497,7 +494,7 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops,
nf_bridge_pull_encap_header_rcsum(skb);
- if (br_validate_ipv4(skb))
+ if (br_validate_ipv4(state->net, skb))
return NF_DROP;
nf_bridge_put(skb->nf_bridge);
@@ -511,7 +508,7 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops,
skb->protocol = htons(ETH_P_IP);
- NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->sk, skb,
+ NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->net, state->sk, skb,
skb->dev, NULL,
br_nf_pre_routing_finish);
@@ -526,7 +523,7 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops,
* took place when the packet entered the bridge), but we
* register an IPv4 PRE_ROUTING 'sabotage' hook that will
* prevent this from happening. */
-static unsigned int br_nf_local_in(const struct nf_hook_ops *ops,
+static unsigned int br_nf_local_in(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
@@ -535,7 +532,7 @@ static unsigned int br_nf_local_in(const struct nf_hook_ops *ops,
}
/* PF_BRIDGE/FORWARD *************************************************/
-static int br_nf_forward_finish(struct sock *sk, struct sk_buff *skb)
+static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
struct net_device *in;
@@ -559,7 +556,7 @@ static int br_nf_forward_finish(struct sock *sk, struct sk_buff *skb)
}
nf_bridge_push_encap_header(skb);
- NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_FORWARD, sk, skb,
+ NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_FORWARD, net, sk, skb,
in, skb->dev, br_forward_finish, 1);
return 0;
}
@@ -570,7 +567,7 @@ static int br_nf_forward_finish(struct sock *sk, struct sk_buff *skb)
* but we are still able to filter on the 'real' indev/outdev
* because of the physdev module. For ARP, indev and outdev are the
* bridge ports. */
-static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops,
+static unsigned int br_nf_forward_ip(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
@@ -609,13 +606,13 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops,
}
if (pf == NFPROTO_IPV4) {
- if (br_validate_ipv4(skb))
+ if (br_validate_ipv4(state->net, skb))
return NF_DROP;
IPCB(skb)->frag_max_size = nf_bridge->frag_max_size;
}
if (pf == NFPROTO_IPV6) {
- if (br_validate_ipv6(skb))
+ if (br_validate_ipv6(state->net, skb))
return NF_DROP;
IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size;
}
@@ -626,14 +623,14 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops,
else
skb->protocol = htons(ETH_P_IPV6);
- NF_HOOK(pf, NF_INET_FORWARD, NULL, skb,
+ NF_HOOK(pf, NF_INET_FORWARD, state->net, NULL, skb,
brnf_get_logical_dev(skb, state->in),
parent, br_nf_forward_finish);
return NF_STOLEN;
}
-static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops,
+static unsigned int br_nf_forward_arp(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
@@ -661,14 +658,13 @@ static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops,
return NF_ACCEPT;
}
*d = state->in;
- NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, state->sk, skb,
+ NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, state->net, state->sk, skb,
state->in, state->out, br_nf_forward_finish);
return NF_STOLEN;
}
-#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) || IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
-static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb)
+static int br_nf_push_frag_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct brnf_frag_data *data;
int err;
@@ -690,30 +686,26 @@ static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb)
__skb_push(skb, data->encap_size);
nf_bridge_info_free(skb);
- return br_dev_queue_push_xmit(sk, skb);
+ return br_dev_queue_push_xmit(net, sk, skb);
}
-#endif
-#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
-static int br_nf_ip_fragment(struct sock *sk, struct sk_buff *skb,
- int (*output)(struct sock *, struct sk_buff *))
+static int
+br_nf_ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
+ int (*output)(struct net *, struct sock *, struct sk_buff *))
{
unsigned int mtu = ip_skb_dst_mtu(skb);
struct iphdr *iph = ip_hdr(skb);
- struct rtable *rt = skb_rtable(skb);
- struct net_device *dev = rt->dst.dev;
if (unlikely(((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) ||
(IPCB(skb)->frag_max_size &&
IPCB(skb)->frag_max_size > mtu))) {
- IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
+ IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
kfree_skb(skb);
return -EMSGSIZE;
}
- return ip_do_fragment(sk, skb, output);
+ return ip_do_fragment(net, sk, skb, output);
}
-#endif
static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
{
@@ -722,7 +714,7 @@ static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
return 0;
}
-static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb)
+static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct nf_bridge_info *nf_bridge;
unsigned int mtu_reserved;
@@ -731,19 +723,19 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb)
if (skb_is_gso(skb) || skb->len + mtu_reserved <= skb->dev->mtu) {
nf_bridge_info_free(skb);
- return br_dev_queue_push_xmit(sk, skb);
+ return br_dev_queue_push_xmit(net, sk, skb);
}
nf_bridge = nf_bridge_info_get(skb);
-#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
/* This is wrong! We should preserve the original fragment
* boundaries by preserving frag_list rather than refragmenting.
*/
- if (skb->protocol == htons(ETH_P_IP)) {
+ if (IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) &&
+ skb->protocol == htons(ETH_P_IP)) {
struct brnf_frag_data *data;
- if (br_validate_ipv4(skb))
+ if (br_validate_ipv4(net, skb))
goto drop;
IPCB(skb)->frag_max_size = nf_bridge->frag_max_size;
@@ -760,15 +752,14 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb)
skb_copy_from_linear_data_offset(skb, -data->size, data->mac,
data->size);
- return br_nf_ip_fragment(sk, skb, br_nf_push_frag_xmit);
+ return br_nf_ip_fragment(net, sk, skb, br_nf_push_frag_xmit);
}
-#endif
-#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
- if (skb->protocol == htons(ETH_P_IPV6)) {
+ if (IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) &&
+ skb->protocol == htons(ETH_P_IPV6)) {
const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
struct brnf_frag_data *data;
- if (br_validate_ipv6(skb))
+ if (br_validate_ipv6(net, skb))
goto drop;
IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size;
@@ -783,21 +774,20 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb)
data->size);
if (v6ops)
- return v6ops->fragment(sk, skb, br_nf_push_frag_xmit);
+ return v6ops->fragment(net, sk, skb, br_nf_push_frag_xmit);
kfree_skb(skb);
return -EMSGSIZE;
}
-#endif
nf_bridge_info_free(skb);
- return br_dev_queue_push_xmit(sk, skb);
+ return br_dev_queue_push_xmit(net, sk, skb);
drop:
kfree_skb(skb);
return 0;
}
/* PF_BRIDGE/POST_ROUTING ********************************************/
-static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops,
+static unsigned int br_nf_post_routing(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
@@ -836,7 +826,7 @@ static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops,
else
skb->protocol = htons(ETH_P_IPV6);
- NF_HOOK(pf, NF_INET_POST_ROUTING, state->sk, skb,
+ NF_HOOK(pf, NF_INET_POST_ROUTING, state->net, state->sk, skb,
NULL, realoutdev,
br_nf_dev_queue_xmit);
@@ -846,7 +836,7 @@ static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops,
/* IP/SABOTAGE *****************************************************/
/* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING
* for the second time. */
-static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops,
+static unsigned int ip_sabotage_in(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
@@ -880,7 +870,7 @@ static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
skb->dev = nf_bridge->physindev;
nf_bridge->physoutdev = NULL;
- br_handle_frame_finish(NULL, skb);
+ br_handle_frame_finish(dev_net(skb->dev), NULL, skb);
}
static int br_nf_dev_xmit(struct sk_buff *skb)
@@ -906,49 +896,42 @@ EXPORT_SYMBOL_GPL(br_netfilter_enable);
static struct nf_hook_ops br_nf_ops[] __read_mostly = {
{
.hook = br_nf_pre_routing,
- .owner = THIS_MODULE,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_PRE_ROUTING,
.priority = NF_BR_PRI_BRNF,
},
{
.hook = br_nf_local_in,
- .owner = THIS_MODULE,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_LOCAL_IN,
.priority = NF_BR_PRI_BRNF,
},
{
.hook = br_nf_forward_ip,
- .owner = THIS_MODULE,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_FORWARD,
.priority = NF_BR_PRI_BRNF - 1,
},
{
.hook = br_nf_forward_arp,
- .owner = THIS_MODULE,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_FORWARD,
.priority = NF_BR_PRI_BRNF,
},
{
.hook = br_nf_post_routing,
- .owner = THIS_MODULE,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_POST_ROUTING,
.priority = NF_BR_PRI_LAST,
},
{
.hook = ip_sabotage_in,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP_PRI_FIRST,
},
{
.hook = ip_sabotage_in,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP6_PRI_FIRST,
diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c
index 77383bfe7ea3..d61f56efc8dc 100644
--- a/net/bridge/br_netfilter_ipv6.c
+++ b/net/bridge/br_netfilter_ipv6.c
@@ -100,10 +100,9 @@ bad:
return -1;
}
-int br_validate_ipv6(struct sk_buff *skb)
+int br_validate_ipv6(struct net *net, struct sk_buff *skb)
{
const struct ipv6hdr *hdr;
- struct net_device *dev = skb->dev;
struct inet6_dev *idev = __in6_dev_get(skb->dev);
u32 pkt_len;
u8 ip6h_len = sizeof(struct ipv6hdr);
@@ -123,12 +122,12 @@ int br_validate_ipv6(struct sk_buff *skb)
if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) {
if (pkt_len + ip6h_len > skb->len) {
- IP6_INC_STATS_BH(dev_net(dev), idev,
+ IP6_INC_STATS_BH(net, idev,
IPSTATS_MIB_INTRUNCATEDPKTS);
goto drop;
}
if (pskb_trim_rcsum(skb, pkt_len + ip6h_len)) {
- IP6_INC_STATS_BH(dev_net(dev), idev,
+ IP6_INC_STATS_BH(net, idev,
IPSTATS_MIB_INDISCARDS);
goto drop;
}
@@ -143,7 +142,7 @@ int br_validate_ipv6(struct sk_buff *skb)
return 0;
inhdr_error:
- IP6_INC_STATS_BH(dev_net(dev), idev, IPSTATS_MIB_INHDRERRORS);
+ IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS);
drop:
return -1;
}
@@ -161,7 +160,7 @@ br_nf_ipv6_daddr_was_changed(const struct sk_buff *skb,
* for br_nf_pre_routing_finish(), same logic is used here but
* equivalent IPv6 function ip6_route_input() called indirectly.
*/
-static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb)
+static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
struct rtable *rt;
@@ -189,7 +188,7 @@ static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb)
nf_bridge_update_protocol(skb);
nf_bridge_push_encap_header(skb);
NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING,
- sk, skb, skb->dev, NULL,
+ net, sk, skb, skb->dev, NULL,
br_nf_pre_routing_finish_bridge,
1);
return 0;
@@ -208,7 +207,7 @@ static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb)
skb->dev = nf_bridge->physindev;
nf_bridge_update_protocol(skb);
nf_bridge_push_encap_header(skb);
- NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, sk, skb,
+ NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, net, sk, skb,
skb->dev, NULL,
br_handle_frame_finish, 1);
@@ -218,13 +217,13 @@ static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb)
/* Replicate the checks that IPv6 does on packet reception and pass the packet
* to ip6tables.
*/
-unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops,
+unsigned int br_nf_pre_routing_ipv6(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct nf_bridge_info *nf_bridge;
- if (br_validate_ipv6(skb))
+ if (br_validate_ipv6(state->net, skb))
return NF_DROP;
nf_bridge_put(skb->nf_bridge);
@@ -237,7 +236,7 @@ unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops,
nf_bridge->ipv6_daddr = ipv6_hdr(skb)->daddr;
skb->protocol = htons(ETH_P_IPV6);
- NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, state->sk, skb,
+ NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, state->net, state->sk, skb,
skb->dev, NULL,
br_nf_pre_routing_finish_ipv6);
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index ea748c93a07f..94b4de8c4646 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -21,36 +21,35 @@
#include "br_private.h"
#include "br_private_stp.h"
-static int br_get_num_vlan_infos(const struct net_port_vlans *pv,
- u32 filter_mask)
+static int __get_num_vlan_infos(struct net_bridge_vlan_group *vg,
+ u32 filter_mask)
{
- u16 vid_range_start = 0, vid_range_end = 0;
- u16 vid_range_flags = 0;
- u16 pvid, vid, flags;
+ struct net_bridge_vlan *v;
+ u16 vid_range_start = 0, vid_range_end = 0, vid_range_flags = 0;
+ u16 flags, pvid;
int num_vlans = 0;
- if (filter_mask & RTEXT_FILTER_BRVLAN)
- return pv->num_vlans;
-
if (!(filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED))
return 0;
- /* Count number of vlan info's
- */
- pvid = br_get_pvid(pv);
- for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
+ pvid = br_get_pvid(vg);
+ /* Count number of vlan infos */
+ list_for_each_entry_rcu(v, &vg->vlan_list, vlist) {
flags = 0;
- if (vid == pvid)
+ /* only a context, bridge vlan not activated */
+ if (!br_vlan_should_use(v))
+ continue;
+ if (v->vid == pvid)
flags |= BRIDGE_VLAN_INFO_PVID;
- if (test_bit(vid, pv->untagged_bitmap))
+ if (v->flags & BRIDGE_VLAN_INFO_UNTAGGED)
flags |= BRIDGE_VLAN_INFO_UNTAGGED;
if (vid_range_start == 0) {
goto initvars;
- } else if ((vid - vid_range_end) == 1 &&
+ } else if ((v->vid - vid_range_end) == 1 &&
flags == vid_range_flags) {
- vid_range_end = vid;
+ vid_range_end = v->vid;
continue;
} else {
if ((vid_range_end - vid_range_start) > 0)
@@ -59,8 +58,8 @@ static int br_get_num_vlan_infos(const struct net_port_vlans *pv,
num_vlans += 1;
}
initvars:
- vid_range_start = vid;
- vid_range_end = vid;
+ vid_range_start = v->vid;
+ vid_range_end = v->vid;
vid_range_flags = flags;
}
@@ -74,28 +73,43 @@ initvars:
return num_vlans;
}
+static int br_get_num_vlan_infos(struct net_bridge_vlan_group *vg,
+ u32 filter_mask)
+{
+ int num_vlans;
+
+ if (!vg)
+ return 0;
+
+ if (filter_mask & RTEXT_FILTER_BRVLAN)
+ return vg->num_vlans;
+
+ rcu_read_lock();
+ num_vlans = __get_num_vlan_infos(vg, filter_mask);
+ rcu_read_unlock();
+
+ return num_vlans;
+}
+
static size_t br_get_link_af_size_filtered(const struct net_device *dev,
u32 filter_mask)
{
- struct net_port_vlans *pv;
+ struct net_bridge_vlan_group *vg = NULL;
+ struct net_bridge_port *p;
+ struct net_bridge *br;
int num_vlan_infos;
rcu_read_lock();
- if (br_port_exists(dev))
- pv = nbp_get_vlan_info(br_port_get_rcu(dev));
- else if (dev->priv_flags & IFF_EBRIDGE)
- pv = br_get_vlan_info((struct net_bridge *)netdev_priv(dev));
- else
- pv = NULL;
- if (pv)
- num_vlan_infos = br_get_num_vlan_infos(pv, filter_mask);
- else
- num_vlan_infos = 0;
+ if (br_port_exists(dev)) {
+ p = br_port_get_rcu(dev);
+ vg = nbp_vlan_group_rcu(p);
+ } else if (dev->priv_flags & IFF_EBRIDGE) {
+ br = netdev_priv(dev);
+ vg = br_vlan_group_rcu(br);
+ }
+ num_vlan_infos = br_get_num_vlan_infos(vg, filter_mask);
rcu_read_unlock();
- if (!num_vlan_infos)
- return 0;
-
/* Each VLAN is returned in bridge_vlan_info along with flags */
return num_vlan_infos * nla_total_size(sizeof(struct bridge_vlan_info));
}
@@ -113,6 +127,20 @@ static inline size_t br_port_info_size(void)
+ nla_total_size(1) /* IFLA_BRPORT_UNICAST_FLOOD */
+ nla_total_size(1) /* IFLA_BRPORT_PROXYARP */
+ nla_total_size(1) /* IFLA_BRPORT_PROXYARP_WIFI */
+ + nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_ROOT_ID */
+ + nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_BRIDGE_ID */
+ + nla_total_size(sizeof(u16)) /* IFLA_BRPORT_DESIGNATED_PORT */
+ + nla_total_size(sizeof(u16)) /* IFLA_BRPORT_DESIGNATED_COST */
+ + nla_total_size(sizeof(u16)) /* IFLA_BRPORT_ID */
+ + nla_total_size(sizeof(u16)) /* IFLA_BRPORT_NO */
+ + nla_total_size(sizeof(u8)) /* IFLA_BRPORT_TOPOLOGY_CHANGE_ACK */
+ + nla_total_size(sizeof(u8)) /* IFLA_BRPORT_CONFIG_PENDING */
+ + nla_total_size(sizeof(u64)) /* IFLA_BRPORT_MESSAGE_AGE_TIMER */
+ + nla_total_size(sizeof(u64)) /* IFLA_BRPORT_FORWARD_DELAY_TIMER */
+ + nla_total_size(sizeof(u64)) /* IFLA_BRPORT_HOLD_TIMER */
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ + nla_total_size(sizeof(u8)) /* IFLA_BRPORT_MULTICAST_ROUTER */
+#endif
+ 0;
}
@@ -134,6 +162,7 @@ static int br_port_fill_attrs(struct sk_buff *skb,
const struct net_bridge_port *p)
{
u8 mode = !!(p->flags & BR_HAIRPIN_MODE);
+ u64 timerval;
if (nla_put_u8(skb, IFLA_BRPORT_STATE, p->state) ||
nla_put_u16(skb, IFLA_BRPORT_PRIORITY, p->priority) ||
@@ -146,9 +175,36 @@ static int br_port_fill_attrs(struct sk_buff *skb,
nla_put_u8(skb, IFLA_BRPORT_UNICAST_FLOOD, !!(p->flags & BR_FLOOD)) ||
nla_put_u8(skb, IFLA_BRPORT_PROXYARP, !!(p->flags & BR_PROXYARP)) ||
nla_put_u8(skb, IFLA_BRPORT_PROXYARP_WIFI,
- !!(p->flags & BR_PROXYARP_WIFI)))
+ !!(p->flags & BR_PROXYARP_WIFI)) ||
+ nla_put(skb, IFLA_BRPORT_ROOT_ID, sizeof(struct ifla_bridge_id),
+ &p->designated_root) ||
+ nla_put(skb, IFLA_BRPORT_BRIDGE_ID, sizeof(struct ifla_bridge_id),
+ &p->designated_bridge) ||
+ nla_put_u16(skb, IFLA_BRPORT_DESIGNATED_PORT, p->designated_port) ||
+ nla_put_u16(skb, IFLA_BRPORT_DESIGNATED_COST, p->designated_cost) ||
+ nla_put_u16(skb, IFLA_BRPORT_ID, p->port_id) ||
+ nla_put_u16(skb, IFLA_BRPORT_NO, p->port_no) ||
+ nla_put_u8(skb, IFLA_BRPORT_TOPOLOGY_CHANGE_ACK,
+ p->topology_change_ack) ||
+ nla_put_u8(skb, IFLA_BRPORT_CONFIG_PENDING, p->config_pending))
+ return -EMSGSIZE;
+
+ timerval = br_timer_value(&p->message_age_timer);
+ if (nla_put_u64(skb, IFLA_BRPORT_MESSAGE_AGE_TIMER, timerval))
+ return -EMSGSIZE;
+ timerval = br_timer_value(&p->forward_delay_timer);
+ if (nla_put_u64(skb, IFLA_BRPORT_FORWARD_DELAY_TIMER, timerval))
+ return -EMSGSIZE;
+ timerval = br_timer_value(&p->hold_timer);
+ if (nla_put_u64(skb, IFLA_BRPORT_HOLD_TIMER, timerval))
return -EMSGSIZE;
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ if (nla_put_u8(skb, IFLA_BRPORT_MULTICAST_ROUTER,
+ p->multicast_router))
+ return -EMSGSIZE;
+#endif
+
return 0;
}
@@ -185,31 +241,33 @@ nla_put_failure:
}
static int br_fill_ifvlaninfo_compressed(struct sk_buff *skb,
- const struct net_port_vlans *pv)
+ struct net_bridge_vlan_group *vg)
{
- u16 vid_range_start = 0, vid_range_end = 0;
- u16 vid_range_flags = 0;
- u16 pvid, vid, flags;
+ struct net_bridge_vlan *v;
+ u16 vid_range_start = 0, vid_range_end = 0, vid_range_flags = 0;
+ u16 flags, pvid;
int err = 0;
/* Pack IFLA_BRIDGE_VLAN_INFO's for every vlan
* and mark vlan info with begin and end flags
* if vlaninfo represents a range
*/
- pvid = br_get_pvid(pv);
- for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
+ pvid = br_get_pvid(vg);
+ list_for_each_entry_rcu(v, &vg->vlan_list, vlist) {
flags = 0;
- if (vid == pvid)
+ if (!br_vlan_should_use(v))
+ continue;
+ if (v->vid == pvid)
flags |= BRIDGE_VLAN_INFO_PVID;
- if (test_bit(vid, pv->untagged_bitmap))
+ if (v->flags & BRIDGE_VLAN_INFO_UNTAGGED)
flags |= BRIDGE_VLAN_INFO_UNTAGGED;
if (vid_range_start == 0) {
goto initvars;
- } else if ((vid - vid_range_end) == 1 &&
+ } else if ((v->vid - vid_range_end) == 1 &&
flags == vid_range_flags) {
- vid_range_end = vid;
+ vid_range_end = v->vid;
continue;
} else {
err = br_fill_ifvlaninfo_range(skb, vid_range_start,
@@ -220,8 +278,8 @@ static int br_fill_ifvlaninfo_compressed(struct sk_buff *skb,
}
initvars:
- vid_range_start = vid;
- vid_range_end = vid;
+ vid_range_start = v->vid;
+ vid_range_end = v->vid;
vid_range_flags = flags;
}
@@ -238,19 +296,23 @@ initvars:
}
static int br_fill_ifvlaninfo(struct sk_buff *skb,
- const struct net_port_vlans *pv)
+ struct net_bridge_vlan_group *vg)
{
struct bridge_vlan_info vinfo;
- u16 pvid, vid;
+ struct net_bridge_vlan *v;
+ u16 pvid;
- pvid = br_get_pvid(pv);
- for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
- vinfo.vid = vid;
+ pvid = br_get_pvid(vg);
+ list_for_each_entry_rcu(v, &vg->vlan_list, vlist) {
+ if (!br_vlan_should_use(v))
+ continue;
+
+ vinfo.vid = v->vid;
vinfo.flags = 0;
- if (vid == pvid)
+ if (v->vid == pvid)
vinfo.flags |= BRIDGE_VLAN_INFO_PVID;
- if (test_bit(vid, pv->untagged_bitmap))
+ if (v->flags & BRIDGE_VLAN_INFO_UNTAGGED)
vinfo.flags |= BRIDGE_VLAN_INFO_UNTAGGED;
if (nla_put(skb, IFLA_BRIDGE_VLAN_INFO,
@@ -269,11 +331,11 @@ nla_put_failure:
* Contains port and master info as well as carrier and bridge state.
*/
static int br_fill_ifinfo(struct sk_buff *skb,
- const struct net_bridge_port *port,
+ struct net_bridge_port *port,
u32 pid, u32 seq, int event, unsigned int flags,
u32 filter_mask, const struct net_device *dev)
{
- const struct net_bridge *br;
+ struct net_bridge *br;
struct ifinfomsg *hdr;
struct nlmsghdr *nlh;
u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
@@ -320,26 +382,31 @@ static int br_fill_ifinfo(struct sk_buff *skb,
/* Check if the VID information is requested */
if ((filter_mask & RTEXT_FILTER_BRVLAN) ||
(filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) {
- const struct net_port_vlans *pv;
+ struct net_bridge_vlan_group *vg;
struct nlattr *af;
int err;
+ /* RCU needed because of the VLAN locking rules (rcu || rtnl) */
+ rcu_read_lock();
if (port)
- pv = nbp_get_vlan_info(port);
+ vg = nbp_vlan_group_rcu(port);
else
- pv = br_get_vlan_info(br);
+ vg = br_vlan_group_rcu(br);
- if (!pv || bitmap_empty(pv->vlan_bitmap, VLAN_N_VID))
+ if (!vg || !vg->num_vlans) {
+ rcu_read_unlock();
goto done;
-
+ }
af = nla_nest_start(skb, IFLA_AF_SPEC);
- if (!af)
+ if (!af) {
+ rcu_read_unlock();
goto nla_put_failure;
-
+ }
if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)
- err = br_fill_ifvlaninfo_compressed(skb, pv);
+ err = br_fill_ifvlaninfo_compressed(skb, vg);
else
- err = br_fill_ifvlaninfo(skb, pv);
+ err = br_fill_ifvlaninfo(skb, vg);
+ rcu_read_unlock();
if (err)
goto nla_put_failure;
nla_nest_end(skb, af);
@@ -413,14 +480,14 @@ static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p,
switch (cmd) {
case RTM_SETLINK:
if (p) {
+ /* if the MASTER flag is set this will act on the global
+ * per-VLAN entry as well
+ */
err = nbp_vlan_add(p, vinfo->vid, vinfo->flags);
if (err)
break;
-
- if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER)
- err = br_vlan_add(p->br, vinfo->vid,
- vinfo->flags);
} else {
+ vinfo->flags |= BRIDGE_VLAN_INFO_BRENTRY;
err = br_vlan_add(br, vinfo->vid, vinfo->flags);
}
break;
@@ -462,6 +529,9 @@ static int br_afspec(struct net_bridge *br,
if (vinfo_start)
return -EINVAL;
vinfo_start = vinfo;
+ /* don't allow range of pvids */
+ if (vinfo_start->flags & BRIDGE_VLAN_INFO_PVID)
+ return -EINVAL;
continue;
}
@@ -507,6 +577,7 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = {
[IFLA_BRPORT_UNICAST_FLOOD] = { .type = NLA_U8 },
[IFLA_BRPORT_PROXYARP] = { .type = NLA_U8 },
[IFLA_BRPORT_PROXYARP_WIFI] = { .type = NLA_U8 },
+ [IFLA_BRPORT_MULTICAST_ROUTER] = { .type = NLA_U8 },
};
/* Change the state of the port and notify spanning tree */
@@ -578,6 +649,18 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
return err;
}
+ if (tb[IFLA_BRPORT_FLUSH])
+ br_fdb_delete_by_port(p->br, p, 0, 0);
+
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ if (tb[IFLA_BRPORT_MULTICAST_ROUTER]) {
+ u8 mcast_router = nla_get_u8(tb[IFLA_BRPORT_MULTICAST_ROUTER]);
+
+ err = br_multicast_set_port_router(p, mcast_router);
+ if (err)
+ return err;
+ }
+#endif
br_port_flags_change(p, old_flags ^ p->flags);
return 0;
}
@@ -744,6 +827,27 @@ static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = {
[IFLA_BR_PRIORITY] = { .type = NLA_U16 },
[IFLA_BR_VLAN_FILTERING] = { .type = NLA_U8 },
[IFLA_BR_VLAN_PROTOCOL] = { .type = NLA_U16 },
+ [IFLA_BR_GROUP_FWD_MASK] = { .type = NLA_U16 },
+ [IFLA_BR_GROUP_ADDR] = { .type = NLA_BINARY,
+ .len = ETH_ALEN },
+ [IFLA_BR_MCAST_ROUTER] = { .type = NLA_U8 },
+ [IFLA_BR_MCAST_SNOOPING] = { .type = NLA_U8 },
+ [IFLA_BR_MCAST_QUERY_USE_IFADDR] = { .type = NLA_U8 },
+ [IFLA_BR_MCAST_QUERIER] = { .type = NLA_U8 },
+ [IFLA_BR_MCAST_HASH_ELASTICITY] = { .type = NLA_U32 },
+ [IFLA_BR_MCAST_HASH_MAX] = { .type = NLA_U32 },
+ [IFLA_BR_MCAST_LAST_MEMBER_CNT] = { .type = NLA_U32 },
+ [IFLA_BR_MCAST_STARTUP_QUERY_CNT] = { .type = NLA_U32 },
+ [IFLA_BR_MCAST_LAST_MEMBER_INTVL] = { .type = NLA_U64 },
+ [IFLA_BR_MCAST_MEMBERSHIP_INTVL] = { .type = NLA_U64 },
+ [IFLA_BR_MCAST_QUERIER_INTVL] = { .type = NLA_U64 },
+ [IFLA_BR_MCAST_QUERY_INTVL] = { .type = NLA_U64 },
+ [IFLA_BR_MCAST_QUERY_RESPONSE_INTVL] = { .type = NLA_U64 },
+ [IFLA_BR_MCAST_STARTUP_QUERY_INTVL] = { .type = NLA_U64 },
+ [IFLA_BR_NF_CALL_IPTABLES] = { .type = NLA_U8 },
+ [IFLA_BR_NF_CALL_IP6TABLES] = { .type = NLA_U8 },
+ [IFLA_BR_NF_CALL_ARPTABLES] = { .type = NLA_U8 },
+ [IFLA_BR_VLAN_DEFAULT_PVID] = { .type = NLA_U16 },
};
static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
@@ -774,9 +878,9 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
}
if (data[IFLA_BR_AGEING_TIME]) {
- u32 ageing_time = nla_get_u32(data[IFLA_BR_AGEING_TIME]);
-
- br->ageing_time = clock_t_to_jiffies(ageing_time);
+ err = br_set_ageing_time(br, nla_get_u32(data[IFLA_BR_AGEING_TIME]));
+ if (err)
+ return err;
}
if (data[IFLA_BR_STP_STATE]) {
@@ -807,6 +911,158 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
if (err)
return err;
}
+
+ if (data[IFLA_BR_VLAN_DEFAULT_PVID]) {
+ __u16 defpvid = nla_get_u16(data[IFLA_BR_VLAN_DEFAULT_PVID]);
+
+ err = __br_vlan_set_default_pvid(br, defpvid);
+ if (err)
+ return err;
+ }
+#endif
+
+ if (data[IFLA_BR_GROUP_FWD_MASK]) {
+ u16 fwd_mask = nla_get_u16(data[IFLA_BR_GROUP_FWD_MASK]);
+
+ if (fwd_mask & BR_GROUPFWD_RESTRICTED)
+ return -EINVAL;
+ br->group_fwd_mask = fwd_mask;
+ }
+
+ if (data[IFLA_BR_GROUP_ADDR]) {
+ u8 new_addr[ETH_ALEN];
+
+ if (nla_len(data[IFLA_BR_GROUP_ADDR]) != ETH_ALEN)
+ return -EINVAL;
+ memcpy(new_addr, nla_data(data[IFLA_BR_GROUP_ADDR]), ETH_ALEN);
+ if (!is_link_local_ether_addr(new_addr))
+ return -EINVAL;
+ if (new_addr[5] == 1 || /* 802.3x Pause address */
+ new_addr[5] == 2 || /* 802.3ad Slow protocols */
+ new_addr[5] == 3) /* 802.1X PAE address */
+ return -EINVAL;
+ spin_lock_bh(&br->lock);
+ memcpy(br->group_addr, new_addr, sizeof(br->group_addr));
+ spin_unlock_bh(&br->lock);
+ br->group_addr_set = true;
+ br_recalculate_fwd_mask(br);
+ }
+
+ if (data[IFLA_BR_FDB_FLUSH])
+ br_fdb_flush(br);
+
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ if (data[IFLA_BR_MCAST_ROUTER]) {
+ u8 multicast_router = nla_get_u8(data[IFLA_BR_MCAST_ROUTER]);
+
+ err = br_multicast_set_router(br, multicast_router);
+ if (err)
+ return err;
+ }
+
+ if (data[IFLA_BR_MCAST_SNOOPING]) {
+ u8 mcast_snooping = nla_get_u8(data[IFLA_BR_MCAST_SNOOPING]);
+
+ err = br_multicast_toggle(br, mcast_snooping);
+ if (err)
+ return err;
+ }
+
+ if (data[IFLA_BR_MCAST_QUERY_USE_IFADDR]) {
+ u8 val;
+
+ val = nla_get_u8(data[IFLA_BR_MCAST_QUERY_USE_IFADDR]);
+ br->multicast_query_use_ifaddr = !!val;
+ }
+
+ if (data[IFLA_BR_MCAST_QUERIER]) {
+ u8 mcast_querier = nla_get_u8(data[IFLA_BR_MCAST_QUERIER]);
+
+ err = br_multicast_set_querier(br, mcast_querier);
+ if (err)
+ return err;
+ }
+
+ if (data[IFLA_BR_MCAST_HASH_ELASTICITY]) {
+ u32 val = nla_get_u32(data[IFLA_BR_MCAST_HASH_ELASTICITY]);
+
+ br->hash_elasticity = val;
+ }
+
+ if (data[IFLA_BR_MCAST_HASH_MAX]) {
+ u32 hash_max = nla_get_u32(data[IFLA_BR_MCAST_HASH_MAX]);
+
+ err = br_multicast_set_hash_max(br, hash_max);
+ if (err)
+ return err;
+ }
+
+ if (data[IFLA_BR_MCAST_LAST_MEMBER_CNT]) {
+ u32 val = nla_get_u32(data[IFLA_BR_MCAST_LAST_MEMBER_CNT]);
+
+ br->multicast_last_member_count = val;
+ }
+
+ if (data[IFLA_BR_MCAST_STARTUP_QUERY_CNT]) {
+ u32 val = nla_get_u32(data[IFLA_BR_MCAST_STARTUP_QUERY_CNT]);
+
+ br->multicast_startup_query_count = val;
+ }
+
+ if (data[IFLA_BR_MCAST_LAST_MEMBER_INTVL]) {
+ u64 val = nla_get_u64(data[IFLA_BR_MCAST_LAST_MEMBER_INTVL]);
+
+ br->multicast_last_member_interval = clock_t_to_jiffies(val);
+ }
+
+ if (data[IFLA_BR_MCAST_MEMBERSHIP_INTVL]) {
+ u64 val = nla_get_u64(data[IFLA_BR_MCAST_MEMBERSHIP_INTVL]);
+
+ br->multicast_membership_interval = clock_t_to_jiffies(val);
+ }
+
+ if (data[IFLA_BR_MCAST_QUERIER_INTVL]) {
+ u64 val = nla_get_u64(data[IFLA_BR_MCAST_QUERIER_INTVL]);
+
+ br->multicast_querier_interval = clock_t_to_jiffies(val);
+ }
+
+ if (data[IFLA_BR_MCAST_QUERY_INTVL]) {
+ u64 val = nla_get_u64(data[IFLA_BR_MCAST_QUERY_INTVL]);
+
+ br->multicast_query_interval = clock_t_to_jiffies(val);
+ }
+
+ if (data[IFLA_BR_MCAST_QUERY_RESPONSE_INTVL]) {
+ u64 val = nla_get_u64(data[IFLA_BR_MCAST_QUERY_RESPONSE_INTVL]);
+
+ br->multicast_query_response_interval = clock_t_to_jiffies(val);
+ }
+
+ if (data[IFLA_BR_MCAST_STARTUP_QUERY_INTVL]) {
+ u64 val = nla_get_u64(data[IFLA_BR_MCAST_STARTUP_QUERY_INTVL]);
+
+ br->multicast_startup_query_interval = clock_t_to_jiffies(val);
+ }
+#endif
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+ if (data[IFLA_BR_NF_CALL_IPTABLES]) {
+ u8 val = nla_get_u8(data[IFLA_BR_NF_CALL_IPTABLES]);
+
+ br->nf_call_iptables = val ? true : false;
+ }
+
+ if (data[IFLA_BR_NF_CALL_IP6TABLES]) {
+ u8 val = nla_get_u8(data[IFLA_BR_NF_CALL_IP6TABLES]);
+
+ br->nf_call_ip6tables = val ? true : false;
+ }
+
+ if (data[IFLA_BR_NF_CALL_ARPTABLES]) {
+ u8 val = nla_get_u8(data[IFLA_BR_NF_CALL_ARPTABLES]);
+
+ br->nf_call_arptables = val ? true : false;
+ }
#endif
return 0;
@@ -823,6 +1079,40 @@ static size_t br_get_size(const struct net_device *brdev)
nla_total_size(sizeof(u8)) + /* IFLA_BR_VLAN_FILTERING */
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
nla_total_size(sizeof(__be16)) + /* IFLA_BR_VLAN_PROTOCOL */
+ nla_total_size(sizeof(u16)) + /* IFLA_BR_VLAN_DEFAULT_PVID */
+#endif
+ nla_total_size(sizeof(u16)) + /* IFLA_BR_GROUP_FWD_MASK */
+ nla_total_size(sizeof(struct ifla_bridge_id)) + /* IFLA_BR_ROOT_ID */
+ nla_total_size(sizeof(struct ifla_bridge_id)) + /* IFLA_BR_BRIDGE_ID */
+ nla_total_size(sizeof(u16)) + /* IFLA_BR_ROOT_PORT */
+ nla_total_size(sizeof(u32)) + /* IFLA_BR_ROOT_PATH_COST */
+ nla_total_size(sizeof(u8)) + /* IFLA_BR_TOPOLOGY_CHANGE */
+ nla_total_size(sizeof(u8)) + /* IFLA_BR_TOPOLOGY_CHANGE_DETECTED */
+ nla_total_size(sizeof(u64)) + /* IFLA_BR_HELLO_TIMER */
+ nla_total_size(sizeof(u64)) + /* IFLA_BR_TCN_TIMER */
+ nla_total_size(sizeof(u64)) + /* IFLA_BR_TOPOLOGY_CHANGE_TIMER */
+ nla_total_size(sizeof(u64)) + /* IFLA_BR_GC_TIMER */
+ nla_total_size(ETH_ALEN) + /* IFLA_BR_GROUP_ADDR */
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_ROUTER */
+ nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_SNOOPING */
+ nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_QUERY_USE_IFADDR */
+ nla_total_size(sizeof(u8)) + /* IFLA_BR_MCAST_QUERIER */
+ nla_total_size(sizeof(u32)) + /* IFLA_BR_MCAST_HASH_ELASTICITY */
+ nla_total_size(sizeof(u32)) + /* IFLA_BR_MCAST_HASH_MAX */
+ nla_total_size(sizeof(u32)) + /* IFLA_BR_MCAST_LAST_MEMBER_CNT */
+ nla_total_size(sizeof(u32)) + /* IFLA_BR_MCAST_STARTUP_QUERY_CNT */
+ nla_total_size(sizeof(u64)) + /* IFLA_BR_MCAST_LAST_MEMBER_INTVL */
+ nla_total_size(sizeof(u64)) + /* IFLA_BR_MCAST_MEMBERSHIP_INTVL */
+ nla_total_size(sizeof(u64)) + /* IFLA_BR_MCAST_QUERIER_INTVL */
+ nla_total_size(sizeof(u64)) + /* IFLA_BR_MCAST_QUERY_INTVL */
+ nla_total_size(sizeof(u64)) + /* IFLA_BR_MCAST_QUERY_RESPONSE_INTVL */
+ nla_total_size(sizeof(u64)) + /* IFLA_BR_MCAST_STARTUP_QUERY_INTVL */
+#endif
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+ nla_total_size(sizeof(u8)) + /* IFLA_BR_NF_CALL_IPTABLES */
+ nla_total_size(sizeof(u8)) + /* IFLA_BR_NF_CALL_IP6TABLES */
+ nla_total_size(sizeof(u8)) + /* IFLA_BR_NF_CALL_ARPTABLES */
#endif
0;
}
@@ -837,6 +1127,20 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
u32 stp_enabled = br->stp_enabled;
u16 priority = (br->bridge_id.prio[0] << 8) | br->bridge_id.prio[1];
u8 vlan_enabled = br_vlan_enabled(br);
+ u64 clockval;
+
+ clockval = br_timer_value(&br->hello_timer);
+ if (nla_put_u64(skb, IFLA_BR_HELLO_TIMER, clockval))
+ return -EMSGSIZE;
+ clockval = br_timer_value(&br->tcn_timer);
+ if (nla_put_u64(skb, IFLA_BR_TCN_TIMER, clockval))
+ return -EMSGSIZE;
+ clockval = br_timer_value(&br->topology_change_timer);
+ if (nla_put_u64(skb, IFLA_BR_TOPOLOGY_CHANGE_TIMER, clockval))
+ return -EMSGSIZE;
+ clockval = br_timer_value(&br->gc_timer);
+ if (nla_put_u64(skb, IFLA_BR_GC_TIMER, clockval))
+ return -EMSGSIZE;
if (nla_put_u32(skb, IFLA_BR_FORWARD_DELAY, forward_delay) ||
nla_put_u32(skb, IFLA_BR_HELLO_TIME, hello_time) ||
@@ -844,11 +1148,66 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
nla_put_u32(skb, IFLA_BR_AGEING_TIME, ageing_time) ||
nla_put_u32(skb, IFLA_BR_STP_STATE, stp_enabled) ||
nla_put_u16(skb, IFLA_BR_PRIORITY, priority) ||
- nla_put_u8(skb, IFLA_BR_VLAN_FILTERING, vlan_enabled))
+ nla_put_u8(skb, IFLA_BR_VLAN_FILTERING, vlan_enabled) ||
+ nla_put_u16(skb, IFLA_BR_GROUP_FWD_MASK, br->group_fwd_mask) ||
+ nla_put(skb, IFLA_BR_BRIDGE_ID, sizeof(struct ifla_bridge_id),
+ &br->bridge_id) ||
+ nla_put(skb, IFLA_BR_ROOT_ID, sizeof(struct ifla_bridge_id),
+ &br->designated_root) ||
+ nla_put_u16(skb, IFLA_BR_ROOT_PORT, br->root_port) ||
+ nla_put_u32(skb, IFLA_BR_ROOT_PATH_COST, br->root_path_cost) ||
+ nla_put_u8(skb, IFLA_BR_TOPOLOGY_CHANGE, br->topology_change) ||
+ nla_put_u8(skb, IFLA_BR_TOPOLOGY_CHANGE_DETECTED,
+ br->topology_change_detected) ||
+ nla_put(skb, IFLA_BR_GROUP_ADDR, ETH_ALEN, br->group_addr))
return -EMSGSIZE;
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
- if (nla_put_be16(skb, IFLA_BR_VLAN_PROTOCOL, br->vlan_proto))
+ if (nla_put_be16(skb, IFLA_BR_VLAN_PROTOCOL, br->vlan_proto) ||
+ nla_put_u16(skb, IFLA_BR_VLAN_DEFAULT_PVID, br->default_pvid))
+ return -EMSGSIZE;
+#endif
+#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ if (nla_put_u8(skb, IFLA_BR_MCAST_ROUTER, br->multicast_router) ||
+ nla_put_u8(skb, IFLA_BR_MCAST_SNOOPING, !br->multicast_disabled) ||
+ nla_put_u8(skb, IFLA_BR_MCAST_QUERY_USE_IFADDR,
+ br->multicast_query_use_ifaddr) ||
+ nla_put_u8(skb, IFLA_BR_MCAST_QUERIER, br->multicast_querier) ||
+ nla_put_u32(skb, IFLA_BR_MCAST_HASH_ELASTICITY,
+ br->hash_elasticity) ||
+ nla_put_u32(skb, IFLA_BR_MCAST_HASH_MAX, br->hash_max) ||
+ nla_put_u32(skb, IFLA_BR_MCAST_LAST_MEMBER_CNT,
+ br->multicast_last_member_count) ||
+ nla_put_u32(skb, IFLA_BR_MCAST_STARTUP_QUERY_CNT,
+ br->multicast_startup_query_count))
+ return -EMSGSIZE;
+
+ clockval = jiffies_to_clock_t(br->multicast_last_member_interval);
+ if (nla_put_u64(skb, IFLA_BR_MCAST_LAST_MEMBER_INTVL, clockval))
+ return -EMSGSIZE;
+ clockval = jiffies_to_clock_t(br->multicast_membership_interval);
+ if (nla_put_u64(skb, IFLA_BR_MCAST_MEMBERSHIP_INTVL, clockval))
+ return -EMSGSIZE;
+ clockval = jiffies_to_clock_t(br->multicast_querier_interval);
+ if (nla_put_u64(skb, IFLA_BR_MCAST_QUERIER_INTVL, clockval))
+ return -EMSGSIZE;
+ clockval = jiffies_to_clock_t(br->multicast_query_interval);
+ if (nla_put_u64(skb, IFLA_BR_MCAST_QUERY_INTVL, clockval))
+ return -EMSGSIZE;
+ clockval = jiffies_to_clock_t(br->multicast_query_response_interval);
+ if (nla_put_u64(skb, IFLA_BR_MCAST_QUERY_RESPONSE_INTVL, clockval))
+ return -EMSGSIZE;
+ clockval = jiffies_to_clock_t(br->multicast_startup_query_interval);
+ if (nla_put_u64(skb, IFLA_BR_MCAST_STARTUP_QUERY_INTVL, clockval))
+ return -EMSGSIZE;
+#endif
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+ if (nla_put_u8(skb, IFLA_BR_NF_CALL_IPTABLES,
+ br->nf_call_iptables ? 1 : 0) ||
+ nla_put_u8(skb, IFLA_BR_NF_CALL_IP6TABLES,
+ br->nf_call_ip6tables ? 1 : 0) ||
+ nla_put_u8(skb, IFLA_BR_NF_CALL_ARPTABLES,
+ br->nf_call_arptables ? 1 : 0))
return -EMSGSIZE;
#endif
@@ -857,20 +1216,22 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
static size_t br_get_link_af_size(const struct net_device *dev)
{
- struct net_port_vlans *pv;
-
- if (br_port_exists(dev))
- pv = nbp_get_vlan_info(br_port_get_rtnl(dev));
- else if (dev->priv_flags & IFF_EBRIDGE)
- pv = br_get_vlan_info((struct net_bridge *)netdev_priv(dev));
- else
- return 0;
+ struct net_bridge_port *p;
+ struct net_bridge *br;
+ int num_vlans = 0;
- if (!pv)
- return 0;
+ if (br_port_exists(dev)) {
+ p = br_port_get_rtnl(dev);
+ num_vlans = br_get_num_vlan_infos(nbp_vlan_group(p),
+ RTEXT_FILTER_BRVLAN);
+ } else if (dev->priv_flags & IFF_EBRIDGE) {
+ br = netdev_priv(dev);
+ num_vlans = br_get_num_vlan_infos(br_vlan_group(br),
+ RTEXT_FILTER_BRVLAN);
+ }
/* Each VLAN is returned in bridge_vlan_info along with flags */
- return pv->num_vlans * nla_total_size(sizeof(struct bridge_vlan_info));
+ return num_vlans * nla_total_size(sizeof(struct bridge_vlan_info));
}
static struct rtnl_af_ops br_af_ops __read_mostly = {
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 213baf7aaa93..216018c76018 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -20,6 +20,7 @@
#include <net/route.h>
#include <net/ip6_fib.h>
#include <linux/if_vlan.h>
+#include <linux/rhashtable.h>
#define BR_HASH_BITS 8
#define BR_HASH_SIZE (1 << BR_HASH_BITS)
@@ -28,7 +29,6 @@
#define BR_PORT_BITS 10
#define BR_MAX_PORTS (1<<BR_PORT_BITS)
-#define BR_VLAN_BITMAP_LEN BITS_TO_LONGS(VLAN_N_VID)
#define BR_VERSION "2.3"
@@ -77,17 +77,61 @@ struct bridge_mcast_querier {
};
#endif
-struct net_port_vlans {
- u16 port_idx;
- u16 pvid;
+/**
+ * struct net_bridge_vlan - per-vlan entry
+ *
+ * @vnode: rhashtable member
+ * @vid: VLAN id
+ * @flags: bridge vlan flags
+ * @br: if MASTER flag set, this points to a bridge struct
+ * @port: if MASTER flag unset, this points to a port struct
+ * @refcnt: if MASTER flag set, this is bumped for each port referencing it
+ * @brvlan: if MASTER flag unset, this points to the global per-VLAN context
+ * for this VLAN entry
+ * @vlist: sorted list of VLAN entries
+ * @rcu: used for entry destruction
+ *
+ * This structure is shared between the global per-VLAN entries contained in
+ * the bridge rhashtable and the local per-port per-VLAN entries contained in
+ * the port's rhashtable. The union entries should be interpreted depending on
+ * the entry flags that are set.
+ */
+struct net_bridge_vlan {
+ struct rhash_head vnode;
+ u16 vid;
+ u16 flags;
union {
- struct net_bridge_port *port;
- struct net_bridge *br;
- } parent;
+ struct net_bridge *br;
+ struct net_bridge_port *port;
+ };
+ union {
+ atomic_t refcnt;
+ struct net_bridge_vlan *brvlan;
+ };
+ struct list_head vlist;
+
struct rcu_head rcu;
- unsigned long vlan_bitmap[BR_VLAN_BITMAP_LEN];
- unsigned long untagged_bitmap[BR_VLAN_BITMAP_LEN];
+};
+
+/**
+ * struct net_bridge_vlan_group
+ *
+ * @vlan_hash: VLAN entry rhashtable
+ * @vlan_list: sorted VLAN entry list
+ * @num_vlans: number of total VLAN entries
+ * @pvid: PVID VLAN id
+ *
+ * IMPORTANT: Be careful when checking if there're VLAN entries using list
+ * primitives because the bridge can have entries in its list which
+ * are just for global context but not for filtering, i.e. they have
+ * the master flag set but not the brentry flag. If you have to check
+ * if there're "real" entries in the bridge please test @num_vlans
+ */
+struct net_bridge_vlan_group {
+ struct rhashtable vlan_hash;
+ struct list_head vlan_list;
u16 num_vlans;
+ u16 pvid;
};
struct net_bridge_fdb_entry
@@ -185,7 +229,7 @@ struct net_bridge_port
struct netpoll *np;
#endif
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
- struct net_port_vlans __rcu *vlan_info;
+ struct net_bridge_vlan_group __rcu *vlgrp;
#endif
};
@@ -293,10 +337,10 @@ struct net_bridge
struct kobject *ifobj;
u32 auto_cnt;
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
+ struct net_bridge_vlan_group __rcu *vlgrp;
u8 vlan_enabled;
__be16 vlan_proto;
u16 default_pvid;
- struct net_port_vlans __rcu *vlan_info;
#endif
};
@@ -344,6 +388,31 @@ static inline int br_is_root_bridge(const struct net_bridge *br)
return !memcmp(&br->bridge_id, &br->designated_root, 8);
}
+/* check if a VLAN entry is global */
+static inline bool br_vlan_is_master(const struct net_bridge_vlan *v)
+{
+ return v->flags & BRIDGE_VLAN_INFO_MASTER;
+}
+
+/* check if a VLAN entry is used by the bridge */
+static inline bool br_vlan_is_brentry(const struct net_bridge_vlan *v)
+{
+ return v->flags & BRIDGE_VLAN_INFO_BRENTRY;
+}
+
+/* check if we should use the vlan entry, returns false if it's only context */
+static inline bool br_vlan_should_use(const struct net_bridge_vlan *v)
+{
+ if (br_vlan_is_master(v)) {
+ if (br_vlan_is_brentry(v))
+ return true;
+ else
+ return false;
+ }
+
+ return true;
+}
+
/* br_device.c */
void br_dev_setup(struct net_device *dev);
void br_dev_delete(struct net_device *dev, struct list_head *list);
@@ -413,10 +482,10 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
/* br_forward.c */
void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb);
-int br_dev_queue_push_xmit(struct sock *sk, struct sk_buff *skb);
+int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb);
void br_forward(const struct net_bridge_port *to,
struct sk_buff *skb, struct sk_buff *skb0);
-int br_forward_finish(struct sock *sk, struct sk_buff *skb);
+int br_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb);
void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb, bool unicast);
void br_flood_forward(struct net_bridge *br, struct sk_buff *skb,
struct sk_buff *skb2, bool unicast);
@@ -434,7 +503,7 @@ void br_port_flags_change(struct net_bridge_port *port, unsigned long mask);
void br_manage_promisc(struct net_bridge *br);
/* br_input.c */
-int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb);
+int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb);
rx_handler_result_t br_handle_frame(struct sk_buff **pskb);
static inline bool br_rx_handler_check_rcu(const struct net_device *dev)
@@ -601,18 +670,19 @@ static inline void br_mdb_uninit(void)
/* br_vlan.c */
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
-bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
- struct sk_buff *skb, u16 *vid);
-bool br_allowed_egress(struct net_bridge *br, const struct net_port_vlans *v,
+bool br_allowed_ingress(const struct net_bridge *br,
+ struct net_bridge_vlan_group *vg, struct sk_buff *skb,
+ u16 *vid);
+bool br_allowed_egress(struct net_bridge_vlan_group *vg,
const struct sk_buff *skb);
bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid);
struct sk_buff *br_handle_vlan(struct net_bridge *br,
- const struct net_port_vlans *v,
+ struct net_bridge_vlan_group *vg,
struct sk_buff *skb);
int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags);
int br_vlan_delete(struct net_bridge *br, u16 vid);
void br_vlan_flush(struct net_bridge *br);
-bool br_vlan_find(struct net_bridge *br, u16 vid);
+struct net_bridge_vlan *br_vlan_find(struct net_bridge_vlan_group *vg, u16 vid);
void br_recalculate_fwd_mask(struct net_bridge *br);
int __br_vlan_filter_toggle(struct net_bridge *br, unsigned long val);
int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val);
@@ -620,22 +690,35 @@ int __br_vlan_set_proto(struct net_bridge *br, __be16 proto);
int br_vlan_set_proto(struct net_bridge *br, unsigned long val);
int br_vlan_init(struct net_bridge *br);
int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val);
+int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid);
int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags);
int nbp_vlan_delete(struct net_bridge_port *port, u16 vid);
void nbp_vlan_flush(struct net_bridge_port *port);
-bool nbp_vlan_find(struct net_bridge_port *port, u16 vid);
int nbp_vlan_init(struct net_bridge_port *port);
+int nbp_get_num_vlan_infos(struct net_bridge_port *p, u32 filter_mask);
+
+static inline struct net_bridge_vlan_group *br_vlan_group(
+ const struct net_bridge *br)
+{
+ return rtnl_dereference(br->vlgrp);
+}
-static inline struct net_port_vlans *br_get_vlan_info(
- const struct net_bridge *br)
+static inline struct net_bridge_vlan_group *nbp_vlan_group(
+ const struct net_bridge_port *p)
{
- return rcu_dereference_rtnl(br->vlan_info);
+ return rtnl_dereference(p->vlgrp);
}
-static inline struct net_port_vlans *nbp_get_vlan_info(
- const struct net_bridge_port *p)
+static inline struct net_bridge_vlan_group *br_vlan_group_rcu(
+ const struct net_bridge *br)
{
- return rcu_dereference_rtnl(p->vlan_info);
+ return rcu_dereference(br->vlgrp);
+}
+
+static inline struct net_bridge_vlan_group *nbp_vlan_group_rcu(
+ const struct net_bridge_port *p)
+{
+ return rcu_dereference(p->vlgrp);
}
/* Since bridge now depends on 8021Q module, but the time bridge sees the
@@ -645,9 +728,9 @@ static inline int br_vlan_get_tag(const struct sk_buff *skb, u16 *vid)
{
int err = 0;
- if (skb_vlan_tag_present(skb))
+ if (skb_vlan_tag_present(skb)) {
*vid = skb_vlan_tag_get(skb) & VLAN_VID_MASK;
- else {
+ } else {
*vid = 0;
err = -EINVAL;
}
@@ -655,13 +738,13 @@ static inline int br_vlan_get_tag(const struct sk_buff *skb, u16 *vid)
return err;
}
-static inline u16 br_get_pvid(const struct net_port_vlans *v)
+static inline u16 br_get_pvid(const struct net_bridge_vlan_group *vg)
{
- if (!v)
+ if (!vg)
return 0;
smp_rmb();
- return v->pvid;
+ return vg->pvid;
}
static inline int br_vlan_enabled(struct net_bridge *br)
@@ -669,16 +752,15 @@ static inline int br_vlan_enabled(struct net_bridge *br)
return br->vlan_enabled;
}
#else
-static inline bool br_allowed_ingress(struct net_bridge *br,
- struct net_port_vlans *v,
+static inline bool br_allowed_ingress(const struct net_bridge *br,
+ struct net_bridge_vlan_group *vg,
struct sk_buff *skb,
u16 *vid)
{
return true;
}
-static inline bool br_allowed_egress(struct net_bridge *br,
- const struct net_port_vlans *v,
+static inline bool br_allowed_egress(struct net_bridge_vlan_group *vg,
const struct sk_buff *skb)
{
return true;
@@ -691,7 +773,7 @@ static inline bool br_should_learn(struct net_bridge_port *p,
}
static inline struct sk_buff *br_handle_vlan(struct net_bridge *br,
- const struct net_port_vlans *v,
+ struct net_bridge_vlan_group *vg,
struct sk_buff *skb)
{
return skb;
@@ -711,11 +793,6 @@ static inline void br_vlan_flush(struct net_bridge *br)
{
}
-static inline bool br_vlan_find(struct net_bridge *br, u16 vid)
-{
- return false;
-}
-
static inline void br_recalculate_fwd_mask(struct net_bridge *br)
{
}
@@ -739,22 +816,12 @@ static inline void nbp_vlan_flush(struct net_bridge_port *port)
{
}
-static inline struct net_port_vlans *br_get_vlan_info(
- const struct net_bridge *br)
-{
- return NULL;
-}
-static inline struct net_port_vlans *nbp_get_vlan_info(
- const struct net_bridge_port *p)
+static inline struct net_bridge_vlan *br_vlan_find(struct net_bridge_vlan_group *vg,
+ u16 vid)
{
return NULL;
}
-static inline bool nbp_vlan_find(struct net_bridge_port *port, u16 vid)
-{
- return false;
-}
-
static inline int nbp_vlan_init(struct net_bridge_port *port)
{
return 0;
@@ -764,7 +831,8 @@ static inline u16 br_vlan_get_tag(const struct sk_buff *skb, u16 *tag)
{
return 0;
}
-static inline u16 br_get_pvid(const struct net_port_vlans *v)
+
+static inline u16 br_get_pvid(const struct net_bridge_vlan_group *vg)
{
return 0;
}
@@ -779,6 +847,37 @@ static inline int __br_vlan_filter_toggle(struct net_bridge *br,
{
return -EOPNOTSUPP;
}
+
+static inline int nbp_get_num_vlan_infos(struct net_bridge_port *p,
+ u32 filter_mask)
+{
+ return 0;
+}
+
+static inline struct net_bridge_vlan_group *br_vlan_group(
+ const struct net_bridge *br)
+{
+ return NULL;
+}
+
+static inline struct net_bridge_vlan_group *nbp_vlan_group(
+ const struct net_bridge_port *p)
+{
+ return NULL;
+}
+
+static inline struct net_bridge_vlan_group *br_vlan_group_rcu(
+ const struct net_bridge *br)
+{
+ return NULL;
+}
+
+static inline struct net_bridge_vlan_group *nbp_vlan_group_rcu(
+ const struct net_bridge_port *p)
+{
+ return NULL;
+}
+
#endif
struct nf_br_ops {
@@ -808,6 +907,7 @@ void __br_set_forward_delay(struct net_bridge *br, unsigned long t);
int br_set_forward_delay(struct net_bridge *br, unsigned long x);
int br_set_hello_time(struct net_bridge *br, unsigned long x);
int br_set_max_age(struct net_bridge *br, unsigned long x);
+int br_set_ageing_time(struct net_bridge *br, u32 ageing_time);
/* br_stp_if.c */
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index ed74ffaa851f..80c34d70218c 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -40,14 +40,15 @@ void br_log_state(const struct net_bridge_port *p)
void br_set_state(struct net_bridge_port *p, unsigned int state)
{
struct switchdev_attr attr = {
- .id = SWITCHDEV_ATTR_PORT_STP_STATE,
+ .id = SWITCHDEV_ATTR_ID_PORT_STP_STATE,
+ .flags = SWITCHDEV_F_DEFER,
.u.stp_state = state,
};
int err;
p->state = state;
err = switchdev_port_attr_set(p->dev, &attr);
- if (err && err != -EOPNOTSUPP)
+ if (err)
br_warn(p->br, "error setting offload STP state on port %u(%s)\n",
(unsigned int) p->port_no, p->dev->name);
}
@@ -566,6 +567,29 @@ int br_set_max_age(struct net_bridge *br, unsigned long val)
}
+int br_set_ageing_time(struct net_bridge *br, u32 ageing_time)
+{
+ struct switchdev_attr attr = {
+ .id = SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME,
+ .flags = SWITCHDEV_F_SKIP_EOPNOTSUPP,
+ .u.ageing_time = ageing_time,
+ };
+ unsigned long t = clock_t_to_jiffies(ageing_time);
+ int err;
+
+ if (t < BR_MIN_AGEING_TIME || t > BR_MAX_AGEING_TIME)
+ return -ERANGE;
+
+ err = switchdev_port_attr_set(br->dev, &attr);
+ if (err)
+ return err;
+
+ br->ageing_time = t;
+ mod_timer(&br->gc_timer, jiffies);
+
+ return 0;
+}
+
void __br_set_forward_delay(struct net_bridge *br, unsigned long t)
{
br->bridge_forward_delay = t;
@@ -576,17 +600,12 @@ void __br_set_forward_delay(struct net_bridge *br, unsigned long t)
int br_set_forward_delay(struct net_bridge *br, unsigned long val)
{
unsigned long t = clock_t_to_jiffies(val);
- int err = -ERANGE;
- spin_lock_bh(&br->lock);
- if (br->stp_enabled != BR_NO_STP &&
- (t < BR_MIN_FORWARD_DELAY || t > BR_MAX_FORWARD_DELAY))
- goto unlock;
+ if (t < BR_MIN_FORWARD_DELAY || t > BR_MAX_FORWARD_DELAY)
+ return -ERANGE;
+ spin_lock_bh(&br->lock);
__br_set_forward_delay(br, t);
- err = 0;
-
-unlock:
spin_unlock_bh(&br->lock);
- return err;
+ return 0;
}
diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c
index 534fc4cd263e..5881fbc114a9 100644
--- a/net/bridge/br_stp_bpdu.c
+++ b/net/bridge/br_stp_bpdu.c
@@ -30,6 +30,12 @@
#define LLC_RESERVE sizeof(struct llc_pdu_un)
+static int br_send_bpdu_finish(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+{
+ return dev_queue_xmit(skb);
+}
+
static void br_send_bpdu(struct net_bridge_port *p,
const unsigned char *data, int length)
{
@@ -54,9 +60,9 @@ static void br_send_bpdu(struct net_bridge_port *p,
skb_reset_mac_header(skb);
- NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, NULL, skb,
- NULL, skb->dev,
- dev_queue_xmit_sk);
+ NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT,
+ dev_net(p->dev), NULL, skb, NULL, skb->dev,
+ br_send_bpdu_finish);
}
static inline void br_set_ticks(unsigned char *dest, int j)
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 4c97fc50fb70..8365bd53c421 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -102,8 +102,15 @@ static ssize_t ageing_time_show(struct device *d,
static int set_ageing_time(struct net_bridge *br, unsigned long val)
{
- br->ageing_time = clock_t_to_jiffies(val);
- return 0;
+ int ret;
+
+ if (!rtnl_trylock())
+ return restart_syscall();
+
+ ret = br_set_ageing_time(br, val);
+ rtnl_unlock();
+
+ return ret;
}
static ssize_t ageing_time_store(struct device *d,
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 5f5a02b49a99..5f0d0cc4744f 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -6,86 +6,205 @@
#include "br_private.h"
-static void __vlan_add_pvid(struct net_port_vlans *v, u16 vid)
+static inline int br_vlan_cmp(struct rhashtable_compare_arg *arg,
+ const void *ptr)
{
- if (v->pvid == vid)
+ const struct net_bridge_vlan *vle = ptr;
+ u16 vid = *(u16 *)arg->key;
+
+ return vle->vid != vid;
+}
+
+static const struct rhashtable_params br_vlan_rht_params = {
+ .head_offset = offsetof(struct net_bridge_vlan, vnode),
+ .key_offset = offsetof(struct net_bridge_vlan, vid),
+ .key_len = sizeof(u16),
+ .nelem_hint = 3,
+ .locks_mul = 1,
+ .max_size = VLAN_N_VID,
+ .obj_cmpfn = br_vlan_cmp,
+ .automatic_shrinking = true,
+};
+
+static struct net_bridge_vlan *br_vlan_lookup(struct rhashtable *tbl, u16 vid)
+{
+ return rhashtable_lookup_fast(tbl, &vid, br_vlan_rht_params);
+}
+
+static void __vlan_add_pvid(struct net_bridge_vlan_group *vg, u16 vid)
+{
+ if (vg->pvid == vid)
return;
smp_wmb();
- v->pvid = vid;
+ vg->pvid = vid;
}
-static void __vlan_delete_pvid(struct net_port_vlans *v, u16 vid)
+static void __vlan_delete_pvid(struct net_bridge_vlan_group *vg, u16 vid)
{
- if (v->pvid != vid)
+ if (vg->pvid != vid)
return;
smp_wmb();
- v->pvid = 0;
+ vg->pvid = 0;
}
-static void __vlan_add_flags(struct net_port_vlans *v, u16 vid, u16 flags)
+static void __vlan_add_flags(struct net_bridge_vlan *v, u16 flags)
{
+ struct net_bridge_vlan_group *vg;
+
+ if (br_vlan_is_master(v))
+ vg = br_vlan_group(v->br);
+ else
+ vg = nbp_vlan_group(v->port);
+
if (flags & BRIDGE_VLAN_INFO_PVID)
- __vlan_add_pvid(v, vid);
+ __vlan_add_pvid(vg, v->vid);
else
- __vlan_delete_pvid(v, vid);
+ __vlan_delete_pvid(vg, v->vid);
if (flags & BRIDGE_VLAN_INFO_UNTAGGED)
- set_bit(vid, v->untagged_bitmap);
+ v->flags |= BRIDGE_VLAN_INFO_UNTAGGED;
else
- clear_bit(vid, v->untagged_bitmap);
+ v->flags &= ~BRIDGE_VLAN_INFO_UNTAGGED;
}
static int __vlan_vid_add(struct net_device *dev, struct net_bridge *br,
u16 vid, u16 flags)
{
- const struct net_device_ops *ops = dev->netdev_ops;
+ struct switchdev_obj_port_vlan v = {
+ .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
+ .flags = flags,
+ .vid_begin = vid,
+ .vid_end = vid,
+ };
int err;
- /* If driver uses VLAN ndo ops, use 8021q to install vid
- * on device, otherwise try switchdev ops to install vid.
+ /* Try switchdev op first. In case it is not supported, fallback to
+ * 8021q add.
*/
+ err = switchdev_port_obj_add(dev, &v.obj);
+ if (err == -EOPNOTSUPP)
+ return vlan_vid_add(dev, br->vlan_proto, vid);
+ return err;
+}
- if (ops->ndo_vlan_rx_add_vid) {
- err = vlan_vid_add(dev, br->vlan_proto, vid);
- } else {
- struct switchdev_obj vlan_obj = {
- .id = SWITCHDEV_OBJ_PORT_VLAN,
- .u.vlan = {
- .flags = flags,
- .vid_begin = vid,
- .vid_end = vid,
- },
- };
+static void __vlan_add_list(struct net_bridge_vlan *v)
+{
+ struct net_bridge_vlan_group *vg;
+ struct list_head *headp, *hpos;
+ struct net_bridge_vlan *vent;
- err = switchdev_port_obj_add(dev, &vlan_obj);
- if (err == -EOPNOTSUPP)
- err = 0;
+ if (br_vlan_is_master(v))
+ vg = br_vlan_group(v->br);
+ else
+ vg = nbp_vlan_group(v->port);
+
+ headp = &vg->vlan_list;
+ list_for_each_prev(hpos, headp) {
+ vent = list_entry(hpos, struct net_bridge_vlan, vlist);
+ if (v->vid < vent->vid)
+ continue;
+ else
+ break;
}
+ list_add_rcu(&v->vlist, hpos);
+}
- return err;
+static void __vlan_del_list(struct net_bridge_vlan *v)
+{
+ list_del_rcu(&v->vlist);
}
-static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags)
+static int __vlan_vid_del(struct net_device *dev, struct net_bridge *br,
+ u16 vid)
{
- struct net_bridge_port *p = NULL;
- struct net_bridge *br;
- struct net_device *dev;
+ struct switchdev_obj_port_vlan v = {
+ .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
+ .vid_begin = vid,
+ .vid_end = vid,
+ };
int err;
- if (test_bit(vid, v->vlan_bitmap)) {
- __vlan_add_flags(v, vid, flags);
+ /* Try switchdev op first. In case it is not supported, fallback to
+ * 8021q del.
+ */
+ err = switchdev_port_obj_del(dev, &v.obj);
+ if (err == -EOPNOTSUPP) {
+ vlan_vid_del(dev, br->vlan_proto, vid);
return 0;
}
+ return err;
+}
+
+/* Returns a master vlan, if it didn't exist it gets created. In all cases a
+ * a reference is taken to the master vlan before returning.
+ */
+static struct net_bridge_vlan *br_vlan_get_master(struct net_bridge *br, u16 vid)
+{
+ struct net_bridge_vlan_group *vg;
+ struct net_bridge_vlan *masterv;
+
+ vg = br_vlan_group(br);
+ masterv = br_vlan_find(vg, vid);
+ if (!masterv) {
+ /* missing global ctx, create it now */
+ if (br_vlan_add(br, vid, 0))
+ return NULL;
+ masterv = br_vlan_find(vg, vid);
+ if (WARN_ON(!masterv))
+ return NULL;
+ }
+ atomic_inc(&masterv->refcnt);
+
+ return masterv;
+}
+
+static void br_vlan_put_master(struct net_bridge_vlan *masterv)
+{
+ struct net_bridge_vlan_group *vg;
+
+ if (!br_vlan_is_master(masterv))
+ return;
- if (v->port_idx) {
- p = v->parent.port;
+ vg = br_vlan_group(masterv->br);
+ if (atomic_dec_and_test(&masterv->refcnt)) {
+ rhashtable_remove_fast(&vg->vlan_hash,
+ &masterv->vnode, br_vlan_rht_params);
+ __vlan_del_list(masterv);
+ kfree_rcu(masterv, rcu);
+ }
+}
+
+/* This is the shared VLAN add function which works for both ports and bridge
+ * devices. There are four possible calls to this function in terms of the
+ * vlan entry type:
+ * 1. vlan is being added on a port (no master flags, global entry exists)
+ * 2. vlan is being added on a bridge (both master and brvlan flags)
+ * 3. vlan is being added on a port, but a global entry didn't exist which
+ * is being created right now (master flag set, brvlan flag unset), the
+ * global entry is used for global per-vlan features, but not for filtering
+ * 4. same as 3 but with both master and brvlan flags set so the entry
+ * will be used for filtering in both the port and the bridge
+ */
+static int __vlan_add(struct net_bridge_vlan *v, u16 flags)
+{
+ struct net_bridge_vlan *masterv = NULL;
+ struct net_bridge_port *p = NULL;
+ struct net_bridge_vlan_group *vg;
+ struct net_device *dev;
+ struct net_bridge *br;
+ int err;
+
+ if (br_vlan_is_master(v)) {
+ br = v->br;
+ dev = br->dev;
+ vg = br_vlan_group(br);
+ } else {
+ p = v->port;
br = p->br;
dev = p->dev;
- } else {
- br = v->parent.br;
- dev = br->dev;
+ vg = nbp_vlan_group(p);
}
if (p) {
@@ -93,116 +212,140 @@ static int __vlan_add(struct net_port_vlans *v, u16 vid, u16 flags)
* This ensures tagged traffic enters the bridge when
* promiscuous mode is disabled by br_manage_promisc().
*/
- err = __vlan_vid_add(dev, br, vid, flags);
+ err = __vlan_vid_add(dev, br, v->vid, flags);
if (err)
- return err;
- }
+ goto out;
+
+ /* need to work on the master vlan too */
+ if (flags & BRIDGE_VLAN_INFO_MASTER) {
+ err = br_vlan_add(br, v->vid, flags |
+ BRIDGE_VLAN_INFO_BRENTRY);
+ if (err)
+ goto out_filt;
+ }
- err = br_fdb_insert(br, p, dev->dev_addr, vid);
- if (err) {
- br_err(br, "failed insert local address into bridge "
- "forwarding table\n");
- goto out_filt;
+ masterv = br_vlan_get_master(br, v->vid);
+ if (!masterv)
+ goto out_filt;
+ v->brvlan = masterv;
}
- set_bit(vid, v->vlan_bitmap);
- v->num_vlans++;
- __vlan_add_flags(v, vid, flags);
+ /* Add the dev mac and count the vlan only if it's usable */
+ if (br_vlan_should_use(v)) {
+ err = br_fdb_insert(br, p, dev->dev_addr, v->vid);
+ if (err) {
+ br_err(br, "failed insert local address into bridge forwarding table\n");
+ goto out_filt;
+ }
+ vg->num_vlans++;
+ }
- return 0;
+ err = rhashtable_lookup_insert_fast(&vg->vlan_hash, &v->vnode,
+ br_vlan_rht_params);
+ if (err)
+ goto out_fdb_insert;
-out_filt:
- if (p)
- vlan_vid_del(dev, br->vlan_proto, vid);
+ __vlan_add_list(v);
+ __vlan_add_flags(v, flags);
+out:
return err;
-}
-static int __vlan_vid_del(struct net_device *dev, struct net_bridge *br,
- u16 vid)
-{
- const struct net_device_ops *ops = dev->netdev_ops;
- int err = 0;
-
- /* If driver uses VLAN ndo ops, use 8021q to delete vid
- * on device, otherwise try switchdev ops to delete vid.
- */
-
- if (ops->ndo_vlan_rx_kill_vid) {
- vlan_vid_del(dev, br->vlan_proto, vid);
- } else {
- struct switchdev_obj vlan_obj = {
- .id = SWITCHDEV_OBJ_PORT_VLAN,
- .u.vlan = {
- .vid_begin = vid,
- .vid_end = vid,
- },
- };
+out_fdb_insert:
+ if (br_vlan_should_use(v)) {
+ br_fdb_find_delete_local(br, p, dev->dev_addr, v->vid);
+ vg->num_vlans--;
+ }
- err = switchdev_port_obj_del(dev, &vlan_obj);
- if (err == -EOPNOTSUPP)
- err = 0;
+out_filt:
+ if (p) {
+ __vlan_vid_del(dev, br, v->vid);
+ if (masterv) {
+ br_vlan_put_master(masterv);
+ v->brvlan = NULL;
+ }
}
- return err;
+ goto out;
}
-static int __vlan_del(struct net_port_vlans *v, u16 vid)
+static int __vlan_del(struct net_bridge_vlan *v)
{
- if (!test_bit(vid, v->vlan_bitmap))
- return -EINVAL;
-
- __vlan_delete_pvid(v, vid);
- clear_bit(vid, v->untagged_bitmap);
+ struct net_bridge_vlan *masterv = v;
+ struct net_bridge_vlan_group *vg;
+ struct net_bridge_port *p = NULL;
+ int err = 0;
- if (v->port_idx) {
- struct net_bridge_port *p = v->parent.port;
- int err;
+ if (br_vlan_is_master(v)) {
+ vg = br_vlan_group(v->br);
+ } else {
+ p = v->port;
+ vg = nbp_vlan_group(v->port);
+ masterv = v->brvlan;
+ }
- err = __vlan_vid_del(p->dev, p->br, vid);
+ __vlan_delete_pvid(vg, v->vid);
+ if (p) {
+ err = __vlan_vid_del(p->dev, p->br, v->vid);
if (err)
- return err;
+ goto out;
}
- clear_bit(vid, v->vlan_bitmap);
- v->num_vlans--;
- if (bitmap_empty(v->vlan_bitmap, VLAN_N_VID)) {
- if (v->port_idx)
- RCU_INIT_POINTER(v->parent.port->vlan_info, NULL);
- else
- RCU_INIT_POINTER(v->parent.br->vlan_info, NULL);
+ if (br_vlan_should_use(v)) {
+ v->flags &= ~BRIDGE_VLAN_INFO_BRENTRY;
+ vg->num_vlans--;
+ }
+
+ if (masterv != v) {
+ rhashtable_remove_fast(&vg->vlan_hash, &v->vnode,
+ br_vlan_rht_params);
+ __vlan_del_list(v);
kfree_rcu(v, rcu);
}
- return 0;
+
+ br_vlan_put_master(masterv);
+out:
+ return err;
}
-static void __vlan_flush(struct net_port_vlans *v)
+static void __vlan_group_free(struct net_bridge_vlan_group *vg)
{
- smp_wmb();
- v->pvid = 0;
- bitmap_zero(v->vlan_bitmap, VLAN_N_VID);
- if (v->port_idx)
- RCU_INIT_POINTER(v->parent.port->vlan_info, NULL);
- else
- RCU_INIT_POINTER(v->parent.br->vlan_info, NULL);
- kfree_rcu(v, rcu);
+ WARN_ON(!list_empty(&vg->vlan_list));
+ rhashtable_destroy(&vg->vlan_hash);
+ kfree(vg);
+}
+
+static void __vlan_flush(struct net_bridge_vlan_group *vg)
+{
+ struct net_bridge_vlan *vlan, *tmp;
+
+ __vlan_delete_pvid(vg, vg->pvid);
+ list_for_each_entry_safe(vlan, tmp, &vg->vlan_list, vlist)
+ __vlan_del(vlan);
}
struct sk_buff *br_handle_vlan(struct net_bridge *br,
- const struct net_port_vlans *pv,
+ struct net_bridge_vlan_group *vg,
struct sk_buff *skb)
{
+ struct net_bridge_vlan *v;
u16 vid;
/* If this packet was not filtered at input, let it pass */
if (!BR_INPUT_SKB_CB(skb)->vlan_filtered)
goto out;
- /* Vlan filter table must be configured at this point. The
+ /* At this point, we know that the frame was filtered and contains
+ * a valid vlan id. If the vlan id has untagged flag set,
+ * send untagged; otherwise, send tagged.
+ */
+ br_vlan_get_tag(skb, &vid);
+ v = br_vlan_find(vg, vid);
+ /* Vlan entry must be configured at this point. The
* only exception is the bridge is set in promisc mode and the
* packet is destined for the bridge device. In this case
* pass the packet as is.
*/
- if (!pv) {
+ if (!v || !br_vlan_should_use(v)) {
if ((br->dev->flags & IFF_PROMISC) && skb->dev == br->dev) {
goto out;
} else {
@@ -210,13 +353,7 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br,
return NULL;
}
}
-
- /* At this point, we know that the frame was filtered and contains
- * a valid vlan id. If the vlan id is set in the untagged bitmap,
- * send untagged; otherwise, send tagged.
- */
- br_vlan_get_tag(skb, &vid);
- if (test_bit(vid, pv->untagged_bitmap))
+ if (v->flags & BRIDGE_VLAN_INFO_UNTAGGED)
skb->vlan_tci = 0;
out:
@@ -224,29 +361,13 @@ out:
}
/* Called under RCU */
-bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
- struct sk_buff *skb, u16 *vid)
+static bool __allowed_ingress(struct net_bridge_vlan_group *vg, __be16 proto,
+ struct sk_buff *skb, u16 *vid)
{
+ const struct net_bridge_vlan *v;
bool tagged;
- __be16 proto;
-
- /* If VLAN filtering is disabled on the bridge, all packets are
- * permitted.
- */
- if (!br->vlan_enabled) {
- BR_INPUT_SKB_CB(skb)->vlan_filtered = false;
- return true;
- }
-
- /* If there are no vlan in the permitted list, all packets are
- * rejected.
- */
- if (!v)
- goto drop;
BR_INPUT_SKB_CB(skb)->vlan_filtered = true;
- proto = br->vlan_proto;
-
/* If vlan tx offload is disabled on bridge device and frame was
* sent from vlan device on the bridge device, it does not have
* HW accelerated vlan tag.
@@ -281,7 +402,7 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
}
if (!*vid) {
- u16 pvid = br_get_pvid(v);
+ u16 pvid = br_get_pvid(vg);
/* Frame had a tag with VID 0 or did not have a tag.
* See if pvid is set on this port. That tells us which
@@ -309,29 +430,43 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
}
/* Frame had a valid vlan tag. See if vlan is allowed */
- if (test_bit(*vid, v->vlan_bitmap))
+ v = br_vlan_find(vg, *vid);
+ if (v && br_vlan_should_use(v))
return true;
drop:
kfree_skb(skb);
return false;
}
+bool br_allowed_ingress(const struct net_bridge *br,
+ struct net_bridge_vlan_group *vg, struct sk_buff *skb,
+ u16 *vid)
+{
+ /* If VLAN filtering is disabled on the bridge, all packets are
+ * permitted.
+ */
+ if (!br->vlan_enabled) {
+ BR_INPUT_SKB_CB(skb)->vlan_filtered = false;
+ return true;
+ }
+
+ return __allowed_ingress(vg, br->vlan_proto, skb, vid);
+}
+
/* Called under RCU. */
-bool br_allowed_egress(struct net_bridge *br,
- const struct net_port_vlans *v,
+bool br_allowed_egress(struct net_bridge_vlan_group *vg,
const struct sk_buff *skb)
{
+ const struct net_bridge_vlan *v;
u16 vid;
/* If this packet was not filtered at input, let it pass */
if (!BR_INPUT_SKB_CB(skb)->vlan_filtered)
return true;
- if (!v)
- return false;
-
br_vlan_get_tag(skb, &vid);
- if (test_bit(vid, v->vlan_bitmap))
+ v = br_vlan_find(vg, vid);
+ if (v && br_vlan_should_use(v))
return true;
return false;
@@ -340,29 +475,29 @@ bool br_allowed_egress(struct net_bridge *br,
/* Called under RCU */
bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid)
{
+ struct net_bridge_vlan_group *vg;
struct net_bridge *br = p->br;
- struct net_port_vlans *v;
/* If filtering was disabled at input, let it pass. */
if (!br->vlan_enabled)
return true;
- v = rcu_dereference(p->vlan_info);
- if (!v)
+ vg = nbp_vlan_group(p);
+ if (!vg || !vg->num_vlans)
return false;
if (!br_vlan_get_tag(skb, vid) && skb->vlan_proto != br->vlan_proto)
*vid = 0;
if (!*vid) {
- *vid = br_get_pvid(v);
+ *vid = br_get_pvid(vg);
if (!*vid)
return false;
return true;
}
- if (test_bit(*vid, v->vlan_bitmap))
+ if (br_vlan_find(vg, *vid))
return true;
return false;
@@ -373,31 +508,49 @@ bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid)
*/
int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags)
{
- struct net_port_vlans *pv = NULL;
- int err;
+ struct net_bridge_vlan_group *vg;
+ struct net_bridge_vlan *vlan;
+ int ret;
ASSERT_RTNL();
- pv = rtnl_dereference(br->vlan_info);
- if (pv)
- return __vlan_add(pv, vid, flags);
+ vg = br_vlan_group(br);
+ vlan = br_vlan_find(vg, vid);
+ if (vlan) {
+ if (!br_vlan_is_brentry(vlan)) {
+ /* Trying to change flags of non-existent bridge vlan */
+ if (!(flags & BRIDGE_VLAN_INFO_BRENTRY))
+ return -EINVAL;
+ /* It was only kept for port vlans, now make it real */
+ ret = br_fdb_insert(br, NULL, br->dev->dev_addr,
+ vlan->vid);
+ if (ret) {
+ br_err(br, "failed insert local address into bridge forwarding table\n");
+ return ret;
+ }
+ atomic_inc(&vlan->refcnt);
+ vlan->flags |= BRIDGE_VLAN_INFO_BRENTRY;
+ vg->num_vlans++;
+ }
+ __vlan_add_flags(vlan, flags);
+ return 0;
+ }
- /* Create port vlan infomration
- */
- pv = kzalloc(sizeof(*pv), GFP_KERNEL);
- if (!pv)
+ vlan = kzalloc(sizeof(*vlan), GFP_KERNEL);
+ if (!vlan)
return -ENOMEM;
- pv->parent.br = br;
- err = __vlan_add(pv, vid, flags);
- if (err)
- goto out;
+ vlan->vid = vid;
+ vlan->flags = flags | BRIDGE_VLAN_INFO_MASTER;
+ vlan->flags &= ~BRIDGE_VLAN_INFO_PVID;
+ vlan->br = br;
+ if (flags & BRIDGE_VLAN_INFO_BRENTRY)
+ atomic_set(&vlan->refcnt, 1);
+ ret = __vlan_add(vlan, flags);
+ if (ret)
+ kfree(vlan);
- rcu_assign_pointer(br->vlan_info, pv);
- return 0;
-out:
- kfree(pv);
- return err;
+ return ret;
}
/* Must be protected by RTNL.
@@ -405,49 +558,41 @@ out:
*/
int br_vlan_delete(struct net_bridge *br, u16 vid)
{
- struct net_port_vlans *pv;
+ struct net_bridge_vlan_group *vg;
+ struct net_bridge_vlan *v;
ASSERT_RTNL();
- pv = rtnl_dereference(br->vlan_info);
- if (!pv)
- return -EINVAL;
+ vg = br_vlan_group(br);
+ v = br_vlan_find(vg, vid);
+ if (!v || !br_vlan_is_brentry(v))
+ return -ENOENT;
br_fdb_find_delete_local(br, NULL, br->dev->dev_addr, vid);
+ br_fdb_delete_by_port(br, NULL, vid, 0);
- __vlan_del(pv, vid);
- return 0;
+ return __vlan_del(v);
}
void br_vlan_flush(struct net_bridge *br)
{
- struct net_port_vlans *pv;
+ struct net_bridge_vlan_group *vg;
ASSERT_RTNL();
- pv = rtnl_dereference(br->vlan_info);
- if (!pv)
- return;
- __vlan_flush(pv);
+ vg = br_vlan_group(br);
+ __vlan_flush(vg);
+ RCU_INIT_POINTER(br->vlgrp, NULL);
+ synchronize_rcu();
+ __vlan_group_free(vg);
}
-bool br_vlan_find(struct net_bridge *br, u16 vid)
+struct net_bridge_vlan *br_vlan_find(struct net_bridge_vlan_group *vg, u16 vid)
{
- struct net_port_vlans *pv;
- bool found = false;
-
- rcu_read_lock();
- pv = rcu_dereference(br->vlan_info);
-
- if (!pv)
- goto out;
-
- if (test_bit(vid, pv->vlan_bitmap))
- found = true;
+ if (!vg)
+ return NULL;
-out:
- rcu_read_unlock();
- return found;
+ return br_vlan_lookup(&vg->vlan_hash, vid);
}
/* Must be protected by RTNL. */
@@ -505,21 +650,18 @@ int __br_vlan_set_proto(struct net_bridge *br, __be16 proto)
{
int err = 0;
struct net_bridge_port *p;
- struct net_port_vlans *pv;
+ struct net_bridge_vlan *vlan;
+ struct net_bridge_vlan_group *vg;
__be16 oldproto;
- u16 vid, errvid;
if (br->vlan_proto == proto)
return 0;
/* Add VLANs for the new proto to the device filter. */
list_for_each_entry(p, &br->port_list, list) {
- pv = rtnl_dereference(p->vlan_info);
- if (!pv)
- continue;
-
- for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
- err = vlan_vid_add(p->dev, proto, vid);
+ vg = nbp_vlan_group(p);
+ list_for_each_entry(vlan, &vg->vlan_list, vlist) {
+ err = vlan_vid_add(p->dev, proto, vlan->vid);
if (err)
goto err_filt;
}
@@ -533,28 +675,21 @@ int __br_vlan_set_proto(struct net_bridge *br, __be16 proto)
/* Delete VLANs for the old proto from the device filter. */
list_for_each_entry(p, &br->port_list, list) {
- pv = rtnl_dereference(p->vlan_info);
- if (!pv)
- continue;
-
- for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID)
- vlan_vid_del(p->dev, oldproto, vid);
+ vg = nbp_vlan_group(p);
+ list_for_each_entry(vlan, &vg->vlan_list, vlist)
+ vlan_vid_del(p->dev, oldproto, vlan->vid);
}
return 0;
err_filt:
- errvid = vid;
- for_each_set_bit(vid, pv->vlan_bitmap, errvid)
- vlan_vid_del(p->dev, proto, vid);
+ list_for_each_entry_continue_reverse(vlan, &vg->vlan_list, vlist)
+ vlan_vid_del(p->dev, proto, vlan->vid);
list_for_each_entry_continue_reverse(p, &br->port_list, list) {
- pv = rtnl_dereference(p->vlan_info);
- if (!pv)
- continue;
-
- for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID)
- vlan_vid_del(p->dev, proto, vid);
+ vg = nbp_vlan_group(p);
+ list_for_each_entry(vlan, &vg->vlan_list, vlist)
+ vlan_vid_del(p->dev, proto, vlan->vid);
}
return err;
@@ -576,9 +711,19 @@ int br_vlan_set_proto(struct net_bridge *br, unsigned long val)
return err;
}
-static bool vlan_default_pvid(struct net_port_vlans *pv, u16 vid)
+static bool vlan_default_pvid(struct net_bridge_vlan_group *vg, u16 vid)
{
- return pv && vid == pv->pvid && test_bit(vid, pv->untagged_bitmap);
+ struct net_bridge_vlan *v;
+
+ if (vid != vg->pvid)
+ return false;
+
+ v = br_vlan_lookup(&vg->vlan_hash, vid);
+ if (v && br_vlan_should_use(v) &&
+ (v->flags & BRIDGE_VLAN_INFO_UNTAGGED))
+ return true;
+
+ return false;
}
static void br_vlan_disable_default_pvid(struct net_bridge *br)
@@ -589,24 +734,31 @@ static void br_vlan_disable_default_pvid(struct net_bridge *br)
/* Disable default_pvid on all ports where it is still
* configured.
*/
- if (vlan_default_pvid(br_get_vlan_info(br), pvid))
+ if (vlan_default_pvid(br_vlan_group(br), pvid))
br_vlan_delete(br, pvid);
list_for_each_entry(p, &br->port_list, list) {
- if (vlan_default_pvid(nbp_get_vlan_info(p), pvid))
+ if (vlan_default_pvid(nbp_vlan_group(p), pvid))
nbp_vlan_delete(p, pvid);
}
br->default_pvid = 0;
}
-static int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid)
+int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid)
{
+ const struct net_bridge_vlan *pvent;
+ struct net_bridge_vlan_group *vg;
struct net_bridge_port *p;
u16 old_pvid;
int err = 0;
unsigned long *changed;
+ if (!pvid) {
+ br_vlan_disable_default_pvid(br);
+ return 0;
+ }
+
changed = kcalloc(BITS_TO_LONGS(BR_MAX_PORTS), sizeof(unsigned long),
GFP_KERNEL);
if (!changed)
@@ -617,11 +769,14 @@ static int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid)
/* Update default_pvid config only if we do not conflict with
* user configuration.
*/
- if ((!old_pvid || vlan_default_pvid(br_get_vlan_info(br), old_pvid)) &&
- !br_vlan_find(br, pvid)) {
+ vg = br_vlan_group(br);
+ pvent = br_vlan_find(vg, pvid);
+ if ((!old_pvid || vlan_default_pvid(vg, old_pvid)) &&
+ (!pvent || !br_vlan_should_use(pvent))) {
err = br_vlan_add(br, pvid,
BRIDGE_VLAN_INFO_PVID |
- BRIDGE_VLAN_INFO_UNTAGGED);
+ BRIDGE_VLAN_INFO_UNTAGGED |
+ BRIDGE_VLAN_INFO_BRENTRY);
if (err)
goto out;
br_vlan_delete(br, old_pvid);
@@ -632,9 +787,10 @@ static int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid)
/* Update default_pvid config only if we do not conflict with
* user configuration.
*/
+ vg = nbp_vlan_group(p);
if ((old_pvid &&
- !vlan_default_pvid(nbp_get_vlan_info(p), old_pvid)) ||
- nbp_vlan_find(p, pvid))
+ !vlan_default_pvid(vg, old_pvid)) ||
+ br_vlan_find(vg, pvid))
continue;
err = nbp_vlan_add(p, pvid,
@@ -668,7 +824,8 @@ err_port:
if (old_pvid)
br_vlan_add(br, old_pvid,
BRIDGE_VLAN_INFO_PVID |
- BRIDGE_VLAN_INFO_UNTAGGED);
+ BRIDGE_VLAN_INFO_UNTAGGED |
+ BRIDGE_VLAN_INFO_BRENTRY);
br_vlan_delete(br, pvid);
}
goto out;
@@ -694,12 +851,7 @@ int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val)
err = -EPERM;
goto unlock;
}
-
- if (!pvid)
- br_vlan_disable_default_pvid(br);
- else
- err = __br_vlan_set_default_pvid(br, pvid);
-
+ err = __br_vlan_set_default_pvid(br, pvid);
unlock:
rtnl_unlock();
return err;
@@ -707,10 +859,66 @@ unlock:
int br_vlan_init(struct net_bridge *br)
{
+ struct net_bridge_vlan_group *vg;
+ int ret = -ENOMEM;
+
+ vg = kzalloc(sizeof(*vg), GFP_KERNEL);
+ if (!vg)
+ goto out;
+ ret = rhashtable_init(&vg->vlan_hash, &br_vlan_rht_params);
+ if (ret)
+ goto err_rhtbl;
+ INIT_LIST_HEAD(&vg->vlan_list);
br->vlan_proto = htons(ETH_P_8021Q);
br->default_pvid = 1;
- return br_vlan_add(br, 1,
- BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED);
+ rcu_assign_pointer(br->vlgrp, vg);
+ ret = br_vlan_add(br, 1,
+ BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED |
+ BRIDGE_VLAN_INFO_BRENTRY);
+ if (ret)
+ goto err_vlan_add;
+
+out:
+ return ret;
+
+err_vlan_add:
+ rhashtable_destroy(&vg->vlan_hash);
+err_rhtbl:
+ kfree(vg);
+
+ goto out;
+}
+
+int nbp_vlan_init(struct net_bridge_port *p)
+{
+ struct net_bridge_vlan_group *vg;
+ int ret = -ENOMEM;
+
+ vg = kzalloc(sizeof(struct net_bridge_vlan_group), GFP_KERNEL);
+ if (!vg)
+ goto out;
+
+ ret = rhashtable_init(&vg->vlan_hash, &br_vlan_rht_params);
+ if (ret)
+ goto err_rhtbl;
+ INIT_LIST_HEAD(&vg->vlan_list);
+ rcu_assign_pointer(p->vlgrp, vg);
+ if (p->br->default_pvid) {
+ ret = nbp_vlan_add(p, p->br->default_pvid,
+ BRIDGE_VLAN_INFO_PVID |
+ BRIDGE_VLAN_INFO_UNTAGGED);
+ if (ret)
+ goto err_vlan_add;
+ }
+out:
+ return ret;
+
+err_vlan_add:
+ rhashtable_destroy(&vg->vlan_hash);
+err_rhtbl:
+ kfree(vg);
+
+ goto out;
}
/* Must be protected by RTNL.
@@ -718,35 +926,28 @@ int br_vlan_init(struct net_bridge *br)
*/
int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags)
{
- struct net_port_vlans *pv = NULL;
- int err;
+ struct net_bridge_vlan *vlan;
+ int ret;
ASSERT_RTNL();
- pv = rtnl_dereference(port->vlan_info);
- if (pv)
- return __vlan_add(pv, vid, flags);
-
- /* Create port vlan infomration
- */
- pv = kzalloc(sizeof(*pv), GFP_KERNEL);
- if (!pv) {
- err = -ENOMEM;
- goto clean_up;
+ vlan = br_vlan_find(nbp_vlan_group(port), vid);
+ if (vlan) {
+ __vlan_add_flags(vlan, flags);
+ return 0;
}
- pv->port_idx = port->port_no;
- pv->parent.port = port;
- err = __vlan_add(pv, vid, flags);
- if (err)
- goto clean_up;
+ vlan = kzalloc(sizeof(*vlan), GFP_KERNEL);
+ if (!vlan)
+ return -ENOMEM;
- rcu_assign_pointer(port->vlan_info, pv);
- return 0;
+ vlan->vid = vid;
+ vlan->port = port;
+ ret = __vlan_add(vlan, flags);
+ if (ret)
+ kfree(vlan);
-clean_up:
- kfree(pv);
- return err;
+ return ret;
}
/* Must be protected by RTNL.
@@ -754,61 +955,28 @@ clean_up:
*/
int nbp_vlan_delete(struct net_bridge_port *port, u16 vid)
{
- struct net_port_vlans *pv;
+ struct net_bridge_vlan *v;
ASSERT_RTNL();
- pv = rtnl_dereference(port->vlan_info);
- if (!pv)
- return -EINVAL;
-
+ v = br_vlan_find(nbp_vlan_group(port), vid);
+ if (!v)
+ return -ENOENT;
br_fdb_find_delete_local(port->br, port, port->dev->dev_addr, vid);
br_fdb_delete_by_port(port->br, port, vid, 0);
- return __vlan_del(pv, vid);
+ return __vlan_del(v);
}
void nbp_vlan_flush(struct net_bridge_port *port)
{
- struct net_port_vlans *pv;
- u16 vid;
+ struct net_bridge_vlan_group *vg;
ASSERT_RTNL();
- pv = rtnl_dereference(port->vlan_info);
- if (!pv)
- return;
-
- for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID)
- vlan_vid_del(port->dev, port->br->vlan_proto, vid);
-
- __vlan_flush(pv);
-}
-
-bool nbp_vlan_find(struct net_bridge_port *port, u16 vid)
-{
- struct net_port_vlans *pv;
- bool found = false;
-
- rcu_read_lock();
- pv = rcu_dereference(port->vlan_info);
-
- if (!pv)
- goto out;
-
- if (test_bit(vid, pv->vlan_bitmap))
- found = true;
-
-out:
- rcu_read_unlock();
- return found;
-}
-
-int nbp_vlan_init(struct net_bridge_port *p)
-{
- return p->br->default_pvid ?
- nbp_vlan_add(p, p->br->default_pvid,
- BRIDGE_VLAN_INFO_PVID |
- BRIDGE_VLAN_INFO_UNTAGGED) :
- 0;
+ vg = nbp_vlan_group(port);
+ __vlan_flush(vg);
+ RCU_INIT_POINTER(port->vlgrp, NULL);
+ synchronize_rcu();
+ __vlan_group_free(vg);
}
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 17f2e4bc2a29..0ad639a96142 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -180,7 +180,7 @@ ebt_log_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct ebt_log_info *info = par->targinfo;
struct nf_loginfo li;
- struct net *net = dev_net(par->in ? par->in : par->out);
+ struct net *net = par->net;
li.type = NF_LOG_TYPE_LOG;
li.u.log.level = info->loglevel;
diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c
index 59ac7952010d..54816150608e 100644
--- a/net/bridge/netfilter/ebt_nflog.c
+++ b/net/bridge/netfilter/ebt_nflog.c
@@ -24,7 +24,7 @@ ebt_nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct ebt_nflog_info *info = par->targinfo;
struct nf_loginfo li;
- struct net *net = dev_net(par->in ? par->in : par->out);
+ struct net *net = par->net;
li.type = NF_LOG_TYPE_ULOG;
li.u.ulog.copy_len = info->len;
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c
index d2cdf5d6e98c..ec94c6f1ae88 100644
--- a/net/bridge/netfilter/ebtable_broute.c
+++ b/net/bridge/netfilter/ebtable_broute.c
@@ -50,10 +50,14 @@ static const struct ebt_table broute_table = {
static int ebt_broute(struct sk_buff *skb)
{
+ struct nf_hook_state state;
int ret;
- ret = ebt_do_table(NF_BR_BROUTING, skb, skb->dev, NULL,
- dev_net(skb->dev)->xt.broute_table);
+ nf_hook_state_init(&state, NULL, NF_BR_BROUTING, INT_MIN,
+ NFPROTO_BRIDGE, skb->dev, NULL, NULL,
+ dev_net(skb->dev), NULL);
+
+ ret = ebt_do_table(skb, &state, state.net->xt.broute_table);
if (ret == NF_DROP)
return 1; /* route it */
return 0; /* bridge it */
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index 8a3f63b2e807..32eccd101f26 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -57,39 +57,34 @@ static const struct ebt_table frame_filter = {
};
static unsigned int
-ebt_in_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ebt_in_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return ebt_do_table(ops->hooknum, skb, state->in, state->out,
- dev_net(state->in)->xt.frame_filter);
+ return ebt_do_table(skb, state, state->net->xt.frame_filter);
}
static unsigned int
-ebt_out_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ebt_out_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return ebt_do_table(ops->hooknum, skb, state->in, state->out,
- dev_net(state->out)->xt.frame_filter);
+ return ebt_do_table(skb, state, state->net->xt.frame_filter);
}
static struct nf_hook_ops ebt_ops_filter[] __read_mostly = {
{
.hook = ebt_in_hook,
- .owner = THIS_MODULE,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_LOCAL_IN,
.priority = NF_BR_PRI_FILTER_BRIDGED,
},
{
.hook = ebt_in_hook,
- .owner = THIS_MODULE,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_FORWARD,
.priority = NF_BR_PRI_FILTER_BRIDGED,
},
{
.hook = ebt_out_hook,
- .owner = THIS_MODULE,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_LOCAL_OUT,
.priority = NF_BR_PRI_FILTER_OTHER,
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index c5ef5b1ab678..ec55358f00c8 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -57,39 +57,34 @@ static struct ebt_table frame_nat = {
};
static unsigned int
-ebt_nat_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ebt_nat_in(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return ebt_do_table(ops->hooknum, skb, state->in, state->out,
- dev_net(state->in)->xt.frame_nat);
+ return ebt_do_table(skb, state, state->net->xt.frame_nat);
}
static unsigned int
-ebt_nat_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ebt_nat_out(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return ebt_do_table(ops->hooknum, skb, state->in, state->out,
- dev_net(state->out)->xt.frame_nat);
+ return ebt_do_table(skb, state, state->net->xt.frame_nat);
}
static struct nf_hook_ops ebt_ops_nat[] __read_mostly = {
{
.hook = ebt_nat_out,
- .owner = THIS_MODULE,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_LOCAL_OUT,
.priority = NF_BR_PRI_NAT_DST_OTHER,
},
{
.hook = ebt_nat_out,
- .owner = THIS_MODULE,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_POST_ROUTING,
.priority = NF_BR_PRI_NAT_SRC,
},
{
.hook = ebt_nat_in,
- .owner = THIS_MODULE,
.pf = NFPROTO_BRIDGE,
.hooknum = NF_BR_PRE_ROUTING,
.priority = NF_BR_PRI_NAT_DST_BRIDGED,
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 48b6b01295de..f46ca417bf2d 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -183,10 +183,11 @@ struct ebt_entry *ebt_next_entry(const struct ebt_entry *entry)
}
/* Do some firewalling */
-unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
- const struct net_device *in, const struct net_device *out,
- struct ebt_table *table)
+unsigned int ebt_do_table(struct sk_buff *skb,
+ const struct nf_hook_state *state,
+ struct ebt_table *table)
{
+ unsigned int hook = state->hook;
int i, nentries;
struct ebt_entry *point;
struct ebt_counter *counter_base, *cb_base;
@@ -199,8 +200,9 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
struct xt_action_param acpar;
acpar.family = NFPROTO_BRIDGE;
- acpar.in = in;
- acpar.out = out;
+ acpar.net = state->net;
+ acpar.in = state->in;
+ acpar.out = state->out;
acpar.hotdrop = false;
acpar.hooknum = hook;
@@ -220,7 +222,7 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
base = private->entries;
i = 0;
while (i < nentries) {
- if (ebt_basic_match(point, skb, in, out))
+ if (ebt_basic_match(point, skb, state->in, state->out))
goto letscontinue;
if (EBT_MATCH_ITERATE(point, ebt_do_match, skb, &acpar) != 0)
diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c
index a343e62442b1..62f6b1b19589 100644
--- a/net/bridge/netfilter/nf_tables_bridge.c
+++ b/net/bridge/netfilter/nf_tables_bridge.c
@@ -65,31 +65,29 @@ int nft_bridge_ip6hdr_validate(struct sk_buff *skb)
EXPORT_SYMBOL_GPL(nft_bridge_ip6hdr_validate);
static inline void nft_bridge_set_pktinfo_ipv4(struct nft_pktinfo *pkt,
- const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
if (nft_bridge_iphdr_validate(skb))
- nft_set_pktinfo_ipv4(pkt, ops, skb, state);
+ nft_set_pktinfo_ipv4(pkt, skb, state);
else
- nft_set_pktinfo(pkt, ops, skb, state);
+ nft_set_pktinfo(pkt, skb, state);
}
static inline void nft_bridge_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
- const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
#if IS_ENABLED(CONFIG_IPV6)
if (nft_bridge_ip6hdr_validate(skb) &&
- nft_set_pktinfo_ipv6(pkt, ops, skb, state) == 0)
+ nft_set_pktinfo_ipv6(pkt, skb, state) == 0)
return;
#endif
- nft_set_pktinfo(pkt, ops, skb, state);
+ nft_set_pktinfo(pkt, skb, state);
}
static unsigned int
-nft_do_chain_bridge(const struct nf_hook_ops *ops,
+nft_do_chain_bridge(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
@@ -97,17 +95,17 @@ nft_do_chain_bridge(const struct nf_hook_ops *ops,
switch (eth_hdr(skb)->h_proto) {
case htons(ETH_P_IP):
- nft_bridge_set_pktinfo_ipv4(&pkt, ops, skb, state);
+ nft_bridge_set_pktinfo_ipv4(&pkt, skb, state);
break;
case htons(ETH_P_IPV6):
- nft_bridge_set_pktinfo_ipv6(&pkt, ops, skb, state);
+ nft_bridge_set_pktinfo_ipv6(&pkt, skb, state);
break;
default:
- nft_set_pktinfo(&pkt, ops, skb, state);
+ nft_set_pktinfo(&pkt, skb, state);
break;
}
- return nft_do_chain(&pkt, ops);
+ return nft_do_chain(&pkt, priv);
}
static struct nft_af_info nft_af_bridge __read_mostly = {
diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index 858d848564ee..fdba3d9fbff3 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -261,7 +261,6 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr,
const struct nft_pktinfo *pkt)
{
struct nft_reject *priv = nft_expr_priv(expr);
- struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out);
const unsigned char *dest = eth_hdr(pkt->skb)->h_dest;
if (is_broadcast_ether_addr(dest) ||
@@ -273,16 +272,16 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr,
switch (priv->type) {
case NFT_REJECT_ICMP_UNREACH:
nft_reject_br_send_v4_unreach(pkt->skb, pkt->in,
- pkt->ops->hooknum,
+ pkt->hook,
priv->icmp_code);
break;
case NFT_REJECT_TCP_RST:
nft_reject_br_send_v4_tcp_reset(pkt->skb, pkt->in,
- pkt->ops->hooknum);
+ pkt->hook);
break;
case NFT_REJECT_ICMPX_UNREACH:
nft_reject_br_send_v4_unreach(pkt->skb, pkt->in,
- pkt->ops->hooknum,
+ pkt->hook,
nft_reject_icmp_code(priv->icmp_code));
break;
}
@@ -290,17 +289,17 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr,
case htons(ETH_P_IPV6):
switch (priv->type) {
case NFT_REJECT_ICMP_UNREACH:
- nft_reject_br_send_v6_unreach(net, pkt->skb, pkt->in,
- pkt->ops->hooknum,
+ nft_reject_br_send_v6_unreach(pkt->net, pkt->skb,
+ pkt->in, pkt->hook,
priv->icmp_code);
break;
case NFT_REJECT_TCP_RST:
- nft_reject_br_send_v6_tcp_reset(net, pkt->skb, pkt->in,
- pkt->ops->hooknum);
+ nft_reject_br_send_v6_tcp_reset(pkt->net, pkt->skb,
+ pkt->in, pkt->hook);
break;
case NFT_REJECT_ICMPX_UNREACH:
- nft_reject_br_send_v6_unreach(net, pkt->skb, pkt->in,
- pkt->ops->hooknum,
+ nft_reject_br_send_v6_unreach(pkt->net, pkt->skb,
+ pkt->in, pkt->hook,
nft_reject_icmpv6_code(priv->icmp_code));
break;
}
diff --git a/net/can/bcm.c b/net/can/bcm.c
index a1ba6875c2a2..6863310d6973 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -96,7 +96,7 @@ struct bcm_op {
canid_t can_id;
u32 flags;
unsigned long frames_abs, frames_filtered;
- struct timeval ival1, ival2;
+ struct bcm_timeval ival1, ival2;
struct hrtimer timer, thrtimer;
struct tasklet_struct tsklet, thrtsklet;
ktime_t rx_stamp, kt_ival1, kt_ival2, kt_lastmsg;
@@ -131,6 +131,11 @@ static inline struct bcm_sock *bcm_sk(const struct sock *sk)
return (struct bcm_sock *)sk;
}
+static inline ktime_t bcm_timeval_to_ktime(struct bcm_timeval tv)
+{
+ return ktime_set(tv.tv_sec, tv.tv_usec * NSEC_PER_USEC);
+}
+
#define CFSIZ sizeof(struct can_frame)
#define OPSIZ sizeof(struct bcm_op)
#define MHSIZ sizeof(struct bcm_msg_head)
@@ -953,8 +958,8 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
op->count = msg_head->count;
op->ival1 = msg_head->ival1;
op->ival2 = msg_head->ival2;
- op->kt_ival1 = timeval_to_ktime(msg_head->ival1);
- op->kt_ival2 = timeval_to_ktime(msg_head->ival2);
+ op->kt_ival1 = bcm_timeval_to_ktime(msg_head->ival1);
+ op->kt_ival2 = bcm_timeval_to_ktime(msg_head->ival2);
/* disable an active timer due to zero values? */
if (!op->kt_ival1.tv64 && !op->kt_ival2.tv64)
@@ -1134,8 +1139,8 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
/* set timer value */
op->ival1 = msg_head->ival1;
op->ival2 = msg_head->ival2;
- op->kt_ival1 = timeval_to_ktime(msg_head->ival1);
- op->kt_ival2 = timeval_to_ktime(msg_head->ival2);
+ op->kt_ival1 = bcm_timeval_to_ktime(msg_head->ival1);
+ op->kt_ival2 = bcm_timeval_to_ktime(msg_head->ival2);
/* disable an active timer due to zero value? */
if (!op->kt_ival1.tv64)
diff --git a/net/core/dev.c b/net/core/dev.c
index 6bb6470f5b7b..1225b4be8ed6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2915,9 +2915,11 @@ EXPORT_SYMBOL(xmit_recursion);
/**
* dev_loopback_xmit - loop back @skb
+ * @net: network namespace this loopback is happening in
+ * @sk: sk needed to be a netfilter okfn
* @skb: buffer to transmit
*/
-int dev_loopback_xmit(struct sock *sk, struct sk_buff *skb)
+int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
{
skb_reset_mac_header(skb);
__skb_pull(skb, skb_network_offset(skb));
@@ -2972,6 +2974,7 @@ static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
new_index = skb_tx_hash(dev, skb);
if (queue_index != new_index && sk &&
+ sk_fullsock(sk) &&
rcu_access_pointer(sk->sk_dst_cache))
sk_tx_queue_set(sk, new_index);
@@ -3143,11 +3146,11 @@ out:
return rc;
}
-int dev_queue_xmit_sk(struct sock *sk, struct sk_buff *skb)
+int dev_queue_xmit(struct sk_buff *skb)
{
return __dev_queue_xmit(skb, NULL);
}
-EXPORT_SYMBOL(dev_queue_xmit_sk);
+EXPORT_SYMBOL(dev_queue_xmit);
int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv)
{
@@ -3668,6 +3671,14 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
case TC_ACT_QUEUED:
kfree_skb(skb);
return NULL;
+ case TC_ACT_REDIRECT:
+ /* skb_mac_header check was done by cls/act_bpf, so
+ * we can safely push the L2 header back before
+ * redirecting to another netdev
+ */
+ __skb_push(skb, skb->mac_len);
+ skb_do_redirect(skb);
+ return NULL;
default:
break;
}
@@ -3982,13 +3993,13 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
* NET_RX_SUCCESS: no congestion
* NET_RX_DROP: packet was dropped
*/
-int netif_receive_skb_sk(struct sock *sk, struct sk_buff *skb)
+int netif_receive_skb(struct sk_buff *skb)
{
trace_netif_receive_skb_entry(skb);
return netif_receive_skb_internal(skb);
}
-EXPORT_SYMBOL(netif_receive_skb_sk);
+EXPORT_SYMBOL(netif_receive_skb);
/* Network device is going away, flush any packets still pending
* Called with irqs disabled.
@@ -4857,8 +4868,7 @@ struct netdev_adjacent {
struct rcu_head rcu;
};
-static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev,
- struct net_device *adj_dev,
+static struct netdev_adjacent *__netdev_find_adj(struct net_device *adj_dev,
struct list_head *adj_list)
{
struct netdev_adjacent *adj;
@@ -4884,7 +4894,7 @@ bool netdev_has_upper_dev(struct net_device *dev,
{
ASSERT_RTNL();
- return __netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper);
+ return __netdev_find_adj(upper_dev, &dev->all_adj_list.upper);
}
EXPORT_SYMBOL(netdev_has_upper_dev);
@@ -5146,7 +5156,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
struct netdev_adjacent *adj;
int ret;
- adj = __netdev_find_adj(dev, adj_dev, dev_list);
+ adj = __netdev_find_adj(adj_dev, dev_list);
if (adj) {
adj->ref_nr++;
@@ -5202,7 +5212,7 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev,
{
struct netdev_adjacent *adj;
- adj = __netdev_find_adj(dev, adj_dev, dev_list);
+ adj = __netdev_find_adj(adj_dev, dev_list);
if (!adj) {
pr_err("tried to remove device %s from %s\n",
@@ -5323,10 +5333,10 @@ static int __netdev_upper_dev_link(struct net_device *dev,
return -EBUSY;
/* To prevent loops, check if dev is not upper device to upper_dev. */
- if (__netdev_find_adj(upper_dev, dev, &upper_dev->all_adj_list.upper))
+ if (__netdev_find_adj(dev, &upper_dev->all_adj_list.upper))
return -EBUSY;
- if (__netdev_find_adj(dev, upper_dev, &dev->adj_list.upper))
+ if (__netdev_find_adj(upper_dev, &dev->adj_list.upper))
return -EEXIST;
if (master && netdev_master_upper_dev_get(dev))
@@ -5336,6 +5346,12 @@ static int __netdev_upper_dev_link(struct net_device *dev,
changeupper_info.master = master;
changeupper_info.linking = true;
+ ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
+ &changeupper_info.info);
+ ret = notifier_to_errno(ret);
+ if (ret)
+ return ret;
+
ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, private,
master);
if (ret)
@@ -5478,6 +5494,9 @@ void netdev_upper_dev_unlink(struct net_device *dev,
changeupper_info.master = netdev_master_upper_dev_get(dev) == upper_dev;
changeupper_info.linking = false;
+ call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
+ &changeupper_info.info);
+
__netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
/* Here is the tricky part. We must remove all dev's lower
@@ -5604,7 +5623,7 @@ void *netdev_lower_dev_get_private(struct net_device *dev,
if (!lower_dev)
return NULL;
- lower = __netdev_find_adj(dev, lower_dev, &dev->adj_list.lower);
+ lower = __netdev_find_adj(lower_dev, &dev->adj_list.lower);
if (!lower)
return NULL;
diff --git a/net/core/dst.c b/net/core/dst.c
index 0771c8cb9307..2a1818065e12 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -144,12 +144,12 @@ loop:
mutex_unlock(&dst_gc_mutex);
}
-int dst_discard_sk(struct sock *sk, struct sk_buff *skb)
+int dst_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
{
kfree_skb(skb);
return 0;
}
-EXPORT_SYMBOL(dst_discard_sk);
+EXPORT_SYMBOL(dst_discard_out);
const u32 dst_default_metrics[RTAX_MAX + 1] = {
/* This initializer is needed to force linker to place this variable
@@ -177,7 +177,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
dst->xfrm = NULL;
#endif
dst->input = dst_discard;
- dst->output = dst_discard_sk;
+ dst->output = dst_discard_out;
dst->error = 0;
dst->obsolete = initial_obsolete;
dst->header_len = 0;
@@ -224,7 +224,7 @@ static void ___dst_free(struct dst_entry *dst)
*/
if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) {
dst->input = dst_discard;
- dst->output = dst_discard_sk;
+ dst->output = dst_discard_out;
}
dst->obsolete = DST_OBSOLETE_DEAD;
}
@@ -352,7 +352,7 @@ static struct dst_ops md_dst_ops = {
.family = AF_UNSPEC,
};
-static int dst_md_discard_sk(struct sock *sk, struct sk_buff *skb)
+static int dst_md_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
{
WARN_ONCE(1, "Attempting to call output on metadata dst\n");
kfree_skb(skb);
@@ -375,7 +375,7 @@ static void __metadata_dst_init(struct metadata_dst *md_dst, u8 optslen)
DST_METADATA | DST_NOCACHE | DST_NOCOUNT);
dst->input = dst_md_discard;
- dst->output = dst_md_discard_sk;
+ dst->output = dst_md_discard_out;
memset(dst + 1, 0, sizeof(*md_dst) + optslen - sizeof(*dst));
}
@@ -430,7 +430,7 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
if (!unregister) {
dst->input = dst_discard;
- dst->output = dst_discard_sk;
+ dst->output = dst_discard_out;
} else {
dst->dev = dev_net(dst->dev)->loopback_dev;
dev_hold(dst->dev);
diff --git a/net/core/filter.c b/net/core/filter.c
index bb18c3680001..672eefbfbe99 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -49,16 +49,17 @@
#include <net/sch_generic.h>
#include <net/cls_cgroup.h>
#include <net/dst_metadata.h>
+#include <net/dst.h>
/**
* sk_filter - run a packet through a socket filter
* @sk: sock associated with &sk_buff
* @skb: buffer to filter
*
- * Run the filter code and then cut skb->data to correct size returned by
- * SK_RUN_FILTER. If pkt_len is 0 we toss packet. If skb->len is smaller
+ * Run the eBPF program and then cut skb->data to correct size returned by
+ * the program. If pkt_len is 0 we toss packet. If skb->len is smaller
* than pkt_len we keep whole skb->data. This is the socket level
- * wrapper to SK_RUN_FILTER. It returns 0 if the packet should
+ * wrapper to BPF_PROG_RUN. It returns 0 if the packet should
* be accepted or -EPERM if the packet should be tossed.
*
*/
@@ -82,7 +83,7 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
rcu_read_lock();
filter = rcu_dereference(sk->sk_filter);
if (filter) {
- unsigned int pkt_len = SK_RUN_FILTER(filter, skb);
+ unsigned int pkt_len = bpf_prog_run_save_cb(filter->prog, skb);
err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
}
@@ -148,12 +149,6 @@ static u64 __get_raw_cpu_id(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
return raw_smp_processor_id();
}
-/* note that this only generates 32-bit random numbers */
-static u64 __get_random_u32(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
-{
- return prandom_u32();
-}
-
static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg,
struct bpf_insn *insn_buf)
{
@@ -312,7 +307,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
*insn = BPF_EMIT_CALL(__get_raw_cpu_id);
break;
case SKF_AD_OFF + SKF_AD_RANDOM:
- *insn = BPF_EMIT_CALL(__get_random_u32);
+ *insn = BPF_EMIT_CALL(bpf_user_rnd_u32);
+ bpf_user_rnd_init_once();
break;
}
break;
@@ -1001,7 +997,7 @@ static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp,
int err;
fp->bpf_func = NULL;
- fp->jited = false;
+ fp->jited = 0;
err = bpf_check_classic(fp->insns, fp->len);
if (err) {
@@ -1083,16 +1079,18 @@ EXPORT_SYMBOL_GPL(bpf_prog_create);
* @pfp: the unattached filter that is created
* @fprog: the filter program
* @trans: post-classic verifier transformation handler
+ * @save_orig: save classic BPF program
*
* This function effectively does the same as bpf_prog_create(), only
* that it builds up its insns buffer from user space provided buffer.
* It also allows for passing a bpf_aux_classic_check_t handler.
*/
int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog,
- bpf_aux_classic_check_t trans)
+ bpf_aux_classic_check_t trans, bool save_orig)
{
unsigned int fsize = bpf_classic_proglen(fprog);
struct bpf_prog *fp;
+ int err;
/* Make sure new filter is there and in the right amounts. */
if (fprog->filter == NULL)
@@ -1108,12 +1106,16 @@ int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog,
}
fp->len = fprog->len;
- /* Since unattached filters are not copied back to user
- * space through sk_get_filter(), we do not need to hold
- * a copy here, and can spare us the work.
- */
fp->orig_prog = NULL;
+ if (save_orig) {
+ err = bpf_prog_store_orig_filter(fp, fprog);
+ if (err) {
+ __bpf_prog_free(fp);
+ return -ENOMEM;
+ }
+ }
+
/* bpf_prepare_filter() already takes care of freeing
* memory in case something goes wrong.
*/
@@ -1404,9 +1406,6 @@ static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5)
if (unlikely(!dev))
return -EINVAL;
- if (unlikely(!(dev->flags & IFF_UP)))
- return -EINVAL;
-
skb2 = skb_clone(skb, GFP_ATOMIC);
if (unlikely(!skb2))
return -ENOMEM;
@@ -1428,6 +1427,49 @@ const struct bpf_func_proto bpf_clone_redirect_proto = {
.arg3_type = ARG_ANYTHING,
};
+struct redirect_info {
+ u32 ifindex;
+ u32 flags;
+};
+
+static DEFINE_PER_CPU(struct redirect_info, redirect_info);
+static u64 bpf_redirect(u64 ifindex, u64 flags, u64 r3, u64 r4, u64 r5)
+{
+ struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+
+ ri->ifindex = ifindex;
+ ri->flags = flags;
+ return TC_ACT_REDIRECT;
+}
+
+int skb_do_redirect(struct sk_buff *skb)
+{
+ struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+ struct net_device *dev;
+
+ dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->ifindex);
+ ri->ifindex = 0;
+ if (unlikely(!dev)) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+
+ if (BPF_IS_REDIRECT_INGRESS(ri->flags))
+ return dev_forward_skb(dev, skb);
+
+ skb->dev = dev;
+ skb_sender_cpu_clear(skb);
+ return dev_queue_xmit(skb);
+}
+
+const struct bpf_func_proto bpf_redirect_proto = {
+ .func = bpf_redirect,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_ANYTHING,
+ .arg2_type = ARG_ANYTHING,
+};
+
static u64 bpf_get_cgroup_classid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
{
return task_get_classid((struct sk_buff *) (unsigned long) r1);
@@ -1440,6 +1482,25 @@ static const struct bpf_func_proto bpf_get_cgroup_classid_proto = {
.arg1_type = ARG_PTR_TO_CTX,
};
+static u64 bpf_get_route_realm(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+#ifdef CONFIG_IP_ROUTE_CLASSID
+ const struct dst_entry *dst;
+
+ dst = skb_dst((struct sk_buff *) (unsigned long) r1);
+ if (dst)
+ return dst->tclassid;
+#endif
+ return 0;
+}
+
+static const struct bpf_func_proto bpf_get_route_realm_proto = {
+ .func = bpf_get_route_realm,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+
static u64 bpf_skb_vlan_push(u64 r1, u64 r2, u64 vlan_tci, u64 r4, u64 r5)
{
struct sk_buff *skb = (struct sk_buff *) (long) r1;
@@ -1580,7 +1641,8 @@ sk_filter_func_proto(enum bpf_func_id func_id)
case BPF_FUNC_ktime_get_ns:
return &bpf_ktime_get_ns_proto;
case BPF_FUNC_trace_printk:
- return bpf_get_trace_printk_proto();
+ if (capable(CAP_SYS_ADMIN))
+ return bpf_get_trace_printk_proto();
default:
return NULL;
}
@@ -1608,6 +1670,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
return &bpf_skb_get_tunnel_key_proto;
case BPF_FUNC_skb_set_tunnel_key:
return bpf_get_skb_set_tunnel_key_proto();
+ case BPF_FUNC_redirect:
+ return &bpf_redirect_proto;
+ case BPF_FUNC_get_route_realm:
+ return &bpf_get_route_realm_proto;
default:
return sk_filter_func_proto(func_id);
}
@@ -1633,6 +1699,9 @@ static bool __is_valid_access(int off, int size, enum bpf_access_type type)
static bool sk_filter_is_valid_access(int off, int size,
enum bpf_access_type type)
{
+ if (off == offsetof(struct __sk_buff, tc_classid))
+ return false;
+
if (type == BPF_WRITE) {
switch (off) {
case offsetof(struct __sk_buff, cb[0]) ...
@@ -1649,10 +1718,14 @@ static bool sk_filter_is_valid_access(int off, int size,
static bool tc_cls_act_is_valid_access(int off, int size,
enum bpf_access_type type)
{
+ if (off == offsetof(struct __sk_buff, tc_classid))
+ return type == BPF_WRITE ? true : false;
+
if (type == BPF_WRITE) {
switch (off) {
case offsetof(struct __sk_buff, mark):
case offsetof(struct __sk_buff, tc_index):
+ case offsetof(struct __sk_buff, priority):
case offsetof(struct __sk_buff, cb[0]) ...
offsetof(struct __sk_buff, cb[4]):
break;
@@ -1665,7 +1738,8 @@ static bool tc_cls_act_is_valid_access(int off, int size,
static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
int src_reg, int ctx_off,
- struct bpf_insn *insn_buf)
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog)
{
struct bpf_insn *insn = insn_buf;
@@ -1694,8 +1768,12 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
case offsetof(struct __sk_buff, priority):
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, priority) != 4);
- *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
- offsetof(struct sk_buff, priority));
+ if (type == BPF_WRITE)
+ *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg,
+ offsetof(struct sk_buff, priority));
+ else
+ *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+ offsetof(struct sk_buff, priority));
break;
case offsetof(struct __sk_buff, ingress_ifindex):
@@ -1752,6 +1830,7 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
offsetof(struct __sk_buff, cb[4]):
BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20);
+ prog->cb_access = 1;
ctx_off -= offsetof(struct __sk_buff, cb[0]);
ctx_off += offsetof(struct sk_buff, cb);
ctx_off += offsetof(struct qdisc_skb_cb, data);
@@ -1761,6 +1840,14 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, ctx_off);
break;
+ case offsetof(struct __sk_buff, tc_classid):
+ ctx_off -= offsetof(struct __sk_buff, tc_classid);
+ ctx_off += offsetof(struct sk_buff, cb);
+ ctx_off += offsetof(struct qdisc_skb_cb, tc_classid);
+ WARN_ON(type != BPF_WRITE);
+ *insn++ = BPF_STX_MEM(BPF_H, dst_reg, src_reg, ctx_off);
+ break;
+
case offsetof(struct __sk_buff, tc_index):
#ifdef CONFIG_NET_SCHED
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tc_index) != 2);
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index dfb1a9ca0835..299cfc24d888 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -180,7 +180,7 @@ int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b)
}
EXPORT_SYMBOL(lwtunnel_cmp_encap);
-int lwtunnel_output(struct sock *sk, struct sk_buff *skb)
+int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
const struct lwtunnel_encap_ops *ops;
@@ -199,7 +199,7 @@ int lwtunnel_output(struct sock *sk, struct sk_buff *skb)
rcu_read_lock();
ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
if (likely(ops && ops->output))
- ret = ops->output(sk, skb);
+ ret = ops->output(net, sk, skb);
rcu_read_unlock();
if (ret == -EOPNOTSUPP)
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 2b515ba7e94f..1aa8437ed6c4 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2235,14 +2235,53 @@ static void neigh_update_notify(struct neighbour *neigh)
__neigh_notify(neigh, RTM_NEWNEIGH, 0);
}
+static bool neigh_master_filtered(struct net_device *dev, int master_idx)
+{
+ struct net_device *master;
+
+ if (!master_idx)
+ return false;
+
+ master = netdev_master_upper_dev_get(dev);
+ if (!master || master->ifindex != master_idx)
+ return true;
+
+ return false;
+}
+
+static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
+{
+ if (filter_idx && dev->ifindex != filter_idx)
+ return true;
+
+ return false;
+}
+
static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
+ const struct nlmsghdr *nlh = cb->nlh;
+ struct nlattr *tb[NDA_MAX + 1];
struct neighbour *n;
int rc, h, s_h = cb->args[1];
int idx, s_idx = idx = cb->args[2];
struct neigh_hash_table *nht;
+ int filter_master_idx = 0, filter_idx = 0;
+ unsigned int flags = NLM_F_MULTI;
+ int err;
+
+ err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX, NULL);
+ if (!err) {
+ if (tb[NDA_IFINDEX])
+ filter_idx = nla_get_u32(tb[NDA_IFINDEX]);
+
+ if (tb[NDA_MASTER])
+ filter_master_idx = nla_get_u32(tb[NDA_MASTER]);
+
+ if (filter_idx || filter_master_idx)
+ flags |= NLM_F_DUMP_FILTERED;
+ }
rcu_read_lock_bh();
nht = rcu_dereference_bh(tbl->nht);
@@ -2255,12 +2294,16 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
n = rcu_dereference_bh(n->next)) {
if (!net_eq(dev_net(n->dev), net))
continue;
+ if (neigh_ifindex_filtered(n->dev, filter_idx))
+ continue;
+ if (neigh_master_filtered(n->dev, filter_master_idx))
+ continue;
if (idx < s_idx)
goto next;
if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
RTM_NEWNEIGH,
- NLM_F_MULTI) < 0) {
+ flags) < 0) {
rc = -1;
goto out;
}
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 830f8a7c1cb1..f88a62ab019d 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -471,7 +471,7 @@ static ssize_t phys_switch_id_show(struct device *dev,
if (dev_isalive(netdev)) {
struct switchdev_attr attr = {
- .id = SWITCHDEV_ATTR_PORT_PARENT_ID,
+ .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
.flags = SWITCHDEV_F_NO_RECURSE,
};
@@ -1003,15 +1003,12 @@ static ssize_t show_trans_timeout(struct netdev_queue *queue,
}
#ifdef CONFIG_XPS
-static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue)
+static unsigned int get_netdev_queue_index(struct netdev_queue *queue)
{
struct net_device *dev = queue->dev;
- int i;
-
- for (i = 0; i < dev->num_tx_queues; i++)
- if (queue == &dev->_tx[i])
- break;
+ unsigned int i;
+ i = queue - dev->_tx;
BUG_ON(i >= dev->num_tx_queues);
return i;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 8bdada242a7d..94acfc89ad97 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -140,7 +140,7 @@ static void queue_process(struct work_struct *work)
* case. Further, we test the poll_owner to avoid recursion on UP
* systems where the lock doesn't exist.
*/
-static int poll_one_napi(struct napi_struct *napi, int budget)
+static void poll_one_napi(struct napi_struct *napi)
{
int work = 0;
@@ -149,33 +149,33 @@ static int poll_one_napi(struct napi_struct *napi, int budget)
* holding the napi->poll_lock.
*/
if (!test_bit(NAPI_STATE_SCHED, &napi->state))
- return budget;
+ return;
/* If we set this bit but see that it has already been set,
* that indicates that napi has been disabled and we need
* to abort this operation
*/
if (test_and_set_bit(NAPI_STATE_NPSVC, &napi->state))
- goto out;
+ return;
- work = napi->poll(napi, budget);
- WARN_ONCE(work > budget, "%pF exceeded budget in poll\n", napi->poll);
+ /* We explicilty pass the polling call a budget of 0 to
+ * indicate that we are clearing the Tx path only.
+ */
+ work = napi->poll(napi, 0);
+ WARN_ONCE(work, "%pF exceeded budget in poll\n", napi->poll);
trace_napi_poll(napi);
clear_bit(NAPI_STATE_NPSVC, &napi->state);
-
-out:
- return budget - work;
}
-static void poll_napi(struct net_device *dev, int budget)
+static void poll_napi(struct net_device *dev)
{
struct napi_struct *napi;
list_for_each_entry(napi, &dev->napi_list, dev_list) {
if (napi->poll_owner != smp_processor_id() &&
spin_trylock(&napi->poll_lock)) {
- budget = poll_one_napi(napi, budget);
+ poll_one_napi(napi);
spin_unlock(&napi->poll_lock);
}
}
@@ -185,7 +185,6 @@ static void netpoll_poll_dev(struct net_device *dev)
{
const struct net_device_ops *ops;
struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo);
- int budget = 0;
/* Don't do any rx activity if the dev_lock mutex is held
* the dev_open/close paths use this to block netpoll activity
@@ -208,7 +207,7 @@ static void netpoll_poll_dev(struct net_device *dev)
/* Process pending work on NIC */
ops->ndo_poll_controller(dev);
- poll_napi(dev, budget);
+ poll_napi(dev);
up(&ni->dev_lock);
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index b42f0e26f89e..5d26056b6d8f 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -37,90 +37,16 @@
int sysctl_max_syn_backlog = 256;
EXPORT_SYMBOL(sysctl_max_syn_backlog);
-int reqsk_queue_alloc(struct request_sock_queue *queue,
- unsigned int nr_table_entries)
+void reqsk_queue_alloc(struct request_sock_queue *queue)
{
- size_t lopt_size = sizeof(struct listen_sock);
- struct listen_sock *lopt = NULL;
+ spin_lock_init(&queue->rskq_lock);
- nr_table_entries = min_t(u32, nr_table_entries, sysctl_max_syn_backlog);
- nr_table_entries = max_t(u32, nr_table_entries, 8);
- nr_table_entries = roundup_pow_of_two(nr_table_entries + 1);
- lopt_size += nr_table_entries * sizeof(struct request_sock *);
+ spin_lock_init(&queue->fastopenq.lock);
+ queue->fastopenq.rskq_rst_head = NULL;
+ queue->fastopenq.rskq_rst_tail = NULL;
+ queue->fastopenq.qlen = 0;
- if (lopt_size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER))
- lopt = kzalloc(lopt_size, GFP_KERNEL |
- __GFP_NOWARN |
- __GFP_NORETRY);
- if (!lopt)
- lopt = vzalloc(lopt_size);
- if (!lopt)
- return -ENOMEM;
-
- get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd));
- spin_lock_init(&queue->syn_wait_lock);
queue->rskq_accept_head = NULL;
- lopt->nr_table_entries = nr_table_entries;
- lopt->max_qlen_log = ilog2(nr_table_entries);
-
- spin_lock_bh(&queue->syn_wait_lock);
- queue->listen_opt = lopt;
- spin_unlock_bh(&queue->syn_wait_lock);
-
- return 0;
-}
-
-void __reqsk_queue_destroy(struct request_sock_queue *queue)
-{
- /* This is an error recovery path only, no locking needed */
- kvfree(queue->listen_opt);
-}
-
-static inline struct listen_sock *reqsk_queue_yank_listen_sk(
- struct request_sock_queue *queue)
-{
- struct listen_sock *lopt;
-
- spin_lock_bh(&queue->syn_wait_lock);
- lopt = queue->listen_opt;
- queue->listen_opt = NULL;
- spin_unlock_bh(&queue->syn_wait_lock);
-
- return lopt;
-}
-
-void reqsk_queue_destroy(struct request_sock_queue *queue)
-{
- /* make all the listen_opt local to us */
- struct listen_sock *lopt = reqsk_queue_yank_listen_sk(queue);
-
- if (listen_sock_qlen(lopt) != 0) {
- unsigned int i;
-
- for (i = 0; i < lopt->nr_table_entries; i++) {
- struct request_sock *req;
-
- spin_lock_bh(&queue->syn_wait_lock);
- while ((req = lopt->syn_table[i]) != NULL) {
- lopt->syn_table[i] = req->dl_next;
- /* Because of following del_timer_sync(),
- * we must release the spinlock here
- * or risk a dead lock.
- */
- spin_unlock_bh(&queue->syn_wait_lock);
- atomic_inc(&lopt->qlen_dec);
- if (del_timer_sync(&req->rsk_timer))
- reqsk_put(req);
- reqsk_put(req);
- spin_lock_bh(&queue->syn_wait_lock);
- }
- spin_unlock_bh(&queue->syn_wait_lock);
- }
- }
-
- if (WARN_ON(listen_sock_qlen(lopt) != 0))
- pr_err("qlen %u\n", listen_sock_qlen(lopt));
- kvfree(lopt);
}
/*
@@ -174,7 +100,7 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
struct sock *lsk = req->rsk_listener;
struct fastopen_queue *fastopenq;
- fastopenq = inet_csk(lsk)->icsk_accept_queue.fastopenq;
+ fastopenq = &inet_csk(lsk)->icsk_accept_queue.fastopenq;
tcp_sk(sk)->fastopen_rsk = NULL;
spin_lock_bh(&fastopenq->lock);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 0ec48403ed68..24775953fa68 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -96,7 +96,7 @@ int rtnl_is_locked(void)
EXPORT_SYMBOL(rtnl_is_locked);
#ifdef CONFIG_PROVE_LOCKING
-int lockdep_rtnl_is_held(void)
+bool lockdep_rtnl_is_held(void)
{
return lockdep_is_held(&rtnl_mutex);
}
@@ -1025,7 +1025,7 @@ static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev)
{
int err;
struct switchdev_attr attr = {
- .id = SWITCHDEV_ATTR_PORT_PARENT_ID,
+ .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
.flags = SWITCHDEV_F_NO_RECURSE,
};
@@ -1272,7 +1272,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
if (!(af = nla_nest_start(skb, af_ops->family)))
goto nla_put_failure;
- err = af_ops->fill_link_af(skb, dev);
+ err = af_ops->fill_link_af(skb, dev, ext_filter_mask);
/*
* Caller may return ENODATA to indicate that there
diff --git a/net/core/sock.c b/net/core/sock.c
index 3307c02244d3..dcc7d62654d5 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -988,6 +988,10 @@ set_rcvbuf:
sk->sk_max_pacing_rate);
break;
+ case SO_INCOMING_CPU:
+ sk->sk_incoming_cpu = val;
+ break;
+
default:
ret = -ENOPROTOOPT;
break;
@@ -1852,6 +1856,32 @@ struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
}
EXPORT_SYMBOL(sock_alloc_send_skb);
+int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
+ struct sockcm_cookie *sockc)
+{
+ struct cmsghdr *cmsg;
+
+ for_each_cmsghdr(cmsg, msg) {
+ if (!CMSG_OK(msg, cmsg))
+ return -EINVAL;
+ if (cmsg->cmsg_level != SOL_SOCKET)
+ continue;
+ switch (cmsg->cmsg_type) {
+ case SO_MARK:
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+ return -EPERM;
+ if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
+ return -EINVAL;
+ sockc->mark = *(u32 *)CMSG_DATA(cmsg);
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+EXPORT_SYMBOL(sock_cmsg_send);
+
/* On 32bit arches, an skb frag is limited to 2^15 */
#define SKB_FRAG_PAGE_ORDER get_order(32768)
@@ -2353,6 +2383,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_max_pacing_rate = ~0U;
sk->sk_pacing_rate = ~0U;
+ sk->sk_incoming_cpu = -1;
/*
* Before updating sk_refcnt, we must commit prior changes to memory
* (Documentation/RCU/rculist_nulls.txt for details)
@@ -2758,7 +2789,7 @@ static int req_prot_init(const struct proto *prot)
rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name,
rsk_prot->obj_size, 0,
- 0, NULL);
+ prot->slab_flags, NULL);
if (!rsk_prot->slab) {
pr_crit("%s: Can't create request sock SLAB cache!\n",
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 817622f3dbb7..0c1d58d43f67 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -1,3 +1,5 @@
+/* License: GPL */
+
#include <linux/mutex.h>
#include <linux/socket.h>
#include <linux/skbuff.h>
@@ -323,14 +325,4 @@ static int __init sock_diag_init(void)
BUG_ON(!broadcast_wq);
return register_pernet_subsys(&diag_net_ops);
}
-
-static void __exit sock_diag_exit(void)
-{
- unregister_pernet_subsys(&diag_net_ops);
- destroy_workqueue(broadcast_wq);
-}
-
-module_init(sock_diag_init);
-module_exit(sock_diag_exit);
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_SOCK_DIAG);
+device_initcall(sock_diag_init);
diff --git a/net/core/utils.c b/net/core/utils.c
index 3dffce953c39..3d17ca8b4744 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -348,52 +348,3 @@ void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb,
}
}
EXPORT_SYMBOL(inet_proto_csum_replace_by_diff);
-
-struct __net_random_once_work {
- struct work_struct work;
- struct static_key *key;
-};
-
-static void __net_random_once_deferred(struct work_struct *w)
-{
- struct __net_random_once_work *work =
- container_of(w, struct __net_random_once_work, work);
- BUG_ON(!static_key_enabled(work->key));
- static_key_slow_dec(work->key);
- kfree(work);
-}
-
-static void __net_random_once_disable_jump(struct static_key *key)
-{
- struct __net_random_once_work *w;
-
- w = kmalloc(sizeof(*w), GFP_ATOMIC);
- if (!w)
- return;
-
- INIT_WORK(&w->work, __net_random_once_deferred);
- w->key = key;
- schedule_work(&w->work);
-}
-
-bool __net_get_random_once(void *buf, int nbytes, bool *done,
- struct static_key *once_key)
-{
- static DEFINE_SPINLOCK(lock);
- unsigned long flags;
-
- spin_lock_irqsave(&lock, flags);
- if (*done) {
- spin_unlock_irqrestore(&lock, flags);
- return false;
- }
-
- get_random_bytes(buf, nbytes);
- *done = true;
- spin_unlock_irqrestore(&lock, flags);
-
- __net_random_once_disable_jump(once_key);
-
- return true;
-}
-EXPORT_SYMBOL(__net_get_random_once);
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 5b21f6f88e97..4f6c1862dfd2 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -13,6 +13,7 @@
* You should have received a copy of the GNU General Public License along with
* this program; if not, see <http://www.gnu.org/licenses/>.
*
+ * Description: Data Center Bridging netlink interface
* Author: Lucy Liu <lucy.liu@intel.com>
*/
@@ -24,7 +25,7 @@
#include <linux/dcbnl.h>
#include <net/dcbevent.h>
#include <linux/rtnetlink.h>
-#include <linux/module.h>
+#include <linux/init.h>
#include <net/sock.h>
/* Data Center Bridging (DCB) is a collection of Ethernet enhancements
@@ -48,10 +49,6 @@
* features for capable devices.
*/
-MODULE_AUTHOR("Lucy Liu, <lucy.liu@intel.com>");
-MODULE_DESCRIPTION("Data Center Bridging netlink interface");
-MODULE_LICENSE("GPL");
-
/**************** DCB attribute policies *************************************/
/* DCB netlink attributes policy */
@@ -1935,19 +1932,6 @@ int dcb_ieee_delapp(struct net_device *dev, struct dcb_app *del)
}
EXPORT_SYMBOL(dcb_ieee_delapp);
-static void dcb_flushapp(void)
-{
- struct dcb_app_type *app;
- struct dcb_app_type *tmp;
-
- spin_lock_bh(&dcb_lock);
- list_for_each_entry_safe(app, tmp, &dcb_app_list, list) {
- list_del(&app->list);
- kfree(app);
- }
- spin_unlock_bh(&dcb_lock);
-}
-
static int __init dcbnl_init(void)
{
INIT_LIST_HEAD(&dcb_app_list);
@@ -1957,12 +1941,4 @@ static int __init dcbnl_init(void)
return 0;
}
-module_init(dcbnl_init);
-
-static void __exit dcbnl_exit(void)
-{
- rtnl_unregister(PF_UNSPEC, RTM_GETDCB);
- rtnl_unregister(PF_UNSPEC, RTM_SETDCB);
- dcb_flushapp();
-}
-module_exit(dcbnl_exit);
+device_initcall(dcbnl_init);
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index bebc735f5afc..923f5a180134 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -229,7 +229,7 @@ void dccp_v4_send_check(struct sock *sk, struct sk_buff *skb);
int dccp_retransmit_skb(struct sock *sk);
void dccp_send_ack(struct sock *sk);
-void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
+void dccp_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
struct request_sock *rsk);
void dccp_send_sync(struct sock *sk, const u64 seq,
@@ -270,13 +270,13 @@ int dccp_reqsk_init(struct request_sock *rq, struct dccp_sock const *dp,
int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
-struct sock *dccp_create_openreq_child(struct sock *sk,
+struct sock *dccp_create_openreq_child(const struct sock *sk,
const struct request_sock *req,
const struct sk_buff *skb);
int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb);
-struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
+struct sock *dccp_v4_request_recv_sock(const struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
struct dst_entry *dst);
struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
@@ -293,7 +293,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized);
void dccp_destroy_sock(struct sock *sk);
void dccp_close(struct sock *sk, long timeout);
-struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
+struct sk_buff *dccp_make_response(const struct sock *sk, struct dst_entry *dst,
struct request_sock *req);
int dccp_connect(struct sock *sk);
@@ -325,13 +325,13 @@ void dccp_send_close(struct sock *sk, const int active);
int dccp_invalid_packet(struct sk_buff *skb);
u32 dccp_sample_rtt(struct sock *sk, long delta);
-static inline int dccp_bad_service_code(const struct sock *sk,
+static inline bool dccp_bad_service_code(const struct sock *sk,
const __be32 service)
{
const struct dccp_sock *dp = dccp_sk(sk);
if (dp->dccps_service == service)
- return 0;
+ return false;
return !dccp_list_has_service(dp->dccps_service_list, service);
}
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index ccf4c5629b3c..59bc180b02d8 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -208,7 +208,6 @@ void dccp_req_err(struct sock *sk, u64 seq)
if (!between48(seq, dccp_rsk(req)->dreq_iss, dccp_rsk(req)->dreq_gss)) {
NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
- reqsk_put(req);
} else {
/*
* Still in RESPOND, just remove it silently.
@@ -218,6 +217,7 @@ void dccp_req_err(struct sock *sk, u64 seq)
*/
inet_csk_reqsk_queue_drop(req->rsk_listener, req);
}
+ reqsk_put(req);
}
EXPORT_SYMBOL(dccp_req_err);
@@ -390,7 +390,8 @@ static inline u64 dccp_v4_init_sequence(const struct sk_buff *skb)
*
* This is the equivalent of TCP's tcp_v4_syn_recv_sock
*/
-struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
+struct sock *dccp_v4_request_recv_sock(const struct sock *sk,
+ struct sk_buff *skb,
struct request_sock *req,
struct dst_entry *dst)
{
@@ -443,36 +444,6 @@ put_and_exit:
}
EXPORT_SYMBOL_GPL(dccp_v4_request_recv_sock);
-static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
-{
- const struct dccp_hdr *dh = dccp_hdr(skb);
- const struct iphdr *iph = ip_hdr(skb);
- struct sock *nsk;
- /* Find possible connection requests. */
- struct request_sock *req = inet_csk_search_req(sk, dh->dccph_sport,
- iph->saddr, iph->daddr);
- if (req) {
- nsk = dccp_check_req(sk, skb, req);
- if (!nsk)
- reqsk_put(req);
- return nsk;
- }
- nsk = inet_lookup_established(sock_net(sk), &dccp_hashinfo,
- iph->saddr, dh->dccph_sport,
- iph->daddr, dh->dccph_dport,
- inet_iif(skb));
- if (nsk != NULL) {
- if (nsk->sk_state != DCCP_TIME_WAIT) {
- bh_lock_sock(nsk);
- return nsk;
- }
- inet_twsk_put(inet_twsk(nsk));
- return NULL;
- }
-
- return sk;
-}
-
static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
@@ -498,7 +469,7 @@ static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk,
return &rt->dst;
}
-static int dccp_v4_send_response(struct sock *sk, struct request_sock *req)
+static int dccp_v4_send_response(const struct sock *sk, struct request_sock *req)
{
int err = -1;
struct sk_buff *skb;
@@ -527,7 +498,7 @@ out:
return err;
}
-static void dccp_v4_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
+static void dccp_v4_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb)
{
int err;
const struct iphdr *rxiph;
@@ -624,7 +595,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
goto drop;
- req = inet_reqsk_alloc(&dccp_request_sock_ops, sk);
+ req = inet_reqsk_alloc(&dccp_request_sock_ops, sk, true);
if (req == NULL)
goto drop;
@@ -704,18 +675,6 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
* NOTE: the check for the packet types is done in
* dccp_rcv_state_process
*/
- if (sk->sk_state == DCCP_LISTEN) {
- struct sock *nsk = dccp_v4_hnd_req(sk, skb);
-
- if (nsk == NULL)
- goto discard;
-
- if (nsk != sk) {
- if (dccp_child_process(sk, nsk, skb))
- goto reset;
- return 0;
- }
- }
if (dccp_rcv_state_process(sk, skb, dh, skb->len))
goto reset;
@@ -723,7 +682,6 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
reset:
dccp_v4_ctl_send_reset(sk, skb);
-discard:
kfree_skb(skb);
return 0;
}
@@ -841,15 +799,10 @@ static int dccp_v4_rcv(struct sk_buff *skb)
DCCP_SKB_CB(skb)->dccpd_ack_seq);
}
- /* Step 2:
- * Look up flow ID in table and get corresponding socket */
+lookup:
sk = __inet_lookup_skb(&dccp_hashinfo, skb,
dh->dccph_sport, dh->dccph_dport);
- /*
- * Step 2:
- * If no socket ...
- */
- if (sk == NULL) {
+ if (!sk) {
dccp_pr_debug("failed to look up flow ID in table and "
"get corresponding socket\n");
goto no_dccp_socket;
@@ -867,6 +820,31 @@ static int dccp_v4_rcv(struct sk_buff *skb)
goto no_dccp_socket;
}
+ if (sk->sk_state == DCCP_NEW_SYN_RECV) {
+ struct request_sock *req = inet_reqsk(sk);
+ struct sock *nsk = NULL;
+
+ sk = req->rsk_listener;
+ if (likely(sk->sk_state == DCCP_LISTEN)) {
+ nsk = dccp_check_req(sk, skb, req);
+ } else {
+ inet_csk_reqsk_queue_drop_and_put(sk, req);
+ goto lookup;
+ }
+ if (!nsk) {
+ reqsk_put(req);
+ goto discard_it;
+ }
+ if (nsk == sk) {
+ sock_hold(sk);
+ reqsk_put(req);
+ } else if (dccp_child_process(sk, nsk, skb)) {
+ dccp_v4_ctl_send_reset(sk, skb);
+ goto discard_it;
+ } else {
+ return 0;
+ }
+ }
/*
* RFC 4340, sec. 9.2.1: Minimum Checksum Coverage
* o if MinCsCov = 0, only packets with CsCov = 0 are accepted
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 5165571f397a..d9cc731f2619 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -181,7 +181,7 @@ out:
}
-static int dccp_v6_send_response(struct sock *sk, struct request_sock *req)
+static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req)
{
struct inet_request_sock *ireq = inet_rsk(req);
struct ipv6_pinfo *np = inet6_sk(sk);
@@ -234,7 +234,7 @@ static void dccp_v6_reqsk_destructor(struct request_sock *req)
kfree_skb(inet_rsk(req)->pktopts);
}
-static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
+static void dccp_v6_ctl_send_reset(const struct sock *sk, struct sk_buff *rxskb)
{
const struct ipv6hdr *rxip6h;
struct sk_buff *skb;
@@ -290,37 +290,6 @@ static struct request_sock_ops dccp6_request_sock_ops = {
.syn_ack_timeout = dccp_syn_ack_timeout,
};
-static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
-{
- const struct dccp_hdr *dh = dccp_hdr(skb);
- const struct ipv6hdr *iph = ipv6_hdr(skb);
- struct request_sock *req;
- struct sock *nsk;
-
- req = inet6_csk_search_req(sk, dh->dccph_sport, &iph->saddr,
- &iph->daddr, inet6_iif(skb));
- if (req) {
- nsk = dccp_check_req(sk, skb, req);
- if (!nsk)
- reqsk_put(req);
- return nsk;
- }
- nsk = __inet6_lookup_established(sock_net(sk), &dccp_hashinfo,
- &iph->saddr, dh->dccph_sport,
- &iph->daddr, ntohs(dh->dccph_dport),
- inet6_iif(skb));
- if (nsk != NULL) {
- if (nsk->sk_state != DCCP_TIME_WAIT) {
- bh_lock_sock(nsk);
- return nsk;
- }
- inet_twsk_put(inet_twsk(nsk));
- return NULL;
- }
-
- return sk;
-}
-
static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
{
struct request_sock *req;
@@ -350,7 +319,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
goto drop;
- req = inet_reqsk_alloc(&dccp6_request_sock_ops, sk);
+ req = inet_reqsk_alloc(&dccp6_request_sock_ops, sk, true);
if (req == NULL)
goto drop;
@@ -398,7 +367,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
if (dccp_v6_send_response(sk, req))
goto drop_and_free;
- inet6_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
+ inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
return 0;
drop_and_free:
@@ -408,13 +377,14 @@ drop:
return -1;
}
-static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
+static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
struct sk_buff *skb,
struct request_sock *req,
struct dst_entry *dst)
{
struct inet_request_sock *ireq = inet_rsk(req);
- struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
+ struct ipv6_pinfo *newnp;
+ const struct ipv6_pinfo *np = inet6_sk(sk);
struct inet_sock *newinet;
struct dccp6_sock *newdp6;
struct sock *newsk;
@@ -462,22 +432,11 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
if (sk_acceptq_is_full(sk))
goto out_overflow;
- if (dst == NULL) {
- struct in6_addr *final_p, final;
+ if (!dst) {
struct flowi6 fl6;
- memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_proto = IPPROTO_DCCP;
- fl6.daddr = ireq->ir_v6_rmt_addr;
- final_p = fl6_update_dst(&fl6, np->opt, &final);
- fl6.saddr = ireq->ir_v6_loc_addr;
- fl6.flowi6_oif = sk->sk_bound_dev_if;
- fl6.fl6_dport = ireq->ir_rmt_port;
- fl6.fl6_sport = htons(ireq->ir_num);
- security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
-
- dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
- if (IS_ERR(dst))
+ dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_DCCP);
+ if (!dst)
goto out;
}
@@ -651,24 +610,6 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
* NOTE: the check for the packet types is done in
* dccp_rcv_state_process
*/
- if (sk->sk_state == DCCP_LISTEN) {
- struct sock *nsk = dccp_v6_hnd_req(sk, skb);
-
- if (nsk == NULL)
- goto discard;
- /*
- * Queue it on the new socket if the new socket is active,
- * otherwise we just shortcircuit this and continue with
- * the new socket..
- */
- if (nsk != sk) {
- if (dccp_child_process(sk, nsk, skb))
- goto reset;
- if (opt_skb != NULL)
- __kfree_skb(opt_skb);
- return 0;
- }
- }
if (dccp_rcv_state_process(sk, skb, dccp_hdr(skb), skb->len))
goto reset;
@@ -715,16 +656,11 @@ static int dccp_v6_rcv(struct sk_buff *skb)
else
DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb);
- /* Step 2:
- * Look up flow ID in table and get corresponding socket */
+lookup:
sk = __inet6_lookup_skb(&dccp_hashinfo, skb,
dh->dccph_sport, dh->dccph_dport,
inet6_iif(skb));
- /*
- * Step 2:
- * If no socket ...
- */
- if (sk == NULL) {
+ if (!sk) {
dccp_pr_debug("failed to look up flow ID in table and "
"get corresponding socket\n");
goto no_dccp_socket;
@@ -742,6 +678,31 @@ static int dccp_v6_rcv(struct sk_buff *skb)
goto no_dccp_socket;
}
+ if (sk->sk_state == DCCP_NEW_SYN_RECV) {
+ struct request_sock *req = inet_reqsk(sk);
+ struct sock *nsk = NULL;
+
+ sk = req->rsk_listener;
+ if (likely(sk->sk_state == DCCP_LISTEN)) {
+ nsk = dccp_check_req(sk, skb, req);
+ } else {
+ inet_csk_reqsk_queue_drop_and_put(sk, req);
+ goto lookup;
+ }
+ if (!nsk) {
+ reqsk_put(req);
+ goto discard_it;
+ }
+ if (nsk == sk) {
+ sock_hold(sk);
+ reqsk_put(req);
+ } else if (dccp_child_process(sk, nsk, skb)) {
+ dccp_v6_ctl_send_reset(sk, skb);
+ goto discard_it;
+ } else {
+ return 0;
+ }
+ }
/*
* RFC 4340, sec. 9.2.1: Minimum Checksum Coverage
* o if MinCsCov = 0, only packets with CsCov = 0 are accepted
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 838f524cf11a..d10aace43672 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -72,7 +72,7 @@ void dccp_time_wait(struct sock *sk, int state, int timeo)
dccp_done(sk);
}
-struct sock *dccp_create_openreq_child(struct sock *sk,
+struct sock *dccp_create_openreq_child(const struct sock *sk,
const struct request_sock *req,
const struct sk_buff *skb)
{
@@ -236,7 +236,7 @@ int dccp_child_process(struct sock *parent, struct sock *child,
EXPORT_SYMBOL_GPL(dccp_child_process);
-void dccp_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
+void dccp_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
struct request_sock *rsk)
{
DCCP_BUG("DCCP-ACK packets are never sent in LISTEN/RESPOND state");
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 0248e8a3460c..4ce912e691d0 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -390,7 +390,7 @@ int dccp_retransmit_skb(struct sock *sk)
return dccp_transmit_skb(sk, skb_clone(sk->sk_send_head, GFP_ATOMIC));
}
-struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
+struct sk_buff *dccp_make_response(const struct sock *sk, struct dst_entry *dst,
struct request_sock *req)
{
struct dccp_hdr *dh;
@@ -398,13 +398,18 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
const u32 dccp_header_size = sizeof(struct dccp_hdr) +
sizeof(struct dccp_hdr_ext) +
sizeof(struct dccp_hdr_response);
- struct sk_buff *skb = sock_wmalloc(sk, sk->sk_prot->max_header, 1,
- GFP_ATOMIC);
- if (skb == NULL)
+ struct sk_buff *skb;
+
+ /* sk is marked const to clearly express we dont hold socket lock.
+ * sock_wmalloc() will atomically change sk->sk_wmem_alloc,
+ * it is safe to promote sk to non const.
+ */
+ skb = sock_wmalloc((struct sock *)sk, MAX_DCCP_HEADER, 1,
+ GFP_ATOMIC);
+ if (!skb)
return NULL;
- /* Reserve space for headers. */
- skb_reserve(skb, sk->sk_prot->max_header);
+ skb_reserve(skb, MAX_DCCP_HEADER);
skb_dst_set(skb, dst_clone(dst));
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 4507b188fc51..482730cd8a56 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -194,7 +194,7 @@ static int dn_neigh_output(struct neighbour *neigh, struct sk_buff *skb)
return err;
}
-static int dn_neigh_output_packet(struct sock *sk, struct sk_buff *skb)
+static int dn_neigh_output_packet(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct dn_route *rt = (struct dn_route *)dst;
@@ -246,8 +246,9 @@ static int dn_long_output(struct neighbour *neigh, struct sock *sk,
skb_reset_network_header(skb);
- return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, sk, skb,
- NULL, neigh->dev, dn_neigh_output_packet);
+ return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING,
+ &init_net, sk, skb, NULL, neigh->dev,
+ dn_neigh_output_packet);
}
/*
@@ -286,8 +287,9 @@ static int dn_short_output(struct neighbour *neigh, struct sock *sk,
skb_reset_network_header(skb);
- return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, sk, skb,
- NULL, neigh->dev, dn_neigh_output_packet);
+ return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING,
+ &init_net, sk, skb, NULL, neigh->dev,
+ dn_neigh_output_packet);
}
/*
@@ -327,11 +329,12 @@ static int dn_phase3_output(struct neighbour *neigh, struct sock *sk,
skb_reset_network_header(skb);
- return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, sk, skb,
- NULL, neigh->dev, dn_neigh_output_packet);
+ return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING,
+ &init_net, sk, skb, NULL, neigh->dev,
+ dn_neigh_output_packet);
}
-int dn_to_neigh_output(struct sock *sk, struct sk_buff *skb)
+int dn_to_neigh_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct dn_route *rt = (struct dn_route *) dst;
@@ -375,7 +378,7 @@ void dn_neigh_pointopoint_hello(struct sk_buff *skb)
/*
* Ethernet router hello message received
*/
-int dn_neigh_router_hello(struct sock *sk, struct sk_buff *skb)
+int dn_neigh_router_hello(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct rtnode_hello_message *msg = (struct rtnode_hello_message *)skb->data;
@@ -437,7 +440,7 @@ int dn_neigh_router_hello(struct sock *sk, struct sk_buff *skb)
/*
* Endnode hello message received
*/
-int dn_neigh_endnode_hello(struct sock *sk, struct sk_buff *skb)
+int dn_neigh_endnode_hello(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct endnode_hello_message *msg = (struct endnode_hello_message *)skb->data;
struct neighbour *neigh;
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
index a321eac9fd0c..7ac086d5c0c0 100644
--- a/net/decnet/dn_nsp_in.c
+++ b/net/decnet/dn_nsp_in.c
@@ -714,7 +714,8 @@ out:
return ret;
}
-static int dn_nsp_rx_packet(struct sock *sk2, struct sk_buff *skb)
+static int dn_nsp_rx_packet(struct net *net, struct sock *sk2,
+ struct sk_buff *skb)
{
struct dn_skb_cb *cb = DN_SKB_CB(skb);
struct sock *sk = NULL;
@@ -814,8 +815,8 @@ free_out:
int dn_nsp_rx(struct sk_buff *skb)
{
- return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_IN, NULL, skb,
- skb->dev, NULL,
+ return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_IN,
+ &init_net, NULL, skb, skb->dev, NULL,
dn_nsp_rx_packet);
}
diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
index 1aaa51ebbda6..849805e7af52 100644
--- a/net/decnet/dn_nsp_out.c
+++ b/net/decnet/dn_nsp_out.c
@@ -85,7 +85,7 @@ static void dn_nsp_send(struct sk_buff *skb)
if (dst) {
try_again:
skb_dst_set(skb, dst);
- dst_output(skb);
+ dst_output(&init_net, skb->sk, skb);
return;
}
@@ -582,7 +582,7 @@ static __inline__ void dn_nsp_do_disc(struct sock *sk, unsigned char msgflg,
* associations.
*/
skb_dst_set(skb, dst_clone(dst));
- dst_output(skb);
+ dst_output(&init_net, skb->sk, skb);
}
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index 03227ffd19ce..607a14f20d88 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -512,7 +512,7 @@ static int dn_return_long(struct sk_buff *skb)
*
* Returns: result of input function if route is found, error code otherwise
*/
-static int dn_route_rx_packet(struct sock *sk, struct sk_buff *skb)
+static int dn_route_rx_packet(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dn_skb_cb *cb;
int err;
@@ -573,8 +573,8 @@ static int dn_route_rx_long(struct sk_buff *skb)
ptr++;
cb->hops = *ptr++; /* Visit Count */
- return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING, NULL, skb,
- skb->dev, NULL,
+ return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING,
+ &init_net, NULL, skb, skb->dev, NULL,
dn_route_rx_packet);
drop_it:
@@ -601,8 +601,8 @@ static int dn_route_rx_short(struct sk_buff *skb)
ptr += 2;
cb->hops = *ptr & 0x3f;
- return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING, NULL, skb,
- skb->dev, NULL,
+ return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING,
+ &init_net, NULL, skb, skb->dev, NULL,
dn_route_rx_packet);
drop_it:
@@ -610,7 +610,7 @@ drop_it:
return NET_RX_DROP;
}
-static int dn_route_discard(struct sock *sk, struct sk_buff *skb)
+static int dn_route_discard(struct net *net, struct sock *sk, struct sk_buff *skb)
{
/*
* I know we drop the packet here, but thats considered success in
@@ -620,7 +620,7 @@ static int dn_route_discard(struct sock *sk, struct sk_buff *skb)
return NET_RX_SUCCESS;
}
-static int dn_route_ptp_hello(struct sock *sk, struct sk_buff *skb)
+static int dn_route_ptp_hello(struct net *net, struct sock *sk, struct sk_buff *skb)
{
dn_dev_hello(skb);
dn_neigh_pointopoint_hello(skb);
@@ -706,22 +706,22 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type
switch (flags & DN_RT_CNTL_MSK) {
case DN_RT_PKT_HELO:
return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO,
- NULL, skb, skb->dev, NULL,
+ &init_net, NULL, skb, skb->dev, NULL,
dn_route_ptp_hello);
case DN_RT_PKT_L1RT:
case DN_RT_PKT_L2RT:
return NF_HOOK(NFPROTO_DECNET, NF_DN_ROUTE,
- NULL, skb, skb->dev, NULL,
+ &init_net, NULL, skb, skb->dev, NULL,
dn_route_discard);
case DN_RT_PKT_ERTH:
return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO,
- NULL, skb, skb->dev, NULL,
+ &init_net, NULL, skb, skb->dev, NULL,
dn_neigh_router_hello);
case DN_RT_PKT_EEDH:
return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO,
- NULL, skb, skb->dev, NULL,
+ &init_net, NULL, skb, skb->dev, NULL,
dn_neigh_endnode_hello);
}
} else {
@@ -744,7 +744,7 @@ out:
return NET_RX_DROP;
}
-static int dn_output(struct sock *sk, struct sk_buff *skb)
+static int dn_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct dn_route *rt = (struct dn_route *)dst;
@@ -770,8 +770,8 @@ static int dn_output(struct sock *sk, struct sk_buff *skb)
cb->rt_flags |= DN_RT_F_IE;
cb->hops = 0;
- return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_OUT, sk, skb,
- NULL, dev,
+ return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_OUT,
+ &init_net, sk, skb, NULL, dev,
dn_to_neigh_output);
error:
@@ -789,9 +789,7 @@ static int dn_forward(struct sk_buff *skb)
struct dn_dev *dn_db = rcu_dereference(dst->dev->dn_ptr);
struct dn_route *rt;
int header_len;
-#ifdef CONFIG_NETFILTER
struct net_device *dev = skb->dev;
-#endif
if (skb->pkt_type != PACKET_HOST)
goto drop;
@@ -819,8 +817,8 @@ static int dn_forward(struct sk_buff *skb)
if (rt->rt_flags & RTCF_DOREDIRECT)
cb->rt_flags |= DN_RT_F_IE;
- return NF_HOOK(NFPROTO_DECNET, NF_DN_FORWARD, NULL, skb,
- dev, skb->dev,
+ return NF_HOOK(NFPROTO_DECNET, NF_DN_FORWARD,
+ &init_net, NULL, skb, dev, skb->dev,
dn_to_neigh_output);
drop:
@@ -832,7 +830,7 @@ drop:
* Used to catch bugs. This should never normally get
* called.
*/
-static int dn_rt_bug_sk(struct sock *sk, struct sk_buff *skb)
+static int dn_rt_bug_out(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dn_skb_cb *cb = DN_SKB_CB(skb);
@@ -1469,7 +1467,7 @@ make_route:
rt->n = neigh;
rt->dst.lastuse = jiffies;
- rt->dst.output = dn_rt_bug_sk;
+ rt->dst.output = dn_rt_bug_out;
switch (res.type) {
case RTN_UNICAST:
rt->dst.input = dn_forward;
diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c
index af34fc9bdf69..85f2fdc360c2 100644
--- a/net/decnet/netfilter/dn_rtmsg.c
+++ b/net/decnet/netfilter/dn_rtmsg.c
@@ -87,7 +87,7 @@ static void dnrmg_send_peer(struct sk_buff *skb)
}
-static unsigned int dnrmg_hook(const struct nf_hook_ops *ops,
+static unsigned int dnrmg_hook(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index adb5325f4934..1eba07feb34a 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -327,8 +327,8 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent)
ret = dsa_slave_create(ds, parent, i, pd->port_names[i]);
if (ret < 0) {
- netdev_err(dst->master_netdev, "[%d]: can't create dsa slave device for port %d(%s)\n",
- index, i, pd->port_names[i]);
+ netdev_err(dst->master_netdev, "[%d]: can't create dsa slave device for port %d(%s): %d\n",
+ index, i, pd->port_names[i], ret);
ret = 0;
}
}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 7d91f4612ac0..b0b8da0f5af8 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -242,16 +242,15 @@ static int dsa_bridge_check_vlan_range(struct dsa_switch *ds,
}
static int dsa_slave_port_vlan_add(struct net_device *dev,
- struct switchdev_obj *obj)
+ const struct switchdev_obj_port_vlan *vlan,
+ struct switchdev_trans *trans)
{
- struct switchdev_obj_vlan *vlan = &obj->u.vlan;
struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_switch *ds = p->parent;
u16 vid;
int err;
- switch (obj->trans) {
- case SWITCHDEV_TRANS_PREPARE:
+ if (switchdev_trans_ph_prepare(trans)) {
if (!ds->drv->port_vlan_add || !ds->drv->port_pvid_set)
return -EOPNOTSUPP;
@@ -263,8 +262,7 @@ static int dsa_slave_port_vlan_add(struct net_device *dev,
vlan->vid_end);
if (err)
return err;
- break;
- case SWITCHDEV_TRANS_COMMIT:
+ } else {
for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) {
err = ds->drv->port_vlan_add(ds, p->port, vid,
vlan->flags &
@@ -274,18 +272,14 @@ static int dsa_slave_port_vlan_add(struct net_device *dev,
if (err)
return err;
}
- break;
- default:
- return -EOPNOTSUPP;
}
return 0;
}
static int dsa_slave_port_vlan_del(struct net_device *dev,
- struct switchdev_obj *obj)
+ const struct switchdev_obj_port_vlan *vlan)
{
- struct switchdev_obj_vlan *vlan = &obj->u.vlan;
struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_switch *ds = p->parent;
u16 vid;
@@ -304,9 +298,9 @@ static int dsa_slave_port_vlan_del(struct net_device *dev,
}
static int dsa_slave_port_vlan_dump(struct net_device *dev,
- struct switchdev_obj *obj)
+ struct switchdev_obj_port_vlan *vlan,
+ switchdev_obj_dump_cb_t *cb)
{
- struct switchdev_obj_vlan *vlan = &obj->u.vlan;
struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_switch *ds = p->parent;
DECLARE_BITMAP(members, DSA_MAX_PORTS);
@@ -338,7 +332,7 @@ static int dsa_slave_port_vlan_dump(struct net_device *dev,
if (test_bit(p->port, untagged))
vlan->flags |= BRIDGE_VLAN_INFO_UNTAGGED;
- err = obj->cb(dev, obj);
+ err = cb(&vlan->obj);
if (err)
break;
}
@@ -347,37 +341,40 @@ static int dsa_slave_port_vlan_dump(struct net_device *dev,
}
static int dsa_slave_port_fdb_add(struct net_device *dev,
- struct switchdev_obj *obj)
+ const struct switchdev_obj_port_fdb *fdb,
+ struct switchdev_trans *trans)
{
- struct switchdev_obj_fdb *fdb = &obj->u.fdb;
struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_switch *ds = p->parent;
- int ret = -EOPNOTSUPP;
+ int ret;
+
+ if (!ds->drv->port_fdb_prepare || !ds->drv->port_fdb_add)
+ return -EOPNOTSUPP;
- if (obj->trans == SWITCHDEV_TRANS_PREPARE)
- ret = ds->drv->port_fdb_add ? 0 : -EOPNOTSUPP;
- else if (obj->trans == SWITCHDEV_TRANS_COMMIT)
- ret = ds->drv->port_fdb_add(ds, p->port, fdb->addr, fdb->vid);
+ if (switchdev_trans_ph_prepare(trans))
+ ret = ds->drv->port_fdb_prepare(ds, p->port, fdb, trans);
+ else
+ ret = ds->drv->port_fdb_add(ds, p->port, fdb, trans);
return ret;
}
static int dsa_slave_port_fdb_del(struct net_device *dev,
- struct switchdev_obj *obj)
+ const struct switchdev_obj_port_fdb *fdb)
{
- struct switchdev_obj_fdb *fdb = &obj->u.fdb;
struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_switch *ds = p->parent;
int ret = -EOPNOTSUPP;
if (ds->drv->port_fdb_del)
- ret = ds->drv->port_fdb_del(ds, p->port, fdb->addr, fdb->vid);
+ ret = ds->drv->port_fdb_del(ds, p->port, fdb);
return ret;
}
static int dsa_slave_port_fdb_dump(struct net_device *dev,
- struct switchdev_obj *obj)
+ struct switchdev_obj_port_fdb *fdb,
+ switchdev_obj_dump_cb_t *cb)
{
struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_switch *ds = p->parent;
@@ -396,11 +393,11 @@ static int dsa_slave_port_fdb_dump(struct net_device *dev,
if (ret < 0)
break;
- obj->u.fdb.addr = addr;
- obj->u.fdb.vid = vid;
- obj->u.fdb.ndm_state = is_static ? NUD_NOARP : NUD_REACHABLE;
+ ether_addr_copy(fdb->addr, addr);
+ fdb->vid = vid;
+ fdb->ndm_state = is_static ? NUD_NOARP : NUD_REACHABLE;
- ret = obj->cb(dev, obj);
+ ret = cb(&fdb->obj);
if (ret < 0)
break;
}
@@ -456,15 +453,16 @@ static int dsa_slave_stp_update(struct net_device *dev, u8 state)
}
static int dsa_slave_port_attr_set(struct net_device *dev,
- struct switchdev_attr *attr)
+ const struct switchdev_attr *attr,
+ struct switchdev_trans *trans)
{
struct dsa_slave_priv *p = netdev_priv(dev);
struct dsa_switch *ds = p->parent;
int ret;
switch (attr->id) {
- case SWITCHDEV_ATTR_PORT_STP_STATE:
- if (attr->trans == SWITCHDEV_TRANS_PREPARE)
+ case SWITCHDEV_ATTR_ID_PORT_STP_STATE:
+ if (switchdev_trans_ph_prepare(trans))
ret = ds->drv->port_stp_update ? 0 : -EOPNOTSUPP;
else
ret = ds->drv->port_stp_update(ds, p->port,
@@ -479,7 +477,8 @@ static int dsa_slave_port_attr_set(struct net_device *dev,
}
static int dsa_slave_port_obj_add(struct net_device *dev,
- struct switchdev_obj *obj)
+ const struct switchdev_obj *obj,
+ struct switchdev_trans *trans)
{
int err;
@@ -489,11 +488,15 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
*/
switch (obj->id) {
- case SWITCHDEV_OBJ_PORT_FDB:
- err = dsa_slave_port_fdb_add(dev, obj);
+ case SWITCHDEV_OBJ_ID_PORT_FDB:
+ err = dsa_slave_port_fdb_add(dev,
+ SWITCHDEV_OBJ_PORT_FDB(obj),
+ trans);
break;
- case SWITCHDEV_OBJ_PORT_VLAN:
- err = dsa_slave_port_vlan_add(dev, obj);
+ case SWITCHDEV_OBJ_ID_PORT_VLAN:
+ err = dsa_slave_port_vlan_add(dev,
+ SWITCHDEV_OBJ_PORT_VLAN(obj),
+ trans);
break;
default:
err = -EOPNOTSUPP;
@@ -504,16 +507,18 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
}
static int dsa_slave_port_obj_del(struct net_device *dev,
- struct switchdev_obj *obj)
+ const struct switchdev_obj *obj)
{
int err;
switch (obj->id) {
- case SWITCHDEV_OBJ_PORT_FDB:
- err = dsa_slave_port_fdb_del(dev, obj);
+ case SWITCHDEV_OBJ_ID_PORT_FDB:
+ err = dsa_slave_port_fdb_del(dev,
+ SWITCHDEV_OBJ_PORT_FDB(obj));
break;
- case SWITCHDEV_OBJ_PORT_VLAN:
- err = dsa_slave_port_vlan_del(dev, obj);
+ case SWITCHDEV_OBJ_ID_PORT_VLAN:
+ err = dsa_slave_port_vlan_del(dev,
+ SWITCHDEV_OBJ_PORT_VLAN(obj));
break;
default:
err = -EOPNOTSUPP;
@@ -524,16 +529,21 @@ static int dsa_slave_port_obj_del(struct net_device *dev,
}
static int dsa_slave_port_obj_dump(struct net_device *dev,
- struct switchdev_obj *obj)
+ struct switchdev_obj *obj,
+ switchdev_obj_dump_cb_t *cb)
{
int err;
switch (obj->id) {
- case SWITCHDEV_OBJ_PORT_FDB:
- err = dsa_slave_port_fdb_dump(dev, obj);
+ case SWITCHDEV_OBJ_ID_PORT_FDB:
+ err = dsa_slave_port_fdb_dump(dev,
+ SWITCHDEV_OBJ_PORT_FDB(obj),
+ cb);
break;
- case SWITCHDEV_OBJ_PORT_VLAN:
- err = dsa_slave_port_vlan_dump(dev, obj);
+ case SWITCHDEV_OBJ_ID_PORT_VLAN:
+ err = dsa_slave_port_vlan_dump(dev,
+ SWITCHDEV_OBJ_PORT_VLAN(obj),
+ cb);
break;
default:
err = -EOPNOTSUPP;
@@ -587,7 +597,7 @@ static int dsa_slave_port_attr_get(struct net_device *dev,
struct dsa_switch *ds = p->parent;
switch (attr->id) {
- case SWITCHDEV_ATTR_PORT_PARENT_ID:
+ case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
attr->u.ppid.id_len = sizeof(ds->index);
memcpy(&attr->u.ppid.id, &ds->index, attr->u.ppid.id_len);
break;
@@ -967,6 +977,10 @@ static const struct switchdev_ops dsa_slave_switchdev_ops = {
.switchdev_port_obj_dump = dsa_slave_port_obj_dump,
};
+static struct device_type dsa_type = {
+ .name = "dsa",
+};
+
static void dsa_slave_adjust_link(struct net_device *dev)
{
struct dsa_slave_priv *p = netdev_priv(dev);
@@ -1015,8 +1029,10 @@ static int dsa_slave_phy_connect(struct dsa_slave_priv *p,
struct dsa_switch *ds = p->parent;
p->phy = ds->slave_mii_bus->phy_map[addr];
- if (!p->phy)
+ if (!p->phy) {
+ netdev_err(slave_dev, "no phy at %d\n", addr);
return -ENODEV;
+ }
/* Use already configured phy mode */
if (p->phy_interface == PHY_INTERFACE_MODE_NA)
@@ -1050,7 +1066,7 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
*/
ret = of_phy_register_fixed_link(port_dn);
if (ret) {
- netdev_err(slave_dev, "failed to register fixed PHY\n");
+ netdev_err(slave_dev, "failed to register fixed PHY: %d\n", ret);
return ret;
}
phy_is_fixed = true;
@@ -1061,17 +1077,20 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
phy_flags = ds->drv->get_phy_flags(ds, p->port);
if (phy_dn) {
- ret = of_mdio_parse_addr(&slave_dev->dev, phy_dn);
+ int phy_id = of_mdio_parse_addr(&slave_dev->dev, phy_dn);
+
/* If this PHY address is part of phys_mii_mask, which means
* that we need to divert reads and writes to/from it, then we
* want to bind this device using the slave MII bus created by
* DSA to make that happen.
*/
- if (!phy_is_fixed && ret >= 0 &&
- (ds->phys_mii_mask & (1 << ret))) {
- ret = dsa_slave_phy_connect(p, slave_dev, ret);
- if (ret)
+ if (!phy_is_fixed && phy_id >= 0 &&
+ (ds->phys_mii_mask & (1 << phy_id))) {
+ ret = dsa_slave_phy_connect(p, slave_dev, phy_id);
+ if (ret) {
+ netdev_err(slave_dev, "failed to connect to phy%d: %d\n", phy_id, ret);
return ret;
+ }
} else {
p->phy = of_phy_connect(slave_dev, phy_dn,
dsa_slave_adjust_link,
@@ -1088,8 +1107,10 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p,
*/
if (!p->phy) {
ret = dsa_slave_phy_connect(p, slave_dev, p->port);
- if (ret)
+ if (ret) {
+ netdev_err(slave_dev, "failed to connect to port %d: %d\n", p->port, ret);
return ret;
+ }
} else {
netdev_info(slave_dev, "attached PHY at address %d [%s]\n",
p->phy->addr, p->phy->drv->name);
@@ -1155,6 +1176,7 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
slave_dev->priv_flags |= IFF_NO_QUEUE;
slave_dev->netdev_ops = &dsa_slave_netdev_ops;
slave_dev->switchdev_ops = &dsa_slave_switchdev_ops;
+ SET_NETDEV_DEVTYPE(slave_dev, &dsa_type);
netdev_for_each_tx_queue(slave_dev, dsa_slave_set_lockdep_class_one,
NULL);
@@ -1200,6 +1222,7 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent,
ret = dsa_slave_phy_setup(p, slave_dev);
if (ret) {
+ netdev_err(master, "error %d setting up slave phy\n", ret);
free_netdev(slave_dev);
return ret;
}
@@ -1253,7 +1276,7 @@ int dsa_slave_netdevice_event(struct notifier_block *unused,
goto out;
err = dsa_slave_master_changed(dev);
- if (err)
+ if (err && err != -EOPNOTSUPP)
netdev_warn(dev, "failed to reflect master change\n");
break;
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index d850fdc828f9..9e63f252a89e 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -127,7 +127,7 @@ u32 eth_get_headlen(void *data, unsigned int len)
struct flow_keys keys;
/* this should never happen, but better safe than sorry */
- if (len < sizeof(*eth))
+ if (unlikely(len < sizeof(*eth)))
return len;
/* parse any remaining L2/L3 headers, check for L4 */
diff --git a/net/ieee802154/6lowpan/6lowpan_i.h b/net/ieee802154/6lowpan/6lowpan_i.h
index ea339fa94c27..b4e17a7c0df0 100644
--- a/net/ieee802154/6lowpan/6lowpan_i.h
+++ b/net/ieee802154/6lowpan/6lowpan_i.h
@@ -7,6 +7,15 @@
#include <net/inet_frag.h>
#include <net/6lowpan.h>
+typedef unsigned __bitwise__ lowpan_rx_result;
+#define RX_CONTINUE ((__force lowpan_rx_result) 0u)
+#define RX_DROP_UNUSABLE ((__force lowpan_rx_result) 1u)
+#define RX_DROP ((__force lowpan_rx_result) 2u)
+#define RX_QUEUED ((__force lowpan_rx_result) 3u)
+
+#define LOWPAN_DISPATCH_FRAG1 0xc0
+#define LOWPAN_DISPATCH_FRAGN 0xe0
+
struct lowpan_create_arg {
u16 tag;
u16 d_size;
@@ -40,7 +49,7 @@ static inline u32 ieee802154_addr_hash(const struct ieee802154_addr *a)
/* private device info */
struct lowpan_dev_info {
- struct net_device *real_dev; /* real WPAN device ptr */
+ struct net_device *wdev; /* wpan device ptr */
u16 fragment_tag;
};
@@ -62,4 +71,7 @@ int lowpan_header_create(struct sk_buff *skb, struct net_device *dev,
const void *_saddr, unsigned int len);
netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev);
+int lowpan_iphc_decompress(struct sk_buff *skb);
+lowpan_rx_result lowpan_rx_h_ipv6(struct sk_buff *skb);
+
#endif /* __IEEE802154_6LOWPAN_I_H__ */
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index 953b1c49f5d1..20c49c724ba0 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -61,7 +61,7 @@ static struct header_ops lowpan_header_ops = {
static struct lock_class_key lowpan_tx_busylock;
static struct lock_class_key lowpan_netdev_xmit_lock_key;
-static void lowpan_set_lockdep_class_one(struct net_device *dev,
+static void lowpan_set_lockdep_class_one(struct net_device *ldev,
struct netdev_queue *txq,
void *_unused)
{
@@ -69,35 +69,47 @@ static void lowpan_set_lockdep_class_one(struct net_device *dev,
&lowpan_netdev_xmit_lock_key);
}
-static int lowpan_dev_init(struct net_device *dev)
+static int lowpan_dev_init(struct net_device *ldev)
{
- netdev_for_each_tx_queue(dev, lowpan_set_lockdep_class_one, NULL);
- dev->qdisc_tx_busylock = &lowpan_tx_busylock;
+ netdev_for_each_tx_queue(ldev, lowpan_set_lockdep_class_one, NULL);
+ ldev->qdisc_tx_busylock = &lowpan_tx_busylock;
+ return 0;
+}
+
+static int lowpan_open(struct net_device *dev)
+{
+ if (!open_count)
+ lowpan_rx_init();
+ open_count++;
+ return 0;
+}
+
+static int lowpan_stop(struct net_device *dev)
+{
+ open_count--;
+ if (!open_count)
+ lowpan_rx_exit();
return 0;
}
static const struct net_device_ops lowpan_netdev_ops = {
.ndo_init = lowpan_dev_init,
.ndo_start_xmit = lowpan_xmit,
+ .ndo_open = lowpan_open,
+ .ndo_stop = lowpan_stop,
};
-static void lowpan_setup(struct net_device *dev)
+static void lowpan_setup(struct net_device *ldev)
{
- dev->addr_len = IEEE802154_ADDR_LEN;
- memset(dev->broadcast, 0xff, IEEE802154_ADDR_LEN);
- dev->type = ARPHRD_6LOWPAN;
- /* Frame Control + Sequence Number + Address fields + Security Header */
- dev->hard_header_len = 2 + 1 + 20 + 14;
- dev->needed_tailroom = 2; /* FCS */
- dev->mtu = IPV6_MIN_MTU;
- dev->priv_flags |= IFF_NO_QUEUE;
- dev->flags = IFF_BROADCAST | IFF_MULTICAST;
- dev->watchdog_timeo = 0;
-
- dev->netdev_ops = &lowpan_netdev_ops;
- dev->header_ops = &lowpan_header_ops;
- dev->destructor = free_netdev;
- dev->features |= NETIF_F_NETNS_LOCAL;
+ memset(ldev->broadcast, 0xff, IEEE802154_ADDR_LEN);
+ /* We need an ipv6hdr as minimum len when calling xmit */
+ ldev->hard_header_len = sizeof(struct ipv6hdr);
+ ldev->flags = IFF_BROADCAST | IFF_MULTICAST;
+
+ ldev->netdev_ops = &lowpan_netdev_ops;
+ ldev->header_ops = &lowpan_header_ops;
+ ldev->destructor = free_netdev;
+ ldev->features |= NETIF_F_NETNS_LOCAL;
}
static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[])
@@ -109,10 +121,10 @@ static int lowpan_validate(struct nlattr *tb[], struct nlattr *data[])
return 0;
}
-static int lowpan_newlink(struct net *src_net, struct net_device *dev,
+static int lowpan_newlink(struct net *src_net, struct net_device *ldev,
struct nlattr *tb[], struct nlattr *data[])
{
- struct net_device *real_dev;
+ struct net_device *wdev;
int ret;
ASSERT_RTNL();
@@ -120,58 +132,56 @@ static int lowpan_newlink(struct net *src_net, struct net_device *dev,
pr_debug("adding new link\n");
if (!tb[IFLA_LINK] ||
- !net_eq(dev_net(dev), &init_net))
+ !net_eq(dev_net(ldev), &init_net))
return -EINVAL;
- /* find and hold real wpan device */
- real_dev = dev_get_by_index(dev_net(dev), nla_get_u32(tb[IFLA_LINK]));
- if (!real_dev)
+ /* find and hold wpan device */
+ wdev = dev_get_by_index(dev_net(ldev), nla_get_u32(tb[IFLA_LINK]));
+ if (!wdev)
return -ENODEV;
- if (real_dev->type != ARPHRD_IEEE802154) {
- dev_put(real_dev);
+ if (wdev->type != ARPHRD_IEEE802154) {
+ dev_put(wdev);
return -EINVAL;
}
- if (real_dev->ieee802154_ptr->lowpan_dev) {
- dev_put(real_dev);
+ if (wdev->ieee802154_ptr->lowpan_dev) {
+ dev_put(wdev);
return -EBUSY;
}
- lowpan_dev_info(dev)->real_dev = real_dev;
+ lowpan_dev_info(ldev)->wdev = wdev;
/* Set the lowpan hardware address to the wpan hardware address. */
- memcpy(dev->dev_addr, real_dev->dev_addr, IEEE802154_ADDR_LEN);
-
- lowpan_netdev_setup(dev, LOWPAN_LLTYPE_IEEE802154);
-
- ret = register_netdevice(dev);
+ memcpy(ldev->dev_addr, wdev->dev_addr, IEEE802154_ADDR_LEN);
+ /* We need headroom for possible wpan_dev_hard_header call and tailroom
+ * for encryption/fcs handling. The lowpan interface will replace
+ * the IPv6 header with 6LoWPAN header. At worst case the 6LoWPAN
+ * header has LOWPAN_IPHC_MAX_HEADER_LEN more bytes than the IPv6
+ * header.
+ */
+ ldev->needed_headroom = LOWPAN_IPHC_MAX_HEADER_LEN +
+ wdev->needed_headroom;
+ ldev->needed_tailroom = wdev->needed_tailroom;
+
+ lowpan_netdev_setup(ldev, LOWPAN_LLTYPE_IEEE802154);
+
+ ret = register_netdevice(ldev);
if (ret < 0) {
- dev_put(real_dev);
+ dev_put(wdev);
return ret;
}
- real_dev->ieee802154_ptr->lowpan_dev = dev;
- if (!open_count)
- lowpan_rx_init();
-
- open_count++;
-
+ wdev->ieee802154_ptr->lowpan_dev = ldev;
return 0;
}
-static void lowpan_dellink(struct net_device *dev, struct list_head *head)
+static void lowpan_dellink(struct net_device *ldev, struct list_head *head)
{
- struct lowpan_dev_info *lowpan_dev = lowpan_dev_info(dev);
- struct net_device *real_dev = lowpan_dev->real_dev;
+ struct net_device *wdev = lowpan_dev_info(ldev)->wdev;
ASSERT_RTNL();
- open_count--;
-
- if (!open_count)
- lowpan_rx_exit();
-
- real_dev->ieee802154_ptr->lowpan_dev = NULL;
- unregister_netdevice(dev);
- dev_put(real_dev);
+ wdev->ieee802154_ptr->lowpan_dev = NULL;
+ unregister_netdevice(ldev);
+ dev_put(wdev);
}
static struct rtnl_link_ops lowpan_link_ops __read_mostly = {
@@ -196,9 +206,9 @@ static inline void lowpan_netlink_fini(void)
static int lowpan_device_event(struct notifier_block *unused,
unsigned long event, void *ptr)
{
- struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct net_device *wdev = netdev_notifier_info_to_dev(ptr);
- if (dev->type != ARPHRD_IEEE802154)
+ if (wdev->type != ARPHRD_IEEE802154)
goto out;
switch (event) {
@@ -207,8 +217,8 @@ static int lowpan_device_event(struct notifier_block *unused,
* also delete possible lowpan interfaces which belongs
* to the wpan interface.
*/
- if (dev->ieee802154_ptr && dev->ieee802154_ptr->lowpan_dev)
- lowpan_dellink(dev->ieee802154_ptr->lowpan_dev, NULL);
+ if (wdev->ieee802154_ptr->lowpan_dev)
+ lowpan_dellink(wdev->ieee802154_ptr->lowpan_dev, NULL);
break;
default:
break;
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index 214d44aef35b..12e8cf4bda9f 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -32,21 +32,10 @@
static const char lowpan_frags_cache_name[] = "lowpan-frags";
-struct lowpan_frag_info {
- u16 d_tag;
- u16 d_size;
- u8 d_offset;
-};
-
-static struct lowpan_frag_info *lowpan_cb(struct sk_buff *skb)
-{
- return (struct lowpan_frag_info *)skb->cb;
-}
-
static struct inet_frags lowpan_frags;
static int lowpan_frag_reasm(struct lowpan_frag_queue *fq,
- struct sk_buff *prev, struct net_device *dev);
+ struct sk_buff *prev, struct net_device *ldev);
static unsigned int lowpan_hash_frag(u16 tag, u16 d_size,
const struct ieee802154_addr *saddr,
@@ -111,7 +100,7 @@ out:
}
static inline struct lowpan_frag_queue *
-fq_find(struct net *net, const struct lowpan_frag_info *frag_info,
+fq_find(struct net *net, const struct lowpan_802154_cb *cb,
const struct ieee802154_addr *src,
const struct ieee802154_addr *dst)
{
@@ -121,12 +110,12 @@ fq_find(struct net *net, const struct lowpan_frag_info *frag_info,
struct netns_ieee802154_lowpan *ieee802154_lowpan =
net_ieee802154_lowpan(net);
- arg.tag = frag_info->d_tag;
- arg.d_size = frag_info->d_size;
+ arg.tag = cb->d_tag;
+ arg.d_size = cb->d_size;
arg.src = src;
arg.dst = dst;
- hash = lowpan_hash_frag(frag_info->d_tag, frag_info->d_size, src, dst);
+ hash = lowpan_hash_frag(cb->d_tag, cb->d_size, src, dst);
q = inet_frag_find(&ieee802154_lowpan->frags,
&lowpan_frags, &arg, hash);
@@ -138,17 +127,17 @@ fq_find(struct net *net, const struct lowpan_frag_info *frag_info,
}
static int lowpan_frag_queue(struct lowpan_frag_queue *fq,
- struct sk_buff *skb, const u8 frag_type)
+ struct sk_buff *skb, u8 frag_type)
{
struct sk_buff *prev, *next;
- struct net_device *dev;
+ struct net_device *ldev;
int end, offset;
if (fq->q.flags & INET_FRAG_COMPLETE)
goto err;
- offset = lowpan_cb(skb)->d_offset << 3;
- end = lowpan_cb(skb)->d_size;
+ offset = lowpan_802154_cb(skb)->d_offset << 3;
+ end = lowpan_802154_cb(skb)->d_size;
/* Is this the final fragment? */
if (offset + skb->len == end) {
@@ -174,13 +163,16 @@ static int lowpan_frag_queue(struct lowpan_frag_queue *fq,
* this fragment, right?
*/
prev = fq->q.fragments_tail;
- if (!prev || lowpan_cb(prev)->d_offset < lowpan_cb(skb)->d_offset) {
+ if (!prev ||
+ lowpan_802154_cb(prev)->d_offset <
+ lowpan_802154_cb(skb)->d_offset) {
next = NULL;
goto found;
}
prev = NULL;
for (next = fq->q.fragments; next != NULL; next = next->next) {
- if (lowpan_cb(next)->d_offset >= lowpan_cb(skb)->d_offset)
+ if (lowpan_802154_cb(next)->d_offset >=
+ lowpan_802154_cb(skb)->d_offset)
break; /* bingo! */
prev = next;
}
@@ -195,18 +187,15 @@ found:
else
fq->q.fragments = skb;
- dev = skb->dev;
- if (dev)
+ ldev = skb->dev;
+ if (ldev)
skb->dev = NULL;
fq->q.stamp = skb->tstamp;
- if (frag_type == LOWPAN_DISPATCH_FRAG1) {
- /* Calculate uncomp. 6lowpan header to estimate full size */
- fq->q.meat += lowpan_uncompress_size(skb, NULL);
+ if (frag_type == LOWPAN_DISPATCH_FRAG1)
fq->q.flags |= INET_FRAG_FIRST_IN;
- } else {
- fq->q.meat += skb->len;
- }
+
+ fq->q.meat += skb->len;
add_frag_mem_limit(fq->q.net, skb->truesize);
if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
@@ -215,7 +204,7 @@ found:
unsigned long orefdst = skb->_skb_refdst;
skb->_skb_refdst = 0UL;
- res = lowpan_frag_reasm(fq, prev, dev);
+ res = lowpan_frag_reasm(fq, prev, ldev);
skb->_skb_refdst = orefdst;
return res;
}
@@ -235,7 +224,7 @@ err:
* the last and the first frames arrived and all the bits are here.
*/
static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev,
- struct net_device *dev)
+ struct net_device *ldev)
{
struct sk_buff *fp, *head = fq->q.fragments;
int sum_truesize;
@@ -313,7 +302,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev,
sub_frag_mem_limit(fq->q.net, sum_truesize);
head->next = NULL;
- head->dev = dev;
+ head->dev = ldev;
head->tstamp = fq->q.stamp;
fq->q.fragments = NULL;
@@ -325,24 +314,87 @@ out_oom:
return -1;
}
-static int lowpan_get_frag_info(struct sk_buff *skb, const u8 frag_type,
- struct lowpan_frag_info *frag_info)
+static int lowpan_frag_rx_handlers_result(struct sk_buff *skb,
+ lowpan_rx_result res)
+{
+ switch (res) {
+ case RX_QUEUED:
+ return NET_RX_SUCCESS;
+ case RX_CONTINUE:
+ /* nobody cared about this packet */
+ net_warn_ratelimited("%s: received unknown dispatch\n",
+ __func__);
+
+ /* fall-through */
+ default:
+ /* all others failure */
+ return NET_RX_DROP;
+ }
+}
+
+static lowpan_rx_result lowpan_frag_rx_h_iphc(struct sk_buff *skb)
+{
+ int ret;
+
+ if (!lowpan_is_iphc(*skb_network_header(skb)))
+ return RX_CONTINUE;
+
+ ret = lowpan_iphc_decompress(skb);
+ if (ret < 0)
+ return RX_DROP;
+
+ return RX_QUEUED;
+}
+
+static int lowpan_invoke_frag_rx_handlers(struct sk_buff *skb)
+{
+ lowpan_rx_result res;
+
+#define CALL_RXH(rxh) \
+ do { \
+ res = rxh(skb); \
+ if (res != RX_CONTINUE) \
+ goto rxh_next; \
+ } while (0)
+
+ /* likely at first */
+ CALL_RXH(lowpan_frag_rx_h_iphc);
+ CALL_RXH(lowpan_rx_h_ipv6);
+
+rxh_next:
+ return lowpan_frag_rx_handlers_result(skb, res);
+#undef CALL_RXH
+}
+
+#define LOWPAN_FRAG_DGRAM_SIZE_HIGH_MASK 0x07
+#define LOWPAN_FRAG_DGRAM_SIZE_HIGH_SHIFT 8
+
+static int lowpan_get_cb(struct sk_buff *skb, u8 frag_type,
+ struct lowpan_802154_cb *cb)
{
bool fail;
- u8 pattern = 0, low = 0;
+ u8 high = 0, low = 0;
__be16 d_tag = 0;
- fail = lowpan_fetch_skb(skb, &pattern, 1);
+ fail = lowpan_fetch_skb(skb, &high, 1);
fail |= lowpan_fetch_skb(skb, &low, 1);
- frag_info->d_size = (pattern & 7) << 8 | low;
+ /* remove the dispatch value and use first three bits as high value
+ * for the datagram size
+ */
+ cb->d_size = (high & LOWPAN_FRAG_DGRAM_SIZE_HIGH_MASK) <<
+ LOWPAN_FRAG_DGRAM_SIZE_HIGH_SHIFT | low;
fail |= lowpan_fetch_skb(skb, &d_tag, 2);
- frag_info->d_tag = ntohs(d_tag);
+ cb->d_tag = ntohs(d_tag);
if (frag_type == LOWPAN_DISPATCH_FRAGN) {
- fail |= lowpan_fetch_skb(skb, &frag_info->d_offset, 1);
+ fail |= lowpan_fetch_skb(skb, &cb->d_offset, 1);
} else {
skb_reset_network_header(skb);
- frag_info->d_offset = 0;
+ cb->d_offset = 0;
+ /* check if datagram_size has ipv6hdr on FRAG1 */
+ fail |= cb->d_size < sizeof(struct ipv6hdr);
+ /* check if we can dereference the dispatch value */
+ fail |= !skb->len;
}
if (unlikely(fail))
@@ -351,27 +403,33 @@ static int lowpan_get_frag_info(struct sk_buff *skb, const u8 frag_type,
return 0;
}
-int lowpan_frag_rcv(struct sk_buff *skb, const u8 frag_type)
+int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type)
{
struct lowpan_frag_queue *fq;
struct net *net = dev_net(skb->dev);
- struct lowpan_frag_info *frag_info = lowpan_cb(skb);
- struct ieee802154_addr source, dest;
+ struct lowpan_802154_cb *cb = lowpan_802154_cb(skb);
+ struct ieee802154_hdr hdr;
int err;
- source = mac_cb(skb)->source;
- dest = mac_cb(skb)->dest;
+ if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0)
+ goto err;
- err = lowpan_get_frag_info(skb, frag_type, frag_info);
+ err = lowpan_get_cb(skb, frag_type, cb);
if (err < 0)
goto err;
- if (frag_info->d_size > IPV6_MIN_MTU) {
+ if (frag_type == LOWPAN_DISPATCH_FRAG1) {
+ err = lowpan_invoke_frag_rx_handlers(skb);
+ if (err == NET_RX_DROP)
+ goto err;
+ }
+
+ if (cb->d_size > IPV6_MIN_MTU) {
net_warn_ratelimited("lowpan_frag_rcv: datagram size exceeds MTU\n");
goto err;
}
- fq = fq_find(net, frag_info, &source, &dest);
+ fq = fq_find(net, cb, &hdr.source, &hdr.dest);
if (fq != NULL) {
int ret;
@@ -387,7 +445,6 @@ err:
kfree_skb(skb);
return -1;
}
-EXPORT_SYMBOL(lowpan_frag_rcv);
#ifdef CONFIG_SYSCTL
static int zero;
diff --git a/net/ieee802154/6lowpan/rx.c b/net/ieee802154/6lowpan/rx.c
index 12e10201d263..65d55e05516c 100644
--- a/net/ieee802154/6lowpan/rx.c
+++ b/net/ieee802154/6lowpan/rx.c
@@ -11,40 +11,101 @@
#include <linux/if_arp.h>
#include <net/6lowpan.h>
+#include <net/mac802154.h>
#include <net/ieee802154_netdev.h>
#include "6lowpan_i.h"
-static int lowpan_give_skb_to_device(struct sk_buff *skb,
- struct net_device *dev)
+#define LOWPAN_DISPATCH_FIRST 0xc0
+#define LOWPAN_DISPATCH_FRAG_MASK 0xf8
+
+#define LOWPAN_DISPATCH_NALP 0x00
+#define LOWPAN_DISPATCH_ESC 0x40
+#define LOWPAN_DISPATCH_HC1 0x42
+#define LOWPAN_DISPATCH_DFF 0x43
+#define LOWPAN_DISPATCH_BC0 0x50
+#define LOWPAN_DISPATCH_MESH 0x80
+
+static int lowpan_give_skb_to_device(struct sk_buff *skb)
{
- skb->dev = dev->ieee802154_ptr->lowpan_dev;
skb->protocol = htons(ETH_P_IPV6);
- skb->pkt_type = PACKET_HOST;
+ skb->dev->stats.rx_packets++;
+ skb->dev->stats.rx_bytes += skb->len;
return netif_rx(skb);
}
-static int
-iphc_decompress(struct sk_buff *skb, const struct ieee802154_hdr *hdr)
+static int lowpan_rx_handlers_result(struct sk_buff *skb, lowpan_rx_result res)
+{
+ switch (res) {
+ case RX_CONTINUE:
+ /* nobody cared about this packet */
+ net_warn_ratelimited("%s: received unknown dispatch\n",
+ __func__);
+
+ /* fall-through */
+ case RX_DROP_UNUSABLE:
+ kfree_skb(skb);
+
+ /* fall-through */
+ case RX_DROP:
+ return NET_RX_DROP;
+ case RX_QUEUED:
+ return lowpan_give_skb_to_device(skb);
+ default:
+ break;
+ }
+
+ return NET_RX_DROP;
+}
+
+static inline bool lowpan_is_frag1(u8 dispatch)
+{
+ return (dispatch & LOWPAN_DISPATCH_FRAG_MASK) == LOWPAN_DISPATCH_FRAG1;
+}
+
+static inline bool lowpan_is_fragn(u8 dispatch)
+{
+ return (dispatch & LOWPAN_DISPATCH_FRAG_MASK) == LOWPAN_DISPATCH_FRAGN;
+}
+
+static lowpan_rx_result lowpan_rx_h_frag(struct sk_buff *skb)
+{
+ int ret;
+
+ if (!(lowpan_is_frag1(*skb_network_header(skb)) ||
+ lowpan_is_fragn(*skb_network_header(skb))))
+ return RX_CONTINUE;
+
+ ret = lowpan_frag_rcv(skb, *skb_network_header(skb) &
+ LOWPAN_DISPATCH_FRAG_MASK);
+ if (ret == 1)
+ return RX_QUEUED;
+
+ /* Packet is freed by lowpan_frag_rcv on error or put into the frag
+ * bucket.
+ */
+ return RX_DROP;
+}
+
+int lowpan_iphc_decompress(struct sk_buff *skb)
{
- u8 iphc0, iphc1;
struct ieee802154_addr_sa sa, da;
+ struct ieee802154_hdr hdr;
+ u8 iphc0, iphc1;
void *sap, *dap;
- raw_dump_table(__func__, "raw skb data dump", skb->data, skb->len);
- /* at least two bytes will be used for the encoding */
- if (skb->len < 2)
+ if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0)
return -EINVAL;
- if (lowpan_fetch_skb_u8(skb, &iphc0))
- return -EINVAL;
+ raw_dump_table(__func__, "raw skb data dump", skb->data, skb->len);
- if (lowpan_fetch_skb_u8(skb, &iphc1))
+ if (lowpan_fetch_skb_u8(skb, &iphc0) ||
+ lowpan_fetch_skb_u8(skb, &iphc1))
return -EINVAL;
- ieee802154_addr_to_sa(&sa, &hdr->source);
- ieee802154_addr_to_sa(&da, &hdr->dest);
+ ieee802154_addr_to_sa(&sa, &hdr.source);
+ ieee802154_addr_to_sa(&da, &hdr.dest);
if (sa.addr_type == IEEE802154_ADDR_SHORT)
sap = &sa.short_addr;
@@ -61,77 +122,216 @@ iphc_decompress(struct sk_buff *skb, const struct ieee802154_hdr *hdr)
IEEE802154_ADDR_LEN, iphc0, iphc1);
}
-static int lowpan_rcv(struct sk_buff *skb, struct net_device *dev,
- struct packet_type *pt, struct net_device *orig_dev)
+static lowpan_rx_result lowpan_rx_h_iphc(struct sk_buff *skb)
{
- struct ieee802154_hdr hdr;
int ret;
- if (dev->type != ARPHRD_IEEE802154 ||
- !dev->ieee802154_ptr->lowpan_dev)
- goto drop;
+ if (!lowpan_is_iphc(*skb_network_header(skb)))
+ return RX_CONTINUE;
- skb = skb_share_check(skb, GFP_ATOMIC);
- if (!skb)
- goto drop;
+ /* Setting datagram_offset to zero indicates non frag handling
+ * while doing lowpan_header_decompress.
+ */
+ lowpan_802154_cb(skb)->d_size = 0;
- if (!netif_running(dev))
- goto drop_skb;
+ ret = lowpan_iphc_decompress(skb);
+ if (ret < 0)
+ return RX_DROP_UNUSABLE;
- if (skb->pkt_type == PACKET_OTHERHOST)
- goto drop_skb;
+ return RX_QUEUED;
+}
- if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0)
- goto drop_skb;
-
- /* check that it's our buffer */
- if (skb->data[0] == LOWPAN_DISPATCH_IPV6) {
- /* Pull off the 1-byte of 6lowpan header. */
- skb_pull(skb, 1);
- return lowpan_give_skb_to_device(skb, dev);
- } else {
- switch (skb->data[0] & 0xe0) {
- case LOWPAN_DISPATCH_IPHC: /* ipv6 datagram */
- ret = iphc_decompress(skb, &hdr);
- if (ret < 0)
- goto drop_skb;
-
- return lowpan_give_skb_to_device(skb, dev);
- case LOWPAN_DISPATCH_FRAG1: /* first fragment header */
- ret = lowpan_frag_rcv(skb, LOWPAN_DISPATCH_FRAG1);
- if (ret == 1) {
- ret = iphc_decompress(skb, &hdr);
- if (ret < 0)
- goto drop_skb;
-
- return lowpan_give_skb_to_device(skb, dev);
- } else if (ret == -1) {
- return NET_RX_DROP;
- } else {
- return NET_RX_SUCCESS;
- }
- case LOWPAN_DISPATCH_FRAGN: /* next fragments headers */
- ret = lowpan_frag_rcv(skb, LOWPAN_DISPATCH_FRAGN);
- if (ret == 1) {
- ret = iphc_decompress(skb, &hdr);
- if (ret < 0)
- goto drop_skb;
-
- return lowpan_give_skb_to_device(skb, dev);
- } else if (ret == -1) {
- return NET_RX_DROP;
- } else {
- return NET_RX_SUCCESS;
- }
- default:
- break;
- }
+lowpan_rx_result lowpan_rx_h_ipv6(struct sk_buff *skb)
+{
+ if (!lowpan_is_ipv6(*skb_network_header(skb)))
+ return RX_CONTINUE;
+
+ /* Pull off the 1-byte of 6lowpan header. */
+ skb_pull(skb, 1);
+ return RX_QUEUED;
+}
+
+static inline bool lowpan_is_esc(u8 dispatch)
+{
+ return dispatch == LOWPAN_DISPATCH_ESC;
+}
+
+static lowpan_rx_result lowpan_rx_h_esc(struct sk_buff *skb)
+{
+ if (!lowpan_is_esc(*skb_network_header(skb)))
+ return RX_CONTINUE;
+
+ net_warn_ratelimited("%s: %s\n", skb->dev->name,
+ "6LoWPAN ESC not supported\n");
+
+ return RX_DROP_UNUSABLE;
+}
+
+static inline bool lowpan_is_hc1(u8 dispatch)
+{
+ return dispatch == LOWPAN_DISPATCH_HC1;
+}
+
+static lowpan_rx_result lowpan_rx_h_hc1(struct sk_buff *skb)
+{
+ if (!lowpan_is_hc1(*skb_network_header(skb)))
+ return RX_CONTINUE;
+
+ net_warn_ratelimited("%s: %s\n", skb->dev->name,
+ "6LoWPAN HC1 not supported\n");
+
+ return RX_DROP_UNUSABLE;
+}
+
+static inline bool lowpan_is_dff(u8 dispatch)
+{
+ return dispatch == LOWPAN_DISPATCH_DFF;
+}
+
+static lowpan_rx_result lowpan_rx_h_dff(struct sk_buff *skb)
+{
+ if (!lowpan_is_dff(*skb_network_header(skb)))
+ return RX_CONTINUE;
+
+ net_warn_ratelimited("%s: %s\n", skb->dev->name,
+ "6LoWPAN DFF not supported\n");
+
+ return RX_DROP_UNUSABLE;
+}
+
+static inline bool lowpan_is_bc0(u8 dispatch)
+{
+ return dispatch == LOWPAN_DISPATCH_BC0;
+}
+
+static lowpan_rx_result lowpan_rx_h_bc0(struct sk_buff *skb)
+{
+ if (!lowpan_is_bc0(*skb_network_header(skb)))
+ return RX_CONTINUE;
+
+ net_warn_ratelimited("%s: %s\n", skb->dev->name,
+ "6LoWPAN BC0 not supported\n");
+
+ return RX_DROP_UNUSABLE;
+}
+
+static inline bool lowpan_is_mesh(u8 dispatch)
+{
+ return (dispatch & LOWPAN_DISPATCH_FIRST) == LOWPAN_DISPATCH_MESH;
+}
+
+static lowpan_rx_result lowpan_rx_h_mesh(struct sk_buff *skb)
+{
+ if (!lowpan_is_mesh(*skb_network_header(skb)))
+ return RX_CONTINUE;
+
+ net_warn_ratelimited("%s: %s\n", skb->dev->name,
+ "6LoWPAN MESH not supported\n");
+
+ return RX_DROP_UNUSABLE;
+}
+
+static int lowpan_invoke_rx_handlers(struct sk_buff *skb)
+{
+ lowpan_rx_result res;
+
+#define CALL_RXH(rxh) \
+ do { \
+ res = rxh(skb); \
+ if (res != RX_CONTINUE) \
+ goto rxh_next; \
+ } while (0)
+
+ /* likely at first */
+ CALL_RXH(lowpan_rx_h_iphc);
+ CALL_RXH(lowpan_rx_h_frag);
+ CALL_RXH(lowpan_rx_h_ipv6);
+ CALL_RXH(lowpan_rx_h_esc);
+ CALL_RXH(lowpan_rx_h_hc1);
+ CALL_RXH(lowpan_rx_h_dff);
+ CALL_RXH(lowpan_rx_h_bc0);
+ CALL_RXH(lowpan_rx_h_mesh);
+
+rxh_next:
+ return lowpan_rx_handlers_result(skb, res);
+#undef CALL_RXH
+}
+
+static inline bool lowpan_is_nalp(u8 dispatch)
+{
+ return (dispatch & LOWPAN_DISPATCH_FIRST) == LOWPAN_DISPATCH_NALP;
+}
+
+/* Lookup for reserved dispatch values at:
+ * https://www.iana.org/assignments/_6lowpan-parameters/_6lowpan-parameters.xhtml#_6lowpan-parameters-1
+ *
+ * Last Updated: 2015-01-22
+ */
+static inline bool lowpan_is_reserved(u8 dispatch)
+{
+ return ((dispatch >= 0x44 && dispatch <= 0x4F) ||
+ (dispatch >= 0x51 && dispatch <= 0x5F) ||
+ (dispatch >= 0xc8 && dispatch <= 0xdf) ||
+ (dispatch >= 0xe8 && dispatch <= 0xff));
+}
+
+/* lowpan_rx_h_check checks on generic 6LoWPAN requirements
+ * in MAC and 6LoWPAN header.
+ *
+ * Don't manipulate the skb here, it could be shared buffer.
+ */
+static inline bool lowpan_rx_h_check(struct sk_buff *skb)
+{
+ __le16 fc = ieee802154_get_fc_from_skb(skb);
+
+ /* check on ieee802154 conform 6LoWPAN header */
+ if (!ieee802154_is_data(fc) ||
+ !ieee802154_is_intra_pan(fc))
+ return false;
+
+ /* check if we can dereference the dispatch */
+ if (unlikely(!skb->len))
+ return false;
+
+ if (lowpan_is_nalp(*skb_network_header(skb)) ||
+ lowpan_is_reserved(*skb_network_header(skb)))
+ return false;
+
+ return true;
+}
+
+static int lowpan_rcv(struct sk_buff *skb, struct net_device *wdev,
+ struct packet_type *pt, struct net_device *orig_wdev)
+{
+ struct net_device *ldev;
+
+ if (wdev->type != ARPHRD_IEEE802154 ||
+ skb->pkt_type == PACKET_OTHERHOST ||
+ !lowpan_rx_h_check(skb))
+ return NET_RX_DROP;
+
+ ldev = wdev->ieee802154_ptr->lowpan_dev;
+ if (!ldev || !netif_running(ldev))
+ return NET_RX_DROP;
+
+ /* Replacing skb->dev and followed rx handlers will manipulate skb. */
+ skb = skb_share_check(skb, GFP_ATOMIC);
+ if (!skb)
+ return NET_RX_DROP;
+ skb->dev = ldev;
+
+ /* When receive frag1 it's likely that we manipulate the buffer.
+ * When recevie iphc we manipulate the data buffer. So we need
+ * to unshare the buffer.
+ */
+ if (lowpan_is_frag1(*skb_network_header(skb)) ||
+ lowpan_is_iphc(*skb_network_header(skb))) {
+ skb = skb_unshare(skb, GFP_ATOMIC);
+ if (!skb)
+ return NET_RX_DROP;
}
-drop_skb:
- kfree_skb(skb);
-drop:
- return NET_RX_DROP;
+ return lowpan_invoke_rx_handlers(skb);
}
static struct packet_type lowpan_packet_type = {
diff --git a/net/ieee802154/6lowpan/tx.c b/net/ieee802154/6lowpan/tx.c
index f6263fc12340..62a21f6f021e 100644
--- a/net/ieee802154/6lowpan/tx.c
+++ b/net/ieee802154/6lowpan/tx.c
@@ -10,6 +10,7 @@
#include <net/6lowpan.h>
#include <net/ieee802154_netdev.h>
+#include <net/mac802154.h>
#include "6lowpan_i.h"
@@ -36,7 +37,14 @@ lowpan_addr_info *lowpan_skb_priv(const struct sk_buff *skb)
sizeof(struct lowpan_addr_info));
}
-int lowpan_header_create(struct sk_buff *skb, struct net_device *dev,
+/* This callback will be called from AF_PACKET and IPv6 stack, the AF_PACKET
+ * sockets gives an 8 byte array for addresses only!
+ *
+ * TODO I think AF_PACKET DGRAM (sending/receiving) RAW (sending) makes no
+ * sense here. We should disable it, the right use-case would be AF_INET6
+ * RAW/DGRAM sockets.
+ */
+int lowpan_header_create(struct sk_buff *skb, struct net_device *ldev,
unsigned short type, const void *_daddr,
const void *_saddr, unsigned int len)
{
@@ -51,7 +59,7 @@ int lowpan_header_create(struct sk_buff *skb, struct net_device *dev,
return 0;
if (!saddr)
- saddr = dev->dev_addr;
+ saddr = ldev->dev_addr;
raw_dump_inline(__func__, "saddr", (unsigned char *)saddr, 8);
raw_dump_inline(__func__, "daddr", (unsigned char *)daddr, 8);
@@ -71,28 +79,33 @@ int lowpan_header_create(struct sk_buff *skb, struct net_device *dev,
static struct sk_buff*
lowpan_alloc_frag(struct sk_buff *skb, int size,
- const struct ieee802154_hdr *master_hdr)
+ const struct ieee802154_hdr *master_hdr, bool frag1)
{
- struct net_device *real_dev = lowpan_dev_info(skb->dev)->real_dev;
+ struct net_device *wdev = lowpan_dev_info(skb->dev)->wdev;
struct sk_buff *frag;
int rc;
- frag = alloc_skb(real_dev->hard_header_len +
- real_dev->needed_tailroom + size,
+ frag = alloc_skb(wdev->needed_headroom + wdev->needed_tailroom + size,
GFP_ATOMIC);
if (likely(frag)) {
- frag->dev = real_dev;
+ frag->dev = wdev;
frag->priority = skb->priority;
- skb_reserve(frag, real_dev->hard_header_len);
+ skb_reserve(frag, wdev->needed_headroom);
skb_reset_network_header(frag);
*mac_cb(frag) = *mac_cb(skb);
- rc = dev_hard_header(frag, real_dev, 0, &master_hdr->dest,
- &master_hdr->source, size);
- if (rc < 0) {
- kfree_skb(frag);
- return ERR_PTR(rc);
+ if (frag1) {
+ memcpy(skb_put(frag, skb->mac_len),
+ skb_mac_header(skb), skb->mac_len);
+ } else {
+ rc = wpan_dev_hard_header(frag, wdev,
+ &master_hdr->dest,
+ &master_hdr->source, size);
+ if (rc < 0) {
+ kfree_skb(frag);
+ return ERR_PTR(rc);
+ }
}
} else {
frag = ERR_PTR(-ENOMEM);
@@ -104,13 +117,13 @@ lowpan_alloc_frag(struct sk_buff *skb, int size,
static int
lowpan_xmit_fragment(struct sk_buff *skb, const struct ieee802154_hdr *wpan_hdr,
u8 *frag_hdr, int frag_hdrlen,
- int offset, int len)
+ int offset, int len, bool frag1)
{
struct sk_buff *frag;
raw_dump_inline(__func__, " fragment header", frag_hdr, frag_hdrlen);
- frag = lowpan_alloc_frag(skb, frag_hdrlen + len, wpan_hdr);
+ frag = lowpan_alloc_frag(skb, frag_hdrlen + len, wpan_hdr, frag1);
if (IS_ERR(frag))
return PTR_ERR(frag);
@@ -123,19 +136,17 @@ lowpan_xmit_fragment(struct sk_buff *skb, const struct ieee802154_hdr *wpan_hdr,
}
static int
-lowpan_xmit_fragmented(struct sk_buff *skb, struct net_device *dev,
- const struct ieee802154_hdr *wpan_hdr)
+lowpan_xmit_fragmented(struct sk_buff *skb, struct net_device *ldev,
+ const struct ieee802154_hdr *wpan_hdr, u16 dgram_size,
+ u16 dgram_offset)
{
- u16 dgram_size, dgram_offset;
__be16 frag_tag;
u8 frag_hdr[5];
int frag_cap, frag_len, payload_cap, rc;
int skb_unprocessed, skb_offset;
- dgram_size = lowpan_uncompress_size(skb, &dgram_offset) -
- skb->mac_len;
- frag_tag = htons(lowpan_dev_info(dev)->fragment_tag);
- lowpan_dev_info(dev)->fragment_tag++;
+ frag_tag = htons(lowpan_dev_info(ldev)->fragment_tag);
+ lowpan_dev_info(ldev)->fragment_tag++;
frag_hdr[0] = LOWPAN_DISPATCH_FRAG1 | ((dgram_size >> 8) & 0x07);
frag_hdr[1] = dgram_size & 0xff;
@@ -151,7 +162,8 @@ lowpan_xmit_fragmented(struct sk_buff *skb, struct net_device *dev,
rc = lowpan_xmit_fragment(skb, wpan_hdr, frag_hdr,
LOWPAN_FRAG1_HEAD_SIZE, 0,
- frag_len + skb_network_header_len(skb));
+ frag_len + skb_network_header_len(skb),
+ true);
if (rc) {
pr_debug("%s unable to send FRAG1 packet (tag: %d)",
__func__, ntohs(frag_tag));
@@ -172,7 +184,7 @@ lowpan_xmit_fragmented(struct sk_buff *skb, struct net_device *dev,
rc = lowpan_xmit_fragment(skb, wpan_hdr, frag_hdr,
LOWPAN_FRAGN_HEAD_SIZE, skb_offset,
- frag_len);
+ frag_len, false);
if (rc) {
pr_debug("%s unable to send a FRAGN packet. (tag: %d, offset: %d)\n",
__func__, ntohs(frag_tag), skb_offset);
@@ -180,6 +192,8 @@ lowpan_xmit_fragmented(struct sk_buff *skb, struct net_device *dev,
}
} while (skb_unprocessed > frag_cap);
+ ldev->stats.tx_packets++;
+ ldev->stats.tx_bytes += dgram_size;
consume_skb(skb);
return NET_XMIT_SUCCESS;
@@ -188,9 +202,10 @@ err:
return rc;
}
-static int lowpan_header(struct sk_buff *skb, struct net_device *dev)
+static int lowpan_header(struct sk_buff *skb, struct net_device *ldev,
+ u16 *dgram_size, u16 *dgram_offset)
{
- struct wpan_dev *wpan_dev = lowpan_dev_info(dev)->real_dev->ieee802154_ptr;
+ struct wpan_dev *wpan_dev = lowpan_dev_info(ldev)->wdev->ieee802154_ptr;
struct ieee802154_addr sa, da;
struct ieee802154_mac_cb *cb = mac_cb_init(skb);
struct lowpan_addr_info info;
@@ -202,7 +217,10 @@ static int lowpan_header(struct sk_buff *skb, struct net_device *dev)
daddr = &info.daddr.u.extended_addr;
saddr = &info.saddr.u.extended_addr;
- lowpan_header_compress(skb, dev, ETH_P_IPV6, daddr, saddr, skb->len);
+ *dgram_size = skb->len;
+ lowpan_header_compress(skb, ldev, ETH_P_IPV6, daddr, saddr, skb->len);
+ /* dgram_offset = (saved bytes after compression) + lowpan header len */
+ *dgram_offset = (*dgram_size - skb->len) + skb_network_header_len(skb);
cb->type = IEEE802154_FC_TYPE_DATA;
@@ -227,17 +245,20 @@ static int lowpan_header(struct sk_buff *skb, struct net_device *dev)
cb->ackreq = wpan_dev->ackreq;
}
- return dev_hard_header(skb, lowpan_dev_info(dev)->real_dev,
- ETH_P_IPV6, (void *)&da, (void *)&sa, 0);
+ return wpan_dev_hard_header(skb, lowpan_dev_info(ldev)->wdev, &da, &sa,
+ 0);
}
-netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev)
+netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *ldev)
{
struct ieee802154_hdr wpan_hdr;
int max_single, ret;
+ u16 dgram_size, dgram_offset;
pr_debug("package xmit\n");
+ WARN_ON_ONCE(skb->len > IPV6_MIN_MTU);
+
/* We must take a copy of the skb before we modify/replace the ipv6
* header as the header could be used elsewhere
*/
@@ -245,7 +266,7 @@ netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev)
if (!skb)
return NET_XMIT_DROP;
- ret = lowpan_header(skb, dev);
+ ret = lowpan_header(skb, ldev, &dgram_size, &dgram_offset);
if (ret < 0) {
kfree_skb(skb);
return NET_XMIT_DROP;
@@ -259,13 +280,16 @@ netdev_tx_t lowpan_xmit(struct sk_buff *skb, struct net_device *dev)
max_single = ieee802154_max_payload(&wpan_hdr);
if (skb_tail_pointer(skb) - skb_network_header(skb) <= max_single) {
- skb->dev = lowpan_dev_info(dev)->real_dev;
+ skb->dev = lowpan_dev_info(ldev)->wdev;
+ ldev->stats.tx_packets++;
+ ldev->stats.tx_bytes += dgram_size;
return dev_queue_xmit(skb);
} else {
netdev_tx_t rc;
pr_debug("frame is too big, fragmentation is needed\n");
- rc = lowpan_xmit_fragmented(skb, dev, &wpan_hdr);
+ rc = lowpan_xmit_fragmented(skb, ldev, &wpan_hdr, dgram_size,
+ dgram_offset);
return rc < 0 ? NET_XMIT_DROP : rc;
}
diff --git a/net/ieee802154/Kconfig b/net/ieee802154/Kconfig
index 1370d5b0041b..188135bcb803 100644
--- a/net/ieee802154/Kconfig
+++ b/net/ieee802154/Kconfig
@@ -12,6 +12,11 @@ menuconfig IEEE802154
if IEEE802154
+config IEEE802154_NL802154_EXPERIMENTAL
+ bool "IEEE 802.15.4 experimental netlink support"
+ ---help---
+ Adds experimental netlink support for nl802154.
+
config IEEE802154_SOCKET
tristate "IEEE 802.15.4 socket interface"
default y
diff --git a/net/ieee802154/core.c b/net/ieee802154/core.c
index b0248e934230..c35fdfa6d04e 100644
--- a/net/ieee802154/core.c
+++ b/net/ieee802154/core.c
@@ -95,6 +95,18 @@ cfg802154_rdev_by_wpan_phy_idx(int wpan_phy_idx)
return result;
}
+struct wpan_phy *wpan_phy_idx_to_wpan_phy(int wpan_phy_idx)
+{
+ struct cfg802154_registered_device *rdev;
+
+ ASSERT_RTNL();
+
+ rdev = cfg802154_rdev_by_wpan_phy_idx(wpan_phy_idx);
+ if (!rdev)
+ return NULL;
+ return &rdev->wpan_phy;
+}
+
struct wpan_phy *
wpan_phy_new(const struct cfg802154_ops *ops, size_t priv_size)
{
diff --git a/net/ieee802154/core.h b/net/ieee802154/core.h
index f3e95580caee..231fade959f3 100644
--- a/net/ieee802154/core.h
+++ b/net/ieee802154/core.h
@@ -42,5 +42,6 @@ extern int cfg802154_rdev_list_generation;
void cfg802154_dev_free(struct cfg802154_registered_device *rdev);
struct cfg802154_registered_device *
cfg802154_rdev_by_wpan_phy_idx(int wpan_phy_idx);
+struct wpan_phy *wpan_phy_idx_to_wpan_phy(int wpan_phy_idx);
#endif /* __IEEE802154_CORE_H */
diff --git a/net/ieee802154/header_ops.c b/net/ieee802154/header_ops.c
index a051b6993177..c7439f0fbbdf 100644
--- a/net/ieee802154/header_ops.c
+++ b/net/ieee802154/header_ops.c
@@ -83,35 +83,35 @@ ieee802154_hdr_push_sechdr(u8 *buf, const struct ieee802154_sechdr *hdr)
}
int
-ieee802154_hdr_push(struct sk_buff *skb, const struct ieee802154_hdr *hdr)
+ieee802154_hdr_push(struct sk_buff *skb, struct ieee802154_hdr *hdr)
{
- u8 buf[MAC802154_FRAME_HARD_HEADER_LEN];
+ u8 buf[IEEE802154_MAX_HEADER_LEN];
int pos = 2;
int rc;
- struct ieee802154_hdr_fc fc = hdr->fc;
+ struct ieee802154_hdr_fc *fc = &hdr->fc;
buf[pos++] = hdr->seq;
- fc.dest_addr_mode = hdr->dest.mode;
+ fc->dest_addr_mode = hdr->dest.mode;
rc = ieee802154_hdr_push_addr(buf + pos, &hdr->dest, false);
if (rc < 0)
return -EINVAL;
pos += rc;
- fc.source_addr_mode = hdr->source.mode;
+ fc->source_addr_mode = hdr->source.mode;
if (hdr->source.pan_id == hdr->dest.pan_id &&
hdr->dest.mode != IEEE802154_ADDR_NONE)
- fc.intra_pan = true;
+ fc->intra_pan = true;
- rc = ieee802154_hdr_push_addr(buf + pos, &hdr->source, fc.intra_pan);
+ rc = ieee802154_hdr_push_addr(buf + pos, &hdr->source, fc->intra_pan);
if (rc < 0)
return -EINVAL;
pos += rc;
- if (fc.security_enabled) {
- fc.version = 1;
+ if (fc->security_enabled) {
+ fc->version = 1;
rc = ieee802154_hdr_push_sechdr(buf + pos, &hdr->sec);
if (rc < 0)
@@ -120,7 +120,7 @@ ieee802154_hdr_push(struct sk_buff *skb, const struct ieee802154_hdr *hdr)
pos += rc;
}
- memcpy(buf, &fc, 2);
+ memcpy(buf, fc, 2);
memcpy(skb_push(skb, pos), buf, pos);
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index 3f89c0abdab1..16ef0d9f566e 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -232,8 +232,86 @@ static const struct nla_policy nl802154_policy[NL802154_ATTR_MAX+1] = {
[NL802154_ATTR_SUPPORTED_COMMANDS] = { .type = NLA_NESTED },
[NL802154_ATTR_ACKREQ_DEFAULT] = { .type = NLA_U8 },
+
+#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
+ [NL802154_ATTR_SEC_ENABLED] = { .type = NLA_U8, },
+ [NL802154_ATTR_SEC_OUT_LEVEL] = { .type = NLA_U32, },
+ [NL802154_ATTR_SEC_OUT_KEY_ID] = { .type = NLA_NESTED, },
+ [NL802154_ATTR_SEC_FRAME_COUNTER] = { .type = NLA_U32 },
+
+ [NL802154_ATTR_SEC_LEVEL] = { .type = NLA_NESTED },
+ [NL802154_ATTR_SEC_DEVICE] = { .type = NLA_NESTED },
+ [NL802154_ATTR_SEC_DEVKEY] = { .type = NLA_NESTED },
+ [NL802154_ATTR_SEC_KEY] = { .type = NLA_NESTED },
+#endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */
};
+#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
+static int
+nl802154_prepare_wpan_dev_dump(struct sk_buff *skb,
+ struct netlink_callback *cb,
+ struct cfg802154_registered_device **rdev,
+ struct wpan_dev **wpan_dev)
+{
+ int err;
+
+ rtnl_lock();
+
+ if (!cb->args[0]) {
+ err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl802154_fam.hdrsize,
+ nl802154_fam.attrbuf, nl802154_fam.maxattr,
+ nl802154_policy);
+ if (err)
+ goto out_unlock;
+
+ *wpan_dev = __cfg802154_wpan_dev_from_attrs(sock_net(skb->sk),
+ nl802154_fam.attrbuf);
+ if (IS_ERR(*wpan_dev)) {
+ err = PTR_ERR(*wpan_dev);
+ goto out_unlock;
+ }
+ *rdev = wpan_phy_to_rdev((*wpan_dev)->wpan_phy);
+ /* 0 is the first index - add 1 to parse only once */
+ cb->args[0] = (*rdev)->wpan_phy_idx + 1;
+ cb->args[1] = (*wpan_dev)->identifier;
+ } else {
+ /* subtract the 1 again here */
+ struct wpan_phy *wpan_phy = wpan_phy_idx_to_wpan_phy(cb->args[0] - 1);
+ struct wpan_dev *tmp;
+
+ if (!wpan_phy) {
+ err = -ENODEV;
+ goto out_unlock;
+ }
+ *rdev = wpan_phy_to_rdev(wpan_phy);
+ *wpan_dev = NULL;
+
+ list_for_each_entry(tmp, &(*rdev)->wpan_dev_list, list) {
+ if (tmp->identifier == cb->args[1]) {
+ *wpan_dev = tmp;
+ break;
+ }
+ }
+
+ if (!*wpan_dev) {
+ err = -ENODEV;
+ goto out_unlock;
+ }
+ }
+
+ return 0;
+ out_unlock:
+ rtnl_unlock();
+ return err;
+}
+
+static void
+nl802154_finish_wpan_dev_dump(struct cfg802154_registered_device *rdev)
+{
+ rtnl_unlock();
+}
+#endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */
+
/* message building helper */
static inline void *nl802154hdr_put(struct sk_buff *skb, u32 portid, u32 seq,
int flags, u8 cmd)
@@ -612,6 +690,107 @@ static inline u64 wpan_dev_id(struct wpan_dev *wpan_dev)
((u64)wpan_phy_to_rdev(wpan_dev->wpan_phy)->wpan_phy_idx << 32);
}
+#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
+#include <net/ieee802154_netdev.h>
+
+static int
+ieee802154_llsec_send_key_id(struct sk_buff *msg,
+ const struct ieee802154_llsec_key_id *desc)
+{
+ struct nlattr *nl_dev_addr;
+
+ if (nla_put_u32(msg, NL802154_KEY_ID_ATTR_MODE, desc->mode))
+ return -ENOBUFS;
+
+ switch (desc->mode) {
+ case NL802154_KEY_ID_MODE_IMPLICIT:
+ nl_dev_addr = nla_nest_start(msg, NL802154_KEY_ID_ATTR_IMPLICIT);
+ if (!nl_dev_addr)
+ return -ENOBUFS;
+
+ if (nla_put_le16(msg, NL802154_DEV_ADDR_ATTR_PAN_ID,
+ desc->device_addr.pan_id) ||
+ nla_put_u32(msg, NL802154_DEV_ADDR_ATTR_MODE,
+ desc->device_addr.mode))
+ return -ENOBUFS;
+
+ switch (desc->device_addr.mode) {
+ case NL802154_DEV_ADDR_SHORT:
+ if (nla_put_le16(msg, NL802154_DEV_ADDR_ATTR_SHORT,
+ desc->device_addr.short_addr))
+ return -ENOBUFS;
+ break;
+ case NL802154_DEV_ADDR_EXTENDED:
+ if (nla_put_le64(msg, NL802154_DEV_ADDR_ATTR_EXTENDED,
+ desc->device_addr.extended_addr))
+ return -ENOBUFS;
+ break;
+ default:
+ /* userspace should handle unknown */
+ break;
+ }
+
+ nla_nest_end(msg, nl_dev_addr);
+ break;
+ case NL802154_KEY_ID_MODE_INDEX:
+ break;
+ case NL802154_KEY_ID_MODE_INDEX_SHORT:
+ /* TODO renmae short_source? */
+ if (nla_put_le32(msg, NL802154_KEY_ID_ATTR_SOURCE_SHORT,
+ desc->short_source))
+ return -ENOBUFS;
+ break;
+ case NL802154_KEY_ID_MODE_INDEX_EXTENDED:
+ if (nla_put_le64(msg, NL802154_KEY_ID_ATTR_SOURCE_EXTENDED,
+ desc->extended_source))
+ return -ENOBUFS;
+ break;
+ default:
+ /* userspace should handle unknown */
+ break;
+ }
+
+ /* TODO key_id to key_idx ? Check naming */
+ if (desc->mode != NL802154_KEY_ID_MODE_IMPLICIT) {
+ if (nla_put_u8(msg, NL802154_KEY_ID_ATTR_INDEX, desc->id))
+ return -ENOBUFS;
+ }
+
+ return 0;
+}
+
+static int nl802154_get_llsec_params(struct sk_buff *msg,
+ struct cfg802154_registered_device *rdev,
+ struct wpan_dev *wpan_dev)
+{
+ struct nlattr *nl_key_id;
+ struct ieee802154_llsec_params params;
+ int ret;
+
+ ret = rdev_get_llsec_params(rdev, wpan_dev, &params);
+ if (ret < 0)
+ return ret;
+
+ if (nla_put_u8(msg, NL802154_ATTR_SEC_ENABLED, params.enabled) ||
+ nla_put_u32(msg, NL802154_ATTR_SEC_OUT_LEVEL, params.out_level) ||
+ nla_put_be32(msg, NL802154_ATTR_SEC_FRAME_COUNTER,
+ params.frame_counter))
+ return -ENOBUFS;
+
+ nl_key_id = nla_nest_start(msg, NL802154_ATTR_SEC_OUT_KEY_ID);
+ if (!nl_key_id)
+ return -ENOBUFS;
+
+ ret = ieee802154_llsec_send_key_id(msg, &params.out_key);
+ if (ret < 0)
+ return ret;
+
+ nla_nest_end(msg, nl_key_id);
+
+ return 0;
+}
+#endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */
+
static int
nl802154_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flags,
struct cfg802154_registered_device *rdev,
@@ -663,6 +842,11 @@ nl802154_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flags,
if (nla_put_u8(msg, NL802154_ATTR_ACKREQ_DEFAULT, wpan_dev->ackreq))
goto nla_put_failure;
+#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
+ if (nl802154_get_llsec_params(msg, rdev, wpan_dev) < 0)
+ goto nla_put_failure;
+#endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */
+
genlmsg_end(msg, hdr);
return 0;
@@ -753,10 +937,8 @@ static int nl802154_new_interface(struct sk_buff *skb, struct genl_info *info)
return -EINVAL;
}
- /* TODO add nla_get_le64 to netlink */
if (info->attrs[NL802154_ATTR_EXTENDED_ADDR])
- extended_addr = (__force __le64)nla_get_u64(
- info->attrs[NL802154_ATTR_EXTENDED_ADDR]);
+ extended_addr = nla_get_le64(info->attrs[NL802154_ATTR_EXTENDED_ADDR]);
if (!rdev->ops->add_virtual_intf)
return -EOPNOTSUPP;
@@ -1075,6 +1257,838 @@ nl802154_set_ackreq_default(struct sk_buff *skb, struct genl_info *info)
return rdev_set_ackreq_default(rdev, wpan_dev, ackreq);
}
+#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
+static const struct nla_policy nl802154_dev_addr_policy[NL802154_DEV_ADDR_ATTR_MAX + 1] = {
+ [NL802154_DEV_ADDR_ATTR_PAN_ID] = { .type = NLA_U16 },
+ [NL802154_DEV_ADDR_ATTR_MODE] = { .type = NLA_U32 },
+ [NL802154_DEV_ADDR_ATTR_SHORT] = { .type = NLA_U16 },
+ [NL802154_DEV_ADDR_ATTR_EXTENDED] = { .type = NLA_U64 },
+};
+
+static int
+ieee802154_llsec_parse_dev_addr(struct nlattr *nla,
+ struct ieee802154_addr *addr)
+{
+ struct nlattr *attrs[NL802154_DEV_ADDR_ATTR_MAX + 1];
+
+ if (!nla || nla_parse_nested(attrs, NL802154_DEV_ADDR_ATTR_MAX, nla,
+ nl802154_dev_addr_policy))
+ return -EINVAL;
+
+ if (!attrs[NL802154_DEV_ADDR_ATTR_PAN_ID] &&
+ !attrs[NL802154_DEV_ADDR_ATTR_MODE] &&
+ !(attrs[NL802154_DEV_ADDR_ATTR_SHORT] ||
+ attrs[NL802154_DEV_ADDR_ATTR_EXTENDED]))
+ return -EINVAL;
+
+ addr->pan_id = nla_get_le16(attrs[NL802154_DEV_ADDR_ATTR_PAN_ID]);
+ addr->mode = nla_get_u32(attrs[NL802154_DEV_ADDR_ATTR_MODE]);
+ switch (addr->mode) {
+ case NL802154_DEV_ADDR_SHORT:
+ addr->short_addr = nla_get_le16(attrs[NL802154_DEV_ADDR_ATTR_SHORT]);
+ break;
+ case NL802154_DEV_ADDR_EXTENDED:
+ addr->extended_addr = nla_get_le64(attrs[NL802154_DEV_ADDR_ATTR_EXTENDED]);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static const struct nla_policy nl802154_key_id_policy[NL802154_KEY_ID_ATTR_MAX + 1] = {
+ [NL802154_KEY_ID_ATTR_MODE] = { .type = NLA_U32 },
+ [NL802154_KEY_ID_ATTR_INDEX] = { .type = NLA_U8 },
+ [NL802154_KEY_ID_ATTR_IMPLICIT] = { .type = NLA_NESTED },
+ [NL802154_KEY_ID_ATTR_SOURCE_SHORT] = { .type = NLA_U32 },
+ [NL802154_KEY_ID_ATTR_SOURCE_EXTENDED] = { .type = NLA_U64 },
+};
+
+static int
+ieee802154_llsec_parse_key_id(struct nlattr *nla,
+ struct ieee802154_llsec_key_id *desc)
+{
+ struct nlattr *attrs[NL802154_KEY_ID_ATTR_MAX + 1];
+
+ if (!nla || nla_parse_nested(attrs, NL802154_KEY_ID_ATTR_MAX, nla,
+ nl802154_key_id_policy))
+ return -EINVAL;
+
+ if (!attrs[NL802154_KEY_ID_ATTR_MODE])
+ return -EINVAL;
+
+ desc->mode = nla_get_u32(attrs[NL802154_KEY_ID_ATTR_MODE]);
+ switch (desc->mode) {
+ case NL802154_KEY_ID_MODE_IMPLICIT:
+ if (!attrs[NL802154_KEY_ID_ATTR_IMPLICIT])
+ return -EINVAL;
+
+ if (ieee802154_llsec_parse_dev_addr(attrs[NL802154_KEY_ID_ATTR_IMPLICIT],
+ &desc->device_addr) < 0)
+ return -EINVAL;
+ break;
+ case NL802154_KEY_ID_MODE_INDEX:
+ break;
+ case NL802154_KEY_ID_MODE_INDEX_SHORT:
+ if (!attrs[NL802154_KEY_ID_ATTR_SOURCE_SHORT])
+ return -EINVAL;
+
+ desc->short_source = nla_get_le32(attrs[NL802154_KEY_ID_ATTR_SOURCE_SHORT]);
+ break;
+ case NL802154_KEY_ID_MODE_INDEX_EXTENDED:
+ if (!attrs[NL802154_KEY_ID_ATTR_SOURCE_EXTENDED])
+ return -EINVAL;
+
+ desc->extended_source = nla_get_le64(attrs[NL802154_KEY_ID_ATTR_SOURCE_EXTENDED]);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (desc->mode != NL802154_KEY_ID_MODE_IMPLICIT) {
+ if (!attrs[NL802154_KEY_ID_ATTR_INDEX])
+ return -EINVAL;
+
+ /* TODO change id to idx */
+ desc->id = nla_get_u8(attrs[NL802154_KEY_ID_ATTR_INDEX]);
+ }
+
+ return 0;
+}
+
+static int nl802154_set_llsec_params(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct cfg802154_registered_device *rdev = info->user_ptr[0];
+ struct net_device *dev = info->user_ptr[1];
+ struct wpan_dev *wpan_dev = dev->ieee802154_ptr;
+ struct ieee802154_llsec_params params;
+ u32 changed = 0;
+ int ret;
+
+ if (info->attrs[NL802154_ATTR_SEC_ENABLED]) {
+ u8 enabled;
+
+ enabled = nla_get_u8(info->attrs[NL802154_ATTR_SEC_ENABLED]);
+ if (enabled != 0 && enabled != 1)
+ return -EINVAL;
+
+ params.enabled = nla_get_u8(info->attrs[NL802154_ATTR_SEC_ENABLED]);
+ changed |= IEEE802154_LLSEC_PARAM_ENABLED;
+ }
+
+ if (info->attrs[NL802154_ATTR_SEC_OUT_KEY_ID]) {
+ ret = ieee802154_llsec_parse_key_id(info->attrs[NL802154_ATTR_SEC_OUT_KEY_ID],
+ &params.out_key);
+ if (ret < 0)
+ return ret;
+
+ changed |= IEEE802154_LLSEC_PARAM_OUT_KEY;
+ }
+
+ if (info->attrs[NL802154_ATTR_SEC_OUT_LEVEL]) {
+ params.out_level = nla_get_u32(info->attrs[NL802154_ATTR_SEC_OUT_LEVEL]);
+ if (params.out_level > NL802154_SECLEVEL_MAX)
+ return -EINVAL;
+
+ changed |= IEEE802154_LLSEC_PARAM_OUT_LEVEL;
+ }
+
+ if (info->attrs[NL802154_ATTR_SEC_FRAME_COUNTER]) {
+ params.frame_counter = nla_get_be32(info->attrs[NL802154_ATTR_SEC_FRAME_COUNTER]);
+ changed |= IEEE802154_LLSEC_PARAM_FRAME_COUNTER;
+ }
+
+ return rdev_set_llsec_params(rdev, wpan_dev, &params, changed);
+}
+
+static int nl802154_send_key(struct sk_buff *msg, u32 cmd, u32 portid,
+ u32 seq, int flags,
+ struct cfg802154_registered_device *rdev,
+ struct net_device *dev,
+ const struct ieee802154_llsec_key_entry *key)
+{
+ void *hdr;
+ u32 commands[NL802154_CMD_FRAME_NR_IDS / 32];
+ struct nlattr *nl_key, *nl_key_id;
+
+ hdr = nl802154hdr_put(msg, portid, seq, flags, cmd);
+ if (!hdr)
+ return -1;
+
+ if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex))
+ goto nla_put_failure;
+
+ nl_key = nla_nest_start(msg, NL802154_ATTR_SEC_KEY);
+ if (!nl_key)
+ goto nla_put_failure;
+
+ nl_key_id = nla_nest_start(msg, NL802154_KEY_ATTR_ID);
+ if (!nl_key_id)
+ goto nla_put_failure;
+
+ if (ieee802154_llsec_send_key_id(msg, &key->id) < 0)
+ goto nla_put_failure;
+
+ nla_nest_end(msg, nl_key_id);
+
+ if (nla_put_u8(msg, NL802154_KEY_ATTR_USAGE_FRAMES,
+ key->key->frame_types))
+ goto nla_put_failure;
+
+ if (key->key->frame_types & BIT(NL802154_FRAME_CMD)) {
+ /* TODO for each nested */
+ memset(commands, 0, sizeof(commands));
+ commands[7] = key->key->cmd_frame_ids;
+ if (nla_put(msg, NL802154_KEY_ATTR_USAGE_CMDS,
+ sizeof(commands), commands))
+ goto nla_put_failure;
+ }
+
+ if (nla_put(msg, NL802154_KEY_ATTR_BYTES, NL802154_KEY_SIZE,
+ key->key->key))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, nl_key);
+ genlmsg_end(msg, hdr);
+
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static int
+nl802154_dump_llsec_key(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct cfg802154_registered_device *rdev = NULL;
+ struct ieee802154_llsec_key_entry *key;
+ struct ieee802154_llsec_table *table;
+ struct wpan_dev *wpan_dev;
+ int err;
+
+ err = nl802154_prepare_wpan_dev_dump(skb, cb, &rdev, &wpan_dev);
+ if (err)
+ return err;
+
+ if (!wpan_dev->netdev) {
+ err = -EINVAL;
+ goto out_err;
+ }
+
+ rdev_lock_llsec_table(rdev, wpan_dev);
+ rdev_get_llsec_table(rdev, wpan_dev, &table);
+
+ /* TODO make it like station dump */
+ if (cb->args[2])
+ goto out;
+
+ list_for_each_entry(key, &table->keys, list) {
+ if (nl802154_send_key(skb, NL802154_CMD_NEW_SEC_KEY,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ rdev, wpan_dev->netdev, key) < 0) {
+ /* TODO */
+ err = -EIO;
+ rdev_unlock_llsec_table(rdev, wpan_dev);
+ goto out_err;
+ }
+ }
+
+ cb->args[2] = 1;
+
+out:
+ rdev_unlock_llsec_table(rdev, wpan_dev);
+ err = skb->len;
+out_err:
+ nl802154_finish_wpan_dev_dump(rdev);
+
+ return err;
+}
+
+static const struct nla_policy nl802154_key_policy[NL802154_KEY_ATTR_MAX + 1] = {
+ [NL802154_KEY_ATTR_ID] = { NLA_NESTED },
+ /* TODO handle it as for_each_nested and NLA_FLAG? */
+ [NL802154_KEY_ATTR_USAGE_FRAMES] = { NLA_U8 },
+ /* TODO handle it as for_each_nested, not static array? */
+ [NL802154_KEY_ATTR_USAGE_CMDS] = { .len = NL802154_CMD_FRAME_NR_IDS / 8 },
+ [NL802154_KEY_ATTR_BYTES] = { .len = NL802154_KEY_SIZE },
+};
+
+static int nl802154_add_llsec_key(struct sk_buff *skb, struct genl_info *info)
+{
+ struct cfg802154_registered_device *rdev = info->user_ptr[0];
+ struct net_device *dev = info->user_ptr[1];
+ struct wpan_dev *wpan_dev = dev->ieee802154_ptr;
+ struct nlattr *attrs[NL802154_KEY_ATTR_MAX + 1];
+ struct ieee802154_llsec_key key = { };
+ struct ieee802154_llsec_key_id id = { };
+ u32 commands[NL802154_CMD_FRAME_NR_IDS / 32] = { };
+
+ if (nla_parse_nested(attrs, NL802154_KEY_ATTR_MAX,
+ info->attrs[NL802154_ATTR_SEC_KEY],
+ nl802154_key_policy))
+ return -EINVAL;
+
+ if (!attrs[NL802154_KEY_ATTR_USAGE_FRAMES] ||
+ !attrs[NL802154_KEY_ATTR_BYTES])
+ return -EINVAL;
+
+ if (ieee802154_llsec_parse_key_id(attrs[NL802154_KEY_ATTR_ID], &id) < 0)
+ return -ENOBUFS;
+
+ key.frame_types = nla_get_u8(attrs[NL802154_KEY_ATTR_USAGE_FRAMES]);
+ if (key.frame_types > BIT(NL802154_FRAME_MAX) ||
+ ((key.frame_types & BIT(NL802154_FRAME_CMD)) &&
+ !attrs[NL802154_KEY_ATTR_USAGE_CMDS]))
+ return -EINVAL;
+
+ if (attrs[NL802154_KEY_ATTR_USAGE_CMDS]) {
+ /* TODO for each nested */
+ nla_memcpy(commands, attrs[NL802154_KEY_ATTR_USAGE_CMDS],
+ NL802154_CMD_FRAME_NR_IDS / 8);
+
+ /* TODO understand the -EINVAL logic here? last condition */
+ if (commands[0] || commands[1] || commands[2] || commands[3] ||
+ commands[4] || commands[5] || commands[6] ||
+ commands[7] > BIT(NL802154_CMD_FRAME_MAX))
+ return -EINVAL;
+
+ key.cmd_frame_ids = commands[7];
+ } else {
+ key.cmd_frame_ids = 0;
+ }
+
+ nla_memcpy(key.key, attrs[NL802154_KEY_ATTR_BYTES], NL802154_KEY_SIZE);
+
+ if (ieee802154_llsec_parse_key_id(attrs[NL802154_KEY_ATTR_ID], &id) < 0)
+ return -ENOBUFS;
+
+ return rdev_add_llsec_key(rdev, wpan_dev, &id, &key);
+}
+
+static int nl802154_del_llsec_key(struct sk_buff *skb, struct genl_info *info)
+{
+ struct cfg802154_registered_device *rdev = info->user_ptr[0];
+ struct net_device *dev = info->user_ptr[1];
+ struct wpan_dev *wpan_dev = dev->ieee802154_ptr;
+ struct nlattr *attrs[NL802154_KEY_ATTR_MAX + 1];
+ struct ieee802154_llsec_key_id id;
+
+ if (nla_parse_nested(attrs, NL802154_KEY_ATTR_MAX,
+ info->attrs[NL802154_ATTR_SEC_KEY],
+ nl802154_key_policy))
+ return -EINVAL;
+
+ if (ieee802154_llsec_parse_key_id(attrs[NL802154_KEY_ATTR_ID], &id) < 0)
+ return -ENOBUFS;
+
+ return rdev_del_llsec_key(rdev, wpan_dev, &id);
+}
+
+static int nl802154_send_device(struct sk_buff *msg, u32 cmd, u32 portid,
+ u32 seq, int flags,
+ struct cfg802154_registered_device *rdev,
+ struct net_device *dev,
+ const struct ieee802154_llsec_device *dev_desc)
+{
+ void *hdr;
+ struct nlattr *nl_device;
+
+ hdr = nl802154hdr_put(msg, portid, seq, flags, cmd);
+ if (!hdr)
+ return -1;
+
+ if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex))
+ goto nla_put_failure;
+
+ nl_device = nla_nest_start(msg, NL802154_ATTR_SEC_DEVICE);
+ if (!nl_device)
+ goto nla_put_failure;
+
+ if (nla_put_u32(msg, NL802154_DEV_ATTR_FRAME_COUNTER,
+ dev_desc->frame_counter) ||
+ nla_put_le16(msg, NL802154_DEV_ATTR_PAN_ID, dev_desc->pan_id) ||
+ nla_put_le16(msg, NL802154_DEV_ATTR_SHORT_ADDR,
+ dev_desc->short_addr) ||
+ nla_put_le64(msg, NL802154_DEV_ATTR_EXTENDED_ADDR,
+ dev_desc->hwaddr) ||
+ nla_put_u8(msg, NL802154_DEV_ATTR_SECLEVEL_EXEMPT,
+ dev_desc->seclevel_exempt) ||
+ nla_put_u32(msg, NL802154_DEV_ATTR_KEY_MODE, dev_desc->key_mode))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, nl_device);
+ genlmsg_end(msg, hdr);
+
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static int
+nl802154_dump_llsec_dev(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct cfg802154_registered_device *rdev = NULL;
+ struct ieee802154_llsec_device *dev;
+ struct ieee802154_llsec_table *table;
+ struct wpan_dev *wpan_dev;
+ int err;
+
+ err = nl802154_prepare_wpan_dev_dump(skb, cb, &rdev, &wpan_dev);
+ if (err)
+ return err;
+
+ if (!wpan_dev->netdev) {
+ err = -EINVAL;
+ goto out_err;
+ }
+
+ rdev_lock_llsec_table(rdev, wpan_dev);
+ rdev_get_llsec_table(rdev, wpan_dev, &table);
+
+ /* TODO make it like station dump */
+ if (cb->args[2])
+ goto out;
+
+ list_for_each_entry(dev, &table->devices, list) {
+ if (nl802154_send_device(skb, NL802154_CMD_NEW_SEC_LEVEL,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ rdev, wpan_dev->netdev, dev) < 0) {
+ /* TODO */
+ err = -EIO;
+ rdev_unlock_llsec_table(rdev, wpan_dev);
+ goto out_err;
+ }
+ }
+
+ cb->args[2] = 1;
+
+out:
+ rdev_unlock_llsec_table(rdev, wpan_dev);
+ err = skb->len;
+out_err:
+ nl802154_finish_wpan_dev_dump(rdev);
+
+ return err;
+}
+
+static const struct nla_policy nl802154_dev_policy[NL802154_DEV_ATTR_MAX + 1] = {
+ [NL802154_DEV_ATTR_FRAME_COUNTER] = { NLA_U32 },
+ [NL802154_DEV_ATTR_PAN_ID] = { .type = NLA_U16 },
+ [NL802154_DEV_ATTR_SHORT_ADDR] = { .type = NLA_U16 },
+ [NL802154_DEV_ATTR_EXTENDED_ADDR] = { .type = NLA_U64 },
+ [NL802154_DEV_ATTR_SECLEVEL_EXEMPT] = { NLA_U8 },
+ [NL802154_DEV_ATTR_KEY_MODE] = { NLA_U32 },
+};
+
+static int
+ieee802154_llsec_parse_device(struct nlattr *nla,
+ struct ieee802154_llsec_device *dev)
+{
+ struct nlattr *attrs[NL802154_DEV_ATTR_MAX + 1];
+
+ if (!nla || nla_parse_nested(attrs, NL802154_DEV_ATTR_MAX, nla,
+ nl802154_dev_policy))
+ return -EINVAL;
+
+ memset(dev, 0, sizeof(*dev));
+
+ if (!attrs[NL802154_DEV_ATTR_FRAME_COUNTER] ||
+ !attrs[NL802154_DEV_ATTR_PAN_ID] ||
+ !attrs[NL802154_DEV_ATTR_SHORT_ADDR] ||
+ !attrs[NL802154_DEV_ATTR_EXTENDED_ADDR] ||
+ !attrs[NL802154_DEV_ATTR_SECLEVEL_EXEMPT] ||
+ !attrs[NL802154_DEV_ATTR_KEY_MODE])
+ return -EINVAL;
+
+ /* TODO be32 */
+ dev->frame_counter = nla_get_u32(attrs[NL802154_DEV_ATTR_FRAME_COUNTER]);
+ dev->pan_id = nla_get_le16(attrs[NL802154_DEV_ATTR_PAN_ID]);
+ dev->short_addr = nla_get_le16(attrs[NL802154_DEV_ATTR_SHORT_ADDR]);
+ /* TODO rename hwaddr to extended_addr */
+ dev->hwaddr = nla_get_le64(attrs[NL802154_DEV_ATTR_EXTENDED_ADDR]);
+ dev->seclevel_exempt = nla_get_u8(attrs[NL802154_DEV_ATTR_SECLEVEL_EXEMPT]);
+ dev->key_mode = nla_get_u32(attrs[NL802154_DEV_ATTR_KEY_MODE]);
+
+ if (dev->key_mode > NL802154_DEVKEY_MAX ||
+ (dev->seclevel_exempt != 0 && dev->seclevel_exempt != 1))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int nl802154_add_llsec_dev(struct sk_buff *skb, struct genl_info *info)
+{
+ struct cfg802154_registered_device *rdev = info->user_ptr[0];
+ struct net_device *dev = info->user_ptr[1];
+ struct wpan_dev *wpan_dev = dev->ieee802154_ptr;
+ struct ieee802154_llsec_device dev_desc;
+
+ if (ieee802154_llsec_parse_device(info->attrs[NL802154_ATTR_SEC_DEVICE],
+ &dev_desc) < 0)
+ return -EINVAL;
+
+ return rdev_add_device(rdev, wpan_dev, &dev_desc);
+}
+
+static int nl802154_del_llsec_dev(struct sk_buff *skb, struct genl_info *info)
+{
+ struct cfg802154_registered_device *rdev = info->user_ptr[0];
+ struct net_device *dev = info->user_ptr[1];
+ struct wpan_dev *wpan_dev = dev->ieee802154_ptr;
+ struct nlattr *attrs[NL802154_DEV_ATTR_MAX + 1];
+ __le64 extended_addr;
+
+ if (nla_parse_nested(attrs, NL802154_DEV_ATTR_MAX,
+ info->attrs[NL802154_ATTR_SEC_DEVICE],
+ nl802154_dev_policy))
+ return -EINVAL;
+
+ if (!attrs[NL802154_DEV_ATTR_EXTENDED_ADDR])
+ return -EINVAL;
+
+ extended_addr = nla_get_le64(attrs[NL802154_DEV_ATTR_EXTENDED_ADDR]);
+ return rdev_del_device(rdev, wpan_dev, extended_addr);
+}
+
+static int nl802154_send_devkey(struct sk_buff *msg, u32 cmd, u32 portid,
+ u32 seq, int flags,
+ struct cfg802154_registered_device *rdev,
+ struct net_device *dev, __le64 extended_addr,
+ const struct ieee802154_llsec_device_key *devkey)
+{
+ void *hdr;
+ struct nlattr *nl_devkey, *nl_key_id;
+
+ hdr = nl802154hdr_put(msg, portid, seq, flags, cmd);
+ if (!hdr)
+ return -1;
+
+ if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex))
+ goto nla_put_failure;
+
+ nl_devkey = nla_nest_start(msg, NL802154_ATTR_SEC_DEVKEY);
+ if (!nl_devkey)
+ goto nla_put_failure;
+
+ if (nla_put_le64(msg, NL802154_DEVKEY_ATTR_EXTENDED_ADDR,
+ extended_addr) ||
+ nla_put_u32(msg, NL802154_DEVKEY_ATTR_FRAME_COUNTER,
+ devkey->frame_counter))
+ goto nla_put_failure;
+
+ nl_key_id = nla_nest_start(msg, NL802154_DEVKEY_ATTR_ID);
+ if (!nl_key_id)
+ goto nla_put_failure;
+
+ if (ieee802154_llsec_send_key_id(msg, &devkey->key_id) < 0)
+ goto nla_put_failure;
+
+ nla_nest_end(msg, nl_key_id);
+ nla_nest_end(msg, nl_devkey);
+ genlmsg_end(msg, hdr);
+
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static int
+nl802154_dump_llsec_devkey(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct cfg802154_registered_device *rdev = NULL;
+ struct ieee802154_llsec_device_key *kpos;
+ struct ieee802154_llsec_device *dpos;
+ struct ieee802154_llsec_table *table;
+ struct wpan_dev *wpan_dev;
+ int err;
+
+ err = nl802154_prepare_wpan_dev_dump(skb, cb, &rdev, &wpan_dev);
+ if (err)
+ return err;
+
+ if (!wpan_dev->netdev) {
+ err = -EINVAL;
+ goto out_err;
+ }
+
+ rdev_lock_llsec_table(rdev, wpan_dev);
+ rdev_get_llsec_table(rdev, wpan_dev, &table);
+
+ /* TODO make it like station dump */
+ if (cb->args[2])
+ goto out;
+
+ /* TODO look if remove devkey and do some nested attribute */
+ list_for_each_entry(dpos, &table->devices, list) {
+ list_for_each_entry(kpos, &dpos->keys, list) {
+ if (nl802154_send_devkey(skb,
+ NL802154_CMD_NEW_SEC_LEVEL,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI, rdev,
+ wpan_dev->netdev,
+ dpos->hwaddr,
+ kpos) < 0) {
+ /* TODO */
+ err = -EIO;
+ rdev_unlock_llsec_table(rdev, wpan_dev);
+ goto out_err;
+ }
+ }
+ }
+
+ cb->args[2] = 1;
+
+out:
+ rdev_unlock_llsec_table(rdev, wpan_dev);
+ err = skb->len;
+out_err:
+ nl802154_finish_wpan_dev_dump(rdev);
+
+ return err;
+}
+
+static const struct nla_policy nl802154_devkey_policy[NL802154_DEVKEY_ATTR_MAX + 1] = {
+ [NL802154_DEVKEY_ATTR_FRAME_COUNTER] = { NLA_U32 },
+ [NL802154_DEVKEY_ATTR_EXTENDED_ADDR] = { NLA_U64 },
+ [NL802154_DEVKEY_ATTR_ID] = { NLA_NESTED },
+};
+
+static int nl802154_add_llsec_devkey(struct sk_buff *skb, struct genl_info *info)
+{
+ struct cfg802154_registered_device *rdev = info->user_ptr[0];
+ struct net_device *dev = info->user_ptr[1];
+ struct wpan_dev *wpan_dev = dev->ieee802154_ptr;
+ struct nlattr *attrs[NL802154_DEVKEY_ATTR_MAX + 1];
+ struct ieee802154_llsec_device_key key;
+ __le64 extended_addr;
+
+ if (!info->attrs[NL802154_ATTR_SEC_DEVKEY] ||
+ nla_parse_nested(attrs, NL802154_DEVKEY_ATTR_MAX,
+ info->attrs[NL802154_ATTR_SEC_DEVKEY],
+ nl802154_devkey_policy) < 0)
+ return -EINVAL;
+
+ if (!attrs[NL802154_DEVKEY_ATTR_FRAME_COUNTER] ||
+ !attrs[NL802154_DEVKEY_ATTR_EXTENDED_ADDR])
+ return -EINVAL;
+
+ /* TODO change key.id ? */
+ if (ieee802154_llsec_parse_key_id(attrs[NL802154_DEVKEY_ATTR_ID],
+ &key.key_id) < 0)
+ return -ENOBUFS;
+
+ /* TODO be32 */
+ key.frame_counter = nla_get_u32(attrs[NL802154_DEVKEY_ATTR_FRAME_COUNTER]);
+ /* TODO change naming hwaddr -> extended_addr
+ * check unique identifier short+pan OR extended_addr
+ */
+ extended_addr = nla_get_le64(attrs[NL802154_DEVKEY_ATTR_EXTENDED_ADDR]);
+ return rdev_add_devkey(rdev, wpan_dev, extended_addr, &key);
+}
+
+static int nl802154_del_llsec_devkey(struct sk_buff *skb, struct genl_info *info)
+{
+ struct cfg802154_registered_device *rdev = info->user_ptr[0];
+ struct net_device *dev = info->user_ptr[1];
+ struct wpan_dev *wpan_dev = dev->ieee802154_ptr;
+ struct nlattr *attrs[NL802154_DEVKEY_ATTR_MAX + 1];
+ struct ieee802154_llsec_device_key key;
+ __le64 extended_addr;
+
+ if (nla_parse_nested(attrs, NL802154_DEVKEY_ATTR_MAX,
+ info->attrs[NL802154_ATTR_SEC_DEVKEY],
+ nl802154_devkey_policy))
+ return -EINVAL;
+
+ if (!attrs[NL802154_DEVKEY_ATTR_EXTENDED_ADDR])
+ return -EINVAL;
+
+ /* TODO change key.id ? */
+ if (ieee802154_llsec_parse_key_id(attrs[NL802154_DEVKEY_ATTR_ID],
+ &key.key_id) < 0)
+ return -ENOBUFS;
+
+ /* TODO change naming hwaddr -> extended_addr
+ * check unique identifier short+pan OR extended_addr
+ */
+ extended_addr = nla_get_le64(attrs[NL802154_DEVKEY_ATTR_EXTENDED_ADDR]);
+ return rdev_del_devkey(rdev, wpan_dev, extended_addr, &key);
+}
+
+static int nl802154_send_seclevel(struct sk_buff *msg, u32 cmd, u32 portid,
+ u32 seq, int flags,
+ struct cfg802154_registered_device *rdev,
+ struct net_device *dev,
+ const struct ieee802154_llsec_seclevel *sl)
+{
+ void *hdr;
+ struct nlattr *nl_seclevel;
+
+ hdr = nl802154hdr_put(msg, portid, seq, flags, cmd);
+ if (!hdr)
+ return -1;
+
+ if (nla_put_u32(msg, NL802154_ATTR_IFINDEX, dev->ifindex))
+ goto nla_put_failure;
+
+ nl_seclevel = nla_nest_start(msg, NL802154_ATTR_SEC_LEVEL);
+ if (!nl_seclevel)
+ goto nla_put_failure;
+
+ if (nla_put_u32(msg, NL802154_SECLEVEL_ATTR_FRAME, sl->frame_type) ||
+ nla_put_u32(msg, NL802154_SECLEVEL_ATTR_LEVELS, sl->sec_levels) ||
+ nla_put_u8(msg, NL802154_SECLEVEL_ATTR_DEV_OVERRIDE,
+ sl->device_override))
+ goto nla_put_failure;
+
+ if (sl->frame_type == NL802154_FRAME_CMD) {
+ if (nla_put_u32(msg, NL802154_SECLEVEL_ATTR_CMD_FRAME,
+ sl->cmd_frame_id))
+ goto nla_put_failure;
+ }
+
+ nla_nest_end(msg, nl_seclevel);
+ genlmsg_end(msg, hdr);
+
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static int
+nl802154_dump_llsec_seclevel(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct cfg802154_registered_device *rdev = NULL;
+ struct ieee802154_llsec_seclevel *sl;
+ struct ieee802154_llsec_table *table;
+ struct wpan_dev *wpan_dev;
+ int err;
+
+ err = nl802154_prepare_wpan_dev_dump(skb, cb, &rdev, &wpan_dev);
+ if (err)
+ return err;
+
+ if (!wpan_dev->netdev) {
+ err = -EINVAL;
+ goto out_err;
+ }
+
+ rdev_lock_llsec_table(rdev, wpan_dev);
+ rdev_get_llsec_table(rdev, wpan_dev, &table);
+
+ /* TODO make it like station dump */
+ if (cb->args[2])
+ goto out;
+
+ list_for_each_entry(sl, &table->security_levels, list) {
+ if (nl802154_send_seclevel(skb, NL802154_CMD_NEW_SEC_LEVEL,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ rdev, wpan_dev->netdev, sl) < 0) {
+ /* TODO */
+ err = -EIO;
+ rdev_unlock_llsec_table(rdev, wpan_dev);
+ goto out_err;
+ }
+ }
+
+ cb->args[2] = 1;
+
+out:
+ rdev_unlock_llsec_table(rdev, wpan_dev);
+ err = skb->len;
+out_err:
+ nl802154_finish_wpan_dev_dump(rdev);
+
+ return err;
+}
+
+static const struct nla_policy nl802154_seclevel_policy[NL802154_SECLEVEL_ATTR_MAX + 1] = {
+ [NL802154_SECLEVEL_ATTR_LEVELS] = { .type = NLA_U8 },
+ [NL802154_SECLEVEL_ATTR_FRAME] = { .type = NLA_U32 },
+ [NL802154_SECLEVEL_ATTR_CMD_FRAME] = { .type = NLA_U32 },
+ [NL802154_SECLEVEL_ATTR_DEV_OVERRIDE] = { .type = NLA_U8 },
+};
+
+static int
+llsec_parse_seclevel(struct nlattr *nla, struct ieee802154_llsec_seclevel *sl)
+{
+ struct nlattr *attrs[NL802154_SECLEVEL_ATTR_MAX + 1];
+
+ if (!nla || nla_parse_nested(attrs, NL802154_SECLEVEL_ATTR_MAX, nla,
+ nl802154_seclevel_policy))
+ return -EINVAL;
+
+ memset(sl, 0, sizeof(*sl));
+
+ if (!attrs[NL802154_SECLEVEL_ATTR_LEVELS] ||
+ !attrs[NL802154_SECLEVEL_ATTR_FRAME] ||
+ !attrs[NL802154_SECLEVEL_ATTR_DEV_OVERRIDE])
+ return -EINVAL;
+
+ sl->sec_levels = nla_get_u8(attrs[NL802154_SECLEVEL_ATTR_LEVELS]);
+ sl->frame_type = nla_get_u32(attrs[NL802154_SECLEVEL_ATTR_FRAME]);
+ sl->device_override = nla_get_u8(attrs[NL802154_SECLEVEL_ATTR_DEV_OVERRIDE]);
+ if (sl->frame_type > NL802154_FRAME_MAX ||
+ (sl->device_override != 0 && sl->device_override != 1))
+ return -EINVAL;
+
+ if (sl->frame_type == NL802154_FRAME_CMD) {
+ if (!attrs[NL802154_SECLEVEL_ATTR_CMD_FRAME])
+ return -EINVAL;
+
+ sl->cmd_frame_id = nla_get_u32(attrs[NL802154_SECLEVEL_ATTR_CMD_FRAME]);
+ if (sl->cmd_frame_id > NL802154_CMD_FRAME_MAX)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int nl802154_add_llsec_seclevel(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct cfg802154_registered_device *rdev = info->user_ptr[0];
+ struct net_device *dev = info->user_ptr[1];
+ struct wpan_dev *wpan_dev = dev->ieee802154_ptr;
+ struct ieee802154_llsec_seclevel sl;
+
+ if (llsec_parse_seclevel(info->attrs[NL802154_ATTR_SEC_LEVEL],
+ &sl) < 0)
+ return -EINVAL;
+
+ return rdev_add_seclevel(rdev, wpan_dev, &sl);
+}
+
+static int nl802154_del_llsec_seclevel(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct cfg802154_registered_device *rdev = info->user_ptr[0];
+ struct net_device *dev = info->user_ptr[1];
+ struct wpan_dev *wpan_dev = dev->ieee802154_ptr;
+ struct ieee802154_llsec_seclevel sl;
+
+ if (!info->attrs[NL802154_ATTR_SEC_LEVEL] ||
+ llsec_parse_seclevel(info->attrs[NL802154_ATTR_SEC_LEVEL],
+ &sl) < 0)
+ return -EINVAL;
+
+ return rdev_del_seclevel(rdev, wpan_dev, &sl);
+}
+#endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */
+
#define NL802154_FLAG_NEED_WPAN_PHY 0x01
#define NL802154_FLAG_NEED_NETDEV 0x02
#define NL802154_FLAG_NEED_RTNL 0x04
@@ -1289,6 +2303,119 @@ static const struct genl_ops nl802154_ops[] = {
.internal_flags = NL802154_FLAG_NEED_NETDEV |
NL802154_FLAG_NEED_RTNL,
},
+#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
+ {
+ .cmd = NL802154_CMD_SET_SEC_PARAMS,
+ .doit = nl802154_set_llsec_params,
+ .policy = nl802154_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = NL802154_FLAG_NEED_NETDEV |
+ NL802154_FLAG_NEED_RTNL,
+ },
+ {
+ .cmd = NL802154_CMD_GET_SEC_KEY,
+ /* TODO .doit by matching key id? */
+ .dumpit = nl802154_dump_llsec_key,
+ .policy = nl802154_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = NL802154_FLAG_NEED_NETDEV |
+ NL802154_FLAG_NEED_RTNL,
+ },
+ {
+ .cmd = NL802154_CMD_NEW_SEC_KEY,
+ .doit = nl802154_add_llsec_key,
+ .policy = nl802154_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = NL802154_FLAG_NEED_NETDEV |
+ NL802154_FLAG_NEED_RTNL,
+ },
+ {
+ .cmd = NL802154_CMD_DEL_SEC_KEY,
+ .doit = nl802154_del_llsec_key,
+ .policy = nl802154_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = NL802154_FLAG_NEED_NETDEV |
+ NL802154_FLAG_NEED_RTNL,
+ },
+ /* TODO unique identifier must short+pan OR extended_addr */
+ {
+ .cmd = NL802154_CMD_GET_SEC_DEV,
+ /* TODO .doit by matching extended_addr? */
+ .dumpit = nl802154_dump_llsec_dev,
+ .policy = nl802154_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = NL802154_FLAG_NEED_NETDEV |
+ NL802154_FLAG_NEED_RTNL,
+ },
+ {
+ .cmd = NL802154_CMD_NEW_SEC_DEV,
+ .doit = nl802154_add_llsec_dev,
+ .policy = nl802154_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = NL802154_FLAG_NEED_NETDEV |
+ NL802154_FLAG_NEED_RTNL,
+ },
+ {
+ .cmd = NL802154_CMD_DEL_SEC_DEV,
+ .doit = nl802154_del_llsec_dev,
+ .policy = nl802154_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = NL802154_FLAG_NEED_NETDEV |
+ NL802154_FLAG_NEED_RTNL,
+ },
+ /* TODO remove complete devkey, put it as nested? */
+ {
+ .cmd = NL802154_CMD_GET_SEC_DEVKEY,
+ /* TODO doit by matching ??? */
+ .dumpit = nl802154_dump_llsec_devkey,
+ .policy = nl802154_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = NL802154_FLAG_NEED_NETDEV |
+ NL802154_FLAG_NEED_RTNL,
+ },
+ {
+ .cmd = NL802154_CMD_NEW_SEC_DEVKEY,
+ .doit = nl802154_add_llsec_devkey,
+ .policy = nl802154_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = NL802154_FLAG_NEED_NETDEV |
+ NL802154_FLAG_NEED_RTNL,
+ },
+ {
+ .cmd = NL802154_CMD_DEL_SEC_DEVKEY,
+ .doit = nl802154_del_llsec_devkey,
+ .policy = nl802154_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = NL802154_FLAG_NEED_NETDEV |
+ NL802154_FLAG_NEED_RTNL,
+ },
+ {
+ .cmd = NL802154_CMD_GET_SEC_LEVEL,
+ /* TODO .doit by matching frame_type? */
+ .dumpit = nl802154_dump_llsec_seclevel,
+ .policy = nl802154_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = NL802154_FLAG_NEED_NETDEV |
+ NL802154_FLAG_NEED_RTNL,
+ },
+ {
+ .cmd = NL802154_CMD_NEW_SEC_LEVEL,
+ .doit = nl802154_add_llsec_seclevel,
+ .policy = nl802154_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = NL802154_FLAG_NEED_NETDEV |
+ NL802154_FLAG_NEED_RTNL,
+ },
+ {
+ .cmd = NL802154_CMD_DEL_SEC_LEVEL,
+ /* TODO match frame_type only? */
+ .doit = nl802154_del_llsec_seclevel,
+ .policy = nl802154_policy,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = NL802154_FLAG_NEED_NETDEV |
+ NL802154_FLAG_NEED_RTNL,
+ },
+#endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */
};
/* initialisation/exit functions */
diff --git a/net/ieee802154/rdev-ops.h b/net/ieee802154/rdev-ops.h
index 03b357501cc5..4441c63b3ea6 100644
--- a/net/ieee802154/rdev-ops.h
+++ b/net/ieee802154/rdev-ops.h
@@ -208,4 +208,113 @@ rdev_set_ackreq_default(struct cfg802154_registered_device *rdev,
return ret;
}
+#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
+/* TODO this is already a nl802154, so move into ieee802154 */
+static inline void
+rdev_get_llsec_table(struct cfg802154_registered_device *rdev,
+ struct wpan_dev *wpan_dev,
+ struct ieee802154_llsec_table **table)
+{
+ rdev->ops->get_llsec_table(&rdev->wpan_phy, wpan_dev, table);
+}
+
+static inline void
+rdev_lock_llsec_table(struct cfg802154_registered_device *rdev,
+ struct wpan_dev *wpan_dev)
+{
+ rdev->ops->lock_llsec_table(&rdev->wpan_phy, wpan_dev);
+}
+
+static inline void
+rdev_unlock_llsec_table(struct cfg802154_registered_device *rdev,
+ struct wpan_dev *wpan_dev)
+{
+ rdev->ops->unlock_llsec_table(&rdev->wpan_phy, wpan_dev);
+}
+
+static inline int
+rdev_get_llsec_params(struct cfg802154_registered_device *rdev,
+ struct wpan_dev *wpan_dev,
+ struct ieee802154_llsec_params *params)
+{
+ return rdev->ops->get_llsec_params(&rdev->wpan_phy, wpan_dev, params);
+}
+
+static inline int
+rdev_set_llsec_params(struct cfg802154_registered_device *rdev,
+ struct wpan_dev *wpan_dev,
+ const struct ieee802154_llsec_params *params,
+ u32 changed)
+{
+ return rdev->ops->set_llsec_params(&rdev->wpan_phy, wpan_dev, params,
+ changed);
+}
+
+static inline int
+rdev_add_llsec_key(struct cfg802154_registered_device *rdev,
+ struct wpan_dev *wpan_dev,
+ const struct ieee802154_llsec_key_id *id,
+ const struct ieee802154_llsec_key *key)
+{
+ return rdev->ops->add_llsec_key(&rdev->wpan_phy, wpan_dev, id, key);
+}
+
+static inline int
+rdev_del_llsec_key(struct cfg802154_registered_device *rdev,
+ struct wpan_dev *wpan_dev,
+ const struct ieee802154_llsec_key_id *id)
+{
+ return rdev->ops->del_llsec_key(&rdev->wpan_phy, wpan_dev, id);
+}
+
+static inline int
+rdev_add_seclevel(struct cfg802154_registered_device *rdev,
+ struct wpan_dev *wpan_dev,
+ const struct ieee802154_llsec_seclevel *sl)
+{
+ return rdev->ops->add_seclevel(&rdev->wpan_phy, wpan_dev, sl);
+}
+
+static inline int
+rdev_del_seclevel(struct cfg802154_registered_device *rdev,
+ struct wpan_dev *wpan_dev,
+ const struct ieee802154_llsec_seclevel *sl)
+{
+ return rdev->ops->del_seclevel(&rdev->wpan_phy, wpan_dev, sl);
+}
+
+static inline int
+rdev_add_device(struct cfg802154_registered_device *rdev,
+ struct wpan_dev *wpan_dev,
+ const struct ieee802154_llsec_device *dev_desc)
+{
+ return rdev->ops->add_device(&rdev->wpan_phy, wpan_dev, dev_desc);
+}
+
+static inline int
+rdev_del_device(struct cfg802154_registered_device *rdev,
+ struct wpan_dev *wpan_dev, __le64 extended_addr)
+{
+ return rdev->ops->del_device(&rdev->wpan_phy, wpan_dev, extended_addr);
+}
+
+static inline int
+rdev_add_devkey(struct cfg802154_registered_device *rdev,
+ struct wpan_dev *wpan_dev, __le64 extended_addr,
+ const struct ieee802154_llsec_device_key *devkey)
+{
+ return rdev->ops->add_devkey(&rdev->wpan_phy, wpan_dev, extended_addr,
+ devkey);
+}
+
+static inline int
+rdev_del_devkey(struct cfg802154_registered_device *rdev,
+ struct wpan_dev *wpan_dev, __le64 extended_addr,
+ const struct ieee802154_llsec_device_key *devkey)
+{
+ return rdev->ops->del_devkey(&rdev->wpan_phy, wpan_dev, extended_addr,
+ devkey);
+}
+#endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */
+
#endif /* __CFG802154_RDEV_OPS */
diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
index b6eacf30ee7a..a548be247e15 100644
--- a/net/ieee802154/socket.c
+++ b/net/ieee802154/socket.c
@@ -273,7 +273,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
goto out;
}
- mtu = dev->mtu;
+ mtu = IEEE802154_MTU;
pr_debug("name = %s, mtu = %u\n", dev->name, mtu);
if (size > mtu) {
@@ -637,7 +637,7 @@ static int dgram_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
err = -ENXIO;
goto out;
}
- mtu = dev->mtu;
+ mtu = IEEE802154_MTU;
pr_debug("name = %s, mtu = %u\n", dev->name, mtu);
if (size > mtu) {
@@ -676,8 +676,8 @@ static int dgram_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
cb->seclevel = ro->seclevel;
cb->seclevel_override = ro->seclevel_override;
- err = dev_hard_header(skb, dev, ETH_P_IEEE802154, &dst_addr,
- ro->bound ? &ro->src_addr : NULL, size);
+ err = wpan_dev_hard_header(skb, dev, &dst_addr,
+ ro->bound ? &ro->src_addr : NULL, size);
if (err < 0)
goto out_skb;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 1d0c3adb6f34..11c4ca13ec3b 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -119,7 +119,7 @@
#ifdef CONFIG_IP_MROUTE
#include <linux/mroute.h>
#endif
-#include <net/vrf.h>
+#include <net/l3mdev.h>
/* The inetsw table contains everything that inet_create needs to
@@ -219,17 +219,13 @@ int inet_listen(struct socket *sock, int backlog)
* shutdown() (rather than close()).
*/
if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) != 0 &&
- !inet_csk(sk)->icsk_accept_queue.fastopenq) {
+ !inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) {
if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) != 0)
- err = fastopen_init_queue(sk, backlog);
+ fastopen_queue_tune(sk, backlog);
else if ((sysctl_tcp_fastopen &
TFO_SERVER_WO_SOCKOPT2) != 0)
- err = fastopen_init_queue(sk,
+ fastopen_queue_tune(sk,
((uint)sysctl_tcp_fastopen) >> 16);
- else
- err = 0;
- if (err)
- goto out;
tcp_fastopen_init_key_once(true);
}
@@ -450,7 +446,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
goto out;
}
- tb_id = vrf_dev_table_ifindex(net, sk->sk_bound_dev_if) ? : tb_id;
+ tb_id = l3mdev_fib_table_by_index(net, sk->sk_bound_dev_if) ? : tb_id;
chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id);
/* Not specified by any standard per-se, however it breaks too
@@ -1043,22 +1039,16 @@ void inet_register_protosw(struct inet_protosw *p)
goto out_illegal;
/* If we are trying to override a permanent protocol, bail. */
- answer = NULL;
last_perm = &inetsw[p->type];
list_for_each(lh, &inetsw[p->type]) {
answer = list_entry(lh, struct inet_protosw, list);
-
/* Check only the non-wild match. */
- if (INET_PROTOSW_PERMANENT & answer->flags) {
- if (protocol == answer->protocol)
- break;
- last_perm = lh;
- }
-
- answer = NULL;
+ if ((INET_PROTOSW_PERMANENT & answer->flags) == 0)
+ break;
+ if (protocol == answer->protocol)
+ goto out_permanent;
+ last_perm = lh;
}
- if (answer)
- goto out_permanent;
/* Add the new entry after the last permanent entry if any, so that
* the new entry does not override a permanent entry when matched with
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 0c9c3482e419..59b3e0e8fd51 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -624,14 +624,20 @@ out:
}
EXPORT_SYMBOL(arp_create);
+static int arp_xmit_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+ return dev_queue_xmit(skb);
+}
+
/*
* Send an arp packet.
*/
void arp_xmit(struct sk_buff *skb)
{
/* Send it off, maybe filter it using firewalling first. */
- NF_HOOK(NFPROTO_ARP, NF_ARP_OUT, NULL, skb,
- NULL, skb->dev, dev_queue_xmit_sk);
+ NF_HOOK(NFPROTO_ARP, NF_ARP_OUT,
+ dev_net(skb->dev), NULL, skb, NULL, skb->dev,
+ arp_xmit_finish);
}
EXPORT_SYMBOL(arp_xmit);
@@ -639,7 +645,7 @@ EXPORT_SYMBOL(arp_xmit);
* Process an arp request.
*/
-static int arp_process(struct sock *sk, struct sk_buff *skb)
+static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
struct in_device *in_dev = __in_dev_get_rcu(dev);
@@ -651,7 +657,6 @@ static int arp_process(struct sock *sk, struct sk_buff *skb)
u16 dev_type = dev->type;
int addr_type;
struct neighbour *n;
- struct net *net = dev_net(dev);
struct dst_entry *reply_dst = NULL;
bool is_garp = false;
@@ -872,7 +877,7 @@ out_free_dst:
static void parp_redo(struct sk_buff *skb)
{
- arp_process(NULL, skb);
+ arp_process(dev_net(skb->dev), NULL, skb);
}
@@ -905,8 +910,9 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev,
memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
- return NF_HOOK(NFPROTO_ARP, NF_ARP_IN, NULL, skb,
- dev, NULL, arp_process);
+ return NF_HOOK(NFPROTO_ARP, NF_ARP_IN,
+ dev_net(dev), NULL, skb, dev, NULL,
+ arp_process);
consumeskb:
consume_skb(skb);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 2d9cb1748f81..735008472844 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1654,7 +1654,8 @@ static size_t inet_get_link_af_size(const struct net_device *dev)
return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
}
-static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
+static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
+ u32 ext_filter_mask)
{
struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
struct nlattr *nla;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 690bcbc59f26..d7c2bb0c4f65 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -45,7 +45,7 @@
#include <net/ip_fib.h>
#include <net/rtnetlink.h>
#include <net/xfrm.h>
-#include <net/vrf.h>
+#include <net/l3mdev.h>
#include <trace/events/fib.h>
#ifndef CONFIG_IP_MULTIPLE_TABLES
@@ -255,7 +255,7 @@ EXPORT_SYMBOL(inet_addr_type);
unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev,
__be32 addr)
{
- u32 rt_table = vrf_dev_table(dev) ? : RT_TABLE_LOCAL;
+ u32 rt_table = l3mdev_fib_table(dev) ? : RT_TABLE_LOCAL;
return __inet_dev_addr_type(net, dev, addr, rt_table);
}
@@ -268,7 +268,7 @@ unsigned int inet_addr_type_dev_table(struct net *net,
const struct net_device *dev,
__be32 addr)
{
- u32 rt_table = vrf_dev_table(dev) ? : RT_TABLE_LOCAL;
+ u32 rt_table = l3mdev_fib_table(dev) ? : RT_TABLE_LOCAL;
return __inet_dev_addr_type(net, NULL, addr, rt_table);
}
@@ -332,7 +332,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
bool dev_match;
fl4.flowi4_oif = 0;
- fl4.flowi4_iif = vrf_master_ifindex_rcu(dev);
+ fl4.flowi4_iif = l3mdev_master_ifindex_rcu(dev);
if (!fl4.flowi4_iif)
fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX;
fl4.daddr = src;
@@ -367,7 +367,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
if (nh->nh_dev == dev) {
dev_match = true;
break;
- } else if (vrf_master_ifindex_rcu(nh->nh_dev) == dev->ifindex) {
+ } else if (l3mdev_master_ifindex_rcu(nh->nh_dev) == dev->ifindex) {
dev_match = true;
break;
}
@@ -804,7 +804,7 @@ out:
static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
{
struct net *net = dev_net(ifa->ifa_dev->dev);
- u32 tb_id = vrf_dev_table_rtnl(ifa->ifa_dev->dev);
+ u32 tb_id = l3mdev_fib_table(ifa->ifa_dev->dev);
struct fib_table *tb;
struct fib_config cfg = {
.fc_protocol = RTPROT_KERNEL,
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 064bd3caaa4f..42778d9d71e5 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -57,8 +57,7 @@ static unsigned int fib_info_cnt;
static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
#ifdef CONFIG_IP_ROUTE_MULTIPATH
-
-static DEFINE_SPINLOCK(fib_multipath_lock);
+u32 fib_multipath_secret __read_mostly;
#define for_nexthops(fi) { \
int nhsel; const struct fib_nh *nh; \
@@ -532,7 +531,67 @@ errout:
return ret;
}
-#endif
+static void fib_rebalance(struct fib_info *fi)
+{
+ int total;
+ int w;
+ struct in_device *in_dev;
+
+ if (fi->fib_nhs < 2)
+ return;
+
+ total = 0;
+ for_nexthops(fi) {
+ if (nh->nh_flags & RTNH_F_DEAD)
+ continue;
+
+ in_dev = __in_dev_get_rtnl(nh->nh_dev);
+
+ if (in_dev &&
+ IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
+ nh->nh_flags & RTNH_F_LINKDOWN)
+ continue;
+
+ total += nh->nh_weight;
+ } endfor_nexthops(fi);
+
+ w = 0;
+ change_nexthops(fi) {
+ int upper_bound;
+
+ in_dev = __in_dev_get_rtnl(nexthop_nh->nh_dev);
+
+ if (nexthop_nh->nh_flags & RTNH_F_DEAD) {
+ upper_bound = -1;
+ } else if (in_dev &&
+ IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
+ nexthop_nh->nh_flags & RTNH_F_LINKDOWN) {
+ upper_bound = -1;
+ } else {
+ w += nexthop_nh->nh_weight;
+ upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31,
+ total) - 1;
+ }
+
+ atomic_set(&nexthop_nh->nh_upper_bound, upper_bound);
+ } endfor_nexthops(fi);
+
+ net_get_random_once(&fib_multipath_secret,
+ sizeof(fib_multipath_secret));
+}
+
+static inline void fib_add_weight(struct fib_info *fi,
+ const struct fib_nh *nh)
+{
+ fi->fib_weight += nh->nh_weight;
+}
+
+#else /* CONFIG_IP_ROUTE_MULTIPATH */
+
+#define fib_rebalance(fi) do { } while (0)
+#define fib_add_weight(fi, nh) do { } while (0)
+
+#endif /* CONFIG_IP_ROUTE_MULTIPATH */
static int fib_encap_match(struct net *net, u16 encap_type,
struct nlattr *encap,
@@ -1094,8 +1153,11 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
change_nexthops(fi) {
fib_info_update_nh_saddr(net, nexthop_nh);
+ fib_add_weight(fi, nexthop_nh);
} endfor_nexthops(fi)
+ fib_rebalance(fi);
+
link_it:
ofi = fib_find_info(fi);
if (ofi) {
@@ -1317,12 +1379,6 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event)
nexthop_nh->nh_flags |= RTNH_F_LINKDOWN;
break;
}
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
- spin_lock_bh(&fib_multipath_lock);
- fi->fib_power -= nexthop_nh->nh_power;
- nexthop_nh->nh_power = 0;
- spin_unlock_bh(&fib_multipath_lock);
-#endif
dead++;
}
#ifdef CONFIG_IP_ROUTE_MULTIPATH
@@ -1345,6 +1401,8 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event)
}
ret++;
}
+
+ fib_rebalance(fi);
}
return ret;
@@ -1467,20 +1525,15 @@ int fib_sync_up(struct net_device *dev, unsigned int nh_flags)
!__in_dev_get_rtnl(dev))
continue;
alive++;
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
- spin_lock_bh(&fib_multipath_lock);
- nexthop_nh->nh_power = 0;
nexthop_nh->nh_flags &= ~nh_flags;
- spin_unlock_bh(&fib_multipath_lock);
-#else
- nexthop_nh->nh_flags &= ~nh_flags;
-#endif
} endfor_nexthops(fi)
if (alive > 0) {
fi->fib_flags &= ~nh_flags;
ret++;
}
+
+ fib_rebalance(fi);
}
return ret;
@@ -1488,62 +1541,40 @@ int fib_sync_up(struct net_device *dev, unsigned int nh_flags)
#ifdef CONFIG_IP_ROUTE_MULTIPATH
-/*
- * The algorithm is suboptimal, but it provides really
- * fair weighted route distribution.
- */
-void fib_select_multipath(struct fib_result *res)
+void fib_select_multipath(struct fib_result *res, int hash)
{
struct fib_info *fi = res->fi;
- struct in_device *in_dev;
- int w;
-
- spin_lock_bh(&fib_multipath_lock);
- if (fi->fib_power <= 0) {
- int power = 0;
- change_nexthops(fi) {
- in_dev = __in_dev_get_rcu(nexthop_nh->nh_dev);
- if (nexthop_nh->nh_flags & RTNH_F_DEAD)
- continue;
- if (in_dev &&
- IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
- nexthop_nh->nh_flags & RTNH_F_LINKDOWN)
- continue;
- power += nexthop_nh->nh_weight;
- nexthop_nh->nh_power = nexthop_nh->nh_weight;
- } endfor_nexthops(fi);
- fi->fib_power = power;
- if (power <= 0) {
- spin_unlock_bh(&fib_multipath_lock);
- /* Race condition: route has just become dead. */
- res->nh_sel = 0;
- return;
- }
- }
-
- /* w should be random number [0..fi->fib_power-1],
- * it is pretty bad approximation.
- */
-
- w = jiffies % fi->fib_power;
+ for_nexthops(fi) {
+ if (hash > atomic_read(&nh->nh_upper_bound))
+ continue;
- change_nexthops(fi) {
- if (!(nexthop_nh->nh_flags & RTNH_F_DEAD) &&
- nexthop_nh->nh_power) {
- w -= nexthop_nh->nh_power;
- if (w <= 0) {
- nexthop_nh->nh_power--;
- fi->fib_power--;
- res->nh_sel = nhsel;
- spin_unlock_bh(&fib_multipath_lock);
- return;
- }
- }
+ res->nh_sel = nhsel;
+ return;
} endfor_nexthops(fi);
/* Race condition: route has just become dead. */
res->nh_sel = 0;
- spin_unlock_bh(&fib_multipath_lock);
}
#endif
+
+void fib_select_path(struct net *net, struct fib_result *res,
+ struct flowi4 *fl4, int mp_hash)
+{
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+ if (res->fi->fib_nhs > 1 && fl4->flowi4_oif == 0) {
+ if (mp_hash < 0)
+ mp_hash = fib_multipath_hash(fl4->saddr, fl4->daddr);
+ fib_select_multipath(res, mp_hash);
+ }
+ else
+#endif
+ if (!res->prefixlen &&
+ res->table->tb_num_default > 1 &&
+ res->type == RTN_UNICAST && !fl4->flowi4_oif)
+ fib_select_default(fl4, res);
+
+ if (!fl4->saddr)
+ fl4->saddr = FIB_RES_PREFSRC(net, *res);
+}
+EXPORT_SYMBOL_GPL(fib_select_path);
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index e5eb8ac4089d..36e26977c908 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -96,7 +96,7 @@
#include <net/xfrm.h>
#include <net/inet_common.h>
#include <net/ip_fib.h>
-#include <net/vrf.h>
+#include <net/l3mdev.h>
/*
* Build xmit assembly blocks
@@ -309,7 +309,7 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
rc = false;
if (icmp_global_allow()) {
- int vif = vrf_master_ifindex(dst->dev);
+ int vif = l3mdev_master_ifindex(dst->dev);
struct inet_peer *peer;
peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif, 1);
@@ -427,7 +427,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
fl4.flowi4_mark = mark;
fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
fl4.flowi4_proto = IPPROTO_ICMP;
- fl4.flowi4_oif = vrf_master_ifindex(skb->dev);
+ fl4.flowi4_oif = l3mdev_master_ifindex(skb->dev);
security_skb_classify_flow(skb, flowi4_to_flowi(&fl4));
rt = ip_route_output_key(net, &fl4);
if (IS_ERR(rt))
@@ -440,6 +440,22 @@ out_unlock:
icmp_xmit_unlock(sk);
}
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+
+/* Source and destination is swapped. See ip_multipath_icmp_hash */
+static int icmp_multipath_hash_skb(const struct sk_buff *skb)
+{
+ const struct iphdr *iph = ip_hdr(skb);
+
+ return fib_multipath_hash(iph->daddr, iph->saddr);
+}
+
+#else
+
+#define icmp_multipath_hash_skb(skb) (-1)
+
+#endif
+
static struct rtable *icmp_route_lookup(struct net *net,
struct flowi4 *fl4,
struct sk_buff *skb_in,
@@ -461,10 +477,11 @@ static struct rtable *icmp_route_lookup(struct net *net,
fl4->flowi4_proto = IPPROTO_ICMP;
fl4->fl4_icmp_type = type;
fl4->fl4_icmp_code = code;
- fl4->flowi4_oif = vrf_master_ifindex(skb_in->dev);
+ fl4->flowi4_oif = l3mdev_master_ifindex(skb_in->dev);
security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4));
- rt = __ip_route_output_key(net, fl4);
+ rt = __ip_route_output_key_hash(net, fl4,
+ icmp_multipath_hash_skb(skb_in));
if (IS_ERR(rt))
return rt;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index d38b8b61eaee..64aaf3522a59 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -397,7 +397,7 @@ static int igmpv3_sendpack(struct sk_buff *skb)
pig->csum = ip_compute_csum(igmp_hdr(skb), igmplen);
- return ip_local_out(skb);
+ return ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb);
}
static int grec_size(struct ip_mc_list *pmc, int type, int gdel, int sdel)
@@ -739,7 +739,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
ih->group = group;
ih->csum = ip_compute_csum((void *)ih, sizeof(struct igmphdr));
- return ip_local_out(skb);
+ return ip_local_out(net, skb->sk, skb);
}
static void igmp_gq_timer_expire(unsigned long data)
@@ -2569,7 +2569,7 @@ void ip_mc_drop_socket(struct sock *sk)
}
/* called with rcu_read_lock() */
-int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u16 proto)
+int ip_check_mc_rcu(struct in_device *in_dev, __be32 mc_addr, __be32 src_addr, u8 proto)
{
struct ip_mc_list *im;
struct ip_mc_list __rcu **mc_hash;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 61b45a17fc73..8430bc8ccd58 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -330,14 +330,12 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
if (error)
goto out_err;
}
- req = reqsk_queue_remove(queue);
+ req = reqsk_queue_remove(queue, sk);
newsk = req->sk;
- sk_acceptq_removed(sk);
if (sk->sk_protocol == IPPROTO_TCP &&
- tcp_rsk(req)->tfo_listener &&
- queue->fastopenq) {
- spin_lock_bh(&queue->fastopenq->lock);
+ tcp_rsk(req)->tfo_listener) {
+ spin_lock_bh(&queue->fastopenq.lock);
if (tcp_rsk(req)->tfo_listener) {
/* We are still waiting for the final ACK from 3WHS
* so can't free req now. Instead, we set req->sk to
@@ -348,7 +346,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
req->sk = NULL;
req = NULL;
}
- spin_unlock_bh(&queue->fastopenq->lock);
+ spin_unlock_bh(&queue->fastopenq.lock);
}
out:
release_sock(sk);
@@ -408,7 +406,7 @@ void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long len)
}
EXPORT_SYMBOL(inet_csk_reset_keepalive_timer);
-struct dst_entry *inet_csk_route_req(struct sock *sk,
+struct dst_entry *inet_csk_route_req(const struct sock *sk,
struct flowi4 *fl4,
const struct request_sock *req)
{
@@ -439,7 +437,7 @@ no_route:
}
EXPORT_SYMBOL_GPL(inet_csk_route_req);
-struct dst_entry *inet_csk_route_child_sock(struct sock *sk,
+struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
struct sock *newsk,
const struct request_sock *req)
{
@@ -478,65 +476,12 @@ no_route:
}
EXPORT_SYMBOL_GPL(inet_csk_route_child_sock);
-static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport,
- const u32 rnd, const u32 synq_hsize)
-{
- return jhash_2words((__force u32)raddr, (__force u32)rport, rnd) & (synq_hsize - 1);
-}
-
#if IS_ENABLED(CONFIG_IPV6)
#define AF_INET_FAMILY(fam) ((fam) == AF_INET)
#else
#define AF_INET_FAMILY(fam) true
#endif
-/* Note: this is temporary :
- * req sock will no longer be in listener hash table
-*/
-struct request_sock *inet_csk_search_req(struct sock *sk,
- const __be16 rport,
- const __be32 raddr,
- const __be32 laddr)
-{
- struct inet_connection_sock *icsk = inet_csk(sk);
- struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
- struct request_sock *req;
- u32 hash = inet_synq_hash(raddr, rport, lopt->hash_rnd,
- lopt->nr_table_entries);
-
- spin_lock(&icsk->icsk_accept_queue.syn_wait_lock);
- for (req = lopt->syn_table[hash]; req != NULL; req = req->dl_next) {
- const struct inet_request_sock *ireq = inet_rsk(req);
-
- if (ireq->ir_rmt_port == rport &&
- ireq->ir_rmt_addr == raddr &&
- ireq->ir_loc_addr == laddr &&
- AF_INET_FAMILY(req->rsk_ops->family)) {
- atomic_inc(&req->rsk_refcnt);
- WARN_ON(req->sk);
- break;
- }
- }
- spin_unlock(&icsk->icsk_accept_queue.syn_wait_lock);
-
- return req;
-}
-EXPORT_SYMBOL_GPL(inet_csk_search_req);
-
-void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
- unsigned long timeout)
-{
- struct inet_connection_sock *icsk = inet_csk(sk);
- struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
- const u32 h = inet_synq_hash(inet_rsk(req)->ir_rmt_addr,
- inet_rsk(req)->ir_rmt_port,
- lopt->hash_rnd, lopt->nr_table_entries);
-
- reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout);
- inet_csk_reqsk_queue_added(sk, timeout);
-}
-EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
-
/* Only thing we need from tcp.h */
extern int sysctl_tcp_synack_retries;
@@ -563,7 +508,7 @@ static inline void syn_ack_recalc(struct request_sock *req, const int thresh,
req->num_timeout >= rskq_defer_accept - 1;
}
-int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req)
+int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req)
{
int err = req->rsk_ops->rtx_syn_ack(parent, req);
@@ -573,27 +518,20 @@ int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req)
}
EXPORT_SYMBOL(inet_rtx_syn_ack);
-/* return true if req was found in the syn_table[] */
+/* return true if req was found in the ehash table */
static bool reqsk_queue_unlink(struct request_sock_queue *queue,
struct request_sock *req)
{
- struct request_sock **prev;
- struct listen_sock *lopt;
- bool found = false;
-
- spin_lock(&queue->syn_wait_lock);
- lopt = queue->listen_opt;
- if (lopt) {
- for (prev = &lopt->syn_table[req->rsk_hash]; *prev != NULL;
- prev = &(*prev)->dl_next) {
- if (*prev == req) {
- *prev = req->dl_next;
- found = true;
- break;
- }
- }
- }
- spin_unlock(&queue->syn_wait_lock);
+ struct inet_hashinfo *hashinfo = req_to_sk(req)->sk_prot->h.hashinfo;
+ spinlock_t *lock;
+ bool found;
+
+ lock = inet_ehash_lockp(hashinfo, req->rsk_hash);
+
+ spin_lock(lock);
+ found = __sk_nulls_del_node_init_rcu(req_to_sk(req));
+ spin_unlock(lock);
+
if (timer_pending(&req->rsk_timer) && del_timer_sync(&req->rsk_timer))
reqsk_put(req);
return found;
@@ -608,21 +546,25 @@ void inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req)
}
EXPORT_SYMBOL(inet_csk_reqsk_queue_drop);
+void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req)
+{
+ inet_csk_reqsk_queue_drop(sk, req);
+ reqsk_put(req);
+}
+EXPORT_SYMBOL(inet_csk_reqsk_queue_drop_and_put);
+
static void reqsk_timer_handler(unsigned long data)
{
struct request_sock *req = (struct request_sock *)data;
struct sock *sk_listener = req->rsk_listener;
struct inet_connection_sock *icsk = inet_csk(sk_listener);
struct request_sock_queue *queue = &icsk->icsk_accept_queue;
- struct listen_sock *lopt = queue->listen_opt;
int qlen, expire = 0, resend = 0;
int max_retries, thresh;
u8 defer_accept;
- if (sk_listener->sk_state != TCP_LISTEN || !lopt) {
- reqsk_put(req);
- return;
- }
+ if (sk_listener->sk_state != TCP_LISTEN)
+ goto drop;
max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
thresh = max_retries;
@@ -643,9 +585,9 @@ static void reqsk_timer_handler(unsigned long data)
* embrions; and abort old ones without pity, if old
* ones are about to clog our table.
*/
- qlen = listen_sock_qlen(lopt);
- if (qlen >> (lopt->max_qlen_log - 1)) {
- int young = listen_sock_young(lopt) << 1;
+ qlen = reqsk_queue_len(queue);
+ if ((qlen << 1) > max(8U, sk_listener->sk_max_ack_backlog)) {
+ int young = reqsk_queue_len_young(queue) << 1;
while (thresh > 2) {
if (qlen < young)
@@ -667,41 +609,40 @@ static void reqsk_timer_handler(unsigned long data)
unsigned long timeo;
if (req->num_timeout++ == 0)
- atomic_inc(&lopt->young_dec);
+ atomic_dec(&queue->young);
timeo = min(TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX);
mod_timer_pinned(&req->rsk_timer, jiffies + timeo);
return;
}
- inet_csk_reqsk_queue_drop(sk_listener, req);
- reqsk_put(req);
+drop:
+ inet_csk_reqsk_queue_drop_and_put(sk_listener, req);
}
-void reqsk_queue_hash_req(struct request_sock_queue *queue,
- u32 hash, struct request_sock *req,
- unsigned long timeout)
+static void reqsk_queue_hash_req(struct request_sock *req,
+ unsigned long timeout)
{
- struct listen_sock *lopt = queue->listen_opt;
-
req->num_retrans = 0;
req->num_timeout = 0;
req->sk = NULL;
setup_timer(&req->rsk_timer, reqsk_timer_handler, (unsigned long)req);
mod_timer_pinned(&req->rsk_timer, jiffies + timeout);
- req->rsk_hash = hash;
+ inet_ehash_insert(req_to_sk(req), NULL);
/* before letting lookups find us, make sure all req fields
* are committed to memory and refcnt initialized.
*/
smp_wmb();
- atomic_set(&req->rsk_refcnt, 2);
+ atomic_set(&req->rsk_refcnt, 2 + 1);
+}
- spin_lock(&queue->syn_wait_lock);
- req->dl_next = lopt->syn_table[hash];
- lopt->syn_table[hash] = req;
- spin_unlock(&queue->syn_wait_lock);
+void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
+ unsigned long timeout)
+{
+ reqsk_queue_hash_req(req, timeout);
+ inet_csk_reqsk_queue_added(sk);
}
-EXPORT_SYMBOL(reqsk_queue_hash_req);
+EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
/**
* inet_csk_clone_lock - clone an inet socket, and lock its clone
@@ -792,16 +733,14 @@ void inet_csk_prepare_forced_close(struct sock *sk)
}
EXPORT_SYMBOL(inet_csk_prepare_forced_close);
-int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
+int inet_csk_listen_start(struct sock *sk, int backlog)
{
- struct inet_sock *inet = inet_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
- int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries);
+ struct inet_sock *inet = inet_sk(sk);
- if (rc != 0)
- return rc;
+ reqsk_queue_alloc(&icsk->icsk_accept_queue);
- sk->sk_max_ack_backlog = 0;
+ sk->sk_max_ack_backlog = backlog;
sk->sk_ack_backlog = 0;
inet_csk_delack_init(sk);
@@ -821,11 +760,57 @@ int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
}
sk->sk_state = TCP_CLOSE;
- __reqsk_queue_destroy(&icsk->icsk_accept_queue);
return -EADDRINUSE;
}
EXPORT_SYMBOL_GPL(inet_csk_listen_start);
+static void inet_child_forget(struct sock *sk, struct request_sock *req,
+ struct sock *child)
+{
+ sk->sk_prot->disconnect(child, O_NONBLOCK);
+
+ sock_orphan(child);
+
+ percpu_counter_inc(sk->sk_prot->orphan_count);
+
+ if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) {
+ BUG_ON(tcp_sk(child)->fastopen_rsk != req);
+ BUG_ON(sk != req->rsk_listener);
+
+ /* Paranoid, to prevent race condition if
+ * an inbound pkt destined for child is
+ * blocked by sock lock in tcp_v4_rcv().
+ * Also to satisfy an assertion in
+ * tcp_v4_destroy_sock().
+ */
+ tcp_sk(child)->fastopen_rsk = NULL;
+ }
+ inet_csk_destroy_sock(child);
+ reqsk_put(req);
+}
+
+void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req,
+ struct sock *child)
+{
+ struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
+
+ spin_lock(&queue->rskq_lock);
+ if (unlikely(sk->sk_state != TCP_LISTEN)) {
+ inet_child_forget(sk, req, child);
+ } else {
+ req->sk = child;
+ req->dl_next = NULL;
+ if (queue->rskq_accept_head == NULL)
+ queue->rskq_accept_head = req;
+ else
+ queue->rskq_accept_tail->dl_next = req;
+ queue->rskq_accept_tail = req;
+ sk_acceptq_added(sk);
+ }
+ spin_unlock(&queue->rskq_lock);
+}
+EXPORT_SYMBOL(inet_csk_reqsk_queue_add);
+
/*
* This routine closes sockets which have been at least partially
* opened, but not yet accepted.
@@ -834,11 +819,7 @@ void inet_csk_listen_stop(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct request_sock_queue *queue = &icsk->icsk_accept_queue;
- struct request_sock *acc_req;
- struct request_sock *req;
-
- /* make all the listen_opt local to us */
- acc_req = reqsk_queue_yank_acceptq(queue);
+ struct request_sock *next, *req;
/* Following specs, it would be better either to send FIN
* (and enter FIN-WAIT-1, it is normal close)
@@ -848,57 +829,34 @@ void inet_csk_listen_stop(struct sock *sk)
* To be honest, we are not able to make either
* of the variants now. --ANK
*/
- reqsk_queue_destroy(queue);
-
- while ((req = acc_req) != NULL) {
+ while ((req = reqsk_queue_remove(queue, sk)) != NULL) {
struct sock *child = req->sk;
- acc_req = req->dl_next;
-
local_bh_disable();
bh_lock_sock(child);
WARN_ON(sock_owned_by_user(child));
sock_hold(child);
- sk->sk_prot->disconnect(child, O_NONBLOCK);
-
- sock_orphan(child);
-
- percpu_counter_inc(sk->sk_prot->orphan_count);
-
- if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(req)->tfo_listener) {
- BUG_ON(tcp_sk(child)->fastopen_rsk != req);
- BUG_ON(sk != req->rsk_listener);
-
- /* Paranoid, to prevent race condition if
- * an inbound pkt destined for child is
- * blocked by sock lock in tcp_v4_rcv().
- * Also to satisfy an assertion in
- * tcp_v4_destroy_sock().
- */
- tcp_sk(child)->fastopen_rsk = NULL;
- }
- inet_csk_destroy_sock(child);
-
+ inet_child_forget(sk, req, child);
bh_unlock_sock(child);
local_bh_enable();
sock_put(child);
- sk_acceptq_removed(sk);
- reqsk_put(req);
+ cond_resched();
}
- if (queue->fastopenq) {
+ if (queue->fastopenq.rskq_rst_head) {
/* Free all the reqs queued in rskq_rst_head. */
- spin_lock_bh(&queue->fastopenq->lock);
- acc_req = queue->fastopenq->rskq_rst_head;
- queue->fastopenq->rskq_rst_head = NULL;
- spin_unlock_bh(&queue->fastopenq->lock);
- while ((req = acc_req) != NULL) {
- acc_req = req->dl_next;
+ spin_lock_bh(&queue->fastopenq.lock);
+ req = queue->fastopenq.rskq_rst_head;
+ queue->fastopenq.rskq_rst_head = NULL;
+ spin_unlock_bh(&queue->fastopenq.lock);
+ while (req != NULL) {
+ next = req->dl_next;
reqsk_put(req);
+ req = next;
}
}
- WARN_ON(sk->sk_ack_backlog);
+ WARN_ON_ONCE(sk->sk_ack_backlog);
}
EXPORT_SYMBOL_GPL(inet_csk_listen_stop);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index c3b1f3a0f4cf..ab9f8a66615d 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -730,91 +730,21 @@ static void twsk_build_assert(void)
#endif
}
-static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
- struct netlink_callback *cb,
- const struct inet_diag_req_v2 *r,
- const struct nlattr *bc)
-{
- struct inet_connection_sock *icsk = inet_csk(sk);
- struct inet_sock *inet = inet_sk(sk);
- struct inet_diag_entry entry;
- int j, s_j, reqnum, s_reqnum;
- struct listen_sock *lopt;
- int err = 0;
-
- s_j = cb->args[3];
- s_reqnum = cb->args[4];
-
- if (s_j > 0)
- s_j--;
-
- entry.family = sk->sk_family;
-
- spin_lock(&icsk->icsk_accept_queue.syn_wait_lock);
-
- lopt = icsk->icsk_accept_queue.listen_opt;
- if (!lopt || !listen_sock_qlen(lopt))
- goto out;
-
- if (bc) {
- entry.sport = inet->inet_num;
- entry.userlocks = sk->sk_userlocks;
- }
-
- for (j = s_j; j < lopt->nr_table_entries; j++) {
- struct request_sock *req, *head = lopt->syn_table[j];
-
- reqnum = 0;
- for (req = head; req; reqnum++, req = req->dl_next) {
- struct inet_request_sock *ireq = inet_rsk(req);
-
- if (reqnum < s_reqnum)
- continue;
- if (r->id.idiag_dport != ireq->ir_rmt_port &&
- r->id.idiag_dport)
- continue;
-
- if (bc) {
- /* Note: entry.sport and entry.userlocks are already set */
- entry_fill_addrs(&entry, req_to_sk(req));
- entry.dport = ntohs(ireq->ir_rmt_port);
-
- if (!inet_diag_bc_run(bc, &entry))
- continue;
- }
-
- err = inet_req_diag_fill(req_to_sk(req), skb,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI, cb->nlh);
- if (err < 0) {
- cb->args[3] = j + 1;
- cb->args[4] = reqnum;
- goto out;
- }
- }
-
- s_reqnum = 0;
- }
-
-out:
- spin_unlock(&icsk->icsk_accept_queue.syn_wait_lock);
-
- return err;
-}
-
void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
struct netlink_callback *cb,
const struct inet_diag_req_v2 *r, struct nlattr *bc)
{
struct net *net = sock_net(skb->sk);
int i, num, s_i, s_num;
+ u32 idiag_states = r->idiag_states;
+ if (idiag_states & TCPF_SYN_RECV)
+ idiag_states |= TCPF_NEW_SYN_RECV;
s_i = cb->args[1];
s_num = num = cb->args[2];
if (cb->args[0] == 0) {
- if (!(r->idiag_states & (TCPF_LISTEN | TCPF_SYN_RECV)))
+ if (!(idiag_states & TCPF_LISTEN))
goto skip_listen_ht;
for (i = s_i; i < INET_LHTABLE_SIZE; i++) {
@@ -844,21 +774,11 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
r->id.idiag_sport)
goto next_listen;
- if (!(r->idiag_states & TCPF_LISTEN) ||
- r->id.idiag_dport ||
+ if (r->id.idiag_dport ||
cb->args[3] > 0)
- goto syn_recv;
-
- if (inet_csk_diag_dump(sk, skb, cb, r, bc) < 0) {
- spin_unlock_bh(&ilb->lock);
- goto done;
- }
-
-syn_recv:
- if (!(r->idiag_states & TCPF_SYN_RECV))
goto next_listen;
- if (inet_diag_dump_reqs(skb, sk, cb, r, bc) < 0) {
+ if (inet_csk_diag_dump(sk, skb, cb, r, bc) < 0) {
spin_unlock_bh(&ilb->lock);
goto done;
}
@@ -879,7 +799,7 @@ skip_listen_ht:
s_i = num = s_num = 0;
}
- if (!(r->idiag_states & ~(TCPF_LISTEN | TCPF_SYN_RECV)))
+ if (!(idiag_states & ~TCPF_LISTEN))
goto out;
for (i = s_i; i <= hashinfo->ehash_mask; i++) {
@@ -906,7 +826,7 @@ skip_listen_ht:
goto next_normal;
state = (sk->sk_state == TCP_TIME_WAIT) ?
inet_twsk(sk)->tw_substate : sk->sk_state;
- if (!(r->idiag_states & (1 << state)))
+ if (!(idiag_states & (1 << state)))
goto next_normal;
if (r->sdiag_family != AF_UNSPEC &&
sk->sk_family != r->sdiag_family)
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 89120196a949..958728a22001 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -126,7 +126,7 @@ void inet_put_port(struct sock *sk)
}
EXPORT_SYMBOL(inet_put_port);
-int __inet_inherit_port(struct sock *sk, struct sock *child)
+int __inet_inherit_port(const struct sock *sk, struct sock *child)
{
struct inet_hashinfo *table = sk->sk_prot->h.hashinfo;
unsigned short port = inet_sk(child)->inet_num;
@@ -137,6 +137,10 @@ int __inet_inherit_port(struct sock *sk, struct sock *child)
spin_lock(&head->lock);
tb = inet_csk(sk)->icsk_bind_hash;
+ if (unlikely(!tb)) {
+ spin_unlock(&head->lock);
+ return -ENOENT;
+ }
if (tb->port != port) {
/* NOTE: using tproxy and redirecting skbs to a proxy
* on a different listener port breaks the assumption
@@ -185,6 +189,8 @@ static inline int compute_score(struct sock *sk, struct net *net,
return -1;
score += 4;
}
+ if (sk->sk_incoming_cpu == raw_smp_processor_id())
+ score++;
}
return score;
}
@@ -398,14 +404,18 @@ static u32 inet_sk_port_offset(const struct sock *sk)
inet->inet_dport);
}
-void __inet_hash_nolisten(struct sock *sk, struct sock *osk)
+/* insert a socket into ehash, and eventually remove another one
+ * (The another one can be a SYN_RECV or TIMEWAIT
+ */
+int inet_ehash_insert(struct sock *sk, struct sock *osk)
{
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
struct hlist_nulls_head *list;
struct inet_ehash_bucket *head;
spinlock_t *lock;
+ int ret = 0;
- WARN_ON(!sk_unhashed(sk));
+ WARN_ON_ONCE(!sk_unhashed(sk));
sk->sk_hash = sk_ehashfn(sk);
head = inet_ehash_bucket(hashinfo, sk->sk_hash);
@@ -419,6 +429,12 @@ void __inet_hash_nolisten(struct sock *sk, struct sock *osk)
sk_nulls_del_node_init_rcu(osk);
}
spin_unlock(lock);
+ return ret;
+}
+
+void __inet_hash_nolisten(struct sock *sk, struct sock *osk)
+{
+ inet_ehash_insert(sk, osk);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
}
EXPORT_SYMBOL_GPL(__inet_hash_nolisten);
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 2d3aa408fbdc..da0d7ce85844 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -61,18 +61,18 @@ static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
}
-static int ip_forward_finish(struct sock *sk, struct sk_buff *skb)
+static int ip_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct ip_options *opt = &(IPCB(skb)->opt);
- IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
- IP_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTOCTETS, skb->len);
+ IP_INC_STATS_BH(net, IPSTATS_MIB_OUTFORWDATAGRAMS);
+ IP_ADD_STATS_BH(net, IPSTATS_MIB_OUTOCTETS, skb->len);
if (unlikely(opt->optlen))
ip_forward_options(skb);
skb_sender_cpu_clear(skb);
- return dst_output_sk(sk, skb);
+ return dst_output(net, sk, skb);
}
int ip_forward(struct sk_buff *skb)
@@ -81,6 +81,7 @@ int ip_forward(struct sk_buff *skb)
struct iphdr *iph; /* Our header */
struct rtable *rt; /* Route we use */
struct ip_options *opt = &(IPCB(skb)->opt);
+ struct net *net;
/* that should never happen */
if (skb->pkt_type != PACKET_HOST)
@@ -99,6 +100,7 @@ int ip_forward(struct sk_buff *skb)
return NET_RX_SUCCESS;
skb_forward_csum(skb);
+ net = dev_net(skb->dev);
/*
* According to the RFC, we must first decrease the TTL field. If
@@ -119,7 +121,7 @@ int ip_forward(struct sk_buff *skb)
IPCB(skb)->flags |= IPSKB_FORWARDED;
mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);
if (ip_exceeds_mtu(skb, mtu)) {
- IP_INC_STATS(dev_net(rt->dst.dev), IPSTATS_MIB_FRAGFAILS);
+ IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(mtu));
goto drop;
@@ -143,8 +145,9 @@ int ip_forward(struct sk_buff *skb)
skb->priority = rt_tos2priority(iph->tos);
- return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, NULL, skb,
- skb->dev, rt->dst.dev, ip_forward_finish);
+ return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD,
+ net, NULL, skb, skb->dev, rt->dst.dev,
+ ip_forward_finish);
sr_failed:
/*
@@ -155,7 +158,7 @@ sr_failed:
too_many_hops:
/* Tell the sender its packet died... */
- IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_INHDRERRORS);
+ IP_INC_STATS_BH(net, IPSTATS_MIB_INHDRERRORS);
icmp_send(skb, ICMP_TIME_EXCEEDED, ICMP_EXC_TTL, 0);
drop:
kfree_skb(skb);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index fa7f15305f9a..5482745d5d68 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -48,7 +48,7 @@
#include <linux/inet.h>
#include <linux/netfilter_ipv4.h>
#include <net/inet_ecn.h>
-#include <net/vrf.h>
+#include <net/l3mdev.h>
/* NOTE. Logic of IP defragmentation is parallel to corresponding IPv6
* code now. If you change something here, _PLEASE_ update ipv6/reassembly.c
@@ -78,7 +78,7 @@ struct ipq {
u8 ecn; /* RFC3168 support */
u16 max_df_size; /* largest frag with DF set seen */
int iif;
- int vif; /* VRF device index */
+ int vif; /* L3 master device index */
unsigned int rid;
struct inet_peer *peer;
};
@@ -654,11 +654,10 @@ out_fail:
}
/* Process an incoming IP datagram fragment. */
-int ip_defrag(struct sk_buff *skb, u32 user)
+int ip_defrag(struct net *net, struct sk_buff *skb, u32 user)
{
struct net_device *dev = skb->dev ? : skb_dst(skb)->dev;
- int vif = vrf_master_ifindex_rcu(dev);
- struct net *net = dev_net(dev);
+ int vif = l3mdev_master_ifindex_rcu(dev);
struct ipq *qp;
IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS);
@@ -683,7 +682,7 @@ int ip_defrag(struct sk_buff *skb, u32 user)
}
EXPORT_SYMBOL(ip_defrag);
-struct sk_buff *ip_check_defrag(struct sk_buff *skb, u32 user)
+struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user)
{
struct iphdr iph;
int netoff;
@@ -712,7 +711,7 @@ struct sk_buff *ip_check_defrag(struct sk_buff *skb, u32 user)
if (pskb_trim_rcsum(skb, netoff + len))
return skb;
memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
- if (ip_defrag(skb, user))
+ if (ip_defrag(net, skb, user))
return NULL;
skb_clear_hash(skb);
}
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index f4fc8a77aaa7..b1209b63381f 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -157,6 +157,7 @@ bool ip_call_ra_chain(struct sk_buff *skb)
u8 protocol = ip_hdr(skb)->protocol;
struct sock *last = NULL;
struct net_device *dev = skb->dev;
+ struct net *net = dev_net(dev);
for (ra = rcu_dereference(ip_ra_chain); ra; ra = rcu_dereference(ra->next)) {
struct sock *sk = ra->sk;
@@ -167,9 +168,9 @@ bool ip_call_ra_chain(struct sk_buff *skb)
if (sk && inet_sk(sk)->inet_num == protocol &&
(!sk->sk_bound_dev_if ||
sk->sk_bound_dev_if == dev->ifindex) &&
- net_eq(sock_net(sk), dev_net(dev))) {
+ net_eq(sock_net(sk), net)) {
if (ip_is_fragment(ip_hdr(skb))) {
- if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN))
+ if (ip_defrag(net, skb, IP_DEFRAG_CALL_RA_CHAIN))
return true;
}
if (last) {
@@ -188,10 +189,8 @@ bool ip_call_ra_chain(struct sk_buff *skb)
return false;
}
-static int ip_local_deliver_finish(struct sock *sk, struct sk_buff *skb)
+static int ip_local_deliver_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- struct net *net = dev_net(skb->dev);
-
__skb_pull(skb, skb_network_header_len(skb));
rcu_read_lock();
@@ -248,14 +247,15 @@ int ip_local_deliver(struct sk_buff *skb)
/*
* Reassemble IP fragments.
*/
+ struct net *net = dev_net(skb->dev);
if (ip_is_fragment(ip_hdr(skb))) {
- if (ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER))
+ if (ip_defrag(net, skb, IP_DEFRAG_LOCAL_DELIVER))
return 0;
}
- return NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_IN, NULL, skb,
- skb->dev, NULL,
+ return NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_IN,
+ net, NULL, skb, skb->dev, NULL,
ip_local_deliver_finish);
}
@@ -311,7 +311,7 @@ drop:
int sysctl_ip_early_demux __read_mostly = 1;
EXPORT_SYMBOL(sysctl_ip_early_demux);
-static int ip_rcv_finish(struct sock *sk, struct sk_buff *skb)
+static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
const struct iphdr *iph = ip_hdr(skb);
struct rtable *rt;
@@ -337,8 +337,7 @@ static int ip_rcv_finish(struct sock *sk, struct sk_buff *skb)
iph->tos, skb->dev);
if (unlikely(err)) {
if (err == -EXDEV)
- NET_INC_STATS_BH(dev_net(skb->dev),
- LINUX_MIB_IPRPFILTER);
+ NET_INC_STATS_BH(net, LINUX_MIB_IPRPFILTER);
goto drop;
}
}
@@ -359,11 +358,9 @@ static int ip_rcv_finish(struct sock *sk, struct sk_buff *skb)
rt = skb_rtable(skb);
if (rt->rt_type == RTN_MULTICAST) {
- IP_UPD_PO_STATS_BH(dev_net(rt->dst.dev), IPSTATS_MIB_INMCAST,
- skb->len);
+ IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_INMCAST, skb->len);
} else if (rt->rt_type == RTN_BROADCAST)
- IP_UPD_PO_STATS_BH(dev_net(rt->dst.dev), IPSTATS_MIB_INBCAST,
- skb->len);
+ IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_INBCAST, skb->len);
return dst_input(skb);
@@ -378,6 +375,7 @@ drop:
int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
{
const struct iphdr *iph;
+ struct net *net;
u32 len;
/* When the interface is in promisc. mode, drop all the crap
@@ -387,11 +385,12 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
goto drop;
- IP_UPD_PO_STATS_BH(dev_net(dev), IPSTATS_MIB_IN, skb->len);
+ net = dev_net(dev);
+ IP_UPD_PO_STATS_BH(net, IPSTATS_MIB_IN, skb->len);
skb = skb_share_check(skb, GFP_ATOMIC);
if (!skb) {
- IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS);
+ IP_INC_STATS_BH(net, IPSTATS_MIB_INDISCARDS);
goto out;
}
@@ -417,7 +416,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
BUILD_BUG_ON(IPSTATS_MIB_ECT1PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_1);
BUILD_BUG_ON(IPSTATS_MIB_ECT0PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_0);
BUILD_BUG_ON(IPSTATS_MIB_CEPKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_CE);
- IP_ADD_STATS_BH(dev_net(dev),
+ IP_ADD_STATS_BH(net,
IPSTATS_MIB_NOECTPKTS + (iph->tos & INET_ECN_MASK),
max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs));
@@ -431,7 +430,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
len = ntohs(iph->tot_len);
if (skb->len < len) {
- IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INTRUNCATEDPKTS);
+ IP_INC_STATS_BH(net, IPSTATS_MIB_INTRUNCATEDPKTS);
goto drop;
} else if (len < (iph->ihl*4))
goto inhdr_error;
@@ -441,7 +440,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
* Note this now means skb->len holds ntohs(iph->tot_len).
*/
if (pskb_trim_rcsum(skb, len)) {
- IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS);
+ IP_INC_STATS_BH(net, IPSTATS_MIB_INDISCARDS);
goto drop;
}
@@ -453,14 +452,14 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
/* Must drop socket now because of tproxy. */
skb_orphan(skb);
- return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, NULL, skb,
- dev, NULL,
+ return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
+ net, NULL, skb, dev, NULL,
ip_rcv_finish);
csum_error:
- IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_CSUMERRORS);
+ IP_INC_STATS_BH(net, IPSTATS_MIB_CSUMERRORS);
inhdr_error:
- IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS);
+ IP_INC_STATS_BH(net, IPSTATS_MIB_INHDRERRORS);
drop:
kfree_skb(skb);
out:
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 0138fada0951..50e29737b584 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -83,9 +83,10 @@
int sysctl_ip_default_ttl __read_mostly = IPDEFTTL;
EXPORT_SYMBOL(sysctl_ip_default_ttl);
-static int ip_fragment(struct sock *sk, struct sk_buff *skb,
- unsigned int mtu,
- int (*output)(struct sock *, struct sk_buff *));
+static int
+ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
+ unsigned int mtu,
+ int (*output)(struct net *, struct sock *, struct sk_buff *));
/* Generate a checksum for an outgoing IP datagram. */
void ip_send_check(struct iphdr *iph)
@@ -95,32 +96,28 @@ void ip_send_check(struct iphdr *iph)
}
EXPORT_SYMBOL(ip_send_check);
-static int __ip_local_out_sk(struct sock *sk, struct sk_buff *skb)
+int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct iphdr *iph = ip_hdr(skb);
iph->tot_len = htons(skb->len);
ip_send_check(iph);
- return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, sk, skb, NULL,
- skb_dst(skb)->dev, dst_output_sk);
-}
-
-int __ip_local_out(struct sk_buff *skb)
-{
- return __ip_local_out_sk(skb->sk, skb);
+ return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
+ net, sk, skb, NULL, skb_dst(skb)->dev,
+ dst_output);
}
-int ip_local_out_sk(struct sock *sk, struct sk_buff *skb)
+int ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
{
int err;
- err = __ip_local_out(skb);
+ err = __ip_local_out(net, sk, skb);
if (likely(err == 1))
- err = dst_output_sk(sk, skb);
+ err = dst_output(net, sk, skb);
return err;
}
-EXPORT_SYMBOL_GPL(ip_local_out_sk);
+EXPORT_SYMBOL_GPL(ip_local_out);
static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
{
@@ -135,11 +132,12 @@ static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst)
* Add an ip header to a skbuff and send it out.
*
*/
-int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
+int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk,
__be32 saddr, __be32 daddr, struct ip_options_rcu *opt)
{
struct inet_sock *inet = inet_sk(sk);
struct rtable *rt = skb_rtable(skb);
+ struct net *net = sock_net(sk);
struct iphdr *iph;
/* Build the IP header. */
@@ -149,15 +147,17 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
iph->version = 4;
iph->ihl = 5;
iph->tos = inet->tos;
- if (ip_dont_fragment(sk, &rt->dst))
- iph->frag_off = htons(IP_DF);
- else
- iph->frag_off = 0;
iph->ttl = ip_select_ttl(inet, &rt->dst);
iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr);
iph->saddr = saddr;
iph->protocol = sk->sk_protocol;
- ip_select_ident(sock_net(sk), skb, sk);
+ if (ip_dont_fragment(sk, &rt->dst)) {
+ iph->frag_off = htons(IP_DF);
+ iph->id = 0;
+ } else {
+ iph->frag_off = 0;
+ __ip_select_ident(net, iph, 1);
+ }
if (opt && opt->opt.optlen) {
iph->ihl += opt->opt.optlen>>2;
@@ -168,11 +168,11 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
skb->mark = sk->sk_mark;
/* Send it out. */
- return ip_local_out(skb);
+ return ip_local_out(net, skb->sk, skb);
}
EXPORT_SYMBOL_GPL(ip_build_and_send_pkt);
-static int ip_finish_output2(struct sock *sk, struct sk_buff *skb)
+static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct rtable *rt = (struct rtable *)dst;
@@ -182,9 +182,9 @@ static int ip_finish_output2(struct sock *sk, struct sk_buff *skb)
u32 nexthop;
if (rt->rt_type == RTN_MULTICAST) {
- IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTMCAST, skb->len);
+ IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTMCAST, skb->len);
} else if (rt->rt_type == RTN_BROADCAST)
- IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUTBCAST, skb->len);
+ IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTBCAST, skb->len);
/* Be paranoid, rather than too clever. */
if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
@@ -220,8 +220,8 @@ static int ip_finish_output2(struct sock *sk, struct sk_buff *skb)
return -EINVAL;
}
-static int ip_finish_output_gso(struct sock *sk, struct sk_buff *skb,
- unsigned int mtu)
+static int ip_finish_output_gso(struct net *net, struct sock *sk,
+ struct sk_buff *skb, unsigned int mtu)
{
netdev_features_t features;
struct sk_buff *segs;
@@ -230,7 +230,7 @@ static int ip_finish_output_gso(struct sock *sk, struct sk_buff *skb,
/* common case: locally created skb or seglen is <= mtu */
if (((IPCB(skb)->flags & IPSKB_FORWARDED) == 0) ||
skb_gso_network_seglen(skb) <= mtu)
- return ip_finish_output2(sk, skb);
+ return ip_finish_output2(net, sk, skb);
/* Slowpath - GSO segment length is exceeding the dst MTU.
*
@@ -253,7 +253,7 @@ static int ip_finish_output_gso(struct sock *sk, struct sk_buff *skb,
int err;
segs->next = NULL;
- err = ip_fragment(sk, segs, mtu, ip_finish_output2);
+ err = ip_fragment(net, sk, segs, mtu, ip_finish_output2);
if (err && ret == 0)
ret = err;
@@ -263,7 +263,7 @@ static int ip_finish_output_gso(struct sock *sk, struct sk_buff *skb,
return ret;
}
-static int ip_finish_output(struct sock *sk, struct sk_buff *skb)
+static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
unsigned int mtu;
@@ -271,20 +271,20 @@ static int ip_finish_output(struct sock *sk, struct sk_buff *skb)
/* Policy lookup after SNAT yielded a new policy */
if (skb_dst(skb)->xfrm) {
IPCB(skb)->flags |= IPSKB_REROUTED;
- return dst_output_sk(sk, skb);
+ return dst_output(net, sk, skb);
}
#endif
mtu = ip_skb_dst_mtu(skb);
if (skb_is_gso(skb))
- return ip_finish_output_gso(sk, skb, mtu);
+ return ip_finish_output_gso(net, sk, skb, mtu);
if (skb->len > mtu || (IPCB(skb)->flags & IPSKB_FRAG_PMTU))
- return ip_fragment(sk, skb, mtu, ip_finish_output2);
+ return ip_fragment(net, sk, skb, mtu, ip_finish_output2);
- return ip_finish_output2(sk, skb);
+ return ip_finish_output2(net, sk, skb);
}
-int ip_mc_output(struct sock *sk, struct sk_buff *skb)
+int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct rtable *rt = skb_rtable(skb);
struct net_device *dev = rt->dst.dev;
@@ -292,7 +292,7 @@ int ip_mc_output(struct sock *sk, struct sk_buff *skb)
/*
* If the indicated interface is up and running, send the packet.
*/
- IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUT, skb->len);
+ IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
skb->dev = dev;
skb->protocol = htons(ETH_P_IP);
@@ -320,7 +320,7 @@ int ip_mc_output(struct sock *sk, struct sk_buff *skb)
struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
if (newskb)
NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING,
- sk, newskb, NULL, newskb->dev,
+ net, sk, newskb, NULL, newskb->dev,
dev_loopback_xmit);
}
@@ -335,26 +335,28 @@ int ip_mc_output(struct sock *sk, struct sk_buff *skb)
if (rt->rt_flags&RTCF_BROADCAST) {
struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
if (newskb)
- NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, sk, newskb,
- NULL, newskb->dev, dev_loopback_xmit);
+ NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING,
+ net, sk, newskb, NULL, newskb->dev,
+ dev_loopback_xmit);
}
- return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, sk, skb, NULL,
- skb->dev, ip_finish_output,
+ return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
+ net, sk, skb, NULL, skb->dev,
+ ip_finish_output,
!(IPCB(skb)->flags & IPSKB_REROUTED));
}
-int ip_output(struct sock *sk, struct sk_buff *skb)
+int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct net_device *dev = skb_dst(skb)->dev;
- IP_UPD_PO_STATS(dev_net(dev), IPSTATS_MIB_OUT, skb->len);
+ IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
skb->dev = dev;
skb->protocol = htons(ETH_P_IP);
- return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, sk, skb,
- NULL, dev,
+ return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
+ net, sk, skb, NULL, dev,
ip_finish_output,
!(IPCB(skb)->flags & IPSKB_REROUTED));
}
@@ -377,6 +379,7 @@ static void ip_copy_addrs(struct iphdr *iph, const struct flowi4 *fl4)
int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)
{
struct inet_sock *inet = inet_sk(sk);
+ struct net *net = sock_net(sk);
struct ip_options_rcu *inet_opt;
struct flowi4 *fl4;
struct rtable *rt;
@@ -407,7 +410,7 @@ int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)
* keep trying until route appears or the connection times
* itself out.
*/
- rt = ip_route_output_ports(sock_net(sk), fl4, sk,
+ rt = ip_route_output_ports(net, fl4, sk,
daddr, inet->inet_saddr,
inet->inet_dport,
inet->inet_sport,
@@ -444,20 +447,20 @@ packet_routed:
ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0);
}
- ip_select_ident_segs(sock_net(sk), skb, sk,
+ ip_select_ident_segs(net, skb, sk,
skb_shinfo(skb)->gso_segs ?: 1);
/* TODO : should we use skb->sk here instead of sk ? */
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
- res = ip_local_out(skb);
+ res = ip_local_out(net, sk, skb);
rcu_read_unlock();
return res;
no_route:
rcu_read_unlock();
- IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
+ IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
kfree_skb(skb);
return -EHOSTUNREACH;
}
@@ -486,29 +489,26 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
skb_copy_secmark(to, from);
}
-static int ip_fragment(struct sock *sk, struct sk_buff *skb,
+static int ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
unsigned int mtu,
- int (*output)(struct sock *, struct sk_buff *))
+ int (*output)(struct net *, struct sock *, struct sk_buff *))
{
struct iphdr *iph = ip_hdr(skb);
if ((iph->frag_off & htons(IP_DF)) == 0)
- return ip_do_fragment(sk, skb, output);
+ return ip_do_fragment(net, sk, skb, output);
if (unlikely(!skb->ignore_df ||
(IPCB(skb)->frag_max_size &&
IPCB(skb)->frag_max_size > mtu))) {
- struct rtable *rt = skb_rtable(skb);
- struct net_device *dev = rt->dst.dev;
-
- IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
+ IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(mtu));
kfree_skb(skb);
return -EMSGSIZE;
}
- return ip_do_fragment(sk, skb, output);
+ return ip_do_fragment(net, sk, skb, output);
}
/*
@@ -518,8 +518,8 @@ static int ip_fragment(struct sock *sk, struct sk_buff *skb,
* single device frame, and queue such a frame for sending.
*/
-int ip_do_fragment(struct sock *sk, struct sk_buff *skb,
- int (*output)(struct sock *, struct sk_buff *))
+int ip_do_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
+ int (*output)(struct net *, struct sock *, struct sk_buff *))
{
struct iphdr *iph;
int ptr;
@@ -621,10 +621,10 @@ int ip_do_fragment(struct sock *sk, struct sk_buff *skb,
ip_send_check(iph);
}
- err = output(sk, skb);
+ err = output(net, sk, skb);
if (!err)
- IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES);
+ IP_INC_STATS(net, IPSTATS_MIB_FRAGCREATES);
if (err || !frag)
break;
@@ -634,7 +634,7 @@ int ip_do_fragment(struct sock *sk, struct sk_buff *skb,
}
if (err == 0) {
- IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS);
+ IP_INC_STATS(net, IPSTATS_MIB_FRAGOKS);
return 0;
}
@@ -643,7 +643,7 @@ int ip_do_fragment(struct sock *sk, struct sk_buff *skb,
kfree_skb(frag);
frag = skb;
}
- IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
+ IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
return err;
slow_path_clean:
@@ -761,19 +761,19 @@ slow_path:
ip_send_check(iph);
- err = output(sk, skb2);
+ err = output(net, sk, skb2);
if (err)
goto fail;
- IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES);
+ IP_INC_STATS(net, IPSTATS_MIB_FRAGCREATES);
}
consume_skb(skb);
- IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGOKS);
+ IP_INC_STATS(net, IPSTATS_MIB_FRAGOKS);
return err;
fail:
kfree_skb(skb);
- IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
+ IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
return err;
}
EXPORT_SYMBOL(ip_do_fragment);
@@ -1434,7 +1434,7 @@ int ip_send_skb(struct net *net, struct sk_buff *skb)
{
int err;
- err = ip_local_out(skb);
+ err = ip_local_out(net, skb->sk, skb);
if (err) {
if (err > 0)
err = net_xmit_errno(err);
@@ -1561,7 +1561,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
}
oif = arg->bound_dev_if;
- if (!oif && netif_index_is_vrf(net, skb->skb_iif))
+ if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
oif = skb->skb_iif;
flowi4_init_output(&fl4, oif,
@@ -1596,7 +1596,6 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
arg->csumoffset) = csum_fold(csum_add(nskb->csum,
arg->csum));
nskb->ip_summed = CHECKSUM_NONE;
- skb_set_queue_mapping(nskb, skb_get_queue_mapping(skb));
ip_push_pending_frames(sk, &fl4);
}
out:
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 84dce6a92f93..6cb9009c3d96 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -53,6 +53,7 @@ int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
__u8 tos, __u8 ttl, __be16 df, bool xnet)
{
int pkt_len = skb->len - skb_inner_network_offset(skb);
+ struct net *net = dev_net(rt->dst.dev);
struct iphdr *iph;
int err;
@@ -76,10 +77,9 @@ int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
iph->daddr = dst;
iph->saddr = src;
iph->ttl = ttl;
- __ip_select_ident(dev_net(rt->dst.dev), iph,
- skb_shinfo(skb)->gso_segs ?: 1);
+ __ip_select_ident(net, iph, skb_shinfo(skb)->gso_segs ?: 1);
- err = ip_local_out_sk(sk, skb);
+ err = ip_local_out(net, sk, skb);
if (unlikely(net_xmit_eval(err)))
pkt_len = 0;
return pkt_len;
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 0c152087ca15..4d8f0b698777 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -197,7 +197,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
skb_dst_set(skb, dst);
skb->dev = skb_dst(skb)->dev;
- err = dst_output(skb);
+ err = dst_output(tunnel->net, skb->sk, skb);
if (net_xmit_eval(err) == 0)
err = skb->len;
iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index ed4ef09c2136..0bc7412d9e14 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -146,6 +146,10 @@ u8 root_server_path[256] = { 0, }; /* Path to mount as root */
/* vendor class identifier */
static char vendor_class_identifier[253] __initdata;
+#if defined(CONFIG_IP_PNP_DHCP)
+static char dhcp_client_identifier[253] __initdata;
+#endif
+
/* Persistent data: */
static int ic_proto_used; /* Protocol used, if any */
@@ -728,6 +732,16 @@ ic_dhcp_init_options(u8 *options)
memcpy(e, vendor_class_identifier, len);
e += len;
}
+ len = strlen(dhcp_client_identifier + 1);
+ /* the minimum length of identifier is 2, include 1 byte type,
+ * and can not be larger than the length of options
+ */
+ if (len >= 1 && len < 312 - (e - options) - 1) {
+ *e++ = 61;
+ *e++ = len + 1;
+ memcpy(e, dhcp_client_identifier, len + 1);
+ e += len + 1;
+ }
}
*e++ = 255; /* End of the list */
@@ -1557,8 +1571,24 @@ static int __init ic_proto_name(char *name)
return 0;
}
#ifdef CONFIG_IP_PNP_DHCP
- else if (!strcmp(name, "dhcp")) {
+ else if (!strncmp(name, "dhcp", 4)) {
+ char *client_id;
+
ic_proto_enabled &= ~IC_RARP;
+ client_id = strstr(name, "dhcp,");
+ if (client_id) {
+ char *v;
+
+ client_id = client_id + 5;
+ v = strchr(client_id, ',');
+ if (!v)
+ return 1;
+ *v = 0;
+ if (kstrtou8(client_id, 0, dhcp_client_identifier))
+ DBG("DHCP: Invalid client identifier type\n");
+ strncpy(dhcp_client_identifier + 1, v + 1, 251);
+ *v = ',';
+ }
return 1;
}
#endif
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 866ee89f5254..fc42525d8694 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1678,17 +1678,18 @@ static void ip_encap(struct net *net, struct sk_buff *skb,
nf_reset(skb);
}
-static inline int ipmr_forward_finish(struct sock *sk, struct sk_buff *skb)
+static inline int ipmr_forward_finish(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
{
struct ip_options *opt = &(IPCB(skb)->opt);
- IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
- IP_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTOCTETS, skb->len);
+ IP_INC_STATS_BH(net, IPSTATS_MIB_OUTFORWDATAGRAMS);
+ IP_ADD_STATS_BH(net, IPSTATS_MIB_OUTOCTETS, skb->len);
if (unlikely(opt->optlen))
ip_forward_options(skb);
- return dst_output_sk(sk, skb);
+ return dst_output(net, sk, skb);
}
/*
@@ -1745,7 +1746,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
* to blackhole.
*/
- IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
+ IP_INC_STATS_BH(net, IPSTATS_MIB_FRAGFAILS);
ip_rt_put(rt);
goto out_free;
}
@@ -1787,8 +1788,8 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
* not mrouter) cannot join to more than one interface - it will
* result in receiving multiple packets.
*/
- NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, NULL, skb,
- skb->dev, dev,
+ NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD,
+ net, NULL, skb, skb->dev, dev,
ipmr_forward_finish);
return;
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 61eafc9b4545..c3776ff6749f 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -17,9 +17,8 @@
#include <net/netfilter/nf_queue.h>
/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
-int ip_route_me_harder(struct sk_buff *skb, unsigned int addr_type)
+int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_type)
{
- struct net *net = dev_net(skb_dst(skb)->dev);
const struct iphdr *iph = ip_hdr(skb);
struct rtable *rt;
struct flowi4 fl4 = {};
@@ -104,7 +103,7 @@ static void nf_ip_saveroute(const struct sk_buff *skb,
}
}
-static int nf_ip_reroute(struct sk_buff *skb,
+static int nf_ip_reroute(struct net *net, struct sk_buff *skb,
const struct nf_queue_entry *entry)
{
const struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry);
@@ -116,7 +115,7 @@ static int nf_ip_reroute(struct sk_buff *skb,
skb->mark == rt_info->mark &&
iph->daddr == rt_info->daddr &&
iph->saddr == rt_info->saddr))
- return ip_route_me_harder(skb, RTN_UNSPEC);
+ return ip_route_me_harder(net, skb, RTN_UNSPEC);
}
return 0;
}
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 8f87fc38ccde..11dccba474b7 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -186,7 +186,7 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
if (FWINV(ret != 0, ARPT_INV_VIA_IN)) {
dprintf("VIA in mismatch (%s vs %s).%s\n",
indev, arpinfo->iniface,
- arpinfo->invflags&ARPT_INV_VIA_IN ?" (INV)":"");
+ arpinfo->invflags & ARPT_INV_VIA_IN ? " (INV)" : "");
return 0;
}
@@ -195,7 +195,7 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
if (FWINV(ret != 0, ARPT_INV_VIA_OUT)) {
dprintf("VIA out mismatch (%s vs %s).%s\n",
outdev, arpinfo->outiface,
- arpinfo->invflags&ARPT_INV_VIA_OUT ?" (INV)":"");
+ arpinfo->invflags & ARPT_INV_VIA_OUT ? " (INV)" : "");
return 0;
}
@@ -247,10 +247,10 @@ struct arpt_entry *arpt_next_entry(const struct arpt_entry *entry)
}
unsigned int arpt_do_table(struct sk_buff *skb,
- unsigned int hook,
const struct nf_hook_state *state,
struct xt_table *table)
{
+ unsigned int hook = state->hook;
static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
unsigned int verdict = NF_DROP;
const struct arphdr *arp;
@@ -285,6 +285,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
*/
e = get_entry(table_base, private->hook_entry[hook]);
+ acpar.net = state->net;
acpar.in = state->in;
acpar.out = state->out;
acpar.hooknum = hook;
@@ -467,7 +468,7 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
pos = newpos;
}
}
- next:
+next:
duprintf("Finished chain %u\n", hook);
}
return 1;
@@ -631,7 +632,7 @@ static inline void cleanup_entry(struct arpt_entry *e)
* newinfo).
*/
static int translate_table(struct xt_table_info *newinfo, void *entry0,
- const struct arpt_replace *repl)
+ const struct arpt_replace *repl)
{
struct arpt_entry *iter;
unsigned int i;
@@ -891,7 +892,7 @@ static int compat_table_info(const struct xt_table_info *info,
#endif
static int get_info(struct net *net, void __user *user,
- const int *len, int compat)
+ const int *len, int compat)
{
char name[XT_TABLE_MAXNAMELEN];
struct xt_table *t;
@@ -1068,7 +1069,7 @@ static int __do_replace(struct net *net, const char *name,
}
static int do_replace(struct net *net, const void __user *user,
- unsigned int len)
+ unsigned int len)
{
int ret;
struct arpt_replace tmp;
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 93876d03120c..1897ee160920 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -27,13 +27,10 @@ static const struct xt_table packet_filter = {
/* The work comes in here from netfilter.c */
static unsigned int
-arptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+arptable_filter_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- const struct net *net = dev_net(state->in ? state->in : state->out);
-
- return arpt_do_table(skb, ops->hooknum, state,
- net->ipv4.arptable_filter);
+ return arpt_do_table(skb, state, state->net->ipv4.arptable_filter);
}
static struct nf_hook_ops *arpfilter_ops __read_mostly;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index b0a86e73451c..b99affad6ba1 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -102,7 +102,7 @@ ip_packet_match(const struct iphdr *ip,
if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
dprintf("VIA in mismatch (%s vs %s).%s\n",
indev, ipinfo->iniface,
- ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
+ ipinfo->invflags & IPT_INV_VIA_IN ? " (INV)" : "");
return false;
}
@@ -111,7 +111,7 @@ ip_packet_match(const struct iphdr *ip,
if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
dprintf("VIA out mismatch (%s vs %s).%s\n",
outdev, ipinfo->outiface,
- ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
+ ipinfo->invflags & IPT_INV_VIA_OUT ? " (INV)" : "");
return false;
}
@@ -120,7 +120,7 @@ ip_packet_match(const struct iphdr *ip,
FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
dprintf("Packet protocol %hi does not match %hi.%s\n",
ip->protocol, ipinfo->proto,
- ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
+ ipinfo->invflags & IPT_INV_PROTO ? " (INV)" : "");
return false;
}
@@ -246,7 +246,8 @@ get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e,
return 0;
}
-static void trace_packet(const struct sk_buff *skb,
+static void trace_packet(struct net *net,
+ const struct sk_buff *skb,
unsigned int hook,
const struct net_device *in,
const struct net_device *out,
@@ -258,7 +259,6 @@ static void trace_packet(const struct sk_buff *skb,
const char *hookname, *chainname, *comment;
const struct ipt_entry *iter;
unsigned int rulenum = 0;
- struct net *net = dev_net(in ? in : out);
root = get_entry(private->entries, private->hook_entry[hook]);
@@ -285,10 +285,10 @@ struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry)
/* Returns one of the generic firewall policies, like NF_ACCEPT. */
unsigned int
ipt_do_table(struct sk_buff *skb,
- unsigned int hook,
const struct nf_hook_state *state,
struct xt_table *table)
{
+ unsigned int hook = state->hook;
static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
const struct iphdr *ip;
/* Initializing verdict to NF_DROP keeps gcc happy. */
@@ -315,6 +315,7 @@ ipt_do_table(struct sk_buff *skb,
acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
acpar.thoff = ip_hdrlen(skb);
acpar.hotdrop = false;
+ acpar.net = state->net;
acpar.in = state->in;
acpar.out = state->out;
acpar.family = NFPROTO_IPV4;
@@ -378,8 +379,8 @@ ipt_do_table(struct sk_buff *skb,
#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
/* The packet is traced: log it */
if (unlikely(skb->nf_trace))
- trace_packet(skb, hook, state->in, state->out,
- table->name, private, e);
+ trace_packet(state->net, skb, hook, state->in,
+ state->out, table->name, private, e);
#endif
/* Standard target? */
if (!t->u.kernel.target->target) {
@@ -430,8 +431,8 @@ ipt_do_table(struct sk_buff *skb,
} while (!acpar.hotdrop);
pr_debug("Exiting %s; sp at %u\n", __func__, stackidx);
- xt_write_recseq_end(addend);
- local_bh_enable();
+ xt_write_recseq_end(addend);
+ local_bh_enable();
#ifdef DEBUG_ALLOW_ALL
return NF_ACCEPT;
@@ -483,7 +484,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
unsigned int oldpos, size;
if ((strcmp(t->target.u.user.name,
- XT_STANDARD_TARGET) == 0) &&
+ XT_STANDARD_TARGET) == 0) &&
t->verdict < -NF_MAX_VERDICT - 1) {
duprintf("mark_source_chains: bad "
"negative verdict (%i)\n",
@@ -548,7 +549,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
pos = newpos;
}
}
- next:
+next:
duprintf("Finished chain %u\n", hook);
}
return 1;
@@ -803,7 +804,7 @@ cleanup_entry(struct ipt_entry *e, struct net *net)
newinfo) */
static int
translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
- const struct ipt_replace *repl)
+ const struct ipt_replace *repl)
{
struct ipt_entry *iter;
unsigned int i;
@@ -1077,7 +1078,7 @@ static int compat_table_info(const struct xt_table_info *info,
#endif
static int get_info(struct net *net, void __user *user,
- const int *len, int compat)
+ const int *len, int compat)
{
char name[XT_TABLE_MAXNAMELEN];
struct xt_table *t;
@@ -1303,7 +1304,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
static int
do_add_counters(struct net *net, const void __user *user,
- unsigned int len, int compat)
+ unsigned int len, int compat)
{
unsigned int i;
struct xt_counters_info tmp;
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 45cb16a6a4a3..4a9e6db9df8d 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -492,14 +492,14 @@ static void arp_print(struct arp_payload *payload)
{
#define HBUFFERLEN 30
char hbuffer[HBUFFERLEN];
- int j,k;
+ int j, k;
- for (k=0, j=0; k < HBUFFERLEN-3 && j < ETH_ALEN; j++) {
+ for (k = 0, j = 0; k < HBUFFERLEN - 3 && j < ETH_ALEN; j++) {
hbuffer[k++] = hex_asc_hi(payload->src_hw[j]);
hbuffer[k++] = hex_asc_lo(payload->src_hw[j]);
- hbuffer[k++]=':';
+ hbuffer[k++] = ':';
}
- hbuffer[--k]='\0';
+ hbuffer[--k] = '\0';
pr_debug("src %pI4@%s, dst %pI4\n",
&payload->src_ip, hbuffer, &payload->dst_ip);
@@ -507,14 +507,14 @@ static void arp_print(struct arp_payload *payload)
#endif
static unsigned int
-arp_mangle(const struct nf_hook_ops *ops,
+arp_mangle(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct arphdr *arp = arp_hdr(skb);
struct arp_payload *payload;
struct clusterip_config *c;
- struct net *net = dev_net(state->in ? state->in : state->out);
+ struct net *net = state->net;
/* we don't care about non-ethernet and non-ipv4 ARP */
if (arp->ar_hrd != htons(ARPHRD_ETHER) ||
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 87907d4bd259..1d16c0f28df0 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -59,7 +59,7 @@ reject_tg(struct sk_buff *skb, const struct xt_action_param *par)
nf_send_unreach(skb, ICMP_PKT_FILTERED, hook);
break;
case IPT_TCP_RESET:
- nf_send_reset(skb, hook);
+ nf_send_reset(par->net, skb, hook);
case IPT_ICMP_ECHOREPLY:
/* Doesn't happen. */
break;
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
index 95ea633e8356..5fdc556514ba 100644
--- a/net/ipv4/netfilter/ipt_SYNPROXY.c
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -39,11 +39,14 @@ synproxy_build_ip(struct sk_buff *skb, __be32 saddr, __be32 daddr)
}
static void
-synproxy_send_tcp(const struct sk_buff *skb, struct sk_buff *nskb,
+synproxy_send_tcp(const struct synproxy_net *snet,
+ const struct sk_buff *skb, struct sk_buff *nskb,
struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo,
struct iphdr *niph, struct tcphdr *nth,
unsigned int tcp_hdr_size)
{
+ struct net *net = nf_ct_net(snet->tmpl);
+
nth->check = ~tcp_v4_check(tcp_hdr_size, niph->saddr, niph->daddr, 0);
nskb->ip_summed = CHECKSUM_PARTIAL;
nskb->csum_start = (unsigned char *)nth - nskb->head;
@@ -51,7 +54,7 @@ synproxy_send_tcp(const struct sk_buff *skb, struct sk_buff *nskb,
skb_dst_set_noref(nskb, skb_dst(skb));
nskb->protocol = htons(ETH_P_IP);
- if (ip_route_me_harder(nskb, RTN_UNSPEC))
+ if (ip_route_me_harder(net, nskb, RTN_UNSPEC))
goto free_nskb;
if (nfct) {
@@ -60,7 +63,7 @@ synproxy_send_tcp(const struct sk_buff *skb, struct sk_buff *nskb,
nf_conntrack_get(nfct);
}
- ip_local_out(nskb);
+ ip_local_out(net, nskb->sk, nskb);
return;
free_nskb:
@@ -68,7 +71,8 @@ free_nskb:
}
static void
-synproxy_send_client_synack(const struct sk_buff *skb, const struct tcphdr *th,
+synproxy_send_client_synack(const struct synproxy_net *snet,
+ const struct sk_buff *skb, const struct tcphdr *th,
const struct synproxy_options *opts)
{
struct sk_buff *nskb;
@@ -104,7 +108,7 @@ synproxy_send_client_synack(const struct sk_buff *skb, const struct tcphdr *th,
synproxy_build_options(nth, opts);
- synproxy_send_tcp(skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
+ synproxy_send_tcp(snet, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
niph, nth, tcp_hdr_size);
}
@@ -148,7 +152,7 @@ synproxy_send_server_syn(const struct synproxy_net *snet,
synproxy_build_options(nth, opts);
- synproxy_send_tcp(skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW,
+ synproxy_send_tcp(snet, skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW,
niph, nth, tcp_hdr_size);
}
@@ -188,7 +192,7 @@ synproxy_send_server_ack(const struct synproxy_net *snet,
synproxy_build_options(nth, opts);
- synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
+ synproxy_send_tcp(snet, skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
}
static void
@@ -226,8 +230,8 @@ synproxy_send_client_ack(const struct synproxy_net *snet,
synproxy_build_options(nth, opts);
- synproxy_send_tcp(skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
- niph, nth, tcp_hdr_size);
+ synproxy_send_tcp(snet, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
+ niph, nth, tcp_hdr_size);
}
static bool
@@ -258,7 +262,7 @@ static unsigned int
synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_synproxy_info *info = par->targinfo;
- struct synproxy_net *snet = synproxy_pernet(dev_net(par->in));
+ struct synproxy_net *snet = synproxy_pernet(par->net);
struct synproxy_options opts = {};
struct tcphdr *th, _th;
@@ -287,7 +291,7 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par)
XT_SYNPROXY_OPT_SACK_PERM |
XT_SYNPROXY_OPT_ECN);
- synproxy_send_client_synack(skb, th, &opts);
+ synproxy_send_client_synack(snet, skb, th, &opts);
return NF_DROP;
} else if (th->ack && !(th->fin || th->rst || th->syn)) {
@@ -299,11 +303,11 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par)
return XT_CONTINUE;
}
-static unsigned int ipv4_synproxy_hook(const struct nf_hook_ops *ops,
+static unsigned int ipv4_synproxy_hook(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *nhs)
{
- struct synproxy_net *snet = synproxy_pernet(dev_net(nhs->in ? : nhs->out));
+ struct synproxy_net *snet = synproxy_pernet(nhs->net);
enum ip_conntrack_info ctinfo;
struct nf_conn *ct;
struct nf_conn_synproxy *synproxy;
@@ -433,14 +437,12 @@ static struct xt_target synproxy_tg4_reg __read_mostly = {
static struct nf_hook_ops ipv4_synproxy_ops[] __read_mostly = {
{
.hook = ipv4_synproxy_hook,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
},
{
.hook = ipv4_synproxy_hook,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index 14a2aa8b8a14..a787d07f6cb7 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -25,7 +25,7 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
bool r;
pr_debug("spi_match:%c 0x%x <= 0x%x <= 0x%x\n",
invert ? '!' : ' ', min, spi, max);
- r=(spi >= min && spi <= max) ^ invert;
+ r = (spi >= min && spi <= max) ^ invert;
pr_debug(" result %s\n", r ? "PASS" : "FAILED");
return r;
}
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
index 8618fd150c96..74dd6671b66d 100644
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -32,12 +32,11 @@ static __be32 rpfilter_get_saddr(__be32 addr)
return addr;
}
-static bool rpfilter_lookup_reverse(struct flowi4 *fl4,
+static bool rpfilter_lookup_reverse(struct net *net, struct flowi4 *fl4,
const struct net_device *dev, u8 flags)
{
struct fib_result res;
bool dev_match;
- struct net *net = dev_net(dev);
int ret __maybe_unused;
if (fib_lookup(net, fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE))
@@ -98,7 +97,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
flow.flowi4_tos = RT_TOS(iph->tos);
flow.flowi4_scope = RT_SCOPE_UNIVERSE;
- return rpfilter_lookup_reverse(&flow, par->in, info->flags) ^ invert;
+ return rpfilter_lookup_reverse(par->net, &flow, par->in, info->flags) ^ invert;
}
static int rpfilter_check(const struct xt_mtchk_param *par)
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index a0f3beca52d2..397ef2dd133e 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -33,19 +33,16 @@ static const struct xt_table packet_filter = {
};
static unsigned int
-iptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+iptable_filter_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- const struct net *net;
-
- if (ops->hooknum == NF_INET_LOCAL_OUT &&
+ if (state->hook == NF_INET_LOCAL_OUT &&
(skb->len < sizeof(struct iphdr) ||
ip_hdrlen(skb) < sizeof(struct iphdr)))
/* root is playing with raw sockets. */
return NF_ACCEPT;
- net = dev_net(state->in ? state->in : state->out);
- return ipt_do_table(skb, ops->hooknum, state, net->ipv4.iptable_filter);
+ return ipt_do_table(skb, state, state->net->ipv4.iptable_filter);
}
static struct nf_hook_ops *filter_ops __read_mostly;
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 62cbb8c5f4a8..ba5d392a13c4 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -39,7 +39,6 @@ static const struct xt_table packet_mangler = {
static unsigned int
ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
{
- struct net_device *out = state->out;
unsigned int ret;
const struct iphdr *iph;
u_int8_t tos;
@@ -59,8 +58,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
daddr = iph->daddr;
tos = iph->tos;
- ret = ipt_do_table(skb, NF_INET_LOCAL_OUT, state,
- dev_net(out)->ipv4.iptable_mangle);
+ ret = ipt_do_table(skb, state, state->net->ipv4.iptable_mangle);
/* Reroute for ANY change. */
if (ret != NF_DROP && ret != NF_STOLEN) {
iph = ip_hdr(skb);
@@ -69,7 +67,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
iph->daddr != daddr ||
skb->mark != mark ||
iph->tos != tos) {
- err = ip_route_me_harder(skb, RTN_UNSPEC);
+ err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
if (err < 0)
ret = NF_DROP_ERR(err);
}
@@ -80,18 +78,17 @@ ipt_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
/* The work comes in here from netfilter.c. */
static unsigned int
-iptable_mangle_hook(const struct nf_hook_ops *ops,
+iptable_mangle_hook(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- if (ops->hooknum == NF_INET_LOCAL_OUT)
+ if (state->hook == NF_INET_LOCAL_OUT)
return ipt_mangle_out(skb, state);
- if (ops->hooknum == NF_INET_POST_ROUTING)
- return ipt_do_table(skb, ops->hooknum, state,
- dev_net(state->out)->ipv4.iptable_mangle);
+ if (state->hook == NF_INET_POST_ROUTING)
+ return ipt_do_table(skb, state,
+ state->net->ipv4.iptable_mangle);
/* PREROUTING/INPUT/FORWARD: */
- return ipt_do_table(skb, ops->hooknum, state,
- dev_net(state->in)->ipv4.iptable_mangle);
+ return ipt_do_table(skb, state, state->net->ipv4.iptable_mangle);
}
static struct nf_hook_ops *mangle_ops __read_mostly;
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index 0d4d9cdf98a4..ae2cd2752046 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -28,49 +28,46 @@ static const struct xt_table nf_nat_ipv4_table = {
.af = NFPROTO_IPV4,
};
-static unsigned int iptable_nat_do_chain(const struct nf_hook_ops *ops,
+static unsigned int iptable_nat_do_chain(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct)
{
- struct net *net = nf_ct_net(ct);
-
- return ipt_do_table(skb, ops->hooknum, state, net->ipv4.nat_table);
+ return ipt_do_table(skb, state, state->net->ipv4.nat_table);
}
-static unsigned int iptable_nat_ipv4_fn(const struct nf_hook_ops *ops,
+static unsigned int iptable_nat_ipv4_fn(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return nf_nat_ipv4_fn(ops, skb, state, iptable_nat_do_chain);
+ return nf_nat_ipv4_fn(priv, skb, state, iptable_nat_do_chain);
}
-static unsigned int iptable_nat_ipv4_in(const struct nf_hook_ops *ops,
+static unsigned int iptable_nat_ipv4_in(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return nf_nat_ipv4_in(ops, skb, state, iptable_nat_do_chain);
+ return nf_nat_ipv4_in(priv, skb, state, iptable_nat_do_chain);
}
-static unsigned int iptable_nat_ipv4_out(const struct nf_hook_ops *ops,
+static unsigned int iptable_nat_ipv4_out(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return nf_nat_ipv4_out(ops, skb, state, iptable_nat_do_chain);
+ return nf_nat_ipv4_out(priv, skb, state, iptable_nat_do_chain);
}
-static unsigned int iptable_nat_ipv4_local_fn(const struct nf_hook_ops *ops,
+static unsigned int iptable_nat_ipv4_local_fn(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return nf_nat_ipv4_local_fn(ops, skb, state, iptable_nat_do_chain);
+ return nf_nat_ipv4_local_fn(priv, skb, state, iptable_nat_do_chain);
}
static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = {
/* Before packet filtering, change destination */
{
.hook = iptable_nat_ipv4_in,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP_PRI_NAT_DST,
@@ -78,7 +75,6 @@ static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = {
/* After packet filtering, change source */
{
.hook = iptable_nat_ipv4_out,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP_PRI_NAT_SRC,
@@ -86,7 +82,6 @@ static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = {
/* Before packet filtering, change destination */
{
.hook = iptable_nat_ipv4_local_fn,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP_PRI_NAT_DST,
@@ -94,7 +89,6 @@ static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = {
/* After packet filtering, change source */
{
.hook = iptable_nat_ipv4_fn,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_NAT_SRC,
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 0356e6da4bb7..1ba02811acb0 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -20,19 +20,16 @@ static const struct xt_table packet_raw = {
/* The work comes in here from netfilter.c. */
static unsigned int
-iptable_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+iptable_raw_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- const struct net *net;
-
- if (ops->hooknum == NF_INET_LOCAL_OUT &&
+ if (state->hook == NF_INET_LOCAL_OUT &&
(skb->len < sizeof(struct iphdr) ||
ip_hdrlen(skb) < sizeof(struct iphdr)))
/* root is playing with raw sockets. */
return NF_ACCEPT;
- net = dev_net(state->in ? state->in : state->out);
- return ipt_do_table(skb, ops->hooknum, state, net->ipv4.iptable_raw);
+ return ipt_do_table(skb, state, state->net->ipv4.iptable_raw);
}
static struct nf_hook_ops *rawtable_ops __read_mostly;
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index 4bce3980ccd9..c2e23d5e9cd4 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -37,20 +37,16 @@ static const struct xt_table security_table = {
};
static unsigned int
-iptable_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+iptable_security_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- const struct net *net;
-
- if (ops->hooknum == NF_INET_LOCAL_OUT &&
+ if (state->hook == NF_INET_LOCAL_OUT &&
(skb->len < sizeof(struct iphdr) ||
ip_hdrlen(skb) < sizeof(struct iphdr)))
/* Somebody is playing with raw sockets. */
return NF_ACCEPT;
- net = dev_net(state->in ? state->in : state->out);
- return ipt_do_table(skb, ops->hooknum, state,
- net->ipv4.iptable_security);
+ return ipt_do_table(skb, state, state->net->ipv4.iptable_security);
}
static struct nf_hook_ops *sectbl_ops __read_mostly;
@@ -83,7 +79,7 @@ static int __init iptable_security_init(void)
int ret;
ret = register_pernet_subsys(&iptable_security_net_ops);
- if (ret < 0)
+ if (ret < 0)
return ret;
sectbl_ops = xt_hook_link(&security_table, iptable_security_hook);
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 8a2caaf3940b..461ca926fd39 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -92,7 +92,7 @@ static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
return NF_ACCEPT;
}
-static unsigned int ipv4_helper(const struct nf_hook_ops *ops,
+static unsigned int ipv4_helper(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
@@ -119,7 +119,7 @@ static unsigned int ipv4_helper(const struct nf_hook_ops *ops,
ct, ctinfo);
}
-static unsigned int ipv4_confirm(const struct nf_hook_ops *ops,
+static unsigned int ipv4_confirm(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
@@ -143,14 +143,14 @@ out:
return nf_conntrack_confirm(skb);
}
-static unsigned int ipv4_conntrack_in(const struct nf_hook_ops *ops,
+static unsigned int ipv4_conntrack_in(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return nf_conntrack_in(dev_net(state->in), PF_INET, ops->hooknum, skb);
+ return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
}
-static unsigned int ipv4_conntrack_local(const struct nf_hook_ops *ops,
+static unsigned int ipv4_conntrack_local(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
@@ -158,7 +158,7 @@ static unsigned int ipv4_conntrack_local(const struct nf_hook_ops *ops,
if (skb->len < sizeof(struct iphdr) ||
ip_hdrlen(skb) < sizeof(struct iphdr))
return NF_ACCEPT;
- return nf_conntrack_in(dev_net(state->out), PF_INET, ops->hooknum, skb);
+ return nf_conntrack_in(state->net, PF_INET, state->hook, skb);
}
/* Connection tracking may drop packets, but never alters them, so
@@ -166,42 +166,36 @@ static unsigned int ipv4_conntrack_local(const struct nf_hook_ops *ops,
static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = {
{
.hook = ipv4_conntrack_in,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP_PRI_CONNTRACK,
},
{
.hook = ipv4_conntrack_local,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP_PRI_CONNTRACK,
},
{
.hook = ipv4_helper,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP_PRI_CONNTRACK_HELPER,
},
{
.hook = ipv4_confirm,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP_PRI_CONNTRACK_CONFIRM,
},
{
.hook = ipv4_helper,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_CONNTRACK_HELPER,
},
{
.hook = ipv4_confirm,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_CONNTRACK_CONFIRM,
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index cdde3ec496e9..c567e1b5d799 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -30,7 +30,7 @@ static inline struct nf_icmp_net *icmp_pernet(struct net *net)
}
static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
- struct nf_conntrack_tuple *tuple)
+ struct net *net, struct nf_conntrack_tuple *tuple)
{
const struct icmphdr *hp;
struct icmphdr _hdr;
@@ -144,7 +144,7 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
if (!nf_ct_get_tuplepr(skb,
skb_network_offset(skb) + ip_hdrlen(skb)
+ sizeof(struct icmphdr),
- PF_INET, &origtuple)) {
+ PF_INET, net, &origtuple)) {
pr_debug("icmp_error_message: failed to get tuple\n");
return -NF_ACCEPT;
}
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index 9306ec4fab41..0e5591c2ee9f 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -22,14 +22,15 @@
#endif
#include <net/netfilter/nf_conntrack_zones.h>
-static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
+static int nf_ct_ipv4_gather_frags(struct net *net, struct sk_buff *skb,
+ u_int32_t user)
{
int err;
skb_orphan(skb);
local_bh_disable();
- err = ip_defrag(skb, user);
+ err = ip_defrag(net, skb, user);
local_bh_enable();
if (!err) {
@@ -61,7 +62,7 @@ static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum,
return IP_DEFRAG_CONNTRACK_OUT + zone_id;
}
-static unsigned int ipv4_conntrack_defrag(const struct nf_hook_ops *ops,
+static unsigned int ipv4_conntrack_defrag(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
@@ -83,9 +84,9 @@ static unsigned int ipv4_conntrack_defrag(const struct nf_hook_ops *ops,
/* Gather fragments. */
if (ip_is_fragment(ip_hdr(skb))) {
enum ip_defrag_users user =
- nf_ct_defrag_user(ops->hooknum, skb);
+ nf_ct_defrag_user(state->hook, skb);
- if (nf_ct_ipv4_gather_frags(skb, user))
+ if (nf_ct_ipv4_gather_frags(state->net, skb, user))
return NF_STOLEN;
}
return NF_ACCEPT;
@@ -94,14 +95,12 @@ static unsigned int ipv4_conntrack_defrag(const struct nf_hook_ops *ops,
static struct nf_hook_ops ipv4_defrag_ops[] = {
{
.hook = ipv4_conntrack_defrag,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP_PRI_CONNTRACK_DEFRAG,
},
{
.hook = ipv4_conntrack_defrag,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP_PRI_CONNTRACK_DEFRAG,
diff --git a/net/ipv4/netfilter/nf_dup_ipv4.c b/net/ipv4/netfilter/nf_dup_ipv4.c
index 2d79e6e8d934..ceb187308120 100644
--- a/net/ipv4/netfilter/nf_dup_ipv4.c
+++ b/net/ipv4/netfilter/nf_dup_ipv4.c
@@ -23,25 +23,10 @@
#include <net/netfilter/nf_conntrack.h>
#endif
-static struct net *pick_net(struct sk_buff *skb)
-{
-#ifdef CONFIG_NET_NS
- const struct dst_entry *dst;
-
- if (skb->dev != NULL)
- return dev_net(skb->dev);
- dst = skb_dst(skb);
- if (dst != NULL && dst->dev != NULL)
- return dev_net(dst->dev);
-#endif
- return &init_net;
-}
-
-static bool nf_dup_ipv4_route(struct sk_buff *skb, const struct in_addr *gw,
- int oif)
+static bool nf_dup_ipv4_route(struct net *net, struct sk_buff *skb,
+ const struct in_addr *gw, int oif)
{
const struct iphdr *iph = ip_hdr(skb);
- struct net *net = pick_net(skb);
struct rtable *rt;
struct flowi4 fl4;
@@ -65,7 +50,7 @@ static bool nf_dup_ipv4_route(struct sk_buff *skb, const struct in_addr *gw,
return true;
}
-void nf_dup_ipv4(struct sk_buff *skb, unsigned int hooknum,
+void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum,
const struct in_addr *gw, int oif)
{
struct iphdr *iph;
@@ -105,9 +90,9 @@ void nf_dup_ipv4(struct sk_buff *skb, unsigned int hooknum,
--iph->ttl;
ip_send_check(iph);
- if (nf_dup_ipv4_route(skb, gw, oif)) {
+ if (nf_dup_ipv4_route(net, skb, gw, oif)) {
__this_cpu_write(nf_skb_duplicated, true);
- ip_local_out(skb);
+ ip_local_out(net, skb->sk, skb);
__this_cpu_write(nf_skb_duplicated, false);
} else {
kfree_skb(skb);
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index 22f4579b0c2a..5075b7ecd26d 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -255,9 +255,9 @@ int nf_nat_icmp_reply_translation(struct sk_buff *skb,
EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation);
unsigned int
-nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
+nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state,
- unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+ unsigned int (*do_chain)(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct))
@@ -266,7 +266,7 @@ nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
enum ip_conntrack_info ctinfo;
struct nf_conn_nat *nat;
/* maniptype == SRC for postrouting. */
- enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
+ enum nf_nat_manip_type maniptype = HOOK2MANIP(state->hook);
/* We never see fragments: conntrack defrags on pre-routing
* and local-out, and nf_nat_out protects post-routing.
@@ -295,7 +295,7 @@ nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
case IP_CT_RELATED_REPLY:
if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
- ops->hooknum))
+ state->hook))
return NF_DROP;
else
return NF_ACCEPT;
@@ -308,21 +308,21 @@ nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
if (!nf_nat_initialized(ct, maniptype)) {
unsigned int ret;
- ret = do_chain(ops, skb, state, ct);
+ ret = do_chain(priv, skb, state, ct);
if (ret != NF_ACCEPT)
return ret;
- if (nf_nat_initialized(ct, HOOK2MANIP(ops->hooknum)))
+ if (nf_nat_initialized(ct, HOOK2MANIP(state->hook)))
break;
- ret = nf_nat_alloc_null_binding(ct, ops->hooknum);
+ ret = nf_nat_alloc_null_binding(ct, state->hook);
if (ret != NF_ACCEPT)
return ret;
} else {
pr_debug("Already setup manip %s for ct %p\n",
maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
ct);
- if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat,
+ if (nf_nat_oif_changed(state->hook, ctinfo, nat,
state->out))
goto oif_changed;
}
@@ -332,11 +332,11 @@ nf_nat_ipv4_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
/* ESTABLISHED */
NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
ctinfo == IP_CT_ESTABLISHED_REPLY);
- if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, state->out))
+ if (nf_nat_oif_changed(state->hook, ctinfo, nat, state->out))
goto oif_changed;
}
- return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
+ return nf_nat_packet(ct, ctinfo, state->hook, skb);
oif_changed:
nf_ct_kill_acct(ct, ctinfo, skb);
@@ -345,9 +345,9 @@ oif_changed:
EXPORT_SYMBOL_GPL(nf_nat_ipv4_fn);
unsigned int
-nf_nat_ipv4_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
+nf_nat_ipv4_in(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state,
- unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+ unsigned int (*do_chain)(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct))
@@ -355,7 +355,7 @@ nf_nat_ipv4_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
unsigned int ret;
__be32 daddr = ip_hdr(skb)->daddr;
- ret = nf_nat_ipv4_fn(ops, skb, state, do_chain);
+ ret = nf_nat_ipv4_fn(priv, skb, state, do_chain);
if (ret != NF_DROP && ret != NF_STOLEN &&
daddr != ip_hdr(skb)->daddr)
skb_dst_drop(skb);
@@ -365,9 +365,9 @@ nf_nat_ipv4_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
EXPORT_SYMBOL_GPL(nf_nat_ipv4_in);
unsigned int
-nf_nat_ipv4_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
+nf_nat_ipv4_out(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state,
- unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+ unsigned int (*do_chain)(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct))
@@ -384,7 +384,7 @@ nf_nat_ipv4_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
ip_hdrlen(skb) < sizeof(struct iphdr))
return NF_ACCEPT;
- ret = nf_nat_ipv4_fn(ops, skb, state, do_chain);
+ ret = nf_nat_ipv4_fn(priv, skb, state, do_chain);
#ifdef CONFIG_XFRM
if (ret != NF_DROP && ret != NF_STOLEN &&
!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
@@ -396,7 +396,7 @@ nf_nat_ipv4_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
(ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
ct->tuplehash[dir].tuple.src.u.all !=
ct->tuplehash[!dir].tuple.dst.u.all)) {
- err = nf_xfrm_me_harder(skb, AF_INET);
+ err = nf_xfrm_me_harder(state->net, skb, AF_INET);
if (err < 0)
ret = NF_DROP_ERR(err);
}
@@ -407,9 +407,9 @@ nf_nat_ipv4_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
EXPORT_SYMBOL_GPL(nf_nat_ipv4_out);
unsigned int
-nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
+nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state,
- unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+ unsigned int (*do_chain)(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct))
@@ -424,14 +424,14 @@ nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
ip_hdrlen(skb) < sizeof(struct iphdr))
return NF_ACCEPT;
- ret = nf_nat_ipv4_fn(ops, skb, state, do_chain);
+ ret = nf_nat_ipv4_fn(priv, skb, state, do_chain);
if (ret != NF_DROP && ret != NF_STOLEN &&
(ct = nf_ct_get(skb, &ctinfo)) != NULL) {
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
if (ct->tuplehash[dir].tuple.dst.u3.ip !=
ct->tuplehash[!dir].tuple.src.u3.ip) {
- err = ip_route_me_harder(skb, RTN_UNSPEC);
+ err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
if (err < 0)
ret = NF_DROP_ERR(err);
}
@@ -440,7 +440,7 @@ nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
ct->tuplehash[dir].tuple.dst.u.all !=
ct->tuplehash[!dir].tuple.src.u.all) {
- err = nf_xfrm_me_harder(skb, AF_INET);
+ err = nf_xfrm_me_harder(state->net, skb, AF_INET);
if (err < 0)
ret = NF_DROP_ERR(err);
}
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 7c676671329d..ddb894ac1458 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -1156,7 +1156,7 @@ static int snmp_parse_mangle(unsigned char *msg,
}
if (obj->type == SNMP_IPADDR)
- mangle_address(ctx.begin, ctx.pointer - 4 , map, check);
+ mangle_address(ctx.begin, ctx.pointer - 4, map, check);
kfree(obj->id);
kfree(obj);
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index 3262e41ff76f..c747b2d9eb77 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -99,7 +99,7 @@ void nf_reject_ip_tcphdr_put(struct sk_buff *nskb, const struct sk_buff *oldskb,
EXPORT_SYMBOL_GPL(nf_reject_ip_tcphdr_put);
/* Send RST reply */
-void nf_send_reset(struct sk_buff *oldskb, int hook)
+void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook)
{
struct sk_buff *nskb;
const struct iphdr *oiph;
@@ -129,7 +129,7 @@ void nf_send_reset(struct sk_buff *oldskb, int hook)
ip4_dst_hoplimit(skb_dst(nskb)));
nf_reject_ip_tcphdr_put(nskb, oldskb, oth);
- if (ip_route_me_harder(nskb, RTN_UNSPEC))
+ if (ip_route_me_harder(net, nskb, RTN_UNSPEC))
goto free_nskb;
/* "Never happens" */
@@ -157,7 +157,7 @@ void nf_send_reset(struct sk_buff *oldskb, int hook)
dev_queue_xmit(nskb);
} else
#endif
- ip_local_out(nskb);
+ ip_local_out(net, nskb->sk, nskb);
return;
diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c
index 8412268bbad1..9d09d4f59545 100644
--- a/net/ipv4/netfilter/nf_tables_arp.c
+++ b/net/ipv4/netfilter/nf_tables_arp.c
@@ -15,15 +15,15 @@
#include <net/netfilter/nf_tables.h>
static unsigned int
-nft_do_chain_arp(const struct nf_hook_ops *ops,
+nft_do_chain_arp(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct nft_pktinfo pkt;
- nft_set_pktinfo(&pkt, ops, skb, state);
+ nft_set_pktinfo(&pkt, skb, state);
- return nft_do_chain(&pkt, ops);
+ return nft_do_chain(&pkt, priv);
}
static struct nft_af_info nft_af_arp __read_mostly = {
diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c
index aa180d3a69a5..ca9dc3c46c4f 100644
--- a/net/ipv4/netfilter/nf_tables_ipv4.c
+++ b/net/ipv4/netfilter/nf_tables_ipv4.c
@@ -18,18 +18,18 @@
#include <net/ip.h>
#include <net/netfilter/nf_tables_ipv4.h>
-static unsigned int nft_do_chain_ipv4(const struct nf_hook_ops *ops,
+static unsigned int nft_do_chain_ipv4(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct nft_pktinfo pkt;
- nft_set_pktinfo_ipv4(&pkt, ops, skb, state);
+ nft_set_pktinfo_ipv4(&pkt, skb, state);
- return nft_do_chain(&pkt, ops);
+ return nft_do_chain(&pkt, priv);
}
-static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops,
+static unsigned int nft_ipv4_output(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
@@ -41,7 +41,7 @@ static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops,
return NF_ACCEPT;
}
- return nft_do_chain_ipv4(ops, skb, state);
+ return nft_do_chain_ipv4(priv, skb, state);
}
struct nft_af_info nft_af_ipv4 __read_mostly = {
diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
index bf5c30ae14e4..f5c66a7a4bf2 100644
--- a/net/ipv4/netfilter/nft_chain_nat_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
@@ -26,44 +26,44 @@
#include <net/netfilter/nf_nat_l3proto.h>
#include <net/ip.h>
-static unsigned int nft_nat_do_chain(const struct nf_hook_ops *ops,
+static unsigned int nft_nat_do_chain(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct)
{
struct nft_pktinfo pkt;
- nft_set_pktinfo_ipv4(&pkt, ops, skb, state);
+ nft_set_pktinfo_ipv4(&pkt, skb, state);
- return nft_do_chain(&pkt, ops);
+ return nft_do_chain(&pkt, priv);
}
-static unsigned int nft_nat_ipv4_fn(const struct nf_hook_ops *ops,
+static unsigned int nft_nat_ipv4_fn(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return nf_nat_ipv4_fn(ops, skb, state, nft_nat_do_chain);
+ return nf_nat_ipv4_fn(priv, skb, state, nft_nat_do_chain);
}
-static unsigned int nft_nat_ipv4_in(const struct nf_hook_ops *ops,
+static unsigned int nft_nat_ipv4_in(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return nf_nat_ipv4_in(ops, skb, state, nft_nat_do_chain);
+ return nf_nat_ipv4_in(priv, skb, state, nft_nat_do_chain);
}
-static unsigned int nft_nat_ipv4_out(const struct nf_hook_ops *ops,
+static unsigned int nft_nat_ipv4_out(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return nf_nat_ipv4_out(ops, skb, state, nft_nat_do_chain);
+ return nf_nat_ipv4_out(priv, skb, state, nft_nat_do_chain);
}
-static unsigned int nft_nat_ipv4_local_fn(const struct nf_hook_ops *ops,
+static unsigned int nft_nat_ipv4_local_fn(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return nf_nat_ipv4_local_fn(ops, skb, state, nft_nat_do_chain);
+ return nf_nat_ipv4_local_fn(priv, skb, state, nft_nat_do_chain);
}
static const struct nf_chain_type nft_chain_nat_ipv4 = {
diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c
index e335b0afdaf3..2375b0a8be46 100644
--- a/net/ipv4/netfilter/nft_chain_route_ipv4.c
+++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c
@@ -21,7 +21,7 @@
#include <net/route.h>
#include <net/ip.h>
-static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
+static unsigned int nf_route_table_hook(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
@@ -37,7 +37,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
ip_hdrlen(skb) < sizeof(struct iphdr))
return NF_ACCEPT;
- nft_set_pktinfo_ipv4(&pkt, ops, skb, state);
+ nft_set_pktinfo_ipv4(&pkt, skb, state);
mark = skb->mark;
iph = ip_hdr(skb);
@@ -45,7 +45,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
daddr = iph->daddr;
tos = iph->tos;
- ret = nft_do_chain(&pkt, ops);
+ ret = nft_do_chain(&pkt, priv);
if (ret != NF_DROP && ret != NF_QUEUE) {
iph = ip_hdr(skb);
@@ -53,7 +53,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
iph->daddr != daddr ||
skb->mark != mark ||
iph->tos != tos)
- if (ip_route_me_harder(skb, RTN_UNSPEC))
+ if (ip_route_me_harder(state->net, skb, RTN_UNSPEC))
ret = NF_DROP;
}
return ret;
diff --git a/net/ipv4/netfilter/nft_dup_ipv4.c b/net/ipv4/netfilter/nft_dup_ipv4.c
index b45932d43b69..bf855e64fc45 100644
--- a/net/ipv4/netfilter/nft_dup_ipv4.c
+++ b/net/ipv4/netfilter/nft_dup_ipv4.c
@@ -30,7 +30,7 @@ static void nft_dup_ipv4_eval(const struct nft_expr *expr,
};
int oif = regs->data[priv->sreg_dev];
- nf_dup_ipv4(pkt->skb, pkt->ops->hooknum, &gw, oif);
+ nf_dup_ipv4(pkt->net, pkt->skb, pkt->hook, &gw, oif);
}
static int nft_dup_ipv4_init(const struct nft_ctx *ctx,
diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c
index 40e414c4ca56..b72ffc58e255 100644
--- a/net/ipv4/netfilter/nft_masq_ipv4.c
+++ b/net/ipv4/netfilter/nft_masq_ipv4.c
@@ -26,7 +26,7 @@ static void nft_masq_ipv4_eval(const struct nft_expr *expr,
memset(&range, 0, sizeof(range));
range.flags = priv->flags;
- regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, pkt->ops->hooknum,
+ regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, pkt->hook,
&range, pkt->out);
}
diff --git a/net/ipv4/netfilter/nft_redir_ipv4.c b/net/ipv4/netfilter/nft_redir_ipv4.c
index d8d795df9c13..c09d4381427e 100644
--- a/net/ipv4/netfilter/nft_redir_ipv4.c
+++ b/net/ipv4/netfilter/nft_redir_ipv4.c
@@ -36,7 +36,7 @@ static void nft_redir_ipv4_eval(const struct nft_expr *expr,
mr.range[0].flags |= priv->flags;
regs->verdict.code = nf_nat_redirect_ipv4(pkt->skb, &mr,
- pkt->ops->hooknum);
+ pkt->hook);
}
static struct nft_expr_type nft_redir_ipv4_type;
diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c
index b07e58b51158..c24f41c816b3 100644
--- a/net/ipv4/netfilter/nft_reject_ipv4.c
+++ b/net/ipv4/netfilter/nft_reject_ipv4.c
@@ -27,11 +27,10 @@ static void nft_reject_ipv4_eval(const struct nft_expr *expr,
switch (priv->type) {
case NFT_REJECT_ICMP_UNREACH:
- nf_send_unreach(pkt->skb, priv->icmp_code,
- pkt->ops->hooknum);
+ nf_send_unreach(pkt->skb, priv->icmp_code, pkt->hook);
break;
case NFT_REJECT_TCP_RST:
- nf_send_reset(pkt->skb, pkt->ops->hooknum);
+ nf_send_reset(pkt->net, pkt->skb, pkt->hook);
break;
default:
break;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 561cd4b8fc6e..8c0d0bdc2a7c 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -411,8 +411,9 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
icmp_out_count(net, ((struct icmphdr *)
skb_transport_header(skb))->type);
- err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT, sk, skb,
- NULL, rt->dst.dev, dst_output_sk);
+ err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
+ net, sk, skb, NULL, rt->dst.dev,
+ dst_output);
if (err > 0)
err = net_xmit_errno(err);
if (err)
@@ -483,6 +484,7 @@ static int raw_getfrag(void *from, char *to, int offset, int len, int odd,
static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
struct inet_sock *inet = inet_sk(sk);
+ struct net *net = sock_net(sk);
struct ipcm_cookie ipc;
struct rtable *rt = NULL;
struct flowi4 fl4;
@@ -542,7 +544,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
ipc.oif = sk->sk_bound_dev_if;
if (msg->msg_controllen) {
- err = ip_cmsg_send(sock_net(sk), msg, &ipc, false);
+ err = ip_cmsg_send(net, msg, &ipc, false);
if (err)
goto out;
if (ipc.opt)
@@ -597,6 +599,9 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
(inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
daddr, saddr, 0, 0);
+ if (!saddr && ipc.oif)
+ l3mdev_get_saddr(net, ipc.oif, &fl4);
+
if (!inet->hdrincl) {
rfv.msg = msg;
rfv.hlen = 0;
@@ -607,7 +612,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
}
security_sk_classify_flow(sk, flowi4_to_flowi(&fl4));
- rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
+ rt = ip_route_output_flow(net, &fl4, sk);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
rt = NULL;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index c81deb85acb4..85f184e429c6 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -112,7 +112,7 @@
#endif
#include <net/secure_seq.h>
#include <net/ip_tunnels.h>
-#include <net/vrf.h>
+#include <net/l3mdev.h>
#define RT_FL_TOS(oldflp4) \
((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
@@ -847,7 +847,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
return;
}
log_martians = IN_DEV_LOG_MARTIANS(in_dev);
- vif = vrf_master_ifindex_rcu(rt->dst.dev);
+ vif = l3mdev_master_ifindex_rcu(rt->dst.dev);
rcu_read_unlock();
net = dev_net(rt->dst.dev);
@@ -941,7 +941,7 @@ static int ip_error(struct sk_buff *skb)
}
peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr,
- vrf_master_ifindex(skb->dev), 1);
+ l3mdev_master_ifindex(skb->dev), 1);
send = true;
if (peer) {
@@ -1152,7 +1152,7 @@ static void ipv4_link_failure(struct sk_buff *skb)
dst_set_expires(&rt->dst, 0);
}
-static int ip_rt_bug(struct sock *sk, struct sk_buff *skb)
+static int ip_rt_bug(struct net *net, struct sock *sk, struct sk_buff *skb)
{
pr_debug("%s: %pI4 -> %pI4, %s\n",
__func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
@@ -1438,12 +1438,34 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
}
static struct rtable *rt_dst_alloc(struct net_device *dev,
+ unsigned int flags, u16 type,
bool nopolicy, bool noxfrm, bool will_cache)
{
- return dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
- (will_cache ? 0 : (DST_HOST | DST_NOCACHE)) |
- (nopolicy ? DST_NOPOLICY : 0) |
- (noxfrm ? DST_NOXFRM : 0));
+ struct rtable *rt;
+
+ rt = dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
+ (will_cache ? 0 : (DST_HOST | DST_NOCACHE)) |
+ (nopolicy ? DST_NOPOLICY : 0) |
+ (noxfrm ? DST_NOXFRM : 0));
+
+ if (rt) {
+ rt->rt_genid = rt_genid_ipv4(dev_net(dev));
+ rt->rt_flags = flags;
+ rt->rt_type = type;
+ rt->rt_is_input = 0;
+ rt->rt_iif = 0;
+ rt->rt_pmtu = 0;
+ rt->rt_gateway = 0;
+ rt->rt_uses_gateway = 0;
+ rt->rt_table_id = 0;
+ INIT_LIST_HEAD(&rt->rt_uncached);
+
+ rt->dst.output = ip_output;
+ if (flags & RTCF_LOCAL)
+ rt->dst.input = ip_local_deliver;
+ }
+
+ return rt;
}
/* called in rcu_read_lock() section */
@@ -1452,6 +1474,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
{
struct rtable *rth;
struct in_device *in_dev = __in_dev_get_rcu(dev);
+ unsigned int flags = RTCF_MULTICAST;
u32 itag = 0;
int err;
@@ -1464,9 +1487,8 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
skb->protocol != htons(ETH_P_IP))
goto e_inval;
- if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
- if (ipv4_is_loopback(saddr))
- goto e_inval;
+ if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev))
+ goto e_inval;
if (ipv4_is_zeronet(saddr)) {
if (!ipv4_is_local_multicast(daddr))
@@ -1477,7 +1499,10 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
if (err < 0)
goto e_err;
}
- rth = rt_dst_alloc(dev_net(dev)->loopback_dev,
+ if (our)
+ flags |= RTCF_LOCAL;
+
+ rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
if (!rth)
goto e_nobufs;
@@ -1486,20 +1511,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
rth->dst.tclassid = itag;
#endif
rth->dst.output = ip_rt_bug;
-
- rth->rt_genid = rt_genid_ipv4(dev_net(dev));
- rth->rt_flags = RTCF_MULTICAST;
- rth->rt_type = RTN_MULTICAST;
rth->rt_is_input= 1;
- rth->rt_iif = 0;
- rth->rt_pmtu = 0;
- rth->rt_gateway = 0;
- rth->rt_uses_gateway = 0;
- INIT_LIST_HEAD(&rth->rt_uncached);
- if (our) {
- rth->dst.input= ip_local_deliver;
- rth->rt_flags |= RTCF_LOCAL;
- }
#ifdef CONFIG_IP_MROUTE
if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
@@ -1608,7 +1620,7 @@ static int __mkroute_input(struct sk_buff *skb,
}
}
- rth = rt_dst_alloc(out_dev->dev,
+ rth = rt_dst_alloc(out_dev->dev, 0, res->type,
IN_DEV_CONF_GET(in_dev, NOPOLICY),
IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache);
if (!rth) {
@@ -1616,19 +1628,12 @@ static int __mkroute_input(struct sk_buff *skb,
goto cleanup;
}
- rth->rt_genid = rt_genid_ipv4(dev_net(rth->dst.dev));
- rth->rt_flags = 0;
- rth->rt_type = res->type;
rth->rt_is_input = 1;
- rth->rt_iif = 0;
- rth->rt_pmtu = 0;
- rth->rt_gateway = 0;
- rth->rt_uses_gateway = 0;
- INIT_LIST_HEAD(&rth->rt_uncached);
+ if (res->table)
+ rth->rt_table_id = res->table->tb_id;
RT_CACHE_STAT_INC(in_slow_tot);
rth->dst.input = ip_forward;
- rth->dst.output = ip_output;
rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag);
if (lwtunnel_output_redirect(rth->dst.lwtstate)) {
@@ -1646,6 +1651,48 @@ out:
return err;
}
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+
+/* To make ICMP packets follow the right flow, the multipath hash is
+ * calculated from the inner IP addresses in reverse order.
+ */
+static int ip_multipath_icmp_hash(struct sk_buff *skb)
+{
+ const struct iphdr *outer_iph = ip_hdr(skb);
+ struct icmphdr _icmph;
+ const struct icmphdr *icmph;
+ struct iphdr _inner_iph;
+ const struct iphdr *inner_iph;
+
+ if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0))
+ goto standard_hash;
+
+ icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph),
+ &_icmph);
+ if (!icmph)
+ goto standard_hash;
+
+ if (icmph->type != ICMP_DEST_UNREACH &&
+ icmph->type != ICMP_REDIRECT &&
+ icmph->type != ICMP_TIME_EXCEEDED &&
+ icmph->type != ICMP_PARAMETERPROB) {
+ goto standard_hash;
+ }
+
+ inner_iph = skb_header_pointer(skb,
+ outer_iph->ihl * 4 + sizeof(_icmph),
+ sizeof(_inner_iph), &_inner_iph);
+ if (!inner_iph)
+ goto standard_hash;
+
+ return fib_multipath_hash(inner_iph->daddr, inner_iph->saddr);
+
+standard_hash:
+ return fib_multipath_hash(outer_iph->saddr, outer_iph->daddr);
+}
+
+#endif /* CONFIG_IP_ROUTE_MULTIPATH */
+
static int ip_mkroute_input(struct sk_buff *skb,
struct fib_result *res,
const struct flowi4 *fl4,
@@ -1653,8 +1700,15 @@ static int ip_mkroute_input(struct sk_buff *skb,
__be32 daddr, __be32 saddr, u32 tos)
{
#ifdef CONFIG_IP_ROUTE_MULTIPATH
- if (res->fi && res->fi->fib_nhs > 1)
- fib_select_multipath(res);
+ if (res->fi && res->fi->fib_nhs > 1) {
+ int h;
+
+ if (unlikely(ip_hdr(skb)->protocol == IPPROTO_ICMP))
+ h = ip_multipath_icmp_hash(skb);
+ else
+ h = fib_multipath_hash(saddr, daddr);
+ fib_select_multipath(res, h);
+ }
#endif
/* create a routing cache entry */
@@ -1706,6 +1760,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
goto martian_source;
res.fi = NULL;
+ res.table = NULL;
if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
goto brd_input;
@@ -1733,7 +1788,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
* Now we are ready to route packet.
*/
fl4.flowi4_oif = 0;
- fl4.flowi4_iif = vrf_master_ifindex_rcu(dev) ? : dev->ifindex;
+ fl4.flowi4_iif = l3mdev_fib_oif_rcu(dev);
fl4.flowi4_mark = skb->mark;
fl4.flowi4_tos = tos;
fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
@@ -1754,7 +1809,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
err = fib_validate_source(skb, saddr, daddr, tos,
0, dev, in_dev, &itag);
if (err < 0)
- goto martian_source_keep_err;
+ goto martian_source;
goto local_input;
}
@@ -1776,7 +1831,7 @@ brd_input:
err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
in_dev, &itag);
if (err < 0)
- goto martian_source_keep_err;
+ goto martian_source;
}
flags |= RTCF_BROADCAST;
res.type = RTN_BROADCAST;
@@ -1796,26 +1851,18 @@ local_input:
}
}
- rth = rt_dst_alloc(net->loopback_dev,
+ rth = rt_dst_alloc(net->loopback_dev, flags | RTCF_LOCAL, res.type,
IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
if (!rth)
goto e_nobufs;
- rth->dst.input= ip_local_deliver;
rth->dst.output= ip_rt_bug;
#ifdef CONFIG_IP_ROUTE_CLASSID
rth->dst.tclassid = itag;
#endif
-
- rth->rt_genid = rt_genid_ipv4(net);
- rth->rt_flags = flags|RTCF_LOCAL;
- rth->rt_type = res.type;
rth->rt_is_input = 1;
- rth->rt_iif = 0;
- rth->rt_pmtu = 0;
- rth->rt_gateway = 0;
- rth->rt_uses_gateway = 0;
- INIT_LIST_HEAD(&rth->rt_uncached);
+ if (res.table)
+ rth->rt_table_id = res.table->tb_id;
RT_CACHE_STAT_INC(in_slow_tot);
if (res.type == RTN_UNREACHABLE) {
@@ -1837,6 +1884,7 @@ no_route:
RT_CACHE_STAT_INC(in_no_route);
res.type = RTN_UNREACHABLE;
res.fi = NULL;
+ res.table = NULL;
goto local_input;
/*
@@ -1859,8 +1907,6 @@ e_nobufs:
goto out;
martian_source:
- err = -EINVAL;
-martian_source_keep_err:
ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
goto out;
}
@@ -1988,28 +2034,19 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
}
add:
- rth = rt_dst_alloc(dev_out,
+ rth = rt_dst_alloc(dev_out, flags, type,
IN_DEV_CONF_GET(in_dev, NOPOLICY),
IN_DEV_CONF_GET(in_dev, NOXFRM),
do_cache);
if (!rth)
return ERR_PTR(-ENOBUFS);
- rth->dst.output = ip_output;
-
- rth->rt_genid = rt_genid_ipv4(dev_net(dev_out));
- rth->rt_flags = flags;
- rth->rt_type = type;
- rth->rt_is_input = 0;
rth->rt_iif = orig_oif ? : 0;
- rth->rt_pmtu = 0;
- rth->rt_gateway = 0;
- rth->rt_uses_gateway = 0;
- INIT_LIST_HEAD(&rth->rt_uncached);
+ if (res->table)
+ rth->rt_table_id = res->table->tb_id;
+
RT_CACHE_STAT_INC(out_slow_tot);
- if (flags & RTCF_LOCAL)
- rth->dst.input = ip_local_deliver;
if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
if (flags & RTCF_LOCAL &&
!(dev_out->flags & IFF_LOOPBACK)) {
@@ -2038,7 +2075,8 @@ add:
* Major route resolver routine.
*/
-struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
+struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
+ int mp_hash)
{
struct net_device *dev_out = NULL;
__u8 tos = RT_FL_TOS(fl4);
@@ -2137,11 +2175,10 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
fl4->saddr = inet_select_addr(dev_out, 0,
RT_SCOPE_HOST);
}
- if (netif_is_vrf(dev_out) &&
- !(fl4->flowi4_flags & FLOWI_FLAG_VRFSRC)) {
- rth = vrf_dev_get_rth(dev_out);
+
+ rth = l3mdev_get_rtable(dev_out, fl4);
+ if (rth)
goto out;
- }
}
if (!fl4->daddr) {
@@ -2159,7 +2196,8 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
if (err) {
res.fi = NULL;
res.table = NULL;
- if (fl4->flowi4_oif) {
+ if (fl4->flowi4_oif &&
+ !netif_index_is_l3_master(net, fl4->flowi4_oif)) {
/* Apparently, routing tables are wrong. Assume,
that the destination is on link.
@@ -2201,18 +2239,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
goto make_route;
}
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
- if (res.fi->fib_nhs > 1 && fl4->flowi4_oif == 0)
- fib_select_multipath(&res);
- else
-#endif
- if (!res.prefixlen &&
- res.table->tb_num_default > 1 &&
- res.type == RTN_UNICAST && !fl4->flowi4_oif)
- fib_select_default(fl4, &res);
-
- if (!fl4->saddr)
- fl4->saddr = FIB_RES_PREFSRC(net, res);
+ fib_select_path(net, &res, fl4, mp_hash);
dev_out = FIB_RES_DEV(res);
fl4->flowi4_oif = dev_out->ifindex;
@@ -2225,7 +2252,7 @@ out:
rcu_read_unlock();
return rth;
}
-EXPORT_SYMBOL_GPL(__ip_route_output_key);
+EXPORT_SYMBOL_GPL(__ip_route_output_key_hash);
static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
{
@@ -2277,7 +2304,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
new->__use = 1;
new->input = dst_discard;
- new->output = dst_discard_sk;
+ new->output = dst_discard_out;
new->dev = ort->dst.dev;
if (new->dev)
@@ -2303,7 +2330,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
}
struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
- struct sock *sk)
+ const struct sock *sk)
{
struct rtable *rt = __ip_route_output_key(net, flp4);
@@ -2319,7 +2346,7 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
}
EXPORT_SYMBOL_GPL(ip_route_output_flow);
-static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
+static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id,
struct flowi4 *fl4, struct sk_buff *skb, u32 portid,
u32 seq, int event, int nowait, unsigned int flags)
{
@@ -2339,8 +2366,8 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
r->rtm_dst_len = 32;
r->rtm_src_len = 0;
r->rtm_tos = fl4->flowi4_tos;
- r->rtm_table = RT_TABLE_MAIN;
- if (nla_put_u32(skb, RTA_TABLE, RT_TABLE_MAIN))
+ r->rtm_table = table_id;
+ if (nla_put_u32(skb, RTA_TABLE, table_id))
goto nla_put_failure;
r->rtm_type = rt->rt_type;
r->rtm_scope = RT_SCOPE_UNIVERSE;
@@ -2445,6 +2472,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
int err;
int mark;
struct sk_buff *skb;
+ u32 table_id = RT_TABLE_MAIN;
err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
if (err < 0)
@@ -2480,6 +2508,9 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
fl4.flowi4_mark = mark;
+ if (netif_index_is_l3_master(net, fl4.flowi4_oif))
+ fl4.flowi4_flags = FLOWI_FLAG_L3MDEV_SRC | FLOWI_FLAG_SKIP_NH_OIF;
+
if (iif) {
struct net_device *dev;
@@ -2514,7 +2545,10 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
if (rtm->rtm_flags & RTM_F_NOTIFY)
rt->rt_flags |= RTCF_NOTIFY;
- err = rt_fill_info(net, dst, src, &fl4, skb,
+ if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE)
+ table_id = rt->rt_table_id;
+
+ err = rt_fill_info(net, dst, src, table_id, &fl4, skb,
NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
RTM_NEWROUTE, 0, 0);
if (err < 0)
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index d70b1f603692..4c0892badb8b 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -192,15 +192,11 @@ u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th,
}
EXPORT_SYMBOL_GPL(__cookie_v4_init_sequence);
-__u32 cookie_v4_init_sequence(struct sock *sk, const struct sk_buff *skb,
- __u16 *mssp)
+__u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mssp)
{
const struct iphdr *iph = ip_hdr(skb);
const struct tcphdr *th = tcp_hdr(skb);
- tcp_synq_overflow(sk);
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
-
return __cookie_v4_init_sequence(iph, th, mssp);
}
@@ -229,6 +225,7 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb,
child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst);
if (child) {
atomic_set(&req->rsk_refcnt, 1);
+ sock_rps_save_rxhash(child, skb);
inet_csk_reqsk_queue_add(sk, req, child);
} else {
reqsk_free(req);
@@ -288,6 +285,10 @@ bool cookie_ecn_ok(const struct tcp_options_received *tcp_opt,
}
EXPORT_SYMBOL(cookie_ecn_ok);
+/* On input, sk is a listener.
+ * Output is listener if incoming packet would not create a child
+ * NULL if memory could not be allocated.
+ */
struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
{
struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt;
@@ -326,7 +327,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
goto out;
ret = NULL;
- req = inet_reqsk_alloc(&tcp_request_sock_ops, sk); /* for safety */
+ req = inet_reqsk_alloc(&tcp_request_sock_ops, sk, false); /* for safety */
if (!req)
goto out;
@@ -345,7 +346,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
ireq->wscale_ok = tcp_opt.wscale_ok;
ireq->tstamp_ok = tcp_opt.saw_tstamp;
req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
- treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0;
+ treq->snt_synack.v64 = 0;
treq->tfo_listener = false;
ireq->ir_iif = sk->sk_bound_dev_if;
@@ -381,10 +382,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
}
/* Try to redo what tcp_v4_send_synack did. */
- req->window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW);
+ req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW);
tcp_select_initial_window(tcp_full_space(sk), req->mss,
- &req->rcv_wnd, &req->window_clamp,
+ &req->rsk_rcv_wnd, &req->rsk_window_clamp,
ireq->wscale_ok, &rcv_wscale,
dst_metric(&rt->dst, RTAX_INITRWND));
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b8b8fa184f75..ac1bdbb50352 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -900,7 +900,8 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
*/
if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) &&
!tcp_passive_fastopen(sk)) {
- if ((err = sk_stream_wait_connect(sk, &timeo)) != 0)
+ err = sk_stream_wait_connect(sk, &timeo);
+ if (err != 0)
goto out_err;
}
@@ -967,7 +968,8 @@ new_segment:
copied += copy;
offset += copy;
- if (!(size -= copy)) {
+ size -= copy;
+ if (!size) {
tcp_tx_timestamp(sk, skb);
goto out;
}
@@ -988,7 +990,8 @@ wait_for_memory:
tcp_push(sk, flags & ~MSG_MORE, mss_now,
TCP_NAGLE_PUSH, size_goal);
- if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
+ err = sk_stream_wait_memory(sk, &timeo);
+ if (err != 0)
goto do_error;
mss_now = tcp_send_mss(sk, &size_goal, flags);
@@ -1111,7 +1114,8 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
*/
if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) &&
!tcp_passive_fastopen(sk)) {
- if ((err = sk_stream_wait_connect(sk, &timeo)) != 0)
+ err = sk_stream_wait_connect(sk, &timeo);
+ if (err != 0)
goto do_error;
}
@@ -1267,7 +1271,8 @@ wait_for_memory:
tcp_push(sk, flags & ~MSG_MORE, mss_now,
TCP_NAGLE_PUSH, size_goal);
- if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
+ err = sk_stream_wait_memory(sk, &timeo);
+ if (err != 0)
goto do_error;
mss_now = tcp_send_mss(sk, &size_goal, flags);
@@ -1767,7 +1772,8 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
/* __ Restore normal policy in scheduler __ */
- if ((chunk = len - tp->ucopy.len) != 0) {
+ chunk = len - tp->ucopy.len;
+ if (chunk != 0) {
NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk);
len -= chunk;
copied += chunk;
@@ -1778,7 +1784,8 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
do_prequeue:
tcp_prequeue_process(sk);
- if ((chunk = len - tp->ucopy.len) != 0) {
+ chunk = len - tp->ucopy.len;
+ if (chunk != 0) {
NET_ADD_STATS_USER(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk);
len -= chunk;
copied += chunk;
@@ -2230,7 +2237,8 @@ int tcp_disconnect(struct sock *sk, int flags)
sk->sk_shutdown = 0;
sock_reset_flag(sk, SOCK_DONE);
tp->srtt_us = 0;
- if ((tp->write_seq += tp->max_window + 2) == 0)
+ tp->write_seq += tp->max_window + 2;
+ if (tp->write_seq == 0)
tp->write_seq = 1;
icsk->icsk_backoff = 0;
tp->snd_cwnd = 2;
@@ -2253,13 +2261,6 @@ int tcp_disconnect(struct sock *sk, int flags)
}
EXPORT_SYMBOL(tcp_disconnect);
-void tcp_sock_destruct(struct sock *sk)
-{
- inet_sock_destruct(sk);
-
- kfree(inet_csk(sk)->icsk_accept_queue.fastopenq);
-}
-
static inline bool tcp_can_repair_sock(const struct sock *sk)
{
return ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN) &&
@@ -2581,7 +2582,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
TCPF_LISTEN))) {
tcp_fastopen_init_key_once(true);
- err = fastopen_init_queue(sk, val);
+ fastopen_queue_tune(sk, val);
} else {
err = -EINVAL;
}
@@ -2849,10 +2850,7 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
break;
case TCP_FASTOPEN:
- if (icsk->icsk_accept_queue.fastopenq)
- val = icsk->icsk_accept_queue.fastopenq->max_qlen;
- else
- val = 0;
+ val = icsk->icsk_accept_queue.fastopenq.max_qlen;
break;
case TCP_TIMESTAMP:
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 93c4dc3ab23f..882caa4e72bc 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -173,6 +173,10 @@ out:
*/
if (ca->get_info)
memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
+ if (ca->flags & TCP_CONG_NEEDS_ECN)
+ INET_ECN_xmit(sk);
+ else
+ INET_ECN_dontxmit(sk);
}
void tcp_init_congestion_control(struct sock *sk)
@@ -181,6 +185,10 @@ void tcp_init_congestion_control(struct sock *sk)
if (icsk->icsk_ca_ops->init)
icsk->icsk_ca_ops->init(sk);
+ if (tcp_ca_needs_ecn(sk))
+ INET_ECN_xmit(sk);
+ else
+ INET_ECN_dontxmit(sk);
}
static void tcp_reinit_congestion_control(struct sock *sk,
@@ -192,8 +200,8 @@ static void tcp_reinit_congestion_control(struct sock *sk,
icsk->icsk_ca_ops = ca;
icsk->icsk_ca_setsockopt = 1;
- if (sk->sk_state != TCP_CLOSE && icsk->icsk_ca_ops->init)
- icsk->icsk_ca_ops->init(sk);
+ if (sk->sk_state != TCP_CLOSE)
+ tcp_init_congestion_control(sk);
}
/* Manage refcounts on socket close. */
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index f9c0fb84e435..93396bf7b475 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -124,10 +124,10 @@ static bool tcp_fastopen_cookie_gen(struct request_sock *req,
return false;
}
-static bool tcp_fastopen_create_child(struct sock *sk,
- struct sk_buff *skb,
- struct dst_entry *dst,
- struct request_sock *req)
+static struct sock *tcp_fastopen_create_child(struct sock *sk,
+ struct sk_buff *skb,
+ struct dst_entry *dst,
+ struct request_sock *req)
{
struct tcp_sock *tp;
struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
@@ -140,11 +140,11 @@ static bool tcp_fastopen_create_child(struct sock *sk,
child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL);
if (!child)
- return false;
+ return NULL;
- spin_lock(&queue->fastopenq->lock);
- queue->fastopenq->qlen++;
- spin_unlock(&queue->fastopenq->lock);
+ spin_lock(&queue->fastopenq.lock);
+ queue->fastopenq.qlen++;
+ spin_unlock(&queue->fastopenq.lock);
/* Initialize the child socket. Have to fix some values to take
* into account the child is a Fast Open socket and is created
@@ -161,15 +161,13 @@ static bool tcp_fastopen_create_child(struct sock *sk,
tp->snd_wnd = ntohs(tcp_hdr(skb)->window);
/* Activate the retrans timer so that SYNACK can be retransmitted.
- * The request socket is not added to the SYN table of the parent
+ * The request socket is not added to the ehash
* because it's been added to the accept queue directly.
*/
inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS,
TCP_TIMEOUT_INIT, TCP_RTO_MAX);
- atomic_set(&req->rsk_refcnt, 1);
- /* Add the child socket directly into the accept queue */
- inet_csk_reqsk_queue_add(sk, req, child);
+ atomic_set(&req->rsk_refcnt, 2);
/* Now finish processing the fastopen child socket. */
inet_csk(child)->icsk_af_ops->rebuild_header(child);
@@ -178,12 +176,10 @@ static bool tcp_fastopen_create_child(struct sock *sk,
tcp_init_metrics(child);
tcp_init_buffer_space(child);
- /* Queue the data carried in the SYN packet. We need to first
- * bump skb's refcnt because the caller will attempt to free it.
- * Note that IPv6 might also have used skb_get() trick
- * in tcp_v6_conn_request() to keep this SYN around (treq->pktopts)
- * So we need to eventually get a clone of the packet,
- * before inserting it in sk_receive_queue.
+ /* Queue the data carried in the SYN packet.
+ * We used to play tricky games with skb_get().
+ * With lockless listener, it is a dead end.
+ * Do not think about it.
*
* XXX (TFO) - we honor a zero-payload TFO request for now,
* (any reason not to?) but no need to queue the skb since
@@ -191,12 +187,7 @@ static bool tcp_fastopen_create_child(struct sock *sk,
*/
end_seq = TCP_SKB_CB(skb)->end_seq;
if (end_seq != TCP_SKB_CB(skb)->seq + 1) {
- struct sk_buff *skb2;
-
- if (unlikely(skb_shared(skb)))
- skb2 = skb_clone(skb, GFP_ATOMIC);
- else
- skb2 = skb_get(skb);
+ struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
if (likely(skb2)) {
skb_dst_drop(skb2);
@@ -214,11 +205,10 @@ static bool tcp_fastopen_create_child(struct sock *sk,
}
}
tcp_rsk(req)->rcv_nxt = tp->rcv_nxt = end_seq;
- sk->sk_data_ready(sk);
- bh_unlock_sock(child);
- sock_put(child);
- WARN_ON(!req->sk);
- return true;
+ /* tcp_conn_request() is sending the SYNACK,
+ * and queues the child into listener accept queue.
+ */
+ return child;
}
static bool tcp_fastopen_queue_check(struct sock *sk)
@@ -235,8 +225,8 @@ static bool tcp_fastopen_queue_check(struct sock *sk)
* between qlen overflow causing Fast Open to be disabled
* temporarily vs a server not supporting Fast Open at all.
*/
- fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq;
- if (!fastopenq || fastopenq->max_qlen == 0)
+ fastopenq = &inet_csk(sk)->icsk_accept_queue.fastopenq;
+ if (fastopenq->max_qlen == 0)
return false;
if (fastopenq->qlen >= fastopenq->max_qlen) {
@@ -261,13 +251,14 @@ static bool tcp_fastopen_queue_check(struct sock *sk)
* may be updated and return the client in the SYN-ACK later. E.g., Fast Open
* cookie request (foc->len == 0).
*/
-bool tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
- struct request_sock *req,
- struct tcp_fastopen_cookie *foc,
- struct dst_entry *dst)
+struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
+ struct request_sock *req,
+ struct tcp_fastopen_cookie *foc,
+ struct dst_entry *dst)
{
struct tcp_fastopen_cookie valid_foc = { .len = -1 };
bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1;
+ struct sock *child;
if (foc->len == 0) /* Client requests a cookie */
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD);
@@ -276,7 +267,7 @@ bool tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
(syn_data || foc->len >= 0) &&
tcp_fastopen_queue_check(sk))) {
foc->len = -1;
- return false;
+ return NULL;
}
if (syn_data && (sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD))
@@ -296,11 +287,12 @@ bool tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
* data in SYN_RECV state.
*/
fastopen:
- if (tcp_fastopen_create_child(sk, skb, dst, req)) {
+ child = tcp_fastopen_create_child(sk, skb, dst, req);
+ if (child) {
foc->len = -1;
NET_INC_STATS_BH(sock_net(sk),
LINUX_MIB_TCPFASTOPENPASSIVE);
- return true;
+ return child;
}
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL);
} else if (foc->len > 0) /* Client presents an invalid cookie */
@@ -308,6 +300,5 @@ fastopen:
valid_foc.exp = foc->exp;
*foc = valid_foc;
- return false;
+ return NULL;
}
-EXPORT_SYMBOL(tcp_try_fastopen);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a8f515bb19c4..944eaca69115 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2953,21 +2953,21 @@ static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
}
/* Compute time elapsed between (last) SYNACK and the ACK completing 3WHS. */
-static void tcp_synack_rtt_meas(struct sock *sk, const u32 synack_stamp)
+void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req)
{
- struct tcp_sock *tp = tcp_sk(sk);
- long seq_rtt_us = -1L;
+ long rtt_us = -1L;
- if (synack_stamp && !tp->total_retrans)
- seq_rtt_us = jiffies_to_usecs(tcp_time_stamp - synack_stamp);
+ if (req && !req->num_retrans && tcp_rsk(req)->snt_synack.v64) {
+ struct skb_mstamp now;
- /* If the ACK acks both the SYNACK and the (Fast Open'd) data packets
- * sent in SYN_RECV, SYNACK RTT is the smooth RTT computed in tcp_ack()
- */
- if (!tp->srtt_us)
- tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, seq_rtt_us, -1L);
+ skb_mstamp_get(&now);
+ rtt_us = skb_mstamp_us_delta(&now, &tcp_rsk(req)->snt_synack);
+ }
+
+ tcp_ack_update_rtt(sk, FLAG_SYN_ACKED, rtt_us, -1L);
}
+
static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -5472,7 +5472,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
}
static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
- const struct tcphdr *th, unsigned int len)
+ const struct tcphdr *th)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
@@ -5698,15 +5698,14 @@ reset_and_undo:
* address independent.
*/
-int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
- const struct tcphdr *th, unsigned int len)
+int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
+ const struct tcphdr *th = tcp_hdr(skb);
struct request_sock *req;
int queued = 0;
bool acceptable;
- u32 synack_stamp;
tp->rx_opt.saw_tstamp = 0;
@@ -5750,7 +5749,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
goto discard;
case TCP_SYN_SENT:
- queued = tcp_rcv_synsent_state_process(sk, skb, th, len);
+ queued = tcp_rcv_synsent_state_process(sk, skb, th);
if (queued >= 0)
return queued;
@@ -5785,15 +5784,16 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
if (!acceptable)
return 1;
+ if (!tp->srtt_us)
+ tcp_synack_rtt_meas(sk, req);
+
/* Once we leave TCP_SYN_RECV, we no longer need req
* so release it.
*/
if (req) {
- synack_stamp = tcp_rsk(req)->snt_synack;
tp->total_retrans = req->num_retrans;
reqsk_fastopen_remove(sk, req, false);
} else {
- synack_stamp = tp->lsndtime;
/* Make sure socket is routed, for correct metrics. */
icsk->icsk_af_ops->rebuild_header(sk);
tcp_init_congestion_control(sk);
@@ -5816,7 +5816,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale;
tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
- tcp_synack_rtt_meas(sk, synack_stamp);
if (tp->rx_opt.tstamp_ok)
tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
@@ -6023,11 +6022,11 @@ static void tcp_openreq_init(struct request_sock *req,
{
struct inet_request_sock *ireq = inet_rsk(req);
- req->rcv_wnd = 0; /* So that tcp_send_synack() knows! */
+ req->rsk_rcv_wnd = 0; /* So that tcp_send_synack() knows! */
req->cookie_ts = 0;
tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
- tcp_rsk(req)->snt_synack = tcp_time_stamp;
+ skb_mstamp_get(&tcp_rsk(req)->snt_synack);
tcp_rsk(req)->last_oow_ack_time = 0;
req->mss = rx_opt->mss_clamp;
req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
@@ -6043,9 +6042,11 @@ static void tcp_openreq_init(struct request_sock *req,
}
struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
- struct sock *sk_listener)
+ struct sock *sk_listener,
+ bool attach_listener)
{
- struct request_sock *req = reqsk_alloc(ops, sk_listener);
+ struct request_sock *req = reqsk_alloc(ops, sk_listener,
+ attach_listener);
if (req) {
struct inet_request_sock *ireq = inet_rsk(req);
@@ -6065,13 +6066,13 @@ EXPORT_SYMBOL(inet_reqsk_alloc);
/*
* Return true if a syncookie should be sent
*/
-static bool tcp_syn_flood_action(struct sock *sk,
+static bool tcp_syn_flood_action(const struct sock *sk,
const struct sk_buff *skb,
const char *proto)
{
+ struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
const char *msg = "Dropping request";
bool want_cookie = false;
- struct listen_sock *lopt;
#ifdef CONFIG_SYN_COOKIES
if (sysctl_tcp_syncookies) {
@@ -6082,12 +6083,12 @@ static bool tcp_syn_flood_action(struct sock *sk,
#endif
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
- lopt = inet_csk(sk)->icsk_accept_queue.listen_opt;
- if (!lopt->synflood_warned && sysctl_tcp_syncookies != 2) {
- lopt->synflood_warned = 1;
+ if (!queue->synflood_warned &&
+ sysctl_tcp_syncookies != 2 &&
+ xchg(&queue->synflood_warned, 1) == 0)
pr_info("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
proto, ntohs(tcp_hdr(skb)->dest), msg);
- }
+
return want_cookie;
}
@@ -6112,16 +6113,15 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
const struct tcp_request_sock_ops *af_ops,
struct sock *sk, struct sk_buff *skb)
{
+ struct tcp_fastopen_cookie foc = { .len = -1 };
+ __u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn;
struct tcp_options_received tmp_opt;
- struct request_sock *req;
struct tcp_sock *tp = tcp_sk(sk);
+ struct sock *fastopen_sk = NULL;
struct dst_entry *dst = NULL;
- __u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn;
- bool want_cookie = false, fastopen;
+ struct request_sock *req;
+ bool want_cookie = false;
struct flowi fl;
- struct tcp_fastopen_cookie foc = { .len = -1 };
- int err;
-
/* TW buckets are converted to open requests without
* limitations, they conserve resources and peer is
@@ -6145,7 +6145,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
goto drop;
}
- req = inet_reqsk_alloc(rsk_ops, sk);
+ req = inet_reqsk_alloc(rsk_ops, sk, !want_cookie);
if (!req)
goto drop;
@@ -6228,20 +6228,30 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
}
tcp_rsk(req)->snt_isn = isn;
+ tcp_rsk(req)->txhash = net_tx_rndhash();
tcp_openreq_init_rwin(req, sk, dst);
- fastopen = !want_cookie &&
- tcp_try_fastopen(sk, skb, req, &foc, dst);
- err = af_ops->send_synack(sk, dst, &fl, req,
- skb_get_queue_mapping(skb), &foc);
- if (!fastopen) {
- if (err || want_cookie)
- goto drop_and_free;
-
+ if (!want_cookie) {
+ tcp_reqsk_record_syn(sk, req, skb);
+ fastopen_sk = tcp_try_fastopen(sk, skb, req, &foc, dst);
+ }
+ if (fastopen_sk) {
+ af_ops->send_synack(fastopen_sk, dst, &fl, req,
+ &foc, false);
+ /* Add the child socket directly into the accept queue */
+ inet_csk_reqsk_queue_add(sk, req, fastopen_sk);
+ sk->sk_data_ready(sk);
+ bh_unlock_sock(fastopen_sk);
+ sock_put(fastopen_sk);
+ } else {
tcp_rsk(req)->tfo_listener = false;
- af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
+ if (!want_cookie)
+ inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
+ af_ops->send_synack(sk, dst, &fl, req,
+ &foc, !want_cookie);
+ if (want_cookie)
+ goto drop_and_free;
}
- tcp_reqsk_record_syn(sk, req, skb);
-
+ reqsk_put(req);
return 0;
drop_and_release:
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 93898e093d4e..30dd45c1f568 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -324,7 +324,6 @@ void tcp_req_err(struct sock *sk, u32 seq)
if (seq != tcp_rsk(req)->snt_isn) {
NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
- reqsk_put(req);
} else {
/*
* Still in SYN_RECV, just remove it silently.
@@ -332,9 +331,10 @@ void tcp_req_err(struct sock *sk, u32 seq)
* created socket, and POSIX does not want network
* errors returned from accept().
*/
- NET_INC_STATS_BH(net, LINUX_MIB_LISTENDROPS);
inet_csk_reqsk_queue_drop(req->rsk_listener, req);
+ NET_INC_STATS_BH(net, LINUX_MIB_LISTENDROPS);
}
+ reqsk_put(req);
}
EXPORT_SYMBOL(tcp_req_err);
@@ -576,7 +576,7 @@ EXPORT_SYMBOL(tcp_v4_send_check);
* Exception: precedence violation. We do not implement it in any case.
*/
-static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb)
+static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
{
const struct tcphdr *th = tcp_hdr(skb);
struct {
@@ -795,7 +795,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
inet_twsk_put(tw);
}
-static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
+static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
struct request_sock *req)
{
/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
@@ -803,7 +803,7 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
*/
tcp_v4_send_ack(skb, (sk->sk_state == TCP_LISTEN) ?
tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
- tcp_rsk(req)->rcv_nxt, req->rcv_wnd,
+ tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd,
tcp_time_stamp,
req->ts_recent,
0,
@@ -818,11 +818,11 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
* This still operates on a request_sock only, not on a big
* socket.
*/
-static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
+static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
struct flowi *fl,
struct request_sock *req,
- u16 queue_mapping,
- struct tcp_fastopen_cookie *foc)
+ struct tcp_fastopen_cookie *foc,
+ bool attach_req)
{
const struct inet_request_sock *ireq = inet_rsk(req);
struct flowi4 fl4;
@@ -833,12 +833,11 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
return -1;
- skb = tcp_make_synack(sk, dst, req, foc);
+ skb = tcp_make_synack(sk, dst, req, foc, attach_req);
if (skb) {
__tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
- skb_set_queue_mapping(skb, queue_mapping);
err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
ireq->ir_rmt_addr,
ireq->opt);
@@ -865,7 +864,7 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req)
*/
/* Find the Key structure for an address. */
-struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
+struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk,
const union tcp_md5_addr *addr,
int family)
{
@@ -877,7 +876,7 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
/* caller either holds rcu_read_lock() or socket lock */
md5sig = rcu_dereference_check(tp->md5sig_info,
sock_owned_by_user(sk) ||
- lockdep_is_held(&sk->sk_lock.slock));
+ lockdep_is_held((spinlock_t *)&sk->sk_lock.slock));
if (!md5sig)
return NULL;
#if IS_ENABLED(CONFIG_IPV6)
@@ -894,7 +893,7 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(struct sock *sk,
}
EXPORT_SYMBOL(tcp_md5_do_lookup);
-struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk,
+struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
const struct sock *addr_sk)
{
const union tcp_md5_addr *addr;
@@ -1112,10 +1111,13 @@ clear_hash_noput:
}
EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
+#endif
+
/* Called with rcu_read_lock() */
-static bool tcp_v4_inbound_md5_hash(struct sock *sk,
+static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
const struct sk_buff *skb)
{
+#ifdef CONFIG_TCP_MD5SIG
/*
* This gets called for each TCP segment that arrives
* so we want to be efficient.
@@ -1165,10 +1167,12 @@ static bool tcp_v4_inbound_md5_hash(struct sock *sk,
return true;
}
return false;
-}
#endif
+ return false;
+}
-static void tcp_v4_init_req(struct request_sock *req, struct sock *sk_listener,
+static void tcp_v4_init_req(struct request_sock *req,
+ const struct sock *sk_listener,
struct sk_buff *skb)
{
struct inet_request_sock *ireq = inet_rsk(req);
@@ -1179,7 +1183,8 @@ static void tcp_v4_init_req(struct request_sock *req, struct sock *sk_listener,
ireq->opt = tcp_v4_save_options(skb);
}
-static struct dst_entry *tcp_v4_route_req(struct sock *sk, struct flowi *fl,
+static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
+ struct flowi *fl,
const struct request_sock *req,
bool *strict)
{
@@ -1218,7 +1223,6 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
.route_req = tcp_v4_route_req,
.init_seq = tcp_v4_init_sequence,
.send_synack = tcp_v4_send_synack,
- .queue_hash_add = inet_csk_reqsk_queue_hash_add,
};
int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
@@ -1241,7 +1245,7 @@ EXPORT_SYMBOL(tcp_v4_conn_request);
* The three way handshake has completed - we got a valid synack -
* now create the new socket.
*/
-struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
+struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
struct dst_entry *dst)
{
@@ -1277,7 +1281,6 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newinet->mc_ttl = ip_hdr(skb)->ttl;
newinet->rcv_tos = ip_hdr(skb)->tos;
inet_csk(newsk)->icsk_ext_hdr_len = 0;
- sk_set_txhash(newsk);
if (inet_opt)
inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
newinet->inet_id = newtp->write_seq ^ jiffies;
@@ -1338,34 +1341,11 @@ put_and_exit:
}
EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
-static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
+static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb)
{
+#ifdef CONFIG_SYN_COOKIES
const struct tcphdr *th = tcp_hdr(skb);
- const struct iphdr *iph = ip_hdr(skb);
- struct request_sock *req;
- struct sock *nsk;
-
- req = inet_csk_search_req(sk, th->source, iph->saddr, iph->daddr);
- if (req) {
- nsk = tcp_check_req(sk, skb, req, false);
- if (!nsk || nsk == sk)
- reqsk_put(req);
- return nsk;
- }
-
- nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr,
- th->source, iph->daddr, th->dest, inet_iif(skb));
-
- if (nsk) {
- if (nsk->sk_state != TCP_TIME_WAIT) {
- bh_lock_sock(nsk);
- return nsk;
- }
- inet_twsk_put(inet_twsk(nsk));
- return NULL;
- }
-#ifdef CONFIG_SYN_COOKIES
if (!th->syn)
sk = cookie_v4_check(sk, skb);
#endif
@@ -1373,7 +1353,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
}
/* The socket must have it's spinlock held when we get
- * here.
+ * here, unless it is a TCP_LISTEN socket.
*
* We have a potential double-lock case here, so even when
* doing backlog processing we use the BH locking scheme.
@@ -1404,13 +1384,13 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
goto csum_err;
if (sk->sk_state == TCP_LISTEN) {
- struct sock *nsk = tcp_v4_hnd_req(sk, skb);
+ struct sock *nsk = tcp_v4_cookie_check(sk, skb);
+
if (!nsk)
goto discard;
-
if (nsk != sk) {
sock_rps_save_rxhash(nsk, skb);
- sk_mark_napi_id(sk, skb);
+ sk_mark_napi_id(nsk, skb);
if (tcp_child_process(sk, nsk, skb)) {
rsk = nsk;
goto reset;
@@ -1420,7 +1400,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
} else
sock_rps_save_rxhash(sk, skb);
- if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) {
+ if (tcp_rcv_state_process(sk, skb)) {
rsk = sk;
goto reset;
}
@@ -1590,6 +1570,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
TCP_SKB_CB(skb)->sacked = 0;
+lookup:
sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
if (!sk)
goto no_tcp_socket;
@@ -1598,6 +1579,33 @@ process:
if (sk->sk_state == TCP_TIME_WAIT)
goto do_time_wait;
+ if (sk->sk_state == TCP_NEW_SYN_RECV) {
+ struct request_sock *req = inet_reqsk(sk);
+ struct sock *nsk = NULL;
+
+ sk = req->rsk_listener;
+ if (tcp_v4_inbound_md5_hash(sk, skb))
+ goto discard_and_relse;
+ if (likely(sk->sk_state == TCP_LISTEN)) {
+ nsk = tcp_check_req(sk, skb, req, false);
+ } else {
+ inet_csk_reqsk_queue_drop_and_put(sk, req);
+ goto lookup;
+ }
+ if (!nsk) {
+ reqsk_put(req);
+ goto discard_it;
+ }
+ if (nsk == sk) {
+ sock_hold(sk);
+ reqsk_put(req);
+ } else if (tcp_child_process(sk, nsk, skb)) {
+ tcp_v4_send_reset(nsk, skb);
+ goto discard_it;
+ } else {
+ return 0;
+ }
+ }
if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
goto discard_and_relse;
@@ -1606,25 +1614,23 @@ process:
if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_and_relse;
-#ifdef CONFIG_TCP_MD5SIG
- /*
- * We really want to reject the packet as early as possible
- * if:
- * o We're expecting an MD5'd packet and this is no MD5 tcp option
- * o There is an MD5 option and we're not expecting one
- */
if (tcp_v4_inbound_md5_hash(sk, skb))
goto discard_and_relse;
-#endif
nf_reset(skb);
if (sk_filter(sk, skb))
goto discard_and_relse;
- sk_incoming_cpu_update(sk);
skb->dev = NULL;
+ if (sk->sk_state == TCP_LISTEN) {
+ ret = tcp_v4_do_rcv(sk, skb);
+ goto put_and_return;
+ }
+
+ sk_incoming_cpu_update(sk);
+
bh_lock_sock_nested(sk);
tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
ret = 0;
@@ -1639,6 +1645,7 @@ process:
}
bh_unlock_sock(sk);
+put_and_return:
sock_put(sk);
return ret;
@@ -1833,35 +1840,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
++st->num;
++st->offset;
- if (st->state == TCP_SEQ_STATE_OPENREQ) {
- struct request_sock *req = cur;
-
- icsk = inet_csk(st->syn_wait_sk);
- req = req->dl_next;
- while (1) {
- while (req) {
- if (req->rsk_ops->family == st->family) {
- cur = req;
- goto out;
- }
- req = req->dl_next;
- }
- if (++st->sbucket >= icsk->icsk_accept_queue.listen_opt->nr_table_entries)
- break;
-get_req:
- req = icsk->icsk_accept_queue.listen_opt->syn_table[st->sbucket];
- }
- sk = sk_nulls_next(st->syn_wait_sk);
- st->state = TCP_SEQ_STATE_LISTENING;
- spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
- } else {
- icsk = inet_csk(sk);
- spin_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
- if (reqsk_queue_len(&icsk->icsk_accept_queue))
- goto start_req;
- spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
- sk = sk_nulls_next(sk);
- }
+ sk = sk_nulls_next(sk);
get_sk:
sk_nulls_for_each_from(sk, node) {
if (!net_eq(sock_net(sk), net))
@@ -1871,16 +1850,6 @@ get_sk:
goto out;
}
icsk = inet_csk(sk);
- spin_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
- if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
-start_req:
- st->uid = sock_i_uid(sk);
- st->syn_wait_sk = sk;
- st->state = TCP_SEQ_STATE_OPENREQ;
- st->sbucket = 0;
- goto get_req;
- }
- spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
}
spin_unlock_bh(&ilb->lock);
st->offset = 0;
@@ -2012,7 +1981,6 @@ static void *tcp_seek_last_pos(struct seq_file *seq)
void *rc = NULL;
switch (st->state) {
- case TCP_SEQ_STATE_OPENREQ:
case TCP_SEQ_STATE_LISTENING:
if (st->bucket >= INET_LHTABLE_SIZE)
break;
@@ -2071,7 +2039,6 @@ static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
}
switch (st->state) {
- case TCP_SEQ_STATE_OPENREQ:
case TCP_SEQ_STATE_LISTENING:
rc = listening_get_next(seq, v);
if (!rc) {
@@ -2096,11 +2063,6 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
struct tcp_iter_state *st = seq->private;
switch (st->state) {
- case TCP_SEQ_STATE_OPENREQ:
- if (v) {
- struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
- spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
- }
case TCP_SEQ_STATE_LISTENING:
if (v != SEQ_START_TOKEN)
spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
@@ -2154,7 +2116,7 @@ void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
EXPORT_SYMBOL(tcp_proc_unregister);
static void get_openreq4(const struct request_sock *req,
- struct seq_file *f, int i, kuid_t uid)
+ struct seq_file *f, int i)
{
const struct inet_request_sock *ireq = inet_rsk(req);
long delta = req->rsk_timer.expires - jiffies;
@@ -2171,7 +2133,8 @@ static void get_openreq4(const struct request_sock *req,
1, /* timers active (only the expire timer) */
jiffies_delta_to_clock_t(delta),
req->num_timeout,
- from_kuid_munged(seq_user_ns(f), uid),
+ from_kuid_munged(seq_user_ns(f),
+ sock_i_uid(req->rsk_listener)),
0, /* non standard timer */
0, /* open_requests have no inode */
0,
@@ -2185,7 +2148,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
const struct tcp_sock *tp = tcp_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk);
const struct inet_sock *inet = inet_sk(sk);
- struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq;
+ const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
__be32 dest = inet->inet_daddr;
__be32 src = inet->inet_rcv_saddr;
__u16 destp = ntohs(inet->inet_dport);
@@ -2272,18 +2235,12 @@ static int tcp4_seq_show(struct seq_file *seq, void *v)
}
st = seq->private;
- switch (st->state) {
- case TCP_SEQ_STATE_LISTENING:
- case TCP_SEQ_STATE_ESTABLISHED:
- if (sk->sk_state == TCP_TIME_WAIT)
- get_timewait4_sock(v, seq, st->num);
- else
- get_tcp4_sock(v, seq, st->num);
- break;
- case TCP_SEQ_STATE_OPENREQ:
- get_openreq4(v, seq, st->num, st->uid);
- break;
- }
+ if (sk->sk_state == TCP_TIME_WAIT)
+ get_timewait4_sock(v, seq, st->num);
+ else if (sk->sk_state == TCP_NEW_SYN_RECV)
+ get_openreq4(v, seq, st->num);
+ else
+ get_tcp4_sock(v, seq, st->num);
out:
seq_pad(seq, '\n');
return 0;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index def765911ff8..41828bdc5d32 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -361,30 +361,38 @@ void tcp_twsk_destructor(struct sock *sk)
}
EXPORT_SYMBOL_GPL(tcp_twsk_destructor);
+/* Warning : This function is called without sk_listener being locked.
+ * Be sure to read socket fields once, as their value could change under us.
+ */
void tcp_openreq_init_rwin(struct request_sock *req,
- struct sock *sk, struct dst_entry *dst)
+ const struct sock *sk_listener,
+ const struct dst_entry *dst)
{
struct inet_request_sock *ireq = inet_rsk(req);
- struct tcp_sock *tp = tcp_sk(sk);
- __u8 rcv_wscale;
+ const struct tcp_sock *tp = tcp_sk(sk_listener);
+ u16 user_mss = READ_ONCE(tp->rx_opt.user_mss);
+ int full_space = tcp_full_space(sk_listener);
int mss = dst_metric_advmss(dst);
+ u32 window_clamp;
+ __u8 rcv_wscale;
- if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
- mss = tp->rx_opt.user_mss;
+ if (user_mss && user_mss < mss)
+ mss = user_mss;
+ window_clamp = READ_ONCE(tp->window_clamp);
/* Set this up on the first call only */
- req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
+ req->rsk_window_clamp = window_clamp ? : dst_metric(dst, RTAX_WINDOW);
/* limit the window selection if the user enforce a smaller rx buffer */
- if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
- (req->window_clamp > tcp_full_space(sk) || req->window_clamp == 0))
- req->window_clamp = tcp_full_space(sk);
+ if (sk_listener->sk_userlocks & SOCK_RCVBUF_LOCK &&
+ (req->rsk_window_clamp > full_space || req->rsk_window_clamp == 0))
+ req->rsk_window_clamp = full_space;
/* tcp_full_space because it is guaranteed to be the first packet */
- tcp_select_initial_window(tcp_full_space(sk),
+ tcp_select_initial_window(full_space,
mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
- &req->rcv_wnd,
- &req->window_clamp,
+ &req->rsk_rcv_wnd,
+ &req->rsk_window_clamp,
ireq->wscale_ok,
&rcv_wscale,
dst_metric(dst, RTAX_INITRWND));
@@ -433,7 +441,9 @@ EXPORT_SYMBOL_GPL(tcp_ca_openreq_child);
* Actually, we could lots of memory writes here. tp of listening
* socket contains all necessary default parameters.
*/
-struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct sk_buff *skb)
+struct sock *tcp_create_openreq_child(const struct sock *sk,
+ struct request_sock *req,
+ struct sk_buff *skb)
{
struct sock *newsk = inet_csk_clone_lock(sk, req, GFP_ATOMIC);
@@ -469,7 +479,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
tcp_enable_early_retrans(newtp);
newtp->tlp_high_seq = 0;
- newtp->lsndtime = treq->snt_synack;
+ newtp->lsndtime = treq->snt_synack.stamp_jiffies;
+ newsk->sk_txhash = treq->txhash;
newtp->last_oow_ack_time = 0;
newtp->total_retrans = req->num_retrans;
@@ -501,9 +512,9 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
if (sysctl_tcp_fack)
tcp_enable_fack(newtp);
}
- newtp->window_clamp = req->window_clamp;
- newtp->rcv_ssthresh = req->rcv_wnd;
- newtp->rcv_wnd = req->rcv_wnd;
+ newtp->window_clamp = req->rsk_window_clamp;
+ newtp->rcv_ssthresh = req->rsk_rcv_wnd;
+ newtp->rcv_wnd = req->rsk_rcv_wnd;
newtp->rx_opt.wscale_ok = ireq->wscale_ok;
if (newtp->rx_opt.wscale_ok) {
newtp->rx_opt.snd_wscale = ireq->snd_wscale;
@@ -567,8 +578,6 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
__be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
bool paws_reject = false;
- BUG_ON(fastopen == (sk->sk_state == TCP_LISTEN));
-
tmp_opt.saw_tstamp = 0;
if (th->doff > (sizeof(struct tcphdr)>>2)) {
tcp_parse_options(skb, &tmp_opt, 0, NULL);
@@ -698,7 +707,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
/* RFC793: "first check sequence number". */
if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
- tcp_rsk(req)->rcv_nxt, tcp_rsk(req)->rcv_nxt + req->rcv_wnd)) {
+ tcp_rsk(req)->rcv_nxt, tcp_rsk(req)->rcv_nxt + req->rsk_rcv_wnd)) {
/* Out of window: send ACK and drop. */
if (!(flg & TCP_FLAG_RST))
req->rsk_ops->send_ack(sk, skb, req);
@@ -759,6 +768,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
if (!child)
goto listen_overflow;
+ sock_rps_save_rxhash(child, skb);
+ tcp_synack_rtt_meas(child, req);
inet_csk_reqsk_queue_drop(sk, req);
inet_csk_reqsk_queue_add(sk, req, child);
/* Warning: caller must not call reqsk_put(req);
@@ -811,8 +822,7 @@ int tcp_child_process(struct sock *parent, struct sock *child,
int state = child->sk_state;
if (!sock_owned_by_user(child)) {
- ret = tcp_rcv_state_process(child, skb, tcp_hdr(skb),
- skb->len);
+ ret = tcp_rcv_state_process(child, skb);
/* Wakeup parent, send SIGIO */
if (state == TCP_SYN_RECV && child->sk_state != state)
parent->sk_data_ready(parent);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 1100ffe4a722..19adedb8c5cc 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -357,14 +357,10 @@ static void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb)
}
static void
-tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th,
- struct sock *sk)
+tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th)
{
- if (inet_rsk(req)->ecn_ok) {
+ if (inet_rsk(req)->ecn_ok)
th->ece = 1;
- if (tcp_ca_needs_ecn(sk))
- INET_ECN_xmit(sk);
- }
}
/* Set up ECN state for a packet on a ESTABLISHED socket that is about to
@@ -612,12 +608,11 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
}
/* Set up TCP options for SYN-ACKs. */
-static unsigned int tcp_synack_options(struct sock *sk,
- struct request_sock *req,
- unsigned int mss, struct sk_buff *skb,
- struct tcp_out_options *opts,
- const struct tcp_md5sig_key *md5,
- struct tcp_fastopen_cookie *foc)
+static unsigned int tcp_synack_options(struct request_sock *req,
+ unsigned int mss, struct sk_buff *skb,
+ struct tcp_out_options *opts,
+ const struct tcp_md5sig_key *md5,
+ struct tcp_fastopen_cookie *foc)
{
struct inet_request_sock *ireq = inet_rsk(req);
unsigned int remaining = MAX_TCP_OPTION_SPACE;
@@ -1827,7 +1822,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
/* Ok, it looks like it is advisable to defer. */
- if (cong_win < send_win && cong_win < skb->len)
+ if (cong_win < send_win && cong_win <= skb->len)
*is_cwnd_limited = true;
return true;
@@ -2060,7 +2055,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
cwnd_quota = tcp_cwnd_test(tp, skb);
if (!cwnd_quota) {
- is_cwnd_limited = true;
if (push_one == 2)
/* Force out a loss probe pkt. */
cwnd_quota = 1;
@@ -2142,6 +2136,7 @@ repair:
/* Send one loss probe per tail loss episode. */
if (push_one != 2)
tcp_schedule_loss_probe(sk);
+ is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd);
tcp_cwnd_validate(sk, is_cwnd_limited);
return false;
}
@@ -2165,7 +2160,7 @@ bool tcp_schedule_loss_probe(struct sock *sk)
/* Don't do any loss probe on a Fast Open connection before 3WHS
* finishes.
*/
- if (sk->sk_state == TCP_SYN_RECV)
+ if (tp->fastopen_rsk)
return false;
/* TLP is only scheduled when next timer event is RTO. */
@@ -2175,7 +2170,7 @@ bool tcp_schedule_loss_probe(struct sock *sk)
/* Schedule a loss probe in 2*RTT for SACK capable connections
* in Open state, that are either limited by cwnd or application.
*/
- if (sysctl_tcp_early_retrans < 3 || !tp->srtt_us || !tp->packets_out ||
+ if (sysctl_tcp_early_retrans < 3 || !tp->packets_out ||
!tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
return false;
@@ -2184,9 +2179,10 @@ bool tcp_schedule_loss_probe(struct sock *sk)
return false;
/* Probe timeout is at least 1.5*rtt + TCP_DELACK_MAX to account
- * for delayed ack when there's one outstanding packet.
+ * for delayed ack when there's one outstanding packet. If no RTT
+ * sample is available then probe after TCP_TIMEOUT_INIT.
*/
- timeout = rtt << 1;
+ timeout = rtt << 1 ? : TCP_TIMEOUT_INIT;
if (tp->packets_out == 1)
timeout = max_t(u32, timeout,
(rtt + (rtt >> 1) + TCP_DELACK_MAX));
@@ -2949,20 +2945,22 @@ int tcp_send_synack(struct sock *sk)
* Allocate one skb and build a SYNACK packet.
* @dst is consumed : Caller should not use it again.
*/
-struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
+struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
struct request_sock *req,
- struct tcp_fastopen_cookie *foc)
+ struct tcp_fastopen_cookie *foc,
+ bool attach_req)
{
- struct tcp_out_options opts;
struct inet_request_sock *ireq = inet_rsk(req);
- struct tcp_sock *tp = tcp_sk(sk);
- struct tcphdr *th;
- struct sk_buff *skb;
+ const struct tcp_sock *tp = tcp_sk(sk);
struct tcp_md5sig_key *md5 = NULL;
+ struct tcp_out_options opts;
+ struct sk_buff *skb;
int tcp_header_size;
+ struct tcphdr *th;
+ u16 user_mss;
int mss;
- skb = sock_wmalloc(sk, MAX_TCP_HEADER, 1, GFP_ATOMIC);
+ skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
if (unlikely(!skb)) {
dst_release(dst);
return NULL;
@@ -2970,11 +2968,23 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
/* Reserve space for headers. */
skb_reserve(skb, MAX_TCP_HEADER);
+ if (attach_req) {
+ skb->destructor = sock_edemux;
+ sock_hold(req_to_sk(req));
+ skb->sk = req_to_sk(req);
+ } else {
+ /* sk is a const pointer, because we want to express multiple
+ * cpu might call us concurrently.
+ * sk->sk_wmem_alloc in an atomic, we can promote to rw.
+ */
+ skb_set_owner_w(skb, (struct sock *)sk);
+ }
skb_dst_set(skb, dst);
mss = dst_metric_advmss(dst);
- if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss)
- mss = tp->rx_opt.user_mss;
+ user_mss = READ_ONCE(tp->rx_opt.user_mss);
+ if (user_mss && user_mss < mss)
+ mss = user_mss;
memset(&opts, 0, sizeof(opts));
#ifdef CONFIG_SYN_COOKIES
@@ -2988,8 +2998,9 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
rcu_read_lock();
md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req));
#endif
- tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5,
- foc) + sizeof(*th);
+ skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4);
+ tcp_header_size = tcp_synack_options(req, mss, skb, &opts, md5, foc) +
+ sizeof(*th);
skb_push(skb, tcp_header_size);
skb_reset_transport_header(skb);
@@ -2998,7 +3009,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
memset(th, 0, sizeof(struct tcphdr));
th->syn = 1;
th->ack = 1;
- tcp_ecn_make_synack(req, th, sk);
+ tcp_ecn_make_synack(req, th);
th->source = htons(ireq->ir_num);
th->dest = ireq->ir_rmt_port;
/* Setting of flags are superfluous here for callers (and ECE is
@@ -3012,8 +3023,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt);
/* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
- th->window = htons(min(req->rcv_wnd, 65535U));
- tcp_options_write((__be32 *)(th + 1), tp, &opts);
+ th->window = htons(min(req->rsk_rcv_wnd, 65535U));
+ tcp_options_write((__be32 *)(th + 1), NULL, &opts);
th->doff = (tcp_header_size >> 2);
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_OUTSEGS);
@@ -3500,13 +3511,14 @@ void tcp_send_probe0(struct sock *sk)
TCP_RTO_MAX);
}
-int tcp_rtx_synack(struct sock *sk, struct request_sock *req)
+int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)
{
const struct tcp_request_sock_ops *af_ops = tcp_rsk(req)->af_specific;
struct flowi fl;
int res;
- res = af_ops->send_synack(sk, NULL, &fl, req, 0, NULL);
+ tcp_rsk(req)->txhash = net_tx_rndhash();
+ res = af_ops->send_synack(sk, NULL, &fl, req, NULL, true);
if (!res) {
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 7149ebc820c7..c9c716a483e4 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -83,7 +83,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset)
}
/* Calculate maximal number or retries on an orphaned socket. */
-static int tcp_orphan_retries(struct sock *sk, int alive)
+static int tcp_orphan_retries(struct sock *sk, bool alive)
{
int retries = sysctl_tcp_orphan_retries; /* May be zero. */
@@ -184,7 +184,7 @@ static int tcp_write_timeout(struct sock *sk)
retry_until = sysctl_tcp_retries2;
if (sock_flag(sk, SOCK_DEAD)) {
- const int alive = icsk->icsk_rto < TCP_RTO_MAX;
+ const bool alive = icsk->icsk_rto < TCP_RTO_MAX;
retry_until = tcp_orphan_retries(sk, alive);
do_reset = alive ||
@@ -298,7 +298,7 @@ static void tcp_probe_timer(struct sock *sk)
max_probes = sysctl_tcp_retries2;
if (sock_flag(sk, SOCK_DEAD)) {
- const int alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX;
+ const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX;
max_probes = tcp_orphan_retries(sk, alive);
if (!alive && icsk->icsk_backoff >= max_probes)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f7d1d5e19e95..24ec14f9825c 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -375,7 +375,8 @@ static inline int compute_score(struct sock *sk, struct net *net,
return -1;
score += 4;
}
-
+ if (sk->sk_incoming_cpu == raw_smp_processor_id())
+ score++;
return score;
}
@@ -419,6 +420,9 @@ static inline int compute_score2(struct sock *sk, struct net *net,
score += 4;
}
+ if (sk->sk_incoming_cpu == raw_smp_processor_id())
+ score++;
+
return score;
}
@@ -1017,30 +1021,14 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
fl4 = &fl4_stack;
- /* unconnected socket. If output device is enslaved to a VRF
- * device lookup source address from VRF table. This mimics
- * behavior of ip_route_connect{_init}.
- */
- if (netif_index_is_vrf(net, ipc.oif)) {
- flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos,
- RT_SCOPE_UNIVERSE, sk->sk_protocol,
- (flow_flags | FLOWI_FLAG_VRFSRC |
- FLOWI_FLAG_SKIP_NH_OIF),
- faddr, saddr, dport,
- inet->inet_sport);
-
- rt = ip_route_output_flow(net, fl4, sk);
- if (!IS_ERR(rt)) {
- saddr = fl4->saddr;
- ip_rt_put(rt);
- }
- }
-
flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos,
RT_SCOPE_UNIVERSE, sk->sk_protocol,
flow_flags,
faddr, saddr, dport, inet->inet_sport);
+ if (!saddr && ipc.oif)
+ l3mdev_get_saddr(net, ipc.oif, fl4);
+
security_sk_classify_flow(sk, flowi4_to_flowi(fl4));
rt = ip_route_output_flow(net, fl4, sk);
if (IS_ERR(rt)) {
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 60b032f58ccc..62e1e72db461 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -22,7 +22,8 @@ int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb)
return xfrm4_extract_header(skb);
}
-static inline int xfrm4_rcv_encap_finish(struct sock *sk, struct sk_buff *skb)
+static inline int xfrm4_rcv_encap_finish(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
{
if (!skb_dst(skb)) {
const struct iphdr *iph = ip_hdr(skb);
@@ -52,8 +53,8 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async)
iph->tot_len = htons(skb->len);
ip_send_check(iph);
- NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, NULL, skb,
- skb->dev, NULL,
+ NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
+ dev_net(skb->dev), NULL, skb, skb->dev, NULL,
xfrm4_rcv_encap_finish);
return 0;
}
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 2878dbfffeb7..9f298d0dc9a1 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -80,24 +80,25 @@ int xfrm4_output_finish(struct sock *sk, struct sk_buff *skb)
return xfrm_output(sk, skb);
}
-static int __xfrm4_output(struct sock *sk, struct sk_buff *skb)
+static int __xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct xfrm_state *x = skb_dst(skb)->xfrm;
#ifdef CONFIG_NETFILTER
if (!x) {
IPCB(skb)->flags |= IPSKB_REROUTED;
- return dst_output_sk(sk, skb);
+ return dst_output(net, sk, skb);
}
#endif
return x->outer_mode->afinfo->output_finish(sk, skb);
}
-int xfrm4_output(struct sock *sk, struct sk_buff *skb)
+int xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, sk, skb,
- NULL, skb_dst(skb)->dev, __xfrm4_output,
+ return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
+ net, sk, skb, NULL, skb_dst(skb)->dev,
+ __xfrm4_output,
!(IPCB(skb)->flags & IPSKB_REROUTED));
}
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index c10a9ee68433..f2606b9056bb 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -15,7 +15,7 @@
#include <net/dst.h>
#include <net/xfrm.h>
#include <net/ip.h>
-#include <net/vrf.h>
+#include <net/l3mdev.h>
static struct xfrm_policy_afinfo xfrm4_policy_afinfo;
@@ -97,6 +97,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
xdst->u.rt.rt_gateway = rt->rt_gateway;
xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway;
xdst->u.rt.rt_pmtu = rt->rt_pmtu;
+ xdst->u.rt.rt_table_id = rt->rt_table_id;
INIT_LIST_HEAD(&xdst->u.rt.rt_uncached);
return 0;
@@ -110,10 +111,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
struct flowi4 *fl4 = &fl->u.ip4;
int oif = 0;
- if (skb_dst(skb)) {
- oif = vrf_master_ifindex(skb_dst(skb)->dev) ?
- : skb_dst(skb)->dev->ifindex;
- }
+ if (skb_dst(skb))
+ oif = l3mdev_fib_oif(skb_dst(skb)->dev);
memset(fl4, 0, sizeof(struct flowi4));
fl4->flowi4_mark = skb->mark;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 36b85bd05ac8..d135350495e8 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -81,6 +81,7 @@
#include <net/ip.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
+#include <net/l3mdev.h>
#include <linux/if_tunnel.h>
#include <linux/rtnetlink.h>
#include <linux/netconf.h>
@@ -2146,7 +2147,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
unsigned long expires, u32 flags)
{
struct fib6_config cfg = {
- .fc_table = RT6_TABLE_PREFIX,
+ .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX,
.fc_metric = IP6_RT_PRIO_ADDRCONF,
.fc_ifindex = dev->ifindex,
.fc_expires = expires,
@@ -2179,8 +2180,9 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
struct fib6_node *fn;
struct rt6_info *rt = NULL;
struct fib6_table *table;
+ u32 tb_id = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX;
- table = fib6_get_table(dev_net(dev), RT6_TABLE_PREFIX);
+ table = fib6_get_table(dev_net(dev), tb_id);
if (!table)
return NULL;
@@ -2211,7 +2213,7 @@ out:
static void addrconf_add_mroute(struct net_device *dev)
{
struct fib6_config cfg = {
- .fc_table = RT6_TABLE_LOCAL,
+ .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_LOCAL,
.fc_metric = IP6_RT_PRIO_ADDRCONF,
.fc_ifindex = dev->ifindex,
.fc_dst_len = 8,
@@ -3029,6 +3031,10 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route)
{
struct in6_addr addr;
+ /* no link local addresses on L3 master devices */
+ if (netif_is_l3_master(idev->dev))
+ return;
+
ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0);
if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY) {
@@ -3627,7 +3633,7 @@ static void addrconf_dad_work(struct work_struct *w)
/* send a neighbour solicitation for our addr */
addrconf_addr_solict_mult(&ifp->addr, &mcaddr);
- ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &in6addr_any, NULL);
+ ndisc_send_ns(ifp->idev->dev, &ifp->addr, &mcaddr, &in6addr_any, NULL);
out:
in6_ifa_put(ifp);
rtnl_unlock();
@@ -4731,7 +4737,8 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
}
}
-static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev)
+static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev,
+ u32 ext_filter_mask)
{
struct nlattr *nla;
struct ifla_cacheinfo ci;
@@ -4751,6 +4758,9 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev)
/* XXX - MC not implemented */
+ if (ext_filter_mask & RTEXT_FILTER_SKIP_STATS)
+ return 0;
+
nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64));
if (!nla)
goto nla_put_failure;
@@ -4786,14 +4796,15 @@ static size_t inet6_get_link_af_size(const struct net_device *dev)
return inet6_ifla6_size();
}
-static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
+static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
+ u32 ext_filter_mask)
{
struct inet6_dev *idev = __in6_dev_get(dev);
if (!idev)
return -ENODATA;
- if (inet6_fill_ifla6_attrs(skb, idev) < 0)
+ if (inet6_fill_ifla6_attrs(skb, idev, ext_filter_mask) < 0)
return -EMSGSIZE;
return 0;
@@ -4948,7 +4959,7 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
if (!protoinfo)
goto nla_put_failure;
- if (inet6_fill_ifla6_attrs(skb, idev) < 0)
+ if (inet6_fill_ifla6_attrs(skb, idev, 0) < 0)
goto nla_put_failure;
nla_nest_end(skb, protoinfo);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 9aadd57808a5..d70b0238f468 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -263,7 +263,7 @@ void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info)
{
- struct ipv6_pinfo *np = inet6_sk(sk);
+ const struct ipv6_pinfo *np = inet6_sk(sk);
struct sock_exterr_skb *serr;
struct ipv6hdr *iph;
struct sk_buff *skb;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 6c2b2132c8d3..efb1c00f2270 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -68,6 +68,7 @@
#include <net/xfrm.h>
#include <net/inet_common.h>
#include <net/dsfield.h>
+#include <net/l3mdev.h>
#include <asm/uaccess.h>
@@ -496,6 +497,9 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
else if (!fl6.flowi6_oif)
fl6.flowi6_oif = np->ucast_oif;
+ if (!fl6.flowi6_oif)
+ fl6.flowi6_oif = l3mdev_master_ifindex(skb->dev);
+
dst = icmpv6_route_lookup(net, skb, sk, &fl6);
if (IS_ERR(dst))
goto out;
@@ -575,7 +579,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
fl6.daddr = ipv6_hdr(skb)->saddr;
if (saddr)
fl6.saddr = *saddr;
- fl6.flowi6_oif = skb->dev->ifindex;
+ fl6.flowi6_oif = l3mdev_fib_oif(skb->dev);
fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
fl6.flowi6_mark = mark;
security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
diff --git a/net/ipv6/ila.c b/net/ipv6/ila.c
index 678d2df4b8d9..1a6852e1ac69 100644
--- a/net/ipv6/ila.c
+++ b/net/ipv6/ila.c
@@ -91,7 +91,7 @@ static void update_ipv6_locator(struct sk_buff *skb, struct ila_params *p)
*(__be64 *)&ip6h->daddr = p->locator;
}
-static int ila_output(struct sock *sk, struct sk_buff *skb)
+static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
@@ -100,7 +100,7 @@ static int ila_output(struct sock *sk, struct sk_buff *skb)
update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate));
- return dst->lwtstate->orig_output(sk, skb);
+ return dst->lwtstate->orig_output(net, sk, skb);
drop:
kfree_skb(skb);
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 6927f3fb5597..5d1c7cee2cb2 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -65,17 +65,18 @@ int inet6_csk_bind_conflict(const struct sock *sk,
}
EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict);
-struct dst_entry *inet6_csk_route_req(struct sock *sk,
+struct dst_entry *inet6_csk_route_req(const struct sock *sk,
struct flowi6 *fl6,
- const struct request_sock *req)
+ const struct request_sock *req,
+ u8 proto)
{
struct inet_request_sock *ireq = inet_rsk(req);
- struct ipv6_pinfo *np = inet6_sk(sk);
+ const struct ipv6_pinfo *np = inet6_sk(sk);
struct in6_addr *final_p, final;
struct dst_entry *dst;
memset(fl6, 0, sizeof(*fl6));
- fl6->flowi6_proto = IPPROTO_TCP;
+ fl6->flowi6_proto = proto;
fl6->daddr = ireq->ir_v6_rmt_addr;
final_p = fl6_update_dst(fl6, np->opt, &final);
fl6->saddr = ireq->ir_v6_loc_addr;
@@ -91,73 +92,7 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk,
return dst;
}
-
-/*
- * request_sock (formerly open request) hash tables.
- */
-static u32 inet6_synq_hash(const struct in6_addr *raddr, const __be16 rport,
- const u32 rnd, const u32 synq_hsize)
-{
- u32 c;
-
- c = jhash_3words((__force u32)raddr->s6_addr32[0],
- (__force u32)raddr->s6_addr32[1],
- (__force u32)raddr->s6_addr32[2],
- rnd);
-
- c = jhash_2words((__force u32)raddr->s6_addr32[3],
- (__force u32)rport,
- c);
-
- return c & (synq_hsize - 1);
-}
-
-struct request_sock *inet6_csk_search_req(struct sock *sk,
- const __be16 rport,
- const struct in6_addr *raddr,
- const struct in6_addr *laddr,
- const int iif)
-{
- struct inet_connection_sock *icsk = inet_csk(sk);
- struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
- struct request_sock *req;
- u32 hash = inet6_synq_hash(raddr, rport, lopt->hash_rnd,
- lopt->nr_table_entries);
-
- spin_lock(&icsk->icsk_accept_queue.syn_wait_lock);
- for (req = lopt->syn_table[hash]; req != NULL; req = req->dl_next) {
- const struct inet_request_sock *ireq = inet_rsk(req);
-
- if (ireq->ir_rmt_port == rport &&
- req->rsk_ops->family == AF_INET6 &&
- ipv6_addr_equal(&ireq->ir_v6_rmt_addr, raddr) &&
- ipv6_addr_equal(&ireq->ir_v6_loc_addr, laddr) &&
- (!ireq->ir_iif || ireq->ir_iif == iif)) {
- atomic_inc(&req->rsk_refcnt);
- WARN_ON(req->sk != NULL);
- break;
- }
- }
- spin_unlock(&icsk->icsk_accept_queue.syn_wait_lock);
-
- return req;
-}
-EXPORT_SYMBOL_GPL(inet6_csk_search_req);
-
-void inet6_csk_reqsk_queue_hash_add(struct sock *sk,
- struct request_sock *req,
- const unsigned long timeout)
-{
- struct inet_connection_sock *icsk = inet_csk(sk);
- struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
- const u32 h = inet6_synq_hash(&inet_rsk(req)->ir_v6_rmt_addr,
- inet_rsk(req)->ir_rmt_port,
- lopt->hash_rnd, lopt->nr_table_entries);
-
- reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout);
- inet_csk_reqsk_queue_added(sk, timeout);
-}
-EXPORT_SYMBOL_GPL(inet6_csk_reqsk_queue_hash_add);
+EXPORT_SYMBOL(inet6_csk_route_req);
void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr)
{
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 6ac8dad0138a..21ace5a2bf7c 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -114,6 +114,8 @@ static inline int compute_score(struct sock *sk, struct net *net,
return -1;
score++;
}
+ if (sk->sk_incoming_cpu == raw_smp_processor_id())
+ score++;
}
return score;
}
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 7d2e0023c72d..09fddf70cca4 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -264,6 +264,7 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id)
return NULL;
}
+EXPORT_SYMBOL_GPL(fib6_get_table);
static void __net_init fib6_tables_init(struct net *net)
{
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index adba03ac7ce9..9075acf081dd 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -47,7 +47,7 @@
#include <net/inet_ecn.h>
#include <net/dst_metadata.h>
-int ip6_rcv_finish(struct sock *sk, struct sk_buff *skb)
+int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
if (sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) {
const struct inet6_protocol *ipprot;
@@ -109,7 +109,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
if (hdr->version != 6)
goto err;
- IP6_ADD_STATS_BH(dev_net(dev), idev,
+ IP6_ADD_STATS_BH(net, idev,
IPSTATS_MIB_NOECTPKTS +
(ipv6_get_dsfield(hdr) & INET_ECN_MASK),
max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs));
@@ -183,8 +183,8 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
/* Must drop socket now because of tproxy. */
skb_orphan(skb);
- return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, NULL, skb,
- dev, NULL,
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
+ net, NULL, skb, dev, NULL,
ip6_rcv_finish);
err:
IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS);
@@ -199,9 +199,8 @@ drop:
*/
-static int ip6_input_finish(struct sock *sk, struct sk_buff *skb)
+static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- struct net *net = dev_net(skb_dst(skb)->dev);
const struct inet6_protocol *ipprot;
struct inet6_dev *idev;
unsigned int nhoff;
@@ -278,8 +277,8 @@ discard:
int ip6_input(struct sk_buff *skb)
{
- return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN, NULL, skb,
- skb->dev, NULL,
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN,
+ dev_net(skb->dev), NULL, skb, skb->dev, NULL,
ip6_input_finish);
}
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 61d403ee1031..0c89671e0767 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -55,8 +55,9 @@
#include <net/xfrm.h>
#include <net/checksum.h>
#include <linux/mroute6.h>
+#include <net/l3mdev.h>
-static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb)
+static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct net_device *dev = dst->dev;
@@ -71,7 +72,7 @@ static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb)
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
- ((mroute6_socket(dev_net(dev), skb) &&
+ ((mroute6_socket(net, skb) &&
!(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
&ipv6_hdr(skb)->saddr))) {
@@ -82,19 +83,18 @@ static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb)
*/
if (newskb)
NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
- sk, newskb, NULL, newskb->dev,
+ net, sk, newskb, NULL, newskb->dev,
dev_loopback_xmit);
if (ipv6_hdr(skb)->hop_limit == 0) {
- IP6_INC_STATS(dev_net(dev), idev,
+ IP6_INC_STATS(net, idev,
IPSTATS_MIB_OUTDISCARDS);
kfree_skb(skb);
return 0;
}
}
- IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
- skb->len);
+ IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
IPV6_ADDR_SCOPE_NODELOCAL &&
@@ -116,48 +116,49 @@ static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb)
}
rcu_read_unlock_bh();
- IP6_INC_STATS(dev_net(dst->dev),
- ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
+ IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
kfree_skb(skb);
return -EINVAL;
}
-static int ip6_finish_output(struct sock *sk, struct sk_buff *skb)
+static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
dst_allfrag(skb_dst(skb)) ||
(IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
- return ip6_fragment(sk, skb, ip6_finish_output2);
+ return ip6_fragment(net, sk, skb, ip6_finish_output2);
else
- return ip6_finish_output2(sk, skb);
+ return ip6_finish_output2(net, sk, skb);
}
-int ip6_output(struct sock *sk, struct sk_buff *skb)
+int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct net_device *dev = skb_dst(skb)->dev;
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
+
if (unlikely(idev->cnf.disable_ipv6)) {
- IP6_INC_STATS(dev_net(dev), idev,
- IPSTATS_MIB_OUTDISCARDS);
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
kfree_skb(skb);
return 0;
}
- return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, sk, skb,
- NULL, dev,
+ return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
+ net, sk, skb, NULL, dev,
ip6_finish_output,
!(IP6CB(skb)->flags & IP6SKB_REROUTED));
}
/*
- * xmit an sk_buff (used by TCP, SCTP and DCCP)
+ * xmit an sk_buff (used by TCP, SCTP and DCCP)
+ * Note : socket lock is not held for SYNACK packets, but might be modified
+ * by calls to skb_set_owner_w() and ipv6_local_error(),
+ * which are using proper atomic operations or spinlocks.
*/
-
-int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
+int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
struct ipv6_txoptions *opt, int tclass)
{
struct net *net = sock_net(sk);
- struct ipv6_pinfo *np = inet6_sk(sk);
+ const struct ipv6_pinfo *np = inet6_sk(sk);
struct in6_addr *first_hop = &fl6->daddr;
struct dst_entry *dst = skb_dst(skb);
struct ipv6hdr *hdr;
@@ -186,7 +187,10 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
}
consume_skb(skb);
skb = skb2;
- skb_set_owner_w(skb, sk);
+ /* skb_set_owner_w() changes sk->sk_wmem_alloc atomically,
+ * it is safe to call in our context (socket lock not held)
+ */
+ skb_set_owner_w(skb, (struct sock *)sk);
}
if (opt->opt_flen)
ipv6_push_frag_opts(skb, opt, &proto);
@@ -224,12 +228,20 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_OUT, skb->len);
- return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb,
- NULL, dst->dev, dst_output_sk);
+ /* hooks should never assume socket lock is held.
+ * we promote our socket to non const
+ */
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
+ net, (struct sock *)sk, skb, NULL, dst->dev,
+ dst_output);
}
skb->dev = dst->dev;
- ipv6_local_error(sk, EMSGSIZE, fl6, mtu);
+ /* ipv6_local_error() does not require socket lock,
+ * we promote our socket to non const
+ */
+ ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
+
IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
kfree_skb(skb);
return -EMSGSIZE;
@@ -317,10 +329,11 @@ static int ip6_forward_proxy_check(struct sk_buff *skb)
return 0;
}
-static inline int ip6_forward_finish(struct sock *sk, struct sk_buff *skb)
+static inline int ip6_forward_finish(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
{
skb_sender_cpu_clear(skb);
- return dst_output_sk(sk, skb);
+ return dst_output(net, sk, skb);
}
static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
@@ -515,8 +528,8 @@ int ip6_forward(struct sk_buff *skb)
IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
- return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, NULL, skb,
- skb->dev, dst->dev,
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
+ net, NULL, skb, skb->dev, dst->dev,
ip6_forward_finish);
error:
@@ -543,8 +556,8 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
skb_copy_secmark(to, from);
}
-int ip6_fragment(struct sock *sk, struct sk_buff *skb,
- int (*output)(struct sock *, struct sk_buff *))
+int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
+ int (*output)(struct net *, struct sock *, struct sk_buff *))
{
struct sk_buff *frag;
struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
@@ -557,7 +570,6 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb,
__be32 frag_id;
int ptr, offset = 0, err = 0;
u8 *prevhdr, nexthdr = 0;
- struct net *net = dev_net(skb_dst(skb)->dev);
hlen = ip6_find_1stfragopt(skb, &prevhdr);
nexthdr = *prevhdr;
@@ -677,7 +689,7 @@ int ip6_fragment(struct sock *sk, struct sk_buff *skb,
ip6_copy_metadata(frag, skb);
}
- err = output(sk, skb);
+ err = output(net, sk, skb);
if (!err)
IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
IPSTATS_MIB_FRAGCREATES);
@@ -805,7 +817,7 @@ slow_path:
/*
* Put this fragment into the sending queue.
*/
- err = output(sk, frag);
+ err = output(net, sk, frag);
if (err)
goto fail;
@@ -877,7 +889,8 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
#ifdef CONFIG_IPV6_SUBTREES
ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
#endif
- (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
+ (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
+ (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
dst_release(dst);
dst = NULL;
}
@@ -886,7 +899,7 @@ out:
return dst;
}
-static int ip6_dst_lookup_tail(struct net *net, struct sock *sk,
+static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
struct dst_entry **dst, struct flowi6 *fl6)
{
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
@@ -1017,7 +1030,7 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup);
* It returns a valid dst pointer on success, or a pointer encoded
* error code.
*/
-struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
+struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6,
const struct in6_addr *final_dst)
{
struct dst_entry *dst = NULL;
@@ -1029,7 +1042,7 @@ struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
if (final_dst)
fl6->daddr = *final_dst;
if (!fl6->flowi6_oif)
- fl6->flowi6_oif = dst->dev->ifindex;
+ fl6->flowi6_oif = l3mdev_fib_oif(dst->dev);
return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
}
@@ -1683,7 +1696,7 @@ int ip6_send_skb(struct sk_buff *skb)
struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
int err;
- err = ip6_local_out(skb);
+ err = ip6_local_out(net, skb->sk, skb);
if (err) {
if (err > 0)
err = net_xmit_errno(err);
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 0224c032dca5..0a8610b33d79 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -482,7 +482,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
return -EMSGSIZE;
}
- err = dst_output(skb);
+ err = dst_output(t->net, skb->sk, skb);
if (net_xmit_eval(err) == 0) {
struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 0e004cc42a22..ad19136086dd 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1985,13 +1985,13 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
}
#endif
-static inline int ip6mr_forward2_finish(struct sock *sk, struct sk_buff *skb)
+static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
+ IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_OUTFORWDATAGRAMS);
- IP6_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
+ IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_OUTOCTETS, skb->len);
- return dst_output_sk(sk, skb);
+ return dst_output(net, sk, skb);
}
/*
@@ -2063,8 +2063,8 @@ static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
IP6CB(skb)->flags |= IP6SKB_FORWARDED;
- return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, NULL, skb,
- skb->dev, dev,
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
+ net, NULL, skb, skb->dev, dev,
ip6mr_forward2_finish);
out_free:
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 083b2927fc67..124338a39e29 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1645,8 +1645,8 @@ static void mld_sendpack(struct sk_buff *skb)
payload_len = skb->len;
err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
- net->ipv6.igmp_sk, skb, NULL, skb->dev,
- dst_output_sk);
+ net, net->ipv6.igmp_sk, skb, NULL, skb->dev,
+ dst_output);
out:
if (!err) {
ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT);
@@ -2008,8 +2008,9 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
}
skb_dst_set(skb, dst);
- err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb,
- NULL, skb->dev, dst_output_sk);
+ err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
+ net, sk, skb, NULL, skb->dev,
+ dst_output);
out:
if (!err) {
ICMP6MSGOUT_INC_STATS(net, idev, type);
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index b9779d441b12..60c79a08e14a 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -118,7 +118,7 @@ static int mip6_mh_filter(struct sock *sk, struct sk_buff *skb)
struct mip6_report_rate_limiter {
spinlock_t lock;
- struct timeval stamp;
+ ktime_t stamp;
int iif;
struct in6_addr src;
struct in6_addr dst;
@@ -184,20 +184,18 @@ static int mip6_destopt_output(struct xfrm_state *x, struct sk_buff *skb)
return 0;
}
-static inline int mip6_report_rl_allow(struct timeval *stamp,
+static inline int mip6_report_rl_allow(ktime_t stamp,
const struct in6_addr *dst,
const struct in6_addr *src, int iif)
{
int allow = 0;
spin_lock_bh(&mip6_report_rl.lock);
- if (mip6_report_rl.stamp.tv_sec != stamp->tv_sec ||
- mip6_report_rl.stamp.tv_usec != stamp->tv_usec ||
+ if (!ktime_equal(mip6_report_rl.stamp, stamp) ||
mip6_report_rl.iif != iif ||
!ipv6_addr_equal(&mip6_report_rl.src, src) ||
!ipv6_addr_equal(&mip6_report_rl.dst, dst)) {
- mip6_report_rl.stamp.tv_sec = stamp->tv_sec;
- mip6_report_rl.stamp.tv_usec = stamp->tv_usec;
+ mip6_report_rl.stamp = stamp;
mip6_report_rl.iif = iif;
mip6_report_rl.src = *src;
mip6_report_rl.dst = *dst;
@@ -216,7 +214,7 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb,
struct ipv6_destopt_hao *hao = NULL;
struct xfrm_selector sel;
int offset;
- struct timeval stamp;
+ ktime_t stamp;
int err = 0;
if (unlikely(fl6->flowi6_proto == IPPROTO_MH &&
@@ -230,9 +228,9 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb,
(skb_network_header(skb) + offset);
}
- skb_get_timestamp(skb, &stamp);
+ stamp = skb_get_ktime(skb);
- if (!mip6_report_rl_allow(&stamp, &ipv6_hdr(skb)->daddr,
+ if (!mip6_report_rl_allow(stamp, &ipv6_hdr(skb)->daddr,
hao ? &hao->addr : &ipv6_hdr(skb)->saddr,
opt->iif))
goto out;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 64a71354b069..3e0f855e1bea 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -67,6 +67,7 @@
#include <net/flow.h>
#include <net/ip6_checksum.h>
#include <net/inet_common.h>
+#include <net/l3mdev.h>
#include <linux/proc_fs.h>
#include <linux/netfilter.h>
@@ -147,6 +148,7 @@ struct neigh_table nd_tbl = {
.gc_thresh2 = 512,
.gc_thresh3 = 1024,
};
+EXPORT_SYMBOL_GPL(nd_tbl);
static void ndisc_fill_addr_option(struct sk_buff *skb, int type, void *data)
{
@@ -441,8 +443,11 @@ static void ndisc_send_skb(struct sk_buff *skb,
if (!dst) {
struct flowi6 fl6;
+ int oif = l3mdev_fib_oif(skb->dev);
- icmpv6_flow_init(sk, &fl6, type, saddr, daddr, skb->dev->ifindex);
+ icmpv6_flow_init(sk, &fl6, type, saddr, daddr, oif);
+ if (oif != skb->dev->ifindex)
+ fl6.flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC;
dst = icmp6_dst_alloc(skb->dev, &fl6);
if (IS_ERR(dst)) {
kfree_skb(skb);
@@ -463,9 +468,9 @@ static void ndisc_send_skb(struct sk_buff *skb,
idev = __in6_dev_get(dst->dev);
IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
- err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb,
- NULL, dst->dev,
- dst_output_sk);
+ err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
+ net, sk, skb, NULL, dst->dev,
+ dst_output);
if (!err) {
ICMP6MSGOUT_INC_STATS(net, idev, type);
ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
@@ -474,8 +479,7 @@ static void ndisc_send_skb(struct sk_buff *skb,
rcu_read_unlock();
}
-void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
- const struct in6_addr *daddr,
+void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr,
const struct in6_addr *solicited_addr,
bool router, bool solicited, bool override, bool inc_opt)
{
@@ -541,7 +545,7 @@ static void ndisc_send_unsol_na(struct net_device *dev)
read_lock_bh(&idev->lock);
list_for_each_entry(ifa, &idev->addr_list, if_list) {
- ndisc_send_na(dev, NULL, &in6addr_linklocal_allnodes, &ifa->addr,
+ ndisc_send_na(dev, &in6addr_linklocal_allnodes, &ifa->addr,
/*router=*/ !!idev->cnf.forwarding,
/*solicited=*/ false, /*override=*/ true,
/*inc_opt=*/ true);
@@ -551,8 +555,7 @@ static void ndisc_send_unsol_na(struct net_device *dev)
in6_dev_put(idev);
}
-void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
- const struct in6_addr *solicit,
+void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit,
const struct in6_addr *daddr, const struct in6_addr *saddr,
struct sk_buff *oskb)
{
@@ -679,12 +682,12 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
"%s: trying to ucast probe in NUD_INVALID: %pI6\n",
__func__, target);
}
- ndisc_send_ns(dev, neigh, target, target, saddr, skb);
+ ndisc_send_ns(dev, target, target, saddr, skb);
} else if ((probes -= NEIGH_VAR(neigh->parms, APP_PROBES)) < 0) {
neigh_app_ns(neigh);
} else {
addrconf_addr_solict_mult(target, &mcaddr);
- ndisc_send_ns(dev, NULL, target, &mcaddr, saddr, skb);
+ ndisc_send_ns(dev, target, &mcaddr, saddr, skb);
}
}
@@ -768,7 +771,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1);
if (ifp) {
-
+have_ifp:
if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) {
if (dad) {
/*
@@ -794,6 +797,18 @@ static void ndisc_recv_ns(struct sk_buff *skb)
} else {
struct net *net = dev_net(dev);
+ /* perhaps an address on the master device */
+ if (netif_is_l3_slave(dev)) {
+ struct net_device *mdev;
+
+ mdev = netdev_master_upper_dev_get_rcu(dev);
+ if (mdev) {
+ ifp = ipv6_get_ifaddr(net, &msg->target, mdev, 1);
+ if (ifp)
+ goto have_ifp;
+ }
+ }
+
idev = in6_dev_get(dev);
if (!idev) {
/* XXX: count this drop? */
@@ -828,7 +843,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
is_router = idev->cnf.forwarding;
if (dad) {
- ndisc_send_na(dev, NULL, &in6addr_linklocal_allnodes, &msg->target,
+ ndisc_send_na(dev, &in6addr_linklocal_allnodes, &msg->target,
!!is_router, false, (ifp != NULL), true);
goto out;
}
@@ -849,8 +864,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
NEIGH_UPDATE_F_WEAK_OVERRIDE|
NEIGH_UPDATE_F_OVERRIDE);
if (neigh || !dev->header_ops) {
- ndisc_send_na(dev, neigh, saddr, &msg->target,
- !!is_router,
+ ndisc_send_na(dev, saddr, &msg->target, !!is_router,
true, (ifp != NULL && inc), inc);
if (neigh)
neigh_release(neigh);
@@ -1486,6 +1500,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
struct flowi6 fl6;
int rd_len;
u8 ha_buf[MAX_ADDR_LEN], *ha = NULL;
+ int oif = l3mdev_fib_oif(dev);
bool ret;
if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) {
@@ -1502,7 +1517,10 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
}
icmpv6_flow_init(sk, &fl6, NDISC_REDIRECT,
- &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex);
+ &saddr_buf, &ipv6_hdr(skb)->saddr, oif);
+
+ if (oif != skb->dev->ifindex)
+ fl6.flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC;
dst = ip6_route_output(net, NULL, &fl6);
if (dst->error) {
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index b4de08a83e0b..d11c46833d61 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -18,9 +18,8 @@
#include <net/ip6_checksum.h>
#include <net/netfilter/nf_queue.h>
-int ip6_route_me_harder(struct sk_buff *skb)
+int ip6_route_me_harder(struct net *net, struct sk_buff *skb)
{
- struct net *net = dev_net(skb_dst(skb)->dev);
const struct ipv6hdr *iph = ipv6_hdr(skb);
unsigned int hh_len;
struct dst_entry *dst;
@@ -93,7 +92,7 @@ static void nf_ip6_saveroute(const struct sk_buff *skb,
}
}
-static int nf_ip6_reroute(struct sk_buff *skb,
+static int nf_ip6_reroute(struct net *net, struct sk_buff *skb,
const struct nf_queue_entry *entry)
{
struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
@@ -103,7 +102,7 @@ static int nf_ip6_reroute(struct sk_buff *skb,
if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) ||
!ipv6_addr_equal(&iph->saddr, &rt_info->saddr) ||
skb->mark != rt_info->mark)
- return ip6_route_me_harder(skb);
+ return ip6_route_me_harder(net, skb);
}
return 0;
}
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 0771991ed812..99425cf2819b 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -117,7 +117,7 @@ ip6_packet_match(const struct sk_buff *skb,
if (FWINV(ret != 0, IP6T_INV_VIA_IN)) {
dprintf("VIA in mismatch (%s vs %s).%s\n",
indev, ip6info->iniface,
- ip6info->invflags&IP6T_INV_VIA_IN ?" (INV)":"");
+ ip6info->invflags & IP6T_INV_VIA_IN ? " (INV)" : "");
return false;
}
@@ -126,14 +126,14 @@ ip6_packet_match(const struct sk_buff *skb,
if (FWINV(ret != 0, IP6T_INV_VIA_OUT)) {
dprintf("VIA out mismatch (%s vs %s).%s\n",
outdev, ip6info->outiface,
- ip6info->invflags&IP6T_INV_VIA_OUT ?" (INV)":"");
+ ip6info->invflags & IP6T_INV_VIA_OUT ? " (INV)" : "");
return false;
}
/* ... might want to do something with class and flowlabel here ... */
/* look for the desired protocol header */
- if((ip6info->flags & IP6T_F_PROTO)) {
+ if (ip6info->flags & IP6T_F_PROTO) {
int protohdr;
unsigned short _frag_off;
@@ -151,9 +151,9 @@ ip6_packet_match(const struct sk_buff *skb,
ip6info->proto);
if (ip6info->proto == protohdr) {
- if(ip6info->invflags & IP6T_INV_PROTO) {
+ if (ip6info->invflags & IP6T_INV_PROTO)
return false;
- }
+
return true;
}
@@ -275,7 +275,8 @@ get_chainname_rulenum(const struct ip6t_entry *s, const struct ip6t_entry *e,
return 0;
}
-static void trace_packet(const struct sk_buff *skb,
+static void trace_packet(struct net *net,
+ const struct sk_buff *skb,
unsigned int hook,
const struct net_device *in,
const struct net_device *out,
@@ -287,7 +288,6 @@ static void trace_packet(const struct sk_buff *skb,
const char *hookname, *chainname, *comment;
const struct ip6t_entry *iter;
unsigned int rulenum = 0;
- struct net *net = dev_net(in ? in : out);
root = get_entry(private->entries, private->hook_entry[hook]);
@@ -314,10 +314,10 @@ ip6t_next_entry(const struct ip6t_entry *entry)
/* Returns one of the generic firewall policies, like NF_ACCEPT. */
unsigned int
ip6t_do_table(struct sk_buff *skb,
- unsigned int hook,
const struct nf_hook_state *state,
struct xt_table *table)
{
+ unsigned int hook = state->hook;
static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
/* Initializing verdict to NF_DROP keeps gcc happy. */
unsigned int verdict = NF_DROP;
@@ -340,6 +340,7 @@ ip6t_do_table(struct sk_buff *skb,
* rule is also a fragment-specific rule, non-fragments won't
* match it. */
acpar.hotdrop = false;
+ acpar.net = state->net;
acpar.in = state->in;
acpar.out = state->out;
acpar.family = NFPROTO_IPV6;
@@ -401,8 +402,8 @@ ip6t_do_table(struct sk_buff *skb,
#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
/* The packet is traced: log it */
if (unlikely(skb->nf_trace))
- trace_packet(skb, hook, state->in, state->out,
- table->name, private, e);
+ trace_packet(state->net, skb, hook, state->in,
+ state->out, table->name, private, e);
#endif
/* Standard target? */
if (!t->u.kernel.target->target) {
@@ -442,8 +443,8 @@ ip6t_do_table(struct sk_buff *skb,
break;
} while (!acpar.hotdrop);
- xt_write_recseq_end(addend);
- local_bh_enable();
+ xt_write_recseq_end(addend);
+ local_bh_enable();
#ifdef DEBUG_ALLOW_ALL
return NF_ACCEPT;
@@ -560,7 +561,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
pos = newpos;
}
}
- next:
+next:
duprintf("Finished chain %u\n", hook);
}
return 1;
@@ -815,7 +816,7 @@ static void cleanup_entry(struct ip6t_entry *e, struct net *net)
newinfo) */
static int
translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
- const struct ip6t_replace *repl)
+ const struct ip6t_replace *repl)
{
struct ip6t_entry *iter;
unsigned int i;
@@ -1089,7 +1090,7 @@ static int compat_table_info(const struct xt_table_info *info,
#endif
static int get_info(struct net *net, void __user *user,
- const int *len, int compat)
+ const int *len, int compat)
{
char name[XT_TABLE_MAXNAMELEN];
struct xt_table *t;
@@ -1151,7 +1152,7 @@ static int get_info(struct net *net, void __user *user,
static int
get_entries(struct net *net, struct ip6t_get_entries __user *uptr,
- const int *len)
+ const int *len)
{
int ret;
struct ip6t_get_entries get;
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 0ed841a3fa33..db29bbf41b59 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -39,7 +39,7 @@ static unsigned int
reject_tg6(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct ip6t_reject_info *reject = par->targinfo;
- struct net *net = dev_net((par->in != NULL) ? par->in : par->out);
+ struct net *net = par->net;
switch (reject->with) {
case IP6T_ICMP6_NO_ROUTE:
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index 1e4bf99ed16e..3deed5860a42 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -76,7 +76,7 @@ synproxy_send_tcp(const struct synproxy_net *snet,
nf_conntrack_get(nfct);
}
- ip6_local_out(nskb);
+ ip6_local_out(net, nskb->sk, nskb);
return;
free_nskb:
@@ -244,7 +244,7 @@ synproxy_send_client_ack(const struct synproxy_net *snet,
synproxy_build_options(nth, opts);
synproxy_send_tcp(snet, skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
- niph, nth, tcp_hdr_size);
+ niph, nth, tcp_hdr_size);
}
static bool
@@ -275,7 +275,7 @@ static unsigned int
synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_synproxy_info *info = par->targinfo;
- struct synproxy_net *snet = synproxy_pernet(dev_net(par->in));
+ struct synproxy_net *snet = synproxy_pernet(par->net);
struct synproxy_options opts = {};
struct tcphdr *th, _th;
@@ -316,11 +316,11 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)
return XT_CONTINUE;
}
-static unsigned int ipv6_synproxy_hook(const struct nf_hook_ops *ops,
+static unsigned int ipv6_synproxy_hook(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *nhs)
{
- struct synproxy_net *snet = synproxy_pernet(dev_net(nhs->in ? : nhs->out));
+ struct synproxy_net *snet = synproxy_pernet(nhs->net);
enum ip_conntrack_info ctinfo;
struct nf_conn *ct;
struct nf_conn_synproxy *synproxy;
@@ -458,14 +458,12 @@ static struct xt_target synproxy_tg6_reg __read_mostly = {
static struct nf_hook_ops ipv6_synproxy_ops[] __read_mostly = {
{
.hook = ipv6_synproxy_hook,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
},
{
.hook = ipv6_synproxy_hook,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c
index 790e0c6b19e1..1ee1b25df096 100644
--- a/net/ipv6/netfilter/ip6t_rpfilter.c
+++ b/net/ipv6/netfilter/ip6t_rpfilter.c
@@ -26,7 +26,7 @@ static bool rpfilter_addr_unicast(const struct in6_addr *addr)
return addr_type & IPV6_ADDR_UNICAST;
}
-static bool rpfilter_lookup_reverse6(const struct sk_buff *skb,
+static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb,
const struct net_device *dev, u8 flags)
{
struct rt6_info *rt;
@@ -53,7 +53,7 @@ static bool rpfilter_lookup_reverse6(const struct sk_buff *skb,
lookup_flags |= RT6_LOOKUP_F_IFACE;
}
- rt = (void *) ip6_route_lookup(dev_net(dev), &fl6, lookup_flags);
+ rt = (void *) ip6_route_lookup(net, &fl6, lookup_flags);
if (rt->dst.error)
goto out;
@@ -93,7 +93,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
if (unlikely(saddrtype == IPV6_ADDR_ANY))
return true ^ invert; /* not routable: forward path will drop it */
- return rpfilter_lookup_reverse6(skb, par->in, info->flags) ^ invert;
+ return rpfilter_lookup_reverse6(par->net, skb, par->in, info->flags) ^ invert;
}
static int rpfilter_check(const struct xt_mtchk_param *par)
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index 5c33d8abc077..8b277b983ca5 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -32,12 +32,10 @@ static const struct xt_table packet_filter = {
/* The work comes in here from netfilter.c. */
static unsigned int
-ip6table_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip6table_filter_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- const struct net *net = dev_net(state->in ? state->in : state->out);
-
- return ip6t_do_table(skb, ops->hooknum, state, net->ipv6.ip6table_filter);
+ return ip6t_do_table(skb, state, state->net->ipv6.ip6table_filter);
}
static struct nf_hook_ops *filter_ops __read_mostly;
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index b551f5b79fe2..abe278b07932 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -57,8 +57,7 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
/* flowlabel and prio (includes version, which shouldn't change either */
flowlabel = *((u_int32_t *)ipv6_hdr(skb));
- ret = ip6t_do_table(skb, NF_INET_LOCAL_OUT, state,
- dev_net(state->out)->ipv6.ip6table_mangle);
+ ret = ip6t_do_table(skb, state, state->net->ipv6.ip6table_mangle);
if (ret != NF_DROP && ret != NF_STOLEN &&
(!ipv6_addr_equal(&ipv6_hdr(skb)->saddr, &saddr) ||
@@ -66,7 +65,7 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
skb->mark != mark ||
ipv6_hdr(skb)->hop_limit != hop_limit ||
flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) {
- err = ip6_route_me_harder(skb);
+ err = ip6_route_me_harder(state->net, skb);
if (err < 0)
ret = NF_DROP_ERR(err);
}
@@ -76,17 +75,16 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
/* The work comes in here from netfilter.c. */
static unsigned int
-ip6table_mangle_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip6table_mangle_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- if (ops->hooknum == NF_INET_LOCAL_OUT)
+ if (state->hook == NF_INET_LOCAL_OUT)
return ip6t_mangle_out(skb, state);
- if (ops->hooknum == NF_INET_POST_ROUTING)
- return ip6t_do_table(skb, ops->hooknum, state,
- dev_net(state->out)->ipv6.ip6table_mangle);
+ if (state->hook == NF_INET_POST_ROUTING)
+ return ip6t_do_table(skb, state,
+ state->net->ipv6.ip6table_mangle);
/* INPUT/FORWARD */
- return ip6t_do_table(skb, ops->hooknum, state,
- dev_net(state->in)->ipv6.ip6table_mangle);
+ return ip6t_do_table(skb, state, state->net->ipv6.ip6table_mangle);
}
static struct nf_hook_ops *mangle_ops __read_mostly;
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index c3a7f7af0ed4..de2a10a565f5 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -30,49 +30,46 @@ static const struct xt_table nf_nat_ipv6_table = {
.af = NFPROTO_IPV6,
};
-static unsigned int ip6table_nat_do_chain(const struct nf_hook_ops *ops,
+static unsigned int ip6table_nat_do_chain(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct)
{
- struct net *net = nf_ct_net(ct);
-
- return ip6t_do_table(skb, ops->hooknum, state, net->ipv6.ip6table_nat);
+ return ip6t_do_table(skb, state, state->net->ipv6.ip6table_nat);
}
-static unsigned int ip6table_nat_fn(const struct nf_hook_ops *ops,
+static unsigned int ip6table_nat_fn(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return nf_nat_ipv6_fn(ops, skb, state, ip6table_nat_do_chain);
+ return nf_nat_ipv6_fn(priv, skb, state, ip6table_nat_do_chain);
}
-static unsigned int ip6table_nat_in(const struct nf_hook_ops *ops,
+static unsigned int ip6table_nat_in(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return nf_nat_ipv6_in(ops, skb, state, ip6table_nat_do_chain);
+ return nf_nat_ipv6_in(priv, skb, state, ip6table_nat_do_chain);
}
-static unsigned int ip6table_nat_out(const struct nf_hook_ops *ops,
+static unsigned int ip6table_nat_out(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return nf_nat_ipv6_out(ops, skb, state, ip6table_nat_do_chain);
+ return nf_nat_ipv6_out(priv, skb, state, ip6table_nat_do_chain);
}
-static unsigned int ip6table_nat_local_fn(const struct nf_hook_ops *ops,
+static unsigned int ip6table_nat_local_fn(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return nf_nat_ipv6_local_fn(ops, skb, state, ip6table_nat_do_chain);
+ return nf_nat_ipv6_local_fn(priv, skb, state, ip6table_nat_do_chain);
}
static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
/* Before packet filtering, change destination */
{
.hook = ip6table_nat_in,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP6_PRI_NAT_DST,
@@ -80,7 +77,6 @@ static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
/* After packet filtering, change source */
{
.hook = ip6table_nat_out,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP6_PRI_NAT_SRC,
@@ -88,7 +84,6 @@ static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
/* Before packet filtering, change destination */
{
.hook = ip6table_nat_local_fn,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP6_PRI_NAT_DST,
@@ -96,7 +91,6 @@ static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = {
/* After packet filtering, change source */
{
.hook = ip6table_nat_fn,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP6_PRI_NAT_SRC,
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index 0b33caad2b69..9021963565c3 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -19,12 +19,10 @@ static const struct xt_table packet_raw = {
/* The work comes in here from netfilter.c. */
static unsigned int
-ip6table_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip6table_raw_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- const struct net *net = dev_net(state->in ? state->in : state->out);
-
- return ip6t_do_table(skb, ops->hooknum, state, net->ipv6.ip6table_raw);
+ return ip6t_do_table(skb, state, state->net->ipv6.ip6table_raw);
}
static struct nf_hook_ops *rawtable_ops __read_mostly;
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index fcef83c25f7b..0d856fedfeb0 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -36,13 +36,10 @@ static const struct xt_table security_table = {
};
static unsigned int
-ip6table_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip6table_security_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- const struct net *net = dev_net(state->in ? state->in : state->out);
-
- return ip6t_do_table(skb, ops->hooknum, state,
- net->ipv6.ip6table_security);
+ return ip6t_do_table(skb, state, state->net->ipv6.ip6table_security);
}
static struct nf_hook_ops *sectbl_ops __read_mostly;
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 7302900c321a..1aa5848764a7 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -95,7 +95,7 @@ static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
return NF_ACCEPT;
}
-static unsigned int ipv6_helper(const struct nf_hook_ops *ops,
+static unsigned int ipv6_helper(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
@@ -131,7 +131,7 @@ static unsigned int ipv6_helper(const struct nf_hook_ops *ops,
return helper->help(skb, protoff, ct, ctinfo);
}
-static unsigned int ipv6_confirm(const struct nf_hook_ops *ops,
+static unsigned int ipv6_confirm(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
@@ -165,14 +165,14 @@ out:
return nf_conntrack_confirm(skb);
}
-static unsigned int ipv6_conntrack_in(const struct nf_hook_ops *ops,
+static unsigned int ipv6_conntrack_in(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return nf_conntrack_in(dev_net(state->in), PF_INET6, ops->hooknum, skb);
+ return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
}
-static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops,
+static unsigned int ipv6_conntrack_local(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
@@ -181,48 +181,42 @@ static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops,
net_notice_ratelimited("ipv6_conntrack_local: packet too short\n");
return NF_ACCEPT;
}
- return nf_conntrack_in(dev_net(state->out), PF_INET6, ops->hooknum, skb);
+ return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
}
static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
{
.hook = ipv6_conntrack_in,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP6_PRI_CONNTRACK,
},
{
.hook = ipv6_conntrack_local,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP6_PRI_CONNTRACK,
},
{
.hook = ipv6_helper,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP6_PRI_CONNTRACK_HELPER,
},
{
.hook = ipv6_confirm,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP6_PRI_LAST,
},
{
.hook = ipv6_helper,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP6_PRI_CONNTRACK_HELPER,
},
{
.hook = ipv6_confirm,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP6_PRI_LAST-1,
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 0e6fae103d33..660bc10c7a9c 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -36,6 +36,7 @@ static inline struct nf_icmp_net *icmpv6_pernet(struct net *net)
static bool icmpv6_pkt_to_tuple(const struct sk_buff *skb,
unsigned int dataoff,
+ struct net *net,
struct nf_conntrack_tuple *tuple)
{
const struct icmp6hdr *hp;
@@ -56,12 +57,12 @@ static const u_int8_t invmap[] = {
[ICMPV6_ECHO_REQUEST - 128] = ICMPV6_ECHO_REPLY + 1,
[ICMPV6_ECHO_REPLY - 128] = ICMPV6_ECHO_REQUEST + 1,
[ICMPV6_NI_QUERY - 128] = ICMPV6_NI_REPLY + 1,
- [ICMPV6_NI_REPLY - 128] = ICMPV6_NI_QUERY +1
+ [ICMPV6_NI_REPLY - 128] = ICMPV6_NI_QUERY + 1
};
static const u_int8_t noct_valid_new[] = {
[ICMPV6_MGM_QUERY - 130] = 1,
- [ICMPV6_MGM_REPORT -130] = 1,
+ [ICMPV6_MGM_REPORT - 130] = 1,
[ICMPV6_MGM_REDUCTION - 130] = 1,
[NDISC_ROUTER_SOLICITATION - 130] = 1,
[NDISC_ROUTER_ADVERTISEMENT - 130] = 1,
@@ -159,7 +160,7 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
skb_network_offset(skb)
+ sizeof(struct ipv6hdr)
+ sizeof(struct icmp6hdr),
- PF_INET6, &origtuple)) {
+ PF_INET6, net, &origtuple)) {
pr_debug("icmpv6_error: Can't get tuple\n");
return -NF_ACCEPT;
}
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 701cd2bae0a9..056f5d4a852a 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -59,7 +59,7 @@ struct nf_ct_frag6_skb_cb
struct sk_buff *orig;
};
-#define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb*)((skb)->cb))
+#define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb *)((skb)->cb))
static struct inet_frags nf_frags;
@@ -445,7 +445,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
skb_reset_transport_header(head);
skb_push(head, head->data - skb_network_header(head));
- for (fp=head->next; fp; fp = fp->next) {
+ for (fp = head->next; fp; fp = fp->next) {
head->data_len += fp->len;
head->len += fp->len;
if (head->ip_summed != fp->ip_summed)
@@ -563,12 +563,10 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff)
return 0;
}
-struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
+struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
{
struct sk_buff *clone;
struct net_device *dev = skb->dev;
- struct net *net = skb_dst(skb) ? dev_net(skb_dst(skb)->dev)
- : dev_net(skb->dev);
struct frag_hdr *fhdr;
struct frag_queue *fq;
struct ipv6hdr *hdr;
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index 6d9c0b3d5b8c..4fdbed5ebfb6 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -51,7 +51,7 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
return IP6_DEFRAG_CONNTRACK_OUT + zone_id;
}
-static unsigned int ipv6_defrag(const struct nf_hook_ops *ops,
+static unsigned int ipv6_defrag(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
@@ -63,7 +63,8 @@ static unsigned int ipv6_defrag(const struct nf_hook_ops *ops,
return NF_ACCEPT;
#endif
- reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(ops->hooknum, skb));
+ reasm = nf_ct_frag6_gather(state->net, skb,
+ nf_ct6_defrag_user(state->hook, skb));
/* queued */
if (reasm == NULL)
return NF_STOLEN;
@@ -74,7 +75,7 @@ static unsigned int ipv6_defrag(const struct nf_hook_ops *ops,
nf_ct_frag6_consume_orig(reasm);
- NF_HOOK_THRESH(NFPROTO_IPV6, ops->hooknum, state->sk, reasm,
+ NF_HOOK_THRESH(NFPROTO_IPV6, state->hook, state->net, state->sk, reasm,
state->in, state->out,
state->okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
@@ -84,14 +85,12 @@ static unsigned int ipv6_defrag(const struct nf_hook_ops *ops,
static struct nf_hook_ops ipv6_defrag_ops[] = {
{
.hook = ipv6_defrag,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP6_PRI_CONNTRACK_DEFRAG,
},
{
.hook = ipv6_defrag,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP6_PRI_CONNTRACK_DEFRAG,
diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c
index c8ab626556a0..6989c70ae29f 100644
--- a/net/ipv6/netfilter/nf_dup_ipv6.c
+++ b/net/ipv6/netfilter/nf_dup_ipv6.c
@@ -19,25 +19,10 @@
#include <net/netfilter/nf_conntrack.h>
#endif
-static struct net *pick_net(struct sk_buff *skb)
-{
-#ifdef CONFIG_NET_NS
- const struct dst_entry *dst;
-
- if (skb->dev != NULL)
- return dev_net(skb->dev);
- dst = skb_dst(skb);
- if (dst != NULL && dst->dev != NULL)
- return dev_net(dst->dev);
-#endif
- return &init_net;
-}
-
-static bool nf_dup_ipv6_route(struct sk_buff *skb, const struct in6_addr *gw,
- int oif)
+static bool nf_dup_ipv6_route(struct net *net, struct sk_buff *skb,
+ const struct in6_addr *gw, int oif)
{
const struct ipv6hdr *iph = ipv6_hdr(skb);
- struct net *net = pick_net(skb);
struct dst_entry *dst;
struct flowi6 fl6;
@@ -61,7 +46,7 @@ static bool nf_dup_ipv6_route(struct sk_buff *skb, const struct in6_addr *gw,
return true;
}
-void nf_dup_ipv6(struct sk_buff *skb, unsigned int hooknum,
+void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum,
const struct in6_addr *gw, int oif)
{
if (this_cpu_read(nf_skb_duplicated))
@@ -81,9 +66,9 @@ void nf_dup_ipv6(struct sk_buff *skb, unsigned int hooknum,
struct ipv6hdr *iph = ipv6_hdr(skb);
--iph->hop_limit;
}
- if (nf_dup_ipv6_route(skb, gw, oif)) {
+ if (nf_dup_ipv6_route(net, skb, gw, oif)) {
__this_cpu_write(nf_skb_duplicated, true);
- ip6_local_out(skb);
+ ip6_local_out(net, skb->sk, skb);
__this_cpu_write(nf_skb_duplicated, false);
} else {
kfree_skb(skb);
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index 70fbaed49edb..238e70c3f7b7 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -262,9 +262,9 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation);
unsigned int
-nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
+nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state,
- unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+ unsigned int (*do_chain)(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct))
@@ -272,7 +272,7 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
struct nf_conn_nat *nat;
- enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
+ enum nf_nat_manip_type maniptype = HOOK2MANIP(state->hook);
__be16 frag_off;
int hdrlen;
u8 nexthdr;
@@ -303,7 +303,7 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
- ops->hooknum,
+ state->hook,
hdrlen))
return NF_DROP;
else
@@ -317,21 +317,21 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
if (!nf_nat_initialized(ct, maniptype)) {
unsigned int ret;
- ret = do_chain(ops, skb, state, ct);
+ ret = do_chain(priv, skb, state, ct);
if (ret != NF_ACCEPT)
return ret;
- if (nf_nat_initialized(ct, HOOK2MANIP(ops->hooknum)))
+ if (nf_nat_initialized(ct, HOOK2MANIP(state->hook)))
break;
- ret = nf_nat_alloc_null_binding(ct, ops->hooknum);
+ ret = nf_nat_alloc_null_binding(ct, state->hook);
if (ret != NF_ACCEPT)
return ret;
} else {
pr_debug("Already setup manip %s for ct %p\n",
maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
ct);
- if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, state->out))
+ if (nf_nat_oif_changed(state->hook, ctinfo, nat, state->out))
goto oif_changed;
}
break;
@@ -340,11 +340,11 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
/* ESTABLISHED */
NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
ctinfo == IP_CT_ESTABLISHED_REPLY);
- if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, state->out))
+ if (nf_nat_oif_changed(state->hook, ctinfo, nat, state->out))
goto oif_changed;
}
- return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
+ return nf_nat_packet(ct, ctinfo, state->hook, skb);
oif_changed:
nf_ct_kill_acct(ct, ctinfo, skb);
@@ -353,9 +353,9 @@ oif_changed:
EXPORT_SYMBOL_GPL(nf_nat_ipv6_fn);
unsigned int
-nf_nat_ipv6_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
+nf_nat_ipv6_in(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state,
- unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+ unsigned int (*do_chain)(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct))
@@ -363,7 +363,7 @@ nf_nat_ipv6_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
unsigned int ret;
struct in6_addr daddr = ipv6_hdr(skb)->daddr;
- ret = nf_nat_ipv6_fn(ops, skb, state, do_chain);
+ ret = nf_nat_ipv6_fn(priv, skb, state, do_chain);
if (ret != NF_DROP && ret != NF_STOLEN &&
ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
skb_dst_drop(skb);
@@ -373,9 +373,9 @@ nf_nat_ipv6_in(const struct nf_hook_ops *ops, struct sk_buff *skb,
EXPORT_SYMBOL_GPL(nf_nat_ipv6_in);
unsigned int
-nf_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
+nf_nat_ipv6_out(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state,
- unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+ unsigned int (*do_chain)(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct))
@@ -391,7 +391,7 @@ nf_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
if (skb->len < sizeof(struct ipv6hdr))
return NF_ACCEPT;
- ret = nf_nat_ipv6_fn(ops, skb, state, do_chain);
+ ret = nf_nat_ipv6_fn(priv, skb, state, do_chain);
#ifdef CONFIG_XFRM
if (ret != NF_DROP && ret != NF_STOLEN &&
!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
@@ -403,7 +403,7 @@ nf_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
(ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
ct->tuplehash[dir].tuple.src.u.all !=
ct->tuplehash[!dir].tuple.dst.u.all)) {
- err = nf_xfrm_me_harder(skb, AF_INET6);
+ err = nf_xfrm_me_harder(state->net, skb, AF_INET6);
if (err < 0)
ret = NF_DROP_ERR(err);
}
@@ -414,9 +414,9 @@ nf_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb,
EXPORT_SYMBOL_GPL(nf_nat_ipv6_out);
unsigned int
-nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
+nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state,
- unsigned int (*do_chain)(const struct nf_hook_ops *ops,
+ unsigned int (*do_chain)(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct))
@@ -430,14 +430,14 @@ nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
if (skb->len < sizeof(struct ipv6hdr))
return NF_ACCEPT;
- ret = nf_nat_ipv6_fn(ops, skb, state, do_chain);
+ ret = nf_nat_ipv6_fn(priv, skb, state, do_chain);
if (ret != NF_DROP && ret != NF_STOLEN &&
(ct = nf_ct_get(skb, &ctinfo)) != NULL) {
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
&ct->tuplehash[!dir].tuple.src.u3)) {
- err = ip6_route_me_harder(skb);
+ err = ip6_route_me_harder(state->net, skb);
if (err < 0)
ret = NF_DROP_ERR(err);
}
@@ -446,7 +446,7 @@ nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb,
ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
ct->tuplehash[dir].tuple.dst.u.all !=
ct->tuplehash[!dir].tuple.src.u.all) {
- err = nf_xfrm_me_harder(skb, AF_INET6);
+ err = nf_xfrm_me_harder(state->net, skb, AF_INET6);
if (err < 0)
ret = NF_DROP_ERR(err);
}
diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
index 7745609665cd..31ba7ca19757 100644
--- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
@@ -34,7 +34,7 @@ nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range *range,
NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
ctinfo == IP_CT_RELATED_REPLY));
- if (ipv6_dev_get_saddr(dev_net(out), out,
+ if (ipv6_dev_get_saddr(nf_ct_net(ct), out,
&ipv6_hdr(skb)->daddr, 0, &src) < 0)
return NF_DROP;
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index 94b4c6dfb400..e0f922b777e3 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -26,7 +26,7 @@ const struct tcphdr *nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb,
int tcphoff;
proto = oip6h->nexthdr;
- tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data),
+ tcphoff = ipv6_skip_exthdr(oldskb, ((u8 *)(oip6h + 1) - oldskb->data),
&proto, &frag_off);
if ((tcphoff < 0) || (tcphoff > oldskb->len)) {
@@ -206,7 +206,7 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook)
dev_queue_xmit(nskb);
} else
#endif
- ip6_local_out(nskb);
+ ip6_local_out(net, nskb->sk, nskb);
}
EXPORT_SYMBOL_GPL(nf_send_reset6);
@@ -224,7 +224,7 @@ static bool reject6_csum_ok(struct sk_buff *skb, int hook)
return true;
proto = ip6h->nexthdr;
- thoff = ipv6_skip_exthdr(skb, ((u8*)(ip6h+1) - skb->data), &proto, &fo);
+ thoff = ipv6_skip_exthdr(skb, ((u8 *)(ip6h + 1) - skb->data), &proto, &fo);
if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0)
return false;
diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c
index c8148ba76d1a..120ea9131be0 100644
--- a/net/ipv6/netfilter/nf_tables_ipv6.c
+++ b/net/ipv6/netfilter/nf_tables_ipv6.c
@@ -16,20 +16,20 @@
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_ipv6.h>
-static unsigned int nft_do_chain_ipv6(const struct nf_hook_ops *ops,
+static unsigned int nft_do_chain_ipv6(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct nft_pktinfo pkt;
/* malformed packet, drop it */
- if (nft_set_pktinfo_ipv6(&pkt, ops, skb, state) < 0)
+ if (nft_set_pktinfo_ipv6(&pkt, skb, state) < 0)
return NF_DROP;
- return nft_do_chain(&pkt, ops);
+ return nft_do_chain(&pkt, priv);
}
-static unsigned int nft_ipv6_output(const struct nf_hook_ops *ops,
+static unsigned int nft_ipv6_output(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
@@ -40,7 +40,7 @@ static unsigned int nft_ipv6_output(const struct nf_hook_ops *ops,
return NF_ACCEPT;
}
- return nft_do_chain_ipv6(ops, skb, state);
+ return nft_do_chain_ipv6(priv, skb, state);
}
struct nft_af_info nft_af_ipv6 __read_mostly = {
diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
index 951bb458b7bd..443cd306c0b0 100644
--- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
@@ -24,44 +24,44 @@
#include <net/netfilter/nf_nat_l3proto.h>
#include <net/ipv6.h>
-static unsigned int nft_nat_do_chain(const struct nf_hook_ops *ops,
+static unsigned int nft_nat_do_chain(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state,
struct nf_conn *ct)
{
struct nft_pktinfo pkt;
- nft_set_pktinfo_ipv6(&pkt, ops, skb, state);
+ nft_set_pktinfo_ipv6(&pkt, skb, state);
- return nft_do_chain(&pkt, ops);
+ return nft_do_chain(&pkt, priv);
}
-static unsigned int nft_nat_ipv6_fn(const struct nf_hook_ops *ops,
+static unsigned int nft_nat_ipv6_fn(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return nf_nat_ipv6_fn(ops, skb, state, nft_nat_do_chain);
+ return nf_nat_ipv6_fn(priv, skb, state, nft_nat_do_chain);
}
-static unsigned int nft_nat_ipv6_in(const struct nf_hook_ops *ops,
+static unsigned int nft_nat_ipv6_in(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return nf_nat_ipv6_in(ops, skb, state, nft_nat_do_chain);
+ return nf_nat_ipv6_in(priv, skb, state, nft_nat_do_chain);
}
-static unsigned int nft_nat_ipv6_out(const struct nf_hook_ops *ops,
+static unsigned int nft_nat_ipv6_out(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return nf_nat_ipv6_out(ops, skb, state, nft_nat_do_chain);
+ return nf_nat_ipv6_out(priv, skb, state, nft_nat_do_chain);
}
-static unsigned int nft_nat_ipv6_local_fn(const struct nf_hook_ops *ops,
+static unsigned int nft_nat_ipv6_local_fn(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return nf_nat_ipv6_local_fn(ops, skb, state, nft_nat_do_chain);
+ return nf_nat_ipv6_local_fn(priv, skb, state, nft_nat_do_chain);
}
static const struct nf_chain_type nft_chain_nat_ipv6 = {
diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c
index 0dafdaac5e17..71d995ff3108 100644
--- a/net/ipv6/netfilter/nft_chain_route_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c
@@ -22,7 +22,7 @@
#include <net/netfilter/nf_tables_ipv6.h>
#include <net/route.h>
-static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
+static unsigned int nf_route_table_hook(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
@@ -33,7 +33,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
u32 mark, flowlabel;
/* malformed packet, drop it */
- if (nft_set_pktinfo_ipv6(&pkt, ops, skb, state) < 0)
+ if (nft_set_pktinfo_ipv6(&pkt, skb, state) < 0)
return NF_DROP;
/* save source/dest address, mark, hoplimit, flowlabel, priority */
@@ -45,14 +45,14 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
/* flowlabel and prio (includes version, which shouldn't change either */
flowlabel = *((u32 *)ipv6_hdr(skb));
- ret = nft_do_chain(&pkt, ops);
+ ret = nft_do_chain(&pkt, priv);
if (ret != NF_DROP && ret != NF_QUEUE &&
(memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) ||
memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) ||
skb->mark != mark ||
ipv6_hdr(skb)->hop_limit != hop_limit ||
flowlabel != *((u_int32_t *)ipv6_hdr(skb))))
- return ip6_route_me_harder(skb) == 0 ? ret : NF_DROP;
+ return ip6_route_me_harder(state->net, skb) == 0 ? ret : NF_DROP;
return ret;
}
@@ -61,11 +61,11 @@ static const struct nf_chain_type nft_chain_route_ipv6 = {
.name = "route",
.type = NFT_CHAIN_T_ROUTE,
.family = NFPROTO_IPV6,
- .owner = THIS_MODULE,
+ .owner = THIS_MODULE,
.hook_mask = (1 << NF_INET_LOCAL_OUT),
.hooks = {
- [NF_INET_LOCAL_OUT] = nf_route_table_hook,
- },
+ [NF_INET_LOCAL_OUT] = nf_route_table_hook,
+ },
};
static int __init nft_chain_route_init(void)
diff --git a/net/ipv6/netfilter/nft_dup_ipv6.c b/net/ipv6/netfilter/nft_dup_ipv6.c
index 0eaa4f65fdea..8bfd470cbe72 100644
--- a/net/ipv6/netfilter/nft_dup_ipv6.c
+++ b/net/ipv6/netfilter/nft_dup_ipv6.c
@@ -28,7 +28,7 @@ static void nft_dup_ipv6_eval(const struct nft_expr *expr,
struct in6_addr *gw = (struct in6_addr *)&regs->data[priv->sreg_addr];
int oif = regs->data[priv->sreg_dev];
- nf_dup_ipv6(pkt->skb, pkt->ops->hooknum, gw, oif);
+ nf_dup_ipv6(pkt->net, pkt->skb, pkt->hook, gw, oif);
}
static int nft_dup_ipv6_init(const struct nft_ctx *ctx,
diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c b/net/ipv6/netfilter/nft_redir_ipv6.c
index effd393bd517..aca44e89a881 100644
--- a/net/ipv6/netfilter/nft_redir_ipv6.c
+++ b/net/ipv6/netfilter/nft_redir_ipv6.c
@@ -35,8 +35,7 @@ static void nft_redir_ipv6_eval(const struct nft_expr *expr,
range.flags |= priv->flags;
- regs->verdict.code = nf_nat_redirect_ipv6(pkt->skb, &range,
- pkt->ops->hooknum);
+ regs->verdict.code = nf_nat_redirect_ipv6(pkt->skb, &range, pkt->hook);
}
static struct nft_expr_type nft_redir_ipv6_type;
diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c
index d0d1540ecf87..533cd5719c59 100644
--- a/net/ipv6/netfilter/nft_reject_ipv6.c
+++ b/net/ipv6/netfilter/nft_reject_ipv6.c
@@ -24,15 +24,14 @@ static void nft_reject_ipv6_eval(const struct nft_expr *expr,
const struct nft_pktinfo *pkt)
{
struct nft_reject *priv = nft_expr_priv(expr);
- struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out);
switch (priv->type) {
case NFT_REJECT_ICMP_UNREACH:
- nf_send_unreach6(net, pkt->skb, priv->icmp_code,
- pkt->ops->hooknum);
+ nf_send_unreach6(pkt->net, pkt->skb, priv->icmp_code,
+ pkt->hook);
break;
case NFT_REJECT_TCP_RST:
- nf_send_reset6(net, pkt->skb, pkt->ops->hooknum);
+ nf_send_reset6(pkt->net, pkt->skb, pkt->hook);
break;
default:
break;
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index 928a0fb0b744..462f2a76b5c2 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -138,7 +138,7 @@ int ip6_dst_hoplimit(struct dst_entry *dst)
EXPORT_SYMBOL(ip6_dst_hoplimit);
#endif
-static int __ip6_local_out_sk(struct sock *sk, struct sk_buff *skb)
+int __ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
{
int len;
@@ -148,30 +148,20 @@ static int __ip6_local_out_sk(struct sock *sk, struct sk_buff *skb)
ipv6_hdr(skb)->payload_len = htons(len);
IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
- return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb,
- NULL, skb_dst(skb)->dev, dst_output_sk);
-}
-
-int __ip6_local_out(struct sk_buff *skb)
-{
- return __ip6_local_out_sk(skb->sk, skb);
+ return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
+ net, sk, skb, NULL, skb_dst(skb)->dev,
+ dst_output);
}
EXPORT_SYMBOL_GPL(__ip6_local_out);
-int ip6_local_out_sk(struct sock *sk, struct sk_buff *skb)
+int ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
{
int err;
- err = __ip6_local_out_sk(sk, skb);
+ err = __ip6_local_out(net, sk, skb);
if (likely(err == 1))
- err = dst_output_sk(sk, skb);
+ err = dst_output(net, sk, skb);
return err;
}
-EXPORT_SYMBOL_GPL(ip6_local_out_sk);
-
-int ip6_local_out(struct sk_buff *skb)
-{
- return ip6_local_out_sk(skb->sk, skb);
-}
EXPORT_SYMBOL_GPL(ip6_local_out);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index fdbada1569a3..dc65ec198f7c 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -614,6 +614,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
unsigned int flags)
{
struct ipv6_pinfo *np = inet6_sk(sk);
+ struct net *net = sock_net(sk);
struct ipv6hdr *iph;
struct sk_buff *skb;
int err;
@@ -652,9 +653,9 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
if (err)
goto error_fault;
- IP6_UPD_PO_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
- err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb,
- NULL, rt->dst.dev, dst_output_sk);
+ IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
+ err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb,
+ NULL, rt->dst.dev, dst_output);
if (err > 0)
err = net_xmit_errno(err);
if (err)
@@ -666,7 +667,7 @@ error_fault:
err = -EFAULT;
kfree_skb(skb);
error:
- IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
+ IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
if (err == -ENOBUFS && !np->recverr)
err = 0;
return err;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 968f31c01f89..d0619632723a 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -61,6 +61,7 @@
#include <net/nexthop.h>
#include <net/lwtunnel.h>
#include <net/ip_tunnels.h>
+#include <net/l3mdev.h>
#include <asm/uaccess.h>
@@ -86,9 +87,9 @@ static void ip6_dst_ifdown(struct dst_entry *,
static int ip6_dst_gc(struct dst_ops *ops);
static int ip6_pkt_discard(struct sk_buff *skb);
-static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb);
+static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
static int ip6_pkt_prohibit(struct sk_buff *skb);
-static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);
+static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
static void ip6_link_failure(struct sk_buff *skb);
static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu);
@@ -303,7 +304,7 @@ static const struct rt6_info ip6_blk_hole_entry_template = {
.obsolete = DST_OBSOLETE_FORCE_CHK,
.error = -EINVAL,
.input = dst_discard,
- .output = dst_discard_sk,
+ .output = dst_discard_out,
},
.rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
.rt6i_protocol = RTPROT_KERNEL,
@@ -421,31 +422,7 @@ static bool rt6_check_expired(const struct rt6_info *rt)
static int rt6_info_hash_nhsfn(unsigned int candidate_count,
const struct flowi6 *fl6)
{
- unsigned int val = fl6->flowi6_proto;
-
- val ^= ipv6_addr_hash(&fl6->daddr);
- val ^= ipv6_addr_hash(&fl6->saddr);
-
- /* Work only if this not encapsulated */
- switch (fl6->flowi6_proto) {
- case IPPROTO_UDP:
- case IPPROTO_TCP:
- case IPPROTO_SCTP:
- val ^= (__force u16)fl6->fl6_sport;
- val ^= (__force u16)fl6->fl6_dport;
- break;
-
- case IPPROTO_ICMPV6:
- val ^= (__force u16)fl6->fl6_icmp_type;
- val ^= (__force u16)fl6->fl6_icmp_code;
- break;
- }
- /* RFC6438 recommands to use flowlabel */
- val ^= (__force u32)fl6->flowlabel;
-
- /* Perhaps, we need to tune, this function? */
- val = val ^ (val >> 7) ^ (val >> 12);
- return val % candidate_count;
+ return get_hash_from_flowi6(fl6) % candidate_count;
}
static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
@@ -498,10 +475,10 @@ static inline struct rt6_info *rt6_device_match(struct net *net,
if (dev->flags & IFF_LOOPBACK) {
if (!sprt->rt6i_idev ||
sprt->rt6i_idev->dev->ifindex != oif) {
- if (flags & RT6_LOOKUP_F_IFACE && oif)
+ if (flags & RT6_LOOKUP_F_IFACE)
continue;
- if (local && (!oif ||
- local->rt6i_idev->dev->ifindex == oif))
+ if (local &&
+ local->rt6i_idev->dev->ifindex == oif)
continue;
}
local = sprt;
@@ -538,7 +515,7 @@ static void rt6_probe_deferred(struct work_struct *w)
container_of(w, struct __rt6_probe_work, work);
addrconf_addr_solict_mult(&work->target, &mcaddr);
- ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL, NULL);
+ ndisc_send_ns(work->dev, &work->target, &mcaddr, NULL, NULL);
dev_put(work->dev);
kfree(work);
}
@@ -1068,6 +1045,9 @@ static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
saved_fn = fn;
+ if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF)
+ oif = 0;
+
redo_rt6_select:
rt = rt6_select(fn, oif, strict);
if (rt->rt6i_nsiblings)
@@ -1165,7 +1145,7 @@ void ip6_route_input(struct sk_buff *skb)
int flags = RT6_LOOKUP_F_HAS_SADDR;
struct ip_tunnel_info *tun_info;
struct flowi6 fl6 = {
- .flowi6_iif = skb->dev->ifindex,
+ .flowi6_iif = l3mdev_fib_oif(skb->dev),
.daddr = iph->daddr,
.saddr = iph->saddr,
.flowlabel = ip6_flowinfo(iph),
@@ -1189,8 +1169,13 @@ static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table
struct dst_entry *ip6_route_output(struct net *net, const struct sock *sk,
struct flowi6 *fl6)
{
+ struct dst_entry *dst;
int flags = 0;
+ dst = l3mdev_rt6_dst_by_oif(net, fl6);
+ if (dst)
+ return dst;
+
fl6->flowi6_iif = LOOPBACK_IFINDEX;
if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr) ||
@@ -1218,7 +1203,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
new = &rt->dst;
new->__use = 1;
new->input = dst_discard;
- new->output = dst_discard_sk;
+ new->output = dst_discard_out;
dst_copy_metrics(new, &ort->dst);
rt->rt6i_idev = ort->rt6i_idev;
@@ -1744,21 +1729,21 @@ static int ip6_convert_metrics(struct mx6_config *mxc,
return -EINVAL;
}
-int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret)
+static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg)
{
- int err;
struct net *net = cfg->fc_nlinfo.nl_net;
struct rt6_info *rt = NULL;
struct net_device *dev = NULL;
struct inet6_dev *idev = NULL;
struct fib6_table *table;
int addr_type;
+ int err = -EINVAL;
if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
- return -EINVAL;
+ goto out;
#ifndef CONFIG_IPV6_SUBTREES
if (cfg->fc_src_len)
- return -EINVAL;
+ goto out;
#endif
if (cfg->fc_ifindex) {
err = -ENODEV;
@@ -1873,7 +1858,7 @@ int ip6_route_info_create(struct fib6_config *cfg, struct rt6_info **rt_ret)
switch (cfg->fc_type) {
case RTN_BLACKHOLE:
rt->dst.error = -EINVAL;
- rt->dst.output = dst_discard_sk;
+ rt->dst.output = dst_discard_out;
rt->dst.input = dst_discard;
break;
case RTN_PROHIBIT:
@@ -1978,9 +1963,7 @@ install_route:
cfg->fc_nlinfo.nl_net = dev_net(dev);
- *rt_ret = rt;
-
- return 0;
+ return rt;
out:
if (dev)
dev_put(dev);
@@ -1989,20 +1972,21 @@ out:
if (rt)
dst_free(&rt->dst);
- *rt_ret = NULL;
-
- return err;
+ return ERR_PTR(err);
}
int ip6_route_add(struct fib6_config *cfg)
{
struct mx6_config mxc = { .mx = NULL, };
- struct rt6_info *rt = NULL;
+ struct rt6_info *rt;
int err;
- err = ip6_route_info_create(cfg, &rt);
- if (err)
+ rt = ip6_route_info_create(cfg);
+ if (IS_ERR(rt)) {
+ err = PTR_ERR(rt);
+ rt = NULL;
goto out;
+ }
err = ip6_convert_metrics(&mxc, cfg);
if (err)
@@ -2284,7 +2268,6 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
unsigned int pref)
{
struct fib6_config cfg = {
- .fc_table = RT6_TABLE_INFO,
.fc_metric = IP6_RT_PRIO_USER,
.fc_ifindex = ifindex,
.fc_dst_len = prefixlen,
@@ -2295,6 +2278,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
.fc_nlinfo.nl_net = net,
};
+ cfg.fc_table = l3mdev_fib_table_by_index(net, ifindex) ? : RT6_TABLE_INFO;
cfg.fc_dst = *prefix;
cfg.fc_gateway = *gwaddr;
@@ -2335,7 +2319,7 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
unsigned int pref)
{
struct fib6_config cfg = {
- .fc_table = RT6_TABLE_DFLT,
+ .fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_DFLT,
.fc_metric = IP6_RT_PRIO_USER,
.fc_ifindex = dev->ifindex,
.fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
@@ -2382,7 +2366,8 @@ static void rtmsg_to_fib6_config(struct net *net,
{
memset(cfg, 0, sizeof(*cfg));
- cfg->fc_table = RT6_TABLE_MAIN;
+ cfg->fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
+ : RT6_TABLE_MAIN;
cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
cfg->fc_metric = rtmsg->rtmsg_metric;
cfg->fc_expires = rtmsg->rtmsg_info;
@@ -2466,7 +2451,7 @@ static int ip6_pkt_discard(struct sk_buff *skb)
return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
}
-static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)
+static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
{
skb->dev = skb_dst(skb)->dev;
return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
@@ -2477,7 +2462,7 @@ static int ip6_pkt_prohibit(struct sk_buff *skb)
return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
}
-static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)
+static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb)
{
skb->dev = skb_dst(skb)->dev;
return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
@@ -2491,6 +2476,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
const struct in6_addr *addr,
bool anycast)
{
+ u32 tb_id;
struct net *net = dev_net(idev->dev);
struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
DST_NOCOUNT);
@@ -2513,7 +2499,8 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
rt->rt6i_gateway = *addr;
rt->rt6i_dst.addr = *addr;
rt->rt6i_dst.plen = 128;
- rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
+ tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
+ rt->rt6i_table = fib6_get_table(net, tb_id);
rt->dst.flags |= DST_NOCACHE;
atomic_set(&rt->dst.__refcnt, 1);
@@ -2892,9 +2879,12 @@ static int ip6_route_multipath_add(struct fib6_config *cfg)
r_cfg.fc_encap_type = nla_get_u16(nla);
}
- err = ip6_route_info_create(&r_cfg, &rt);
- if (err)
+ rt = ip6_route_info_create(&r_cfg);
+ if (IS_ERR(rt)) {
+ err = PTR_ERR(rt);
+ rt = NULL;
goto cleanup;
+ }
err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
if (err) {
@@ -3273,6 +3263,11 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
} else {
fl6.flowi6_oif = oif;
+ if (netif_index_is_l3_master(net, oif)) {
+ fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC |
+ FLOWI_FLAG_SKIP_NH_OIF;
+ }
+
rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
}
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 0909f4e0d53c..bb8f2fa1c7fb 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -114,14 +114,11 @@ u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph,
}
EXPORT_SYMBOL_GPL(__cookie_v6_init_sequence);
-__u32 cookie_v6_init_sequence(struct sock *sk, const struct sk_buff *skb, __u16 *mssp)
+__u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mssp)
{
const struct ipv6hdr *iph = ipv6_hdr(skb);
const struct tcphdr *th = tcp_hdr(skb);
- tcp_synq_overflow(sk);
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESSENT);
-
return __cookie_v6_init_sequence(iph, th, mssp);
}
@@ -173,7 +170,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
goto out;
ret = NULL;
- req = inet_reqsk_alloc(&tcp6_request_sock_ops, sk);
+ req = inet_reqsk_alloc(&tcp6_request_sock_ops, sk, false);
if (!req)
goto out;
@@ -210,7 +207,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
ireq->wscale_ok = tcp_opt.wscale_ok;
ireq->tstamp_ok = tcp_opt.saw_tstamp;
req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0;
- treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0;
+ treq->snt_synack.v64 = 0;
treq->rcv_isn = ntohl(th->seq) - 1;
treq->snt_isn = cookie;
@@ -238,9 +235,9 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
goto out_free;
}
- req->window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
+ req->rsk_window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
tcp_select_initial_window(tcp_full_space(sk), req->mss,
- &req->rcv_wnd, &req->window_clamp,
+ &req->rsk_rcv_wnd, &req->rsk_window_clamp,
ireq->wscale_ok, &rcv_wscale,
dst_metric(dst, RTAX_INITRWND));
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 97d9314ea361..f495d189f5e0 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -70,8 +70,8 @@
#include <linux/crypto.h>
#include <linux/scatterlist.h>
-static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb);
-static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
+static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
+static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
struct request_sock *req);
static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
@@ -82,7 +82,7 @@ static const struct inet_connection_sock_af_ops ipv6_specific;
static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
#else
-static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
+static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
const struct in6_addr *addr)
{
return NULL;
@@ -434,11 +434,11 @@ out:
}
-static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
+static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
struct flowi *fl,
struct request_sock *req,
- u16 queue_mapping,
- struct tcp_fastopen_cookie *foc)
+ struct tcp_fastopen_cookie *foc,
+ bool attach_req)
{
struct inet_request_sock *ireq = inet_rsk(req);
struct ipv6_pinfo *np = inet6_sk(sk);
@@ -447,10 +447,11 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
int err = -ENOMEM;
/* First, grab a route. */
- if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL)
+ if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
+ IPPROTO_TCP)) == NULL)
goto done;
- skb = tcp_make_synack(sk, dst, req, foc);
+ skb = tcp_make_synack(sk, dst, req, foc, attach_req);
if (skb) {
__tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
@@ -460,7 +461,6 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
if (np->repflow && ireq->pktopts)
fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
- skb_set_queue_mapping(skb, queue_mapping);
err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
err = net_xmit_eval(err);
}
@@ -476,13 +476,13 @@ static void tcp_v6_reqsk_destructor(struct request_sock *req)
}
#ifdef CONFIG_TCP_MD5SIG
-static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
+static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
const struct in6_addr *addr)
{
return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
}
-static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk,
+static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
const struct sock *addr_sk)
{
return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
@@ -621,8 +621,12 @@ clear_hash_noput:
return 1;
}
-static bool tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
+#endif
+
+static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
+ const struct sk_buff *skb)
{
+#ifdef CONFIG_TCP_MD5SIG
const __u8 *hash_location = NULL;
struct tcp_md5sig_key *hash_expected;
const struct ipv6hdr *ip6h = ipv6_hdr(skb);
@@ -659,26 +663,27 @@ static bool tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb)
&ip6h->daddr, ntohs(th->dest));
return true;
}
+#endif
return false;
}
-#endif
-static void tcp_v6_init_req(struct request_sock *req, struct sock *sk,
+static void tcp_v6_init_req(struct request_sock *req,
+ const struct sock *sk_listener,
struct sk_buff *skb)
{
struct inet_request_sock *ireq = inet_rsk(req);
- struct ipv6_pinfo *np = inet6_sk(sk);
+ const struct ipv6_pinfo *np = inet6_sk(sk_listener);
ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
/* So that link locals have meaning */
- if (!sk->sk_bound_dev_if &&
+ if (!sk_listener->sk_bound_dev_if &&
ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
ireq->ir_iif = tcp_v6_iif(skb);
if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
- (ipv6_opt_accepted(sk, skb, &TCP_SKB_CB(skb)->header.h6) ||
+ (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
np->rxopt.bits.rxinfo ||
np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
np->rxopt.bits.rxohlim || np->repflow)) {
@@ -687,13 +692,14 @@ static void tcp_v6_init_req(struct request_sock *req, struct sock *sk,
}
}
-static struct dst_entry *tcp_v6_route_req(struct sock *sk, struct flowi *fl,
+static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
+ struct flowi *fl,
const struct request_sock *req,
bool *strict)
{
if (strict)
*strict = true;
- return inet6_csk_route_req(sk, &fl->u.ip6, req);
+ return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
}
struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
@@ -720,10 +726,9 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
.route_req = tcp_v6_route_req,
.init_seq = tcp_v6_init_sequence,
.send_synack = tcp_v6_send_synack,
- .queue_hash_add = inet6_csk_reqsk_queue_hash_add,
};
-static void tcp_v6_send_response(struct sock *sk, struct sk_buff *skb, u32 seq,
+static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
u32 ack, u32 win, u32 tsval, u32 tsecr,
int oif, struct tcp_md5sig_key *key, int rst,
u8 tclass, u32 label)
@@ -822,7 +827,7 @@ static void tcp_v6_send_response(struct sock *sk, struct sk_buff *skb, u32 seq,
kfree_skb(buff);
}
-static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
+static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
{
const struct tcphdr *th = tcp_hdr(skb);
u32 seq = 0, ack_seq = 0;
@@ -893,7 +898,7 @@ release_sk1:
#endif
}
-static void tcp_v6_send_ack(struct sock *sk, struct sk_buff *skb, u32 seq,
+static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
struct tcp_md5sig_key *key, u8 tclass,
u32 label)
@@ -916,7 +921,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
inet_twsk_put(tw);
}
-static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
+static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
struct request_sock *req)
{
/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
@@ -924,44 +929,18 @@ static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
*/
tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
- tcp_rsk(req)->rcv_nxt, req->rcv_wnd,
+ tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd,
tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if,
tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr),
0, 0);
}
-static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb)
+static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
{
+#ifdef CONFIG_SYN_COOKIES
const struct tcphdr *th = tcp_hdr(skb);
- struct request_sock *req;
- struct sock *nsk;
-
- /* Find possible connection requests. */
- req = inet6_csk_search_req(sk, th->source,
- &ipv6_hdr(skb)->saddr,
- &ipv6_hdr(skb)->daddr, tcp_v6_iif(skb));
- if (req) {
- nsk = tcp_check_req(sk, skb, req, false);
- if (!nsk || nsk == sk)
- reqsk_put(req);
- return nsk;
- }
- nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo,
- &ipv6_hdr(skb)->saddr, th->source,
- &ipv6_hdr(skb)->daddr, ntohs(th->dest),
- tcp_v6_iif(skb));
-
- if (nsk) {
- if (nsk->sk_state != TCP_TIME_WAIT) {
- bh_lock_sock(nsk);
- return nsk;
- }
- inet_twsk_put(inet_twsk(nsk));
- return NULL;
- }
-#ifdef CONFIG_SYN_COOKIES
if (!th->syn)
sk = cookie_v6_check(sk, skb);
#endif
@@ -984,12 +963,13 @@ drop:
return 0; /* don't send reset */
}
-static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
+static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
struct request_sock *req,
struct dst_entry *dst)
{
struct inet_request_sock *ireq;
- struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
+ struct ipv6_pinfo *newnp;
+ const struct ipv6_pinfo *np = inet6_sk(sk);
struct tcp6_sock *newtcp6sk;
struct inet_sock *newinet;
struct tcp_sock *newtp;
@@ -1057,7 +1037,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
goto out_overflow;
if (!dst) {
- dst = inet6_csk_route_req(sk, &fl6, req);
+ dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
if (!dst)
goto out;
}
@@ -1090,8 +1070,6 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
newsk->sk_bound_dev_if = ireq->ir_iif;
- sk_set_txhash(newsk);
-
/* Now IPv6 options...
First: no IPv4 options.
@@ -1181,7 +1159,7 @@ out:
}
/* The socket must have it's spinlock held when we get
- * here.
+ * here, unless it is a TCP_LISTEN socket.
*
* We have a potential double-lock case here, so even when
* doing backlog processing we use the BH locking scheme.
@@ -1252,18 +1230,14 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
goto csum_err;
if (sk->sk_state == TCP_LISTEN) {
- struct sock *nsk = tcp_v6_hnd_req(sk, skb);
+ struct sock *nsk = tcp_v6_cookie_check(sk, skb);
+
if (!nsk)
goto discard;
- /*
- * Queue it on the new socket if the new socket is active,
- * otherwise we just shortcircuit this and continue with
- * the new socket..
- */
if (nsk != sk) {
sock_rps_save_rxhash(nsk, skb);
- sk_mark_napi_id(sk, skb);
+ sk_mark_napi_id(nsk, skb);
if (tcp_child_process(sk, nsk, skb))
goto reset;
if (opt_skb)
@@ -1273,7 +1247,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
} else
sock_rps_save_rxhash(sk, skb);
- if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len))
+ if (tcp_rcv_state_process(sk, skb))
goto reset;
if (opt_skb)
goto ipv6_pktoptions;
@@ -1387,6 +1361,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)
th = tcp_hdr(skb);
hdr = ipv6_hdr(skb);
+lookup:
sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest,
inet6_iif(skb));
if (!sk)
@@ -1396,6 +1371,37 @@ process:
if (sk->sk_state == TCP_TIME_WAIT)
goto do_time_wait;
+ if (sk->sk_state == TCP_NEW_SYN_RECV) {
+ struct request_sock *req = inet_reqsk(sk);
+ struct sock *nsk = NULL;
+
+ sk = req->rsk_listener;
+ tcp_v6_fill_cb(skb, hdr, th);
+ if (tcp_v6_inbound_md5_hash(sk, skb)) {
+ reqsk_put(req);
+ goto discard_it;
+ }
+ if (likely(sk->sk_state == TCP_LISTEN)) {
+ nsk = tcp_check_req(sk, skb, req, false);
+ } else {
+ inet_csk_reqsk_queue_drop_and_put(sk, req);
+ goto lookup;
+ }
+ if (!nsk) {
+ reqsk_put(req);
+ goto discard_it;
+ }
+ if (nsk == sk) {
+ sock_hold(sk);
+ reqsk_put(req);
+ tcp_v6_restore_cb(skb);
+ } else if (tcp_child_process(sk, nsk, skb)) {
+ tcp_v6_send_reset(nsk, skb);
+ goto discard_it;
+ } else {
+ return 0;
+ }
+ }
if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
goto discard_and_relse;
@@ -1406,17 +1412,21 @@ process:
tcp_v6_fill_cb(skb, hdr, th);
-#ifdef CONFIG_TCP_MD5SIG
if (tcp_v6_inbound_md5_hash(sk, skb))
goto discard_and_relse;
-#endif
if (sk_filter(sk, skb))
goto discard_and_relse;
- sk_incoming_cpu_update(sk);
skb->dev = NULL;
+ if (sk->sk_state == TCP_LISTEN) {
+ ret = tcp_v6_do_rcv(sk, skb);
+ goto put_and_return;
+ }
+
+ sk_incoming_cpu_update(sk);
+
bh_lock_sock_nested(sk);
tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
ret = 0;
@@ -1431,6 +1441,7 @@ process:
}
bh_unlock_sock(sk);
+put_and_return:
sock_put(sk);
return ret ? -1 : 0;
@@ -1631,7 +1642,7 @@ static void tcp_v6_destroy_sock(struct sock *sk)
#ifdef CONFIG_PROC_FS
/* Proc filesystem TCPv6 sock list dumping. */
static void get_openreq6(struct seq_file *seq,
- struct request_sock *req, int i, kuid_t uid)
+ const struct request_sock *req, int i)
{
long ttd = req->rsk_timer.expires - jiffies;
const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
@@ -1655,7 +1666,8 @@ static void get_openreq6(struct seq_file *seq,
1, /* timers active (only the expire timer) */
jiffies_to_clock_t(ttd),
req->num_timeout,
- from_kuid_munged(seq_user_ns(seq), uid),
+ from_kuid_munged(seq_user_ns(seq),
+ sock_i_uid(req->rsk_listener)),
0, /* non standard timer */
0, /* open_requests have no inode */
0, req);
@@ -1670,7 +1682,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
const struct inet_sock *inet = inet_sk(sp);
const struct tcp_sock *tp = tcp_sk(sp);
const struct inet_connection_sock *icsk = inet_csk(sp);
- struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq;
+ const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
dest = &sp->sk_v6_daddr;
src = &sp->sk_v6_rcv_saddr;
@@ -1714,7 +1726,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
(icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
tp->snd_cwnd,
sp->sk_state == TCP_LISTEN ?
- (fastopenq ? fastopenq->max_qlen : 0) :
+ fastopenq->max_qlen :
(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
);
}
@@ -1760,18 +1772,12 @@ static int tcp6_seq_show(struct seq_file *seq, void *v)
}
st = seq->private;
- switch (st->state) {
- case TCP_SEQ_STATE_LISTENING:
- case TCP_SEQ_STATE_ESTABLISHED:
- if (sk->sk_state == TCP_TIME_WAIT)
- get_timewait6_sock(seq, v, st->num);
- else
- get_tcp6_sock(seq, v, st->num);
- break;
- case TCP_SEQ_STATE_OPENREQ:
- get_openreq6(seq, v, st->num, st->uid);
- break;
- }
+ if (sk->sk_state == TCP_TIME_WAIT)
+ get_timewait6_sock(seq, v, st->num);
+ else if (sk->sk_state == TCP_NEW_SYN_RECV)
+ get_openreq6(seq, v, st->num);
+ else
+ get_tcp6_sock(seq, v, st->num);
out:
return 0;
}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 0aba654f5b91..01bcb49619ee 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -182,10 +182,12 @@ static inline int compute_score(struct sock *sk, struct net *net,
score++;
}
+ if (sk->sk_incoming_cpu == raw_smp_processor_id())
+ score++;
+
return score;
}
-#define SCORE2_MAX (1 + 1 + 1)
static inline int compute_score2(struct sock *sk, struct net *net,
const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr,
@@ -223,6 +225,9 @@ static inline int compute_score2(struct sock *sk, struct net *net,
score++;
}
+ if (sk->sk_incoming_cpu == raw_smp_processor_id())
+ score++;
+
return score;
}
@@ -251,8 +256,7 @@ begin:
hash = udp6_ehashfn(net, daddr, hnum,
saddr, sport);
matches = 1;
- } else if (score == SCORE2_MAX)
- goto exact_match;
+ }
} else if (score == badness && reuseport) {
matches++;
if (reciprocal_scale(hash, matches) == 0)
@@ -269,7 +273,6 @@ begin:
goto begin;
if (result) {
-exact_match:
if (unlikely(!atomic_inc_not_zero_hint(&result->sk_refcnt, 2)))
result = NULL;
else if (unlikely(compute_score2(result, net, saddr, sport,
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 74bd17882a2f..0eaab1fa6be5 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -42,8 +42,8 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
ipv6_hdr(skb)->payload_len = htons(skb->len);
__skb_push(skb, skb->data - skb_network_header(skb));
- NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, NULL, skb,
- skb->dev, NULL,
+ NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
+ dev_net(skb->dev), NULL, skb, skb->dev, NULL,
ip6_rcv_finish);
return -1;
}
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 09c76a7b474d..9db067a11b52 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -131,7 +131,14 @@ int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb)
return xfrm_output(sk, skb);
}
-static int __xfrm6_output(struct sock *sk, struct sk_buff *skb)
+static int __xfrm6_output_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+ struct xfrm_state *x = skb_dst(skb)->xfrm;
+
+ return x->outer_mode->afinfo->output_finish(sk, skb);
+}
+
+static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct xfrm_state *x = dst->xfrm;
@@ -140,7 +147,7 @@ static int __xfrm6_output(struct sock *sk, struct sk_buff *skb)
#ifdef CONFIG_NETFILTER
if (!x) {
IP6CB(skb)->flags |= IP6SKB_REROUTED;
- return dst_output_sk(sk, skb);
+ return dst_output(net, sk, skb);
}
#endif
@@ -160,15 +167,16 @@ static int __xfrm6_output(struct sock *sk, struct sk_buff *skb)
if (x->props.mode == XFRM_MODE_TUNNEL &&
((skb->len > mtu && !skb_is_gso(skb)) ||
dst_allfrag(skb_dst(skb)))) {
- return ip6_fragment(sk, skb,
- x->outer_mode->afinfo->output_finish);
+ return ip6_fragment(net, sk, skb,
+ __xfrm6_output_finish);
}
return x->outer_mode->afinfo->output_finish(sk, skb);
}
-int xfrm6_output(struct sock *sk, struct sk_buff *skb)
+int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, sk, skb,
- NULL, skb_dst(skb)->dev, __xfrm6_output,
+ return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
+ net, sk, skb, NULL, skb_dst(skb)->dev,
+ __xfrm6_output,
!(IP6CB(skb)->flags & IP6SKB_REROUTED));
}
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 5cedfda4b241..08c9c93f3527 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -20,7 +20,7 @@
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
-#include <net/vrf.h>
+#include <net/l3mdev.h>
#if IS_ENABLED(CONFIG_IPV6_MIP6)
#include <net/mip6.h>
#endif
@@ -133,10 +133,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
nexthdr = nh[nhoff];
- if (skb_dst(skb)) {
- oif = vrf_master_ifindex(skb_dst(skb)->dev) ?
- : skb_dst(skb)->dev->ifindex;
- }
+ if (skb_dst(skb))
+ oif = l3mdev_fib_oif(skb_dst(skb)->dev);
memset(fl6, 0, sizeof(struct flowi6));
fl6->flowi6_mark = skb->mark;
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 918151c11348..fcb2752419c6 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -95,11 +95,10 @@ static void afiucv_hs_callback_txnotify(struct sk_buff *, enum iucv_tx_notify);
/* Call Back functions */
static void iucv_callback_rx(struct iucv_path *, struct iucv_message *);
static void iucv_callback_txdone(struct iucv_path *, struct iucv_message *);
-static void iucv_callback_connack(struct iucv_path *, u8 ipuser[16]);
-static int iucv_callback_connreq(struct iucv_path *, u8 ipvmid[8],
- u8 ipuser[16]);
-static void iucv_callback_connrej(struct iucv_path *, u8 ipuser[16]);
-static void iucv_callback_shutdown(struct iucv_path *, u8 ipuser[16]);
+static void iucv_callback_connack(struct iucv_path *, u8 *);
+static int iucv_callback_connreq(struct iucv_path *, u8 *, u8 *);
+static void iucv_callback_connrej(struct iucv_path *, u8 *);
+static void iucv_callback_shutdown(struct iucv_path *, u8 *);
static struct iucv_sock_list iucv_sk_list = {
.lock = __RW_LOCK_UNLOCKED(iucv_sk_list.lock),
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 2a6a1fdd62c0..7eaa000c9258 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -713,7 +713,7 @@ static struct notifier_block __refdata iucv_cpu_notifier = {
*
* Sever an iucv path to free up the pathid. Used internally.
*/
-static int iucv_sever_pathid(u16 pathid, u8 userdata[16])
+static int iucv_sever_pathid(u16 pathid, u8 *userdata)
{
union iucv_param *parm;
@@ -876,7 +876,7 @@ static struct notifier_block iucv_reboot_notifier = {
* Returns the result of the CP IUCV call.
*/
int iucv_path_accept(struct iucv_path *path, struct iucv_handler *handler,
- u8 userdata[16], void *private)
+ u8 *userdata, void *private)
{
union iucv_param *parm;
int rc;
@@ -923,7 +923,7 @@ EXPORT_SYMBOL(iucv_path_accept);
* Returns the result of the CP IUCV call.
*/
int iucv_path_connect(struct iucv_path *path, struct iucv_handler *handler,
- u8 userid[8], u8 system[8], u8 userdata[16],
+ u8 *userid, u8 *system, u8 *userdata,
void *private)
{
union iucv_param *parm;
@@ -985,7 +985,7 @@ EXPORT_SYMBOL(iucv_path_connect);
*
* Returns the result from the CP IUCV call.
*/
-int iucv_path_quiesce(struct iucv_path *path, u8 userdata[16])
+int iucv_path_quiesce(struct iucv_path *path, u8 *userdata)
{
union iucv_param *parm;
int rc;
@@ -1017,7 +1017,7 @@ EXPORT_SYMBOL(iucv_path_quiesce);
*
* Returns the result from the CP IUCV call.
*/
-int iucv_path_resume(struct iucv_path *path, u8 userdata[16])
+int iucv_path_resume(struct iucv_path *path, u8 *userdata)
{
union iucv_param *parm;
int rc;
@@ -1047,7 +1047,7 @@ out:
*
* Returns the result from the CP IUCV call.
*/
-int iucv_path_sever(struct iucv_path *path, u8 userdata[16])
+int iucv_path_sever(struct iucv_path *path, u8 *userdata)
{
int rc;
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 68aa9ffd4ae4..5871537af387 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -321,4 +321,7 @@ do { \
#define l2tp_dbg(ptr, type, fmt, ...) \
l2tp_printk(ptr, type, pr_debug, fmt, ##__VA_ARGS__)
+#define MODULE_ALIAS_L2TP_PWTYPE(type) \
+ MODULE_ALIAS("net-l2tp-type-" __stringify(type))
+
#endif /* _L2TP_CORE_H_ */
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 4b552873b556..e253c26f31ac 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -358,3 +358,4 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
MODULE_DESCRIPTION("L2TP ethernet pseudowire driver");
MODULE_VERSION("1.0");
+MODULE_ALIAS_L2TP_PWTYPE(5);
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 79649937ec71..ec22078b0914 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -655,3 +655,4 @@ MODULE_VERSION("1.0");
* enums
*/
MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 2, IPPROTO_L2TP);
+MODULE_ALIAS_NET_PF_PROTO(PF_INET, IPPROTO_L2TP);
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index d1ded3777815..aca38d8aed8e 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -801,3 +801,4 @@ MODULE_VERSION("1.0");
* enums
*/
MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET6, 2, IPPROTO_L2TP);
+MODULE_ALIAS_NET_PF_PROTO(PF_INET6, IPPROTO_L2TP);
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 9e13c2ff8789..f93c5be612a7 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -576,6 +576,13 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
if (info->attrs[L2TP_ATTR_MRU])
cfg.mru = nla_get_u16(info->attrs[L2TP_ATTR_MRU]);
+#ifdef CONFIG_MODULES
+ if (l2tp_nl_cmd_ops[cfg.pw_type] == NULL) {
+ genl_unlock();
+ request_module("net-l2tp-type-%u", cfg.pw_type);
+ genl_lock();
+ }
+#endif
if ((l2tp_nl_cmd_ops[cfg.pw_type] == NULL) ||
(l2tp_nl_cmd_ops[cfg.pw_type]->session_create == NULL)) {
ret = -EPROTONOSUPPORT;
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index f56c9f69e9f2..1ad18c55064c 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -1863,3 +1863,4 @@ MODULE_DESCRIPTION("PPP over L2TP over UDP");
MODULE_LICENSE("GPL");
MODULE_VERSION(PPPOL2TP_DRV_VERSION);
MODULE_ALIAS("pppox-proto-" __stringify(PX_PROTO_OL2TP));
+MODULE_ALIAS_L2TP_PWTYPE(11);
diff --git a/net/l3mdev/Kconfig b/net/l3mdev/Kconfig
new file mode 100644
index 000000000000..5d47325037bc
--- /dev/null
+++ b/net/l3mdev/Kconfig
@@ -0,0 +1,10 @@
+#
+# Configuration for L3 master device support
+#
+
+config NET_L3_MASTER_DEV
+ bool "L3 Master device support"
+ depends on INET || IPV6
+ ---help---
+ This module provides glue between core networking code and device
+ drivers to support L3 master devices like VRF.
diff --git a/net/l3mdev/Makefile b/net/l3mdev/Makefile
new file mode 100644
index 000000000000..84a53a6f609a
--- /dev/null
+++ b/net/l3mdev/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for the L3 device API
+#
+
+obj-$(CONFIG_NET_L3_MASTER_DEV) += l3mdev.o
diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
new file mode 100644
index 000000000000..8e5ead366e7f
--- /dev/null
+++ b/net/l3mdev/l3mdev.c
@@ -0,0 +1,92 @@
+/*
+ * net/l3mdev/l3mdev.c - L3 master device implementation
+ * Copyright (c) 2015 Cumulus Networks
+ * Copyright (c) 2015 David Ahern <dsa@cumulusnetworks.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/netdevice.h>
+#include <net/l3mdev.h>
+
+/**
+ * l3mdev_master_ifindex - get index of L3 master device
+ * @dev: targeted interface
+ */
+
+int l3mdev_master_ifindex_rcu(struct net_device *dev)
+{
+ int ifindex = 0;
+
+ if (!dev)
+ return 0;
+
+ if (netif_is_l3_master(dev)) {
+ ifindex = dev->ifindex;
+ } else if (netif_is_l3_slave(dev)) {
+ struct net_device *master;
+
+ master = netdev_master_upper_dev_get_rcu(dev);
+ if (master)
+ ifindex = master->ifindex;
+ }
+
+ return ifindex;
+}
+EXPORT_SYMBOL_GPL(l3mdev_master_ifindex_rcu);
+
+/**
+ * l3mdev_fib_table - get FIB table id associated with an L3
+ * master interface
+ * @dev: targeted interface
+ */
+
+u32 l3mdev_fib_table_rcu(const struct net_device *dev)
+{
+ u32 tb_id = 0;
+
+ if (!dev)
+ return 0;
+
+ if (netif_is_l3_master(dev)) {
+ if (dev->l3mdev_ops->l3mdev_fib_table)
+ tb_id = dev->l3mdev_ops->l3mdev_fib_table(dev);
+ } else if (netif_is_l3_slave(dev)) {
+ /* Users of netdev_master_upper_dev_get_rcu need non-const,
+ * but current inet_*type functions take a const
+ */
+ struct net_device *_dev = (struct net_device *) dev;
+ const struct net_device *master;
+
+ master = netdev_master_upper_dev_get_rcu(_dev);
+ if (master &&
+ master->l3mdev_ops->l3mdev_fib_table)
+ tb_id = master->l3mdev_ops->l3mdev_fib_table(master);
+ }
+
+ return tb_id;
+}
+EXPORT_SYMBOL_GPL(l3mdev_fib_table_rcu);
+
+u32 l3mdev_fib_table_by_index(struct net *net, int ifindex)
+{
+ struct net_device *dev;
+ u32 tb_id = 0;
+
+ if (!ifindex)
+ return 0;
+
+ rcu_read_lock();
+
+ dev = dev_get_by_index_rcu(net, ifindex);
+ if (dev)
+ tb_id = l3mdev_fib_table_rcu(dev);
+
+ rcu_read_unlock();
+
+ return tb_id;
+}
+EXPORT_SYMBOL_GPL(l3mdev_fib_table_by_index);
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 5c564a68fb50..10ad4ac1fa0b 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -79,7 +79,7 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
(int)reason);
if (drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_STOP,
- &sta->sta, tid, NULL, 0))
+ &sta->sta, tid, NULL, 0, false))
sdata_info(sta->sdata,
"HW problem - can not stop rx aggregation for %pM tid %d\n",
sta->sta.addr, tid);
@@ -189,6 +189,7 @@ static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *d
struct ieee80211_local *local = sdata->local;
struct sk_buff *skb;
struct ieee80211_mgmt *mgmt;
+ bool amsdu = ieee80211_hw_check(&local->hw, SUPPORTS_AMSDU_IN_AMPDU);
u16 capab;
skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom);
@@ -217,7 +218,8 @@ static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *d
mgmt->u.action.u.addba_resp.action_code = WLAN_ACTION_ADDBA_RESP;
mgmt->u.action.u.addba_resp.dialog_token = dialog_token;
- capab = (u16)(policy << 1); /* bit 1 aggregation policy */
+ capab = (u16)(amsdu << 0); /* bit 0 A-MSDU support */
+ capab |= (u16)(policy << 1); /* bit 1 aggregation policy */
capab |= (u16)(tid << 2); /* bit 5:2 TID number */
capab |= (u16)(buf_size << 6); /* bit 15:6 max size of aggregation */
@@ -321,7 +323,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
__skb_queue_head_init(&tid_agg_rx->reorder_buf[i]);
ret = drv_ampdu_action(local, sta->sdata, IEEE80211_AMPDU_RX_START,
- &sta->sta, tid, &start_seq_num, 0);
+ &sta->sta, tid, &start_seq_num, 0, false);
ht_dbg(sta->sdata, "Rx A-MPDU request on %pM tid %d result %d\n",
sta->sta.addr, tid, ret);
if (ret) {
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index c8ba2e77737c..a758eb84e8f0 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -97,7 +97,8 @@ static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata,
mgmt->u.action.u.addba_req.action_code = WLAN_ACTION_ADDBA_REQ;
mgmt->u.action.u.addba_req.dialog_token = dialog_token;
- capab = (u16)(1 << 1); /* bit 1 aggregation policy */
+ capab = (u16)(1 << 0); /* bit 0 A-MSDU support */
+ capab |= (u16)(1 << 1); /* bit 1 aggregation policy */
capab |= (u16)(tid << 2); /* bit 5:2 TID number */
capab |= (u16)(agg_size << 6); /* bit 15:6 max size of aggergation */
@@ -331,7 +332,7 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
return -EALREADY;
ret = drv_ampdu_action(local, sta->sdata,
IEEE80211_AMPDU_TX_STOP_FLUSH_CONT,
- &sta->sta, tid, NULL, 0);
+ &sta->sta, tid, NULL, 0, false);
WARN_ON_ONCE(ret);
return 0;
}
@@ -381,7 +382,7 @@ int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid,
tid_tx->tx_stop = reason == AGG_STOP_LOCAL_REQUEST;
ret = drv_ampdu_action(local, sta->sdata, action,
- &sta->sta, tid, NULL, 0);
+ &sta->sta, tid, NULL, 0, false);
/* HW shall not deny going back to legacy */
if (WARN_ON(ret)) {
@@ -469,7 +470,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
start_seq_num = sta->tid_seq[tid] >> 4;
ret = drv_ampdu_action(local, sdata, IEEE80211_AMPDU_TX_START,
- &sta->sta, tid, &start_seq_num, 0);
+ &sta->sta, tid, &start_seq_num, 0, false);
if (ret) {
ht_dbg(sdata,
"BA request denied - HW unavailable for %pM tid %d\n",
@@ -693,7 +694,8 @@ static void ieee80211_agg_tx_operational(struct ieee80211_local *local,
drv_ampdu_action(local, sta->sdata,
IEEE80211_AMPDU_TX_OPERATIONAL,
- &sta->sta, tid, NULL, tid_tx->buf_size);
+ &sta->sta, tid, NULL, tid_tx->buf_size,
+ tid_tx->amsdu);
/*
* synchronize with TX path, while splicing the TX path
@@ -918,8 +920,10 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
struct tid_ampdu_tx *tid_tx;
u16 capab, tid;
u8 buf_size;
+ bool amsdu;
capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab);
+ amsdu = capab & IEEE80211_ADDBA_PARAM_AMSDU_MASK;
tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2;
buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6;
@@ -968,6 +972,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
}
tid_tx->buf_size = buf_size;
+ tid_tx->amsdu = amsdu;
if (test_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state))
ieee80211_agg_tx_operational(local, sta, tid);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 7a77a1470f25..68e551e263c6 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -981,7 +981,7 @@ static int sta_apply_auth_flags(struct ieee80211_local *local,
* well. Some drivers require rate control initialized
* before drv_sta_state() is called.
*/
- if (test_sta_flag(sta, WLAN_STA_TDLS_PEER))
+ if (!test_sta_flag(sta, WLAN_STA_RATE_CONTROL))
rate_control_rate_init(sta);
ret = sta_info_move_state(sta, IEEE80211_STA_ASSOC);
@@ -1120,8 +1120,11 @@ static int sta_apply_parameters(struct ieee80211_local *local,
local->hw.queues >= IEEE80211_NUM_ACS)
sta->sta.wme = set & BIT(NL80211_STA_FLAG_WME);
- /* auth flags will be set later for TDLS stations */
- if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER)) {
+ /* auth flags will be set later for TDLS,
+ * and for unassociated stations that move to assocaited */
+ if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER) &&
+ !((mask & BIT(NL80211_STA_FLAG_ASSOCIATED)) &&
+ (set & BIT(NL80211_STA_FLAG_ASSOCIATED)))) {
ret = sta_apply_auth_flags(local, sta, mask, set);
if (ret)
return ret;
@@ -1156,6 +1159,7 @@ static int sta_apply_parameters(struct ieee80211_local *local,
set_sta_flag(sta, WLAN_STA_TDLS_CHAN_SWITCH);
if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) &&
+ !sdata->u.mgd.tdls_wider_bw_prohibited &&
ieee80211_hw_check(&local->hw, TDLS_WIDER_BW) &&
params->ext_capab_len >= 8 &&
params->ext_capab[7] & WLAN_EXT_CAPA8_TDLS_WIDE_BW_ENABLED)
@@ -1212,7 +1216,8 @@ static int sta_apply_parameters(struct ieee80211_local *local,
sta_apply_mesh_params(local, sta, params);
/* set the STA state after all sta info from usermode has been set */
- if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) {
+ if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) ||
+ set & BIT(NL80211_STA_FLAG_ASSOCIATED)) {
ret = sta_apply_auth_flags(local, sta, mask, set);
if (ret)
return ret;
@@ -1254,12 +1259,14 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
* defaults -- if userspace wants something else we'll
* change it accordingly in sta_apply_parameters()
*/
- if (!(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))) {
+ if (!(params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) &&
+ !(params->sta_flags_set & (BIT(NL80211_STA_FLAG_AUTHENTICATED) |
+ BIT(NL80211_STA_FLAG_ASSOCIATED)))) {
sta_info_pre_move_state(sta, IEEE80211_STA_AUTH);
sta_info_pre_move_state(sta, IEEE80211_STA_ASSOC);
- } else {
- sta->sta.tdls = true;
}
+ if (params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))
+ sta->sta.tdls = true;
err = sta_apply_parameters(local, sta, params);
if (err) {
@@ -1268,10 +1275,12 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev,
}
/*
- * for TDLS, rate control should be initialized only when
- * rates are known and station is marked authorized
+ * for TDLS and for unassociated station, rate control should be
+ * initialized only when rates are known and station is marked
+ * authorized/associated
*/
- if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER))
+ if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER) &&
+ test_sta_flag(sta, WLAN_STA_ASSOC))
rate_control_rate_init(sta);
layer2_update = sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
@@ -1346,7 +1355,10 @@ static int ieee80211_change_station(struct wiphy *wiphy,
break;
case NL80211_IFTYPE_AP:
case NL80211_IFTYPE_AP_VLAN:
- statype = CFG80211_STA_AP_CLIENT;
+ if (test_sta_flag(sta, WLAN_STA_ASSOC))
+ statype = CFG80211_STA_AP_CLIENT;
+ else
+ statype = CFG80211_STA_AP_CLIENT_UNASSOC;
break;
default:
err = -EOPNOTSUPP;
@@ -3522,18 +3534,32 @@ static void ieee80211_mgmt_frame_register(struct wiphy *wiphy,
u16 frame_type, bool reg)
{
struct ieee80211_local *local = wiphy_priv(wiphy);
+ struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
switch (frame_type) {
case IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_PROBE_REQ:
- if (reg)
+ if (reg) {
local->probe_req_reg++;
- else
- local->probe_req_reg--;
+ sdata->vif.probe_req_reg++;
+ } else {
+ if (local->probe_req_reg)
+ local->probe_req_reg--;
+
+ if (sdata->vif.probe_req_reg)
+ sdata->vif.probe_req_reg--;
+ }
if (!local->open_count)
break;
- ieee80211_queue_work(&local->hw, &local->reconfig_filter);
+ if (sdata->vif.probe_req_reg == 1)
+ drv_config_iface_filter(local, sdata, FIF_PROBE_REQ,
+ FIF_PROBE_REQ);
+ else if (sdata->vif.probe_req_reg == 0)
+ drv_config_iface_filter(local, sdata, 0,
+ FIF_PROBE_REQ);
+
+ ieee80211_configure_filter(local);
break;
default:
break;
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 1560c8482bcb..4d2aaebd4f97 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -123,6 +123,8 @@ static const char *hw_flag_names[NUM_IEEE80211_HW_FLAGS + 1] = {
FLAG(SUPPORTS_CLONED_SKBS),
FLAG(SINGLE_SCAN_ON_ALL_BANDS),
FLAG(TDLS_WIDER_BW),
+ FLAG(SUPPORTS_AMSDU_IN_AMPDU),
+ FLAG(BEACON_TX_STATUS),
/* keep last for the build bug below */
(void *)0x1
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index 702ca122c498..7961e7d0b61e 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -2,6 +2,7 @@
* Copyright 2003-2005 Devicescape Software, Inc.
* Copyright (c) 2006 Jiri Benc <jbenc@suse.cz>
* Copyright 2007 Johannes Berg <johannes@sipsolutions.net>
+ * Copyright (C) 2015 Intel Deutschland GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -34,6 +35,14 @@ static const struct file_operations key_ ##name## _ops = { \
.llseek = generic_file_llseek, \
}
+#define KEY_OPS_W(name) \
+static const struct file_operations key_ ##name## _ops = { \
+ .read = key_##name##_read, \
+ .write = key_##name##_write, \
+ .open = simple_open, \
+ .llseek = generic_file_llseek, \
+}
+
#define KEY_FILE(name, format) \
KEY_READ_##format(name) \
KEY_OPS(name)
@@ -74,6 +83,41 @@ static ssize_t key_algorithm_read(struct file *file,
}
KEY_OPS(algorithm);
+static ssize_t key_tx_spec_write(struct file *file, const char __user *userbuf,
+ size_t count, loff_t *ppos)
+{
+ struct ieee80211_key *key = file->private_data;
+ u64 pn;
+ int ret;
+
+ switch (key->conf.cipher) {
+ case WLAN_CIPHER_SUITE_WEP40:
+ case WLAN_CIPHER_SUITE_WEP104:
+ return -EINVAL;
+ case WLAN_CIPHER_SUITE_TKIP:
+ /* not supported yet */
+ return -EOPNOTSUPP;
+ case WLAN_CIPHER_SUITE_CCMP:
+ case WLAN_CIPHER_SUITE_CCMP_256:
+ case WLAN_CIPHER_SUITE_AES_CMAC:
+ case WLAN_CIPHER_SUITE_BIP_CMAC_256:
+ case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+ case WLAN_CIPHER_SUITE_BIP_GMAC_256:
+ case WLAN_CIPHER_SUITE_GCMP:
+ case WLAN_CIPHER_SUITE_GCMP_256:
+ ret = kstrtou64_from_user(userbuf, count, 16, &pn);
+ if (ret)
+ return ret;
+ /* PN is a 48-bit counter */
+ if (pn >= (1ULL << 48))
+ return -ERANGE;
+ atomic64_set(&key->conf.tx_pn, pn);
+ return count;
+ default:
+ return 0;
+ }
+}
+
static ssize_t key_tx_spec_read(struct file *file, char __user *userbuf,
size_t count, loff_t *ppos)
{
@@ -110,7 +154,7 @@ static ssize_t key_tx_spec_read(struct file *file, char __user *userbuf,
}
return simple_read_from_buffer(userbuf, count, ppos, buf, len);
}
-KEY_OPS(tx_spec);
+KEY_OPS_W(tx_spec);
static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf,
size_t count, loff_t *ppos)
@@ -278,6 +322,9 @@ KEY_OPS(key);
#define DEBUGFS_ADD(name) \
debugfs_create_file(#name, 0400, key->debugfs.dir, \
key, &key_##name##_ops);
+#define DEBUGFS_ADD_W(name) \
+ debugfs_create_file(#name, 0600, key->debugfs.dir, \
+ key, &key_##name##_ops);
void ieee80211_debugfs_key_add(struct ieee80211_key *key)
{
@@ -310,7 +357,7 @@ void ieee80211_debugfs_key_add(struct ieee80211_key *key)
DEBUGFS_ADD(keyidx);
DEBUGFS_ADD(hw_key_idx);
DEBUGFS_ADD(algorithm);
- DEBUGFS_ADD(tx_spec);
+ DEBUGFS_ADD_W(tx_spec);
DEBUGFS_ADD(rx_spec);
DEBUGFS_ADD(replays);
DEBUGFS_ADD(icverrors);
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 1021e87c051f..37ea30e0754c 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -114,14 +114,6 @@ static ssize_t ieee80211_if_fmt_##name( \
return scnprintf(buf, buflen, "%pM\n", sdata->field); \
}
-#define IEEE80211_IF_FMT_DEC_DIV_16(name, field) \
-static ssize_t ieee80211_if_fmt_##name( \
- const struct ieee80211_sub_if_data *sdata, \
- char *buf, int buflen) \
-{ \
- return scnprintf(buf, buflen, "%d\n", sdata->field / 16); \
-}
-
#define IEEE80211_IF_FMT_JIFFIES_TO_MS(name, field) \
static ssize_t ieee80211_if_fmt_##name( \
const struct ieee80211_sub_if_data *sdata, \
@@ -247,8 +239,6 @@ IEEE80211_IF_FILE_R(hw_queues);
/* STA attributes */
IEEE80211_IF_FILE(bssid, u.mgd.bssid, MAC);
IEEE80211_IF_FILE(aid, u.mgd.aid, DEC);
-IEEE80211_IF_FILE(last_beacon, u.mgd.last_beacon_signal, DEC);
-IEEE80211_IF_FILE(ave_beacon, u.mgd.ave_beacon_signal, DEC_DIV_16);
IEEE80211_IF_FILE(beacon_timeout, u.mgd.beacon_timeout, JIFFIES_TO_MS);
static int ieee80211_set_smps(struct ieee80211_sub_if_data *sdata,
@@ -455,6 +445,34 @@ static ssize_t ieee80211_if_parse_uapsd_max_sp_len(
}
IEEE80211_IF_FILE_RW(uapsd_max_sp_len);
+static ssize_t ieee80211_if_fmt_tdls_wider_bw(
+ const struct ieee80211_sub_if_data *sdata, char *buf, int buflen)
+{
+ const struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+ bool tdls_wider_bw;
+
+ tdls_wider_bw = ieee80211_hw_check(&sdata->local->hw, TDLS_WIDER_BW) &&
+ !ifmgd->tdls_wider_bw_prohibited;
+
+ return snprintf(buf, buflen, "%d\n", tdls_wider_bw);
+}
+
+static ssize_t ieee80211_if_parse_tdls_wider_bw(
+ struct ieee80211_sub_if_data *sdata, const char *buf, int buflen)
+{
+ struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+ u8 val;
+ int ret;
+
+ ret = kstrtou8(buf, 0, &val);
+ if (ret)
+ return ret;
+
+ ifmgd->tdls_wider_bw_prohibited = !val;
+ return buflen;
+}
+IEEE80211_IF_FILE_RW(tdls_wider_bw);
+
/* AP attributes */
IEEE80211_IF_FILE(num_mcast_sta, u.ap.num_mcast_sta, ATOMIC);
IEEE80211_IF_FILE(num_sta_ps, u.ap.ps.num_sta_ps, ATOMIC);
@@ -606,14 +624,13 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata)
{
DEBUGFS_ADD(bssid);
DEBUGFS_ADD(aid);
- DEBUGFS_ADD(last_beacon);
- DEBUGFS_ADD(ave_beacon);
DEBUGFS_ADD(beacon_timeout);
DEBUGFS_ADD_MODE(smps, 0600);
DEBUGFS_ADD_MODE(tkip_mic_test, 0200);
DEBUGFS_ADD_MODE(beacon_loss, 0200);
DEBUGFS_ADD_MODE(uapsd_queues, 0600);
DEBUGFS_ADD_MODE(uapsd_max_sp_len, 0600);
+ DEBUGFS_ADD_MODE(tdls_wider_bw, 0600);
}
static void add_ap_files(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c
index 267c3b1ca047..a1d54318f16c 100644
--- a/net/mac80211/driver-ops.c
+++ b/net/mac80211/driver-ops.c
@@ -8,6 +8,60 @@
#include "trace.h"
#include "driver-ops.h"
+int drv_add_interface(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata)
+{
+ int ret;
+
+ might_sleep();
+
+ if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
+ (sdata->vif.type == NL80211_IFTYPE_MONITOR &&
+ !ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF) &&
+ !(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE))))
+ return -EINVAL;
+
+ trace_drv_add_interface(local, sdata);
+ ret = local->ops->add_interface(&local->hw, &sdata->vif);
+ trace_drv_return_int(local, ret);
+
+ if (ret == 0)
+ sdata->flags |= IEEE80211_SDATA_IN_DRIVER;
+
+ return ret;
+}
+
+int drv_change_interface(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ enum nl80211_iftype type, bool p2p)
+{
+ int ret;
+
+ might_sleep();
+
+ if (!check_sdata_in_driver(sdata))
+ return -EIO;
+
+ trace_drv_change_interface(local, sdata, type, p2p);
+ ret = local->ops->change_interface(&local->hw, &sdata->vif, type, p2p);
+ trace_drv_return_int(local, ret);
+ return ret;
+}
+
+void drv_remove_interface(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata)
+{
+ might_sleep();
+
+ if (!check_sdata_in_driver(sdata))
+ return;
+
+ trace_drv_remove_interface(local, sdata);
+ local->ops->remove_interface(&local->hw, &sdata->vif);
+ sdata->flags &= ~IEEE80211_SDATA_IN_DRIVER;
+ trace_drv_return_void(local);
+}
+
__must_check
int drv_sta_state(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
@@ -39,3 +93,171 @@ int drv_sta_state(struct ieee80211_local *local,
trace_drv_return_int(local, ret);
return ret;
}
+
+void drv_sta_rc_update(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_sta *sta, u32 changed)
+{
+ sdata = get_bss_sdata(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return;
+
+ WARN_ON(changed & IEEE80211_RC_SUPP_RATES_CHANGED &&
+ (sdata->vif.type != NL80211_IFTYPE_ADHOC &&
+ sdata->vif.type != NL80211_IFTYPE_MESH_POINT));
+
+ trace_drv_sta_rc_update(local, sdata, sta, changed);
+ if (local->ops->sta_rc_update)
+ local->ops->sta_rc_update(&local->hw, &sdata->vif,
+ sta, changed);
+
+ trace_drv_return_void(local);
+}
+
+int drv_conf_tx(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata, u16 ac,
+ const struct ieee80211_tx_queue_params *params)
+{
+ int ret = -EOPNOTSUPP;
+
+ might_sleep();
+
+ if (!check_sdata_in_driver(sdata))
+ return -EIO;
+
+ if (WARN_ONCE(params->cw_min == 0 ||
+ params->cw_min > params->cw_max,
+ "%s: invalid CW_min/CW_max: %d/%d\n",
+ sdata->name, params->cw_min, params->cw_max))
+ return -EINVAL;
+
+ trace_drv_conf_tx(local, sdata, ac, params);
+ if (local->ops->conf_tx)
+ ret = local->ops->conf_tx(&local->hw, &sdata->vif,
+ ac, params);
+ trace_drv_return_int(local, ret);
+ return ret;
+}
+
+u64 drv_get_tsf(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata)
+{
+ u64 ret = -1ULL;
+
+ might_sleep();
+
+ if (!check_sdata_in_driver(sdata))
+ return ret;
+
+ trace_drv_get_tsf(local, sdata);
+ if (local->ops->get_tsf)
+ ret = local->ops->get_tsf(&local->hw, &sdata->vif);
+ trace_drv_return_u64(local, ret);
+ return ret;
+}
+
+void drv_set_tsf(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ u64 tsf)
+{
+ might_sleep();
+
+ if (!check_sdata_in_driver(sdata))
+ return;
+
+ trace_drv_set_tsf(local, sdata, tsf);
+ if (local->ops->set_tsf)
+ local->ops->set_tsf(&local->hw, &sdata->vif, tsf);
+ trace_drv_return_void(local);
+}
+
+void drv_reset_tsf(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata)
+{
+ might_sleep();
+
+ if (!check_sdata_in_driver(sdata))
+ return;
+
+ trace_drv_reset_tsf(local, sdata);
+ if (local->ops->reset_tsf)
+ local->ops->reset_tsf(&local->hw, &sdata->vif);
+ trace_drv_return_void(local);
+}
+
+int drv_switch_vif_chanctx(struct ieee80211_local *local,
+ struct ieee80211_vif_chanctx_switch *vifs,
+ int n_vifs, enum ieee80211_chanctx_switch_mode mode)
+{
+ int ret = 0;
+ int i;
+
+ if (!local->ops->switch_vif_chanctx)
+ return -EOPNOTSUPP;
+
+ for (i = 0; i < n_vifs; i++) {
+ struct ieee80211_chanctx *new_ctx =
+ container_of(vifs[i].new_ctx,
+ struct ieee80211_chanctx,
+ conf);
+ struct ieee80211_chanctx *old_ctx =
+ container_of(vifs[i].old_ctx,
+ struct ieee80211_chanctx,
+ conf);
+
+ WARN_ON_ONCE(!old_ctx->driver_present);
+ WARN_ON_ONCE((mode == CHANCTX_SWMODE_SWAP_CONTEXTS &&
+ new_ctx->driver_present) ||
+ (mode == CHANCTX_SWMODE_REASSIGN_VIF &&
+ !new_ctx->driver_present));
+ }
+
+ trace_drv_switch_vif_chanctx(local, vifs, n_vifs, mode);
+ ret = local->ops->switch_vif_chanctx(&local->hw,
+ vifs, n_vifs, mode);
+ trace_drv_return_int(local, ret);
+
+ if (!ret && mode == CHANCTX_SWMODE_SWAP_CONTEXTS) {
+ for (i = 0; i < n_vifs; i++) {
+ struct ieee80211_chanctx *new_ctx =
+ container_of(vifs[i].new_ctx,
+ struct ieee80211_chanctx,
+ conf);
+ struct ieee80211_chanctx *old_ctx =
+ container_of(vifs[i].old_ctx,
+ struct ieee80211_chanctx,
+ conf);
+
+ new_ctx->driver_present = true;
+ old_ctx->driver_present = false;
+ }
+ }
+
+ return ret;
+}
+
+int drv_ampdu_action(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ enum ieee80211_ampdu_mlme_action action,
+ struct ieee80211_sta *sta, u16 tid,
+ u16 *ssn, u8 buf_size, bool amsdu)
+{
+ int ret = -EOPNOTSUPP;
+
+ might_sleep();
+
+ sdata = get_bss_sdata(sdata);
+ if (!check_sdata_in_driver(sdata))
+ return -EIO;
+
+ trace_drv_ampdu_action(local, sdata, action, sta, tid,
+ ssn, buf_size, amsdu);
+
+ if (local->ops->ampdu_action)
+ ret = local->ops->ampdu_action(&local->hw, &sdata->vif, action,
+ sta, tid, ssn, buf_size, amsdu);
+
+ trace_drv_return_int(local, ret);
+
+ return ret;
+}
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 02d91332d7dd..30987099eb8f 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -137,59 +137,15 @@ static inline void drv_set_wakeup(struct ieee80211_local *local,
}
#endif
-static inline int drv_add_interface(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata)
-{
- int ret;
-
- might_sleep();
-
- if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
- (sdata->vif.type == NL80211_IFTYPE_MONITOR &&
- !ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF) &&
- !(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE))))
- return -EINVAL;
-
- trace_drv_add_interface(local, sdata);
- ret = local->ops->add_interface(&local->hw, &sdata->vif);
- trace_drv_return_int(local, ret);
-
- if (ret == 0)
- sdata->flags |= IEEE80211_SDATA_IN_DRIVER;
-
- return ret;
-}
-
-static inline int drv_change_interface(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata,
- enum nl80211_iftype type, bool p2p)
-{
- int ret;
+int drv_add_interface(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata);
- might_sleep();
-
- if (!check_sdata_in_driver(sdata))
- return -EIO;
-
- trace_drv_change_interface(local, sdata, type, p2p);
- ret = local->ops->change_interface(&local->hw, &sdata->vif, type, p2p);
- trace_drv_return_int(local, ret);
- return ret;
-}
-
-static inline void drv_remove_interface(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata)
-{
- might_sleep();
-
- if (!check_sdata_in_driver(sdata))
- return;
+int drv_change_interface(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ enum nl80211_iftype type, bool p2p);
- trace_drv_remove_interface(local, sdata);
- local->ops->remove_interface(&local->hw, &sdata->vif);
- sdata->flags &= ~IEEE80211_SDATA_IN_DRIVER;
- trace_drv_return_void(local);
-}
+void drv_remove_interface(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata);
static inline int drv_config(struct ieee80211_local *local, u32 changed)
{
@@ -260,6 +216,22 @@ static inline void drv_configure_filter(struct ieee80211_local *local,
trace_drv_return_void(local);
}
+static inline void drv_config_iface_filter(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ unsigned int filter_flags,
+ unsigned int changed_flags)
+{
+ might_sleep();
+
+ trace_drv_config_iface_filter(local, sdata, filter_flags,
+ changed_flags);
+ if (local->ops->config_iface_filter)
+ local->ops->config_iface_filter(&local->hw, &sdata->vif,
+ filter_flags,
+ changed_flags);
+ trace_drv_return_void(local);
+}
+
static inline int drv_set_tim(struct ieee80211_local *local,
struct ieee80211_sta *sta, bool set)
{
@@ -580,25 +552,9 @@ int drv_sta_state(struct ieee80211_local *local,
enum ieee80211_sta_state old_state,
enum ieee80211_sta_state new_state);
-static inline void drv_sta_rc_update(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata,
- struct ieee80211_sta *sta, u32 changed)
-{
- sdata = get_bss_sdata(sdata);
- if (!check_sdata_in_driver(sdata))
- return;
-
- WARN_ON(changed & IEEE80211_RC_SUPP_RATES_CHANGED &&
- (sdata->vif.type != NL80211_IFTYPE_ADHOC &&
- sdata->vif.type != NL80211_IFTYPE_MESH_POINT));
-
- trace_drv_sta_rc_update(local, sdata, sta, changed);
- if (local->ops->sta_rc_update)
- local->ops->sta_rc_update(&local->hw, &sdata->vif,
- sta, changed);
-
- trace_drv_return_void(local);
-}
+void drv_sta_rc_update(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_sta *sta, u32 changed);
static inline void drv_sta_rate_tbl_update(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
@@ -630,76 +586,17 @@ static inline void drv_sta_statistics(struct ieee80211_local *local,
trace_drv_return_void(local);
}
-static inline int drv_conf_tx(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata, u16 ac,
- const struct ieee80211_tx_queue_params *params)
-{
- int ret = -EOPNOTSUPP;
-
- might_sleep();
-
- if (!check_sdata_in_driver(sdata))
- return -EIO;
-
- if (WARN_ONCE(params->cw_min == 0 ||
- params->cw_min > params->cw_max,
- "%s: invalid CW_min/CW_max: %d/%d\n",
- sdata->name, params->cw_min, params->cw_max))
- return -EINVAL;
-
- trace_drv_conf_tx(local, sdata, ac, params);
- if (local->ops->conf_tx)
- ret = local->ops->conf_tx(&local->hw, &sdata->vif,
- ac, params);
- trace_drv_return_int(local, ret);
- return ret;
-}
-
-static inline u64 drv_get_tsf(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata)
-{
- u64 ret = -1ULL;
-
- might_sleep();
-
- if (!check_sdata_in_driver(sdata))
- return ret;
-
- trace_drv_get_tsf(local, sdata);
- if (local->ops->get_tsf)
- ret = local->ops->get_tsf(&local->hw, &sdata->vif);
- trace_drv_return_u64(local, ret);
- return ret;
-}
-
-static inline void drv_set_tsf(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata,
- u64 tsf)
-{
- might_sleep();
-
- if (!check_sdata_in_driver(sdata))
- return;
-
- trace_drv_set_tsf(local, sdata, tsf);
- if (local->ops->set_tsf)
- local->ops->set_tsf(&local->hw, &sdata->vif, tsf);
- trace_drv_return_void(local);
-}
-
-static inline void drv_reset_tsf(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata)
-{
- might_sleep();
+int drv_conf_tx(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata, u16 ac,
+ const struct ieee80211_tx_queue_params *params);
- if (!check_sdata_in_driver(sdata))
- return;
-
- trace_drv_reset_tsf(local, sdata);
- if (local->ops->reset_tsf)
- local->ops->reset_tsf(&local->hw, &sdata->vif);
- trace_drv_return_void(local);
-}
+u64 drv_get_tsf(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata);
+void drv_set_tsf(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ u64 tsf);
+void drv_reset_tsf(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata);
static inline int drv_tx_last_beacon(struct ieee80211_local *local)
{
@@ -714,30 +611,11 @@ static inline int drv_tx_last_beacon(struct ieee80211_local *local)
return ret;
}
-static inline int drv_ampdu_action(struct ieee80211_local *local,
- struct ieee80211_sub_if_data *sdata,
- enum ieee80211_ampdu_mlme_action action,
- struct ieee80211_sta *sta, u16 tid,
- u16 *ssn, u8 buf_size)
-{
- int ret = -EOPNOTSUPP;
-
- might_sleep();
-
- sdata = get_bss_sdata(sdata);
- if (!check_sdata_in_driver(sdata))
- return -EIO;
-
- trace_drv_ampdu_action(local, sdata, action, sta, tid, ssn, buf_size);
-
- if (local->ops->ampdu_action)
- ret = local->ops->ampdu_action(&local->hw, &sdata->vif, action,
- sta, tid, ssn, buf_size);
-
- trace_drv_return_int(local, ret);
-
- return ret;
-}
+int drv_ampdu_action(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ enum ieee80211_ampdu_mlme_action action,
+ struct ieee80211_sta *sta, u16 tid,
+ u16 *ssn, u8 buf_size, bool amsdu);
static inline int drv_get_survey(struct ieee80211_local *local, int idx,
struct survey_info *survey)
@@ -1066,58 +944,9 @@ static inline void drv_unassign_vif_chanctx(struct ieee80211_local *local,
trace_drv_return_void(local);
}
-static inline int
-drv_switch_vif_chanctx(struct ieee80211_local *local,
- struct ieee80211_vif_chanctx_switch *vifs,
- int n_vifs,
- enum ieee80211_chanctx_switch_mode mode)
-{
- int ret = 0;
- int i;
-
- if (!local->ops->switch_vif_chanctx)
- return -EOPNOTSUPP;
-
- for (i = 0; i < n_vifs; i++) {
- struct ieee80211_chanctx *new_ctx =
- container_of(vifs[i].new_ctx,
- struct ieee80211_chanctx,
- conf);
- struct ieee80211_chanctx *old_ctx =
- container_of(vifs[i].old_ctx,
- struct ieee80211_chanctx,
- conf);
-
- WARN_ON_ONCE(!old_ctx->driver_present);
- WARN_ON_ONCE((mode == CHANCTX_SWMODE_SWAP_CONTEXTS &&
- new_ctx->driver_present) ||
- (mode == CHANCTX_SWMODE_REASSIGN_VIF &&
- !new_ctx->driver_present));
- }
-
- trace_drv_switch_vif_chanctx(local, vifs, n_vifs, mode);
- ret = local->ops->switch_vif_chanctx(&local->hw,
- vifs, n_vifs, mode);
- trace_drv_return_int(local, ret);
-
- if (!ret && mode == CHANCTX_SWMODE_SWAP_CONTEXTS) {
- for (i = 0; i < n_vifs; i++) {
- struct ieee80211_chanctx *new_ctx =
- container_of(vifs[i].new_ctx,
- struct ieee80211_chanctx,
- conf);
- struct ieee80211_chanctx *old_ctx =
- container_of(vifs[i].old_ctx,
- struct ieee80211_chanctx,
- conf);
-
- new_ctx->driver_present = true;
- old_ctx->driver_present = false;
- }
- }
-
- return ret;
-}
+int drv_switch_vif_chanctx(struct ieee80211_local *local,
+ struct ieee80211_vif_chanctx_switch *vifs,
+ int n_vifs, enum ieee80211_chanctx_switch_mode mode);
static inline int drv_start_ap(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 6e52659f923f..f9605f13def9 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -419,6 +419,8 @@ struct ieee80211_sta_tx_tspec {
bool downgraded;
};
+DECLARE_EWMA(beacon_signal, 16, 4)
+
struct ieee80211_if_managed {
struct timer_list timer;
struct timer_list conn_mon_timer;
@@ -490,16 +492,7 @@ struct ieee80211_if_managed {
s16 p2p_noa_index;
- /* Signal strength from the last Beacon frame in the current BSS. */
- int last_beacon_signal;
-
- /*
- * Weighted average of the signal strength from Beacon frames in the
- * current BSS. This is in units of 1/16 of the signal unit to maintain
- * accuracy and to speed up calculations, i.e., the value need to be
- * divided by 16 to get the actual value.
- */
- int ave_beacon_signal;
+ struct ewma_beacon_signal ave_beacon_signal;
/*
* Number of Beacon frames used in ave_beacon_signal. This can be used
@@ -535,6 +528,7 @@ struct ieee80211_if_managed {
struct sk_buff *teardown_skb; /* A copy to send through the AP */
spinlock_t teardown_lock; /* To lock changing teardown_skb */
bool tdls_chan_switch_prohibited;
+ bool tdls_wider_bw_prohibited;
/* WMM-AC TSPEC support */
struct ieee80211_sta_tx_tspec tx_tspec[IEEE80211_NUM_ACS];
@@ -1641,6 +1635,9 @@ void ieee80211_purge_tx_queue(struct ieee80211_hw *hw,
struct sk_buff *
ieee80211_build_data_template(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb, u32 info_flags);
+void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb,
+ struct ieee80211_supported_band *sband,
+ int retry_count, int shift, bool send_to_cooked);
void ieee80211_check_fast_xmit(struct sta_info *sta);
void ieee80211_check_fast_xmit_all(struct ieee80211_local *local);
@@ -1853,7 +1850,7 @@ void ieee80211_dynamic_ps_disable_work(struct work_struct *work);
void ieee80211_dynamic_ps_timer(unsigned long data);
void ieee80211_send_nullfunc(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
- int powersave);
+ bool powersave);
void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata,
struct ieee80211_hdr *hdr);
void ieee80211_sta_tx_notify(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 6964fc6a8ea2..42d7f0f65bd6 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1204,7 +1204,7 @@ static void ieee80211_iface_work(struct work_struct *work)
if (!ieee80211_sdata_running(sdata))
return;
- if (local->scanning)
+ if (test_bit(SCAN_SW_SCANNING, &local->scanning))
return;
if (!ieee80211_can_run_worker(local))
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index ff79a13d231d..9b813a2f3a75 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -543,7 +543,8 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
NL80211_FEATURE_HT_IBSS |
NL80211_FEATURE_VIF_TXPOWER |
NL80211_FEATURE_MAC_ON_CREATE |
- NL80211_FEATURE_USERSPACE_MPM;
+ NL80211_FEATURE_USERSPACE_MPM |
+ NL80211_FEATURE_FULL_AP_CLIENT_STATE;
if (!ops->hw_scan)
wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN |
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index e06a5ca7c9a9..626e8de70842 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -94,6 +94,9 @@ bool mesh_matches_local(struct ieee80211_sub_if_data *sdata,
ieee80211_ht_oper_to_chandef(sdata->vif.bss_conf.chandef.chan,
ie->ht_operation, &sta_chan_def);
+ ieee80211_vht_oper_to_chandef(sdata->vif.bss_conf.chandef.chan,
+ ie->vht_operation, &sta_chan_def);
+
if (!cfg80211_chandef_compatible(&sdata->vif.bss_conf.chandef,
&sta_chan_def))
return false;
@@ -436,8 +439,6 @@ int mesh_add_ht_oper_ie(struct ieee80211_sub_if_data *sdata,
struct ieee80211_local *local = sdata->local;
struct ieee80211_chanctx_conf *chanctx_conf;
struct ieee80211_channel *channel;
- enum nl80211_channel_type channel_type =
- cfg80211_get_chandef_type(&sdata->vif.bss_conf.chandef);
struct ieee80211_supported_band *sband;
struct ieee80211_sta_ht_cap *ht_cap;
u8 *pos;
@@ -454,7 +455,10 @@ int mesh_add_ht_oper_ie(struct ieee80211_sub_if_data *sdata,
sband = local->hw.wiphy->bands[channel->band];
ht_cap = &sband->ht_cap;
- if (!ht_cap->ht_supported || channel_type == NL80211_CHAN_NO_HT)
+ if (!ht_cap->ht_supported ||
+ sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
+ sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 ||
+ sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10)
return 0;
if (skb_tailroom(skb) < 2 + sizeof(struct ieee80211_ht_operation))
@@ -467,6 +471,68 @@ int mesh_add_ht_oper_ie(struct ieee80211_sub_if_data *sdata,
return 0;
}
+int mesh_add_vht_cap_ie(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb)
+{
+ struct ieee80211_local *local = sdata->local;
+ enum ieee80211_band band = ieee80211_get_sdata_band(sdata);
+ struct ieee80211_supported_band *sband;
+ u8 *pos;
+
+ sband = local->hw.wiphy->bands[band];
+ if (!sband->vht_cap.vht_supported ||
+ sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
+ sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 ||
+ sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10)
+ return 0;
+
+ if (skb_tailroom(skb) < 2 + sizeof(struct ieee80211_vht_cap))
+ return -ENOMEM;
+
+ pos = skb_put(skb, 2 + sizeof(struct ieee80211_vht_cap));
+ ieee80211_ie_build_vht_cap(pos, &sband->vht_cap, sband->vht_cap.cap);
+
+ return 0;
+}
+
+int mesh_add_vht_oper_ie(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb)
+{
+ struct ieee80211_local *local = sdata->local;
+ struct ieee80211_chanctx_conf *chanctx_conf;
+ struct ieee80211_channel *channel;
+ struct ieee80211_supported_band *sband;
+ struct ieee80211_sta_vht_cap *vht_cap;
+ u8 *pos;
+
+ rcu_read_lock();
+ chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf);
+ if (WARN_ON(!chanctx_conf)) {
+ rcu_read_unlock();
+ return -EINVAL;
+ }
+ channel = chanctx_conf->def.chan;
+ rcu_read_unlock();
+
+ sband = local->hw.wiphy->bands[channel->band];
+ vht_cap = &sband->vht_cap;
+
+ if (!vht_cap->vht_supported ||
+ sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
+ sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 ||
+ sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10)
+ return 0;
+
+ if (skb_tailroom(skb) < 2 + sizeof(struct ieee80211_vht_operation))
+ return -ENOMEM;
+
+ pos = skb_put(skb, 2 + sizeof(struct ieee80211_vht_operation));
+ ieee80211_ie_build_vht_oper(pos, vht_cap,
+ &sdata->vif.bss_conf.chandef);
+
+ return 0;
+}
+
static void ieee80211_mesh_path_timer(unsigned long data)
{
struct ieee80211_sub_if_data *sdata =
@@ -540,9 +606,9 @@ int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc,
*
* Return the header length.
*/
-int ieee80211_new_mesh_header(struct ieee80211_sub_if_data *sdata,
- struct ieee80211s_hdr *meshhdr,
- const char *addr4or5, const char *addr6)
+unsigned int ieee80211_new_mesh_header(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211s_hdr *meshhdr,
+ const char *addr4or5, const char *addr6)
{
if (WARN_ON(!addr4or5 && addr6))
return 0;
@@ -637,6 +703,8 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)
2 + ifmsh->mesh_id_len +
2 + sizeof(struct ieee80211_meshconf_ie) +
2 + sizeof(__le16) + /* awake window */
+ 2 + sizeof(struct ieee80211_vht_cap) +
+ 2 + sizeof(struct ieee80211_vht_operation) +
ifmsh->ie_len;
bcn = kzalloc(sizeof(*bcn) + head_len + tail_len, GFP_KERNEL);
@@ -718,6 +786,8 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)
mesh_add_meshid_ie(sdata, skb) ||
mesh_add_meshconf_ie(sdata, skb) ||
mesh_add_awake_window_ie(sdata, skb) ||
+ mesh_add_vht_cap_ie(sdata, skb) ||
+ mesh_add_vht_oper_ie(sdata, skb) ||
mesh_add_vendor_ies(sdata, skb))
goto out_free;
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 50c8473cf9dc..a1596344c3ba 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -207,9 +207,9 @@ struct mesh_rmc {
/* Various */
int ieee80211_fill_mesh_addresses(struct ieee80211_hdr *hdr, __le16 *fc,
const u8 *da, const u8 *sa);
-int ieee80211_new_mesh_header(struct ieee80211_sub_if_data *sdata,
- struct ieee80211s_hdr *meshhdr,
- const char *addr4or5, const char *addr6);
+unsigned int ieee80211_new_mesh_header(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211s_hdr *meshhdr,
+ const char *addr4or5, const char *addr6);
int mesh_rmc_check(struct ieee80211_sub_if_data *sdata,
const u8 *addr, struct ieee80211s_hdr *mesh_hdr);
bool mesh_matches_local(struct ieee80211_sub_if_data *sdata,
@@ -227,6 +227,10 @@ int mesh_add_ht_cap_ie(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb);
int mesh_add_ht_oper_ie(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb);
+int mesh_add_vht_cap_ie(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb);
+int mesh_add_vht_oper_ie(struct ieee80211_sub_if_data *sdata,
+ struct sk_buff *skb);
void mesh_rmc_free(struct ieee80211_sub_if_data *sdata);
int mesh_rmc_init(struct ieee80211_sub_if_data *sdata);
void ieee80211s_init(void);
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 58384642e03c..a360b24b7df8 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -226,6 +226,8 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
2 + sizeof(struct ieee80211_meshconf_ie) +
2 + sizeof(struct ieee80211_ht_cap) +
2 + sizeof(struct ieee80211_ht_operation) +
+ 2 + sizeof(struct ieee80211_vht_cap) +
+ 2 + sizeof(struct ieee80211_vht_operation) +
2 + 8 + /* peering IE */
sdata->u.mesh.ie_len);
if (!skb)
@@ -306,7 +308,9 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
if (action != WLAN_SP_MESH_PEERING_CLOSE) {
if (mesh_add_ht_cap_ie(sdata, skb) ||
- mesh_add_ht_oper_ie(sdata, skb))
+ mesh_add_ht_oper_ie(sdata, skb) ||
+ mesh_add_vht_cap_ie(sdata, skb) ||
+ mesh_add_vht_oper_ie(sdata, skb))
goto free;
}
@@ -402,6 +406,9 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata,
elems->ht_cap_elem, sta))
changed |= IEEE80211_RC_BW_CHANGED;
+ ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband,
+ elems->vht_cap_elem, sta);
+
if (bw != sta->sta.bandwidth)
changed |= IEEE80211_RC_BW_CHANGED;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index cd7e55e08a23..56ef9a8e151c 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -82,13 +82,6 @@ MODULE_PARM_DESC(probe_wait_ms,
" before disconnecting (reason 4).");
/*
- * Weight given to the latest Beacon frame when calculating average signal
- * strength for Beacon frames received in the current BSS. This must be
- * between 1 and 15.
- */
-#define IEEE80211_SIGNAL_AVE_WEIGHT 3
-
-/*
* How many Beacon frames need to have been used in average signal strength
* before starting to indicate signal change events.
*/
@@ -943,7 +936,7 @@ void ieee80211_send_pspoll(struct ieee80211_local *local,
void ieee80211_send_nullfunc(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
- int powersave)
+ bool powersave)
{
struct sk_buff *skb;
struct ieee80211_hdr_3addr *nullfunc;
@@ -1427,7 +1420,7 @@ static void ieee80211_enable_ps(struct ieee80211_local *local,
msecs_to_jiffies(conf->dynamic_ps_timeout));
} else {
if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK))
- ieee80211_send_nullfunc(local, sdata, 1);
+ ieee80211_send_nullfunc(local, sdata, true);
if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK) &&
ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS))
@@ -1642,7 +1635,7 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work)
msecs_to_jiffies(
local->hw.conf.dynamic_ps_timeout));
} else {
- ieee80211_send_nullfunc(local, sdata, 1);
+ ieee80211_send_nullfunc(local, sdata, true);
/* Flush to get the tx status of nullfunc frame */
ieee80211_flush_queues(local, sdata, false);
}
@@ -2275,7 +2268,7 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata)
if (ieee80211_hw_check(&sdata->local->hw, REPORTS_TX_ACK_STATUS)) {
ifmgd->nullfunc_failed = false;
- ieee80211_send_nullfunc(sdata->local, sdata, 0);
+ ieee80211_send_nullfunc(sdata->local, sdata, false);
} else {
int ssid_len;
@@ -3262,16 +3255,6 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata,
if (ifmgd->associated &&
ether_addr_equal(mgmt->bssid, ifmgd->associated->bssid))
ieee80211_reset_ap_probe(sdata);
-
- if (ifmgd->auth_data && !ifmgd->auth_data->bss->proberesp_ies &&
- ether_addr_equal(mgmt->bssid, ifmgd->auth_data->bss->bssid)) {
- /* got probe response, continue with auth */
- sdata_info(sdata, "direct probe responded\n");
- ifmgd->auth_data->tries = 0;
- ifmgd->auth_data->timeout = jiffies;
- ifmgd->auth_data->timeout_started = true;
- run_again(sdata, ifmgd->auth_data->timeout);
- }
}
/*
@@ -3374,24 +3357,21 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
bssid = ifmgd->associated->bssid;
/* Track average RSSI from the Beacon frames of the current AP */
- ifmgd->last_beacon_signal = rx_status->signal;
if (ifmgd->flags & IEEE80211_STA_RESET_SIGNAL_AVE) {
ifmgd->flags &= ~IEEE80211_STA_RESET_SIGNAL_AVE;
- ifmgd->ave_beacon_signal = rx_status->signal * 16;
+ ewma_beacon_signal_init(&ifmgd->ave_beacon_signal);
ifmgd->last_cqm_event_signal = 0;
ifmgd->count_beacon_signal = 1;
ifmgd->last_ave_beacon_signal = 0;
} else {
- ifmgd->ave_beacon_signal =
- (IEEE80211_SIGNAL_AVE_WEIGHT * rx_status->signal * 16 +
- (16 - IEEE80211_SIGNAL_AVE_WEIGHT) *
- ifmgd->ave_beacon_signal) / 16;
ifmgd->count_beacon_signal++;
}
+ ewma_beacon_signal_add(&ifmgd->ave_beacon_signal, -rx_status->signal);
+
if (ifmgd->rssi_min_thold != ifmgd->rssi_max_thold &&
ifmgd->count_beacon_signal >= IEEE80211_SIGNAL_AVE_MIN_COUNT) {
- int sig = ifmgd->ave_beacon_signal;
+ int sig = -ewma_beacon_signal_read(&ifmgd->ave_beacon_signal);
int last_sig = ifmgd->last_ave_beacon_signal;
struct ieee80211_event event = {
.type = RSSI_EVENT,
@@ -3418,10 +3398,11 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
if (bss_conf->cqm_rssi_thold &&
ifmgd->count_beacon_signal >= IEEE80211_SIGNAL_AVE_MIN_COUNT &&
!(sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_CQM_RSSI)) {
- int sig = ifmgd->ave_beacon_signal / 16;
+ int sig = -ewma_beacon_signal_read(&ifmgd->ave_beacon_signal);
int last_event = ifmgd->last_cqm_event_signal;
int thold = bss_conf->cqm_rssi_thold;
int hyst = bss_conf->cqm_rssi_hyst;
+
if (sig < thold &&
(last_event == 0 || sig < last_event - hyst)) {
ifmgd->last_cqm_event_signal = sig;
@@ -3456,31 +3437,27 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
len - baselen, false, &elems,
care_about_ies, ncrc);
- if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK)) {
- bool directed_tim = ieee80211_check_tim(elems.tim,
- elems.tim_len,
- ifmgd->aid);
- if (directed_tim) {
- if (local->hw.conf.dynamic_ps_timeout > 0) {
- if (local->hw.conf.flags & IEEE80211_CONF_PS) {
- local->hw.conf.flags &= ~IEEE80211_CONF_PS;
- ieee80211_hw_config(local,
- IEEE80211_CONF_CHANGE_PS);
- }
- ieee80211_send_nullfunc(local, sdata, 0);
- } else if (!local->pspolling && sdata->u.mgd.powersave) {
- local->pspolling = true;
-
- /*
- * Here is assumed that the driver will be
- * able to send ps-poll frame and receive a
- * response even though power save mode is
- * enabled, but some drivers might require
- * to disable power save here. This needs
- * to be investigated.
- */
- ieee80211_send_pspoll(local, sdata);
+ if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK) &&
+ ieee80211_check_tim(elems.tim, elems.tim_len, ifmgd->aid)) {
+ if (local->hw.conf.dynamic_ps_timeout > 0) {
+ if (local->hw.conf.flags & IEEE80211_CONF_PS) {
+ local->hw.conf.flags &= ~IEEE80211_CONF_PS;
+ ieee80211_hw_config(local,
+ IEEE80211_CONF_CHANGE_PS);
}
+ ieee80211_send_nullfunc(local, sdata, false);
+ } else if (!local->pspolling && sdata->u.mgd.powersave) {
+ local->pspolling = true;
+
+ /*
+ * Here is assumed that the driver will be
+ * able to send ps-poll frame and receive a
+ * response even though power save mode is
+ * enabled, but some drivers might require
+ * to disable power save here. This needs
+ * to be investigated.
+ */
+ ieee80211_send_pspoll(local, sdata);
}
}
@@ -3717,12 +3694,14 @@ static void ieee80211_sta_connection_lost(struct ieee80211_sub_if_data *sdata,
reason);
}
-static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata)
+static int ieee80211_auth(struct ieee80211_sub_if_data *sdata)
{
struct ieee80211_local *local = sdata->local;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
struct ieee80211_mgd_auth_data *auth_data = ifmgd->auth_data;
u32 tx_flags = 0;
+ u16 trans = 1;
+ u16 status = 0;
sdata_assert_lock(sdata);
@@ -3746,54 +3725,27 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata)
drv_mgd_prepare_tx(local, sdata);
- if (auth_data->bss->proberesp_ies) {
- u16 trans = 1;
- u16 status = 0;
-
- sdata_info(sdata, "send auth to %pM (try %d/%d)\n",
- auth_data->bss->bssid, auth_data->tries,
- IEEE80211_AUTH_MAX_TRIES);
+ sdata_info(sdata, "send auth to %pM (try %d/%d)\n",
+ auth_data->bss->bssid, auth_data->tries,
+ IEEE80211_AUTH_MAX_TRIES);
- auth_data->expected_transaction = 2;
+ auth_data->expected_transaction = 2;
- if (auth_data->algorithm == WLAN_AUTH_SAE) {
- trans = auth_data->sae_trans;
- status = auth_data->sae_status;
- auth_data->expected_transaction = trans;
- }
-
- if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS))
- tx_flags = IEEE80211_TX_CTL_REQ_TX_STATUS |
- IEEE80211_TX_INTFL_MLME_CONN_TX;
-
- ieee80211_send_auth(sdata, trans, auth_data->algorithm, status,
- auth_data->data, auth_data->data_len,
- auth_data->bss->bssid,
- auth_data->bss->bssid, NULL, 0, 0,
- tx_flags);
- } else {
- const u8 *ssidie;
+ if (auth_data->algorithm == WLAN_AUTH_SAE) {
+ trans = auth_data->sae_trans;
+ status = auth_data->sae_status;
+ auth_data->expected_transaction = trans;
+ }
- sdata_info(sdata, "direct probe to %pM (try %d/%i)\n",
- auth_data->bss->bssid, auth_data->tries,
- IEEE80211_AUTH_MAX_TRIES);
+ if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS))
+ tx_flags = IEEE80211_TX_CTL_REQ_TX_STATUS |
+ IEEE80211_TX_INTFL_MLME_CONN_TX;
- rcu_read_lock();
- ssidie = ieee80211_bss_get_ie(auth_data->bss, WLAN_EID_SSID);
- if (!ssidie) {
- rcu_read_unlock();
- return -EINVAL;
- }
- /*
- * Direct probe is sent to broadcast address as some APs
- * will not answer to direct packet in unassociated state.
- */
- ieee80211_send_probe_req(sdata, sdata->vif.addr, NULL,
- ssidie + 2, ssidie[1],
- NULL, 0, (u32) -1, true, 0,
- auth_data->bss->channel, false);
- rcu_read_unlock();
- }
+ ieee80211_send_auth(sdata, trans, auth_data->algorithm, status,
+ auth_data->data, auth_data->data_len,
+ auth_data->bss->bssid,
+ auth_data->bss->bssid, NULL, 0, 0,
+ tx_flags);
if (tx_flags == 0) {
auth_data->timeout = jiffies + IEEE80211_AUTH_TIMEOUT;
@@ -3874,8 +3826,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
bool status_acked = ifmgd->status_acked;
ifmgd->status_received = false;
- if (ifmgd->auth_data &&
- (ieee80211_is_probe_req(fc) || ieee80211_is_auth(fc))) {
+ if (ifmgd->auth_data && ieee80211_is_auth(fc)) {
if (status_acked) {
ifmgd->auth_data->timeout =
jiffies + IEEE80211_AUTH_TIMEOUT_SHORT;
@@ -3906,7 +3857,7 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata)
* so let's just kill the auth data
*/
ieee80211_destroy_auth_data(sdata, false);
- } else if (ieee80211_probe_auth(sdata)) {
+ } else if (ieee80211_auth(sdata)) {
u8 bssid[ETH_ALEN];
struct ieee80211_event event = {
.type = MLME_EVENT,
@@ -4613,7 +4564,7 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
if (err)
goto err_clear;
- err = ieee80211_probe_auth(sdata);
+ err = ieee80211_auth(sdata);
if (err) {
sta_info_destroy_addr(sdata, req->bss->bssid);
goto err_clear;
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index f2c75cf491fc..04401037140e 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -57,7 +57,7 @@ static void ieee80211_offchannel_ps_enable(struct ieee80211_sub_if_data *sdata)
* to send a new nullfunc frame to inform the AP that we
* are again sleeping.
*/
- ieee80211_send_nullfunc(local, sdata, 1);
+ ieee80211_send_nullfunc(local, sdata, true);
}
/* inform AP that we are awake again, unless power save is enabled */
@@ -66,7 +66,7 @@ static void ieee80211_offchannel_ps_disable(struct ieee80211_sub_if_data *sdata)
struct ieee80211_local *local = sdata->local;
if (!local->ps_sdata)
- ieee80211_send_nullfunc(local, sdata, 0);
+ ieee80211_send_nullfunc(local, sdata, false);
else if (local->offchannel_ps_enabled) {
/*
* In !IEEE80211_HW_PS_NULLFUNC_STACK case the hardware
@@ -93,7 +93,7 @@ static void ieee80211_offchannel_ps_disable(struct ieee80211_sub_if_data *sdata)
* restart the timer now and send a nullfunc frame to inform
* the AP that we are awake.
*/
- ieee80211_send_nullfunc(local, sdata, 0);
+ ieee80211_send_nullfunc(local, sdata, false);
mod_timer(&local->dynamic_ps_timer, jiffies +
msecs_to_jiffies(local->hw.conf.dynamic_ps_timeout));
}
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index b676b9fa707b..ad88ad4e8eb1 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -23,7 +23,8 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
ieee80211_del_virtual_monitor(local);
- if (ieee80211_hw_check(hw, AMPDU_AGGREGATION)) {
+ if (ieee80211_hw_check(hw, AMPDU_AGGREGATION) &&
+ !(wowlan && wowlan->any)) {
mutex_lock(&local->sta_mtx);
list_for_each_entry(sta, &local->sta_list, list) {
set_sta_flag(sta, WLAN_STA_BLOCK_BA);
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 9ce8883d5f44..b07e2f748f93 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -305,7 +305,10 @@ static void __rate_control_send_low(struct ieee80211_hw *hw,
info->control.rates[0].idx = i;
break;
}
- WARN_ON_ONCE(i == sband->n_bitrates);
+ WARN_ONCE(i == sband->n_bitrates,
+ "no supported rates (0x%x) in rate_mask 0x%x with flags 0x%x\n",
+ sta ? sta->supp_rates[sband->band] : 0,
+ rate_mask, rate_flags);
info->control.rates[0].count =
(info->flags & IEEE80211_TX_CTL_NO_ACK) ?
diff --git a/net/mac80211/rc80211_minstrel_debugfs.c b/net/mac80211/rc80211_minstrel_debugfs.c
index 1db5f7c3318a..820b0abc9c0d 100644
--- a/net/mac80211/rc80211_minstrel_debugfs.c
+++ b/net/mac80211/rc80211_minstrel_debugfs.c
@@ -85,12 +85,10 @@ minstrel_stats_open(struct inode *inode, struct file *file)
file->private_data = ms;
p = ms->buf;
p += sprintf(p, "\n");
- p += sprintf(p, "best __________rate_________ ______"
- "statistics______ ________last_______ "
- "______sum-of________\n");
- p += sprintf(p, "rate [name idx airtime max_tp] [ ø(tp) ø(prob) "
- "sd(prob)] [prob.|retry|suc|att] "
- "[#success | #attempts]\n");
+ p += sprintf(p,
+ "best __________rate_________ ________statistics________ ________last_______ ______sum-of________\n");
+ p += sprintf(p,
+ "rate [name idx airtime max_tp] [avg(tp) avg(prob) sd(prob)] [prob.|retry|suc|att] [#success | #attempts]\n");
for (i = 0; i < mi->n_rates; i++) {
struct minstrel_rate *mr = &mi->r[i];
@@ -112,7 +110,7 @@ minstrel_stats_open(struct inode *inode, struct file *file)
prob = MINSTREL_TRUNC(mrs->cur_prob * 1000);
eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000);
- p += sprintf(p, "%4u.%1u %4u.%1u %3u.%1u %3u.%1u"
+ p += sprintf(p, "%4u.%1u %4u.%1u %3u.%1u %3u.%1u"
" %3u.%1u %3u %3u %-3u "
"%9llu %-9llu\n",
tp_max / 10, tp_max % 10,
diff --git a/net/mac80211/rc80211_minstrel_ht_debugfs.c b/net/mac80211/rc80211_minstrel_ht_debugfs.c
index 6822ce0f95e5..5320e35ed3d0 100644
--- a/net/mac80211/rc80211_minstrel_ht_debugfs.c
+++ b/net/mac80211/rc80211_minstrel_ht_debugfs.c
@@ -86,7 +86,7 @@ minstrel_ht_stats_dump(struct minstrel_ht_sta *mi, int i, char *p)
prob = MINSTREL_TRUNC(mrs->cur_prob * 1000);
eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000);
- p += sprintf(p, "%4u.%1u %4u.%1u %3u.%1u %3u.%1u"
+ p += sprintf(p, "%4u.%1u %4u.%1u %3u.%1u %3u.%1u"
" %3u.%1u %3u %3u %-3u "
"%9llu %-9llu\n",
tp_max / 10, tp_max % 10,
@@ -129,12 +129,10 @@ minstrel_ht_stats_open(struct inode *inode, struct file *file)
p = ms->buf;
p += sprintf(p, "\n");
- p += sprintf(p, " best ____________rate__________ "
- "______statistics______ ________last_______ "
- "______sum-of________\n");
- p += sprintf(p, "mode guard # rate [name idx airtime max_tp] "
- "[ ø(tp) ø(prob) sd(prob)] [prob.|retry|suc|att] [#success | "
- "#attempts]\n");
+ p += sprintf(p,
+ " best ____________rate__________ ________statistics________ ________last_______ ______sum-of________\n");
+ p += sprintf(p,
+ "mode guard # rate [name idx airtime max_tp] [avg(tp) avg(prob) sd(prob)] [prob.|retry|suc|att] [#success | #attempts]\n");
p = minstrel_ht_stats_dump(mi, MINSTREL_CCK_GROUP, p);
for (i = 0; i < MINSTREL_CCK_GROUP; i++)
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 64f1936350c6..c3644458e2ee 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -303,7 +303,6 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
struct ieee80211_local *local = sdata->local;
struct ieee80211_hw *hw = &local->hw;
struct sta_info *sta;
- struct timespec uptime;
int i;
sta = kzalloc(sizeof(*sta) + hw->sta_data_size, gfp);
@@ -339,8 +338,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
/* Mark TID as unreserved */
sta->reserved_tid = IEEE80211_TID_UNRESERVED;
- ktime_get_ts(&uptime);
- sta->last_connected = uptime.tv_sec;
+ sta->last_connected = ktime_get_seconds();
ewma_signal_init(&sta->avg_signal);
for (i = 0; i < ARRAY_SIZE(sta->chain_signal_avg); i++)
ewma_signal_init(&sta->chain_signal_avg[i]);
@@ -1813,7 +1811,6 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
struct ieee80211_sub_if_data *sdata = sta->sdata;
struct ieee80211_local *local = sdata->local;
struct rate_control_ref *ref = NULL;
- struct timespec uptime;
u32 thr = 0;
int i, ac;
@@ -1838,8 +1835,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
BIT(NL80211_STA_INFO_RX_DROP_MISC) |
BIT(NL80211_STA_INFO_BEACON_LOSS);
- ktime_get_ts(&uptime);
- sinfo->connected_time = uptime.tv_sec - sta->last_connected;
+ sinfo->connected_time = ktime_get_seconds() - sta->last_connected;
sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx);
if (!(sinfo->filled & (BIT(NL80211_STA_INFO_TX_BYTES64) |
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index b087c71ff7fe..d5ded8749ac4 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -133,6 +133,7 @@ enum ieee80211_agg_stop_reason {
* @buf_size: reorder buffer size at receiver
* @failed_bar_ssn: ssn of the last failed BAR tx attempt
* @bar_pending: BAR needs to be re-sent
+ * @amsdu: support A-MSDU withing A-MDPU
*
* This structure's lifetime is managed by RCU, assignments to
* the array holding it must hold the aggregation mutex.
@@ -158,6 +159,7 @@ struct tid_ampdu_tx {
u16 failed_bar_ssn;
bool bar_pending;
+ bool amsdu;
};
/**
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 3ed7ddfbf8e8..9169ccc36534 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -669,16 +669,70 @@ void ieee80211_tx_status_noskb(struct ieee80211_hw *hw,
}
EXPORT_SYMBOL(ieee80211_tx_status_noskb);
-void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
+void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb,
+ struct ieee80211_supported_band *sband,
+ int retry_count, int shift, bool send_to_cooked)
{
struct sk_buff *skb2;
+ struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+ struct ieee80211_sub_if_data *sdata;
+ struct net_device *prev_dev = NULL;
+ int rtap_len;
+
+ /* send frame to monitor interfaces now */
+ rtap_len = ieee80211_tx_radiotap_len(info);
+ if (WARN_ON_ONCE(skb_headroom(skb) < rtap_len)) {
+ pr_err("ieee80211_tx_status: headroom too small\n");
+ dev_kfree_skb(skb);
+ return;
+ }
+ ieee80211_add_tx_radiotap_header(local, sband, skb, retry_count,
+ rtap_len, shift);
+
+ /* XXX: is this sufficient for BPF? */
+ skb_set_mac_header(skb, 0);
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ skb->pkt_type = PACKET_OTHERHOST;
+ skb->protocol = htons(ETH_P_802_2);
+ memset(skb->cb, 0, sizeof(skb->cb));
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(sdata, &local->interfaces, list) {
+ if (sdata->vif.type == NL80211_IFTYPE_MONITOR) {
+ if (!ieee80211_sdata_running(sdata))
+ continue;
+
+ if ((sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) &&
+ !send_to_cooked)
+ continue;
+
+ if (prev_dev) {
+ skb2 = skb_clone(skb, GFP_ATOMIC);
+ if (skb2) {
+ skb2->dev = prev_dev;
+ netif_rx(skb2);
+ }
+ }
+
+ prev_dev = sdata->dev;
+ }
+ }
+ if (prev_dev) {
+ skb->dev = prev_dev;
+ netif_rx(skb);
+ skb = NULL;
+ }
+ rcu_read_unlock();
+ dev_kfree_skb(skb);
+}
+
+void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
+{
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
struct ieee80211_local *local = hw_to_local(hw);
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
__le16 fc;
struct ieee80211_supported_band *sband;
- struct ieee80211_sub_if_data *sdata;
- struct net_device *prev_dev = NULL;
struct sta_info *sta;
struct rhash_head *tmp;
int retry_count;
@@ -686,7 +740,6 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
bool send_to_cooked;
bool acked;
struct ieee80211_bar *bar;
- int rtap_len;
int shift = 0;
int tid = IEEE80211_NUM_TIDS;
const struct bucket_table *tbl;
@@ -879,51 +932,8 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
return;
}
- /* send frame to monitor interfaces now */
- rtap_len = ieee80211_tx_radiotap_len(info);
- if (WARN_ON_ONCE(skb_headroom(skb) < rtap_len)) {
- pr_err("ieee80211_tx_status: headroom too small\n");
- dev_kfree_skb(skb);
- return;
- }
- ieee80211_add_tx_radiotap_header(local, sband, skb, retry_count,
- rtap_len, shift);
-
- /* XXX: is this sufficient for BPF? */
- skb_set_mac_header(skb, 0);
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- skb->pkt_type = PACKET_OTHERHOST;
- skb->protocol = htons(ETH_P_802_2);
- memset(skb->cb, 0, sizeof(skb->cb));
-
- rcu_read_lock();
- list_for_each_entry_rcu(sdata, &local->interfaces, list) {
- if (sdata->vif.type == NL80211_IFTYPE_MONITOR) {
- if (!ieee80211_sdata_running(sdata))
- continue;
-
- if ((sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) &&
- !send_to_cooked)
- continue;
-
- if (prev_dev) {
- skb2 = skb_clone(skb, GFP_ATOMIC);
- if (skb2) {
- skb2->dev = prev_dev;
- netif_rx(skb2);
- }
- }
-
- prev_dev = sdata->dev;
- }
- }
- if (prev_dev) {
- skb->dev = prev_dev;
- netif_rx(skb);
- skb = NULL;
- }
- rcu_read_unlock();
- dev_kfree_skb(skb);
+ /* send to monitor interfaces */
+ ieee80211_tx_monitor(local, skb, sband, retry_count, shift, send_to_cooked);
}
EXPORT_SYMBOL(ieee80211_tx_status);
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index 4e202d0679b2..ecc5e2a8f80b 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -41,9 +41,11 @@ static void ieee80211_tdls_add_ext_capab(struct ieee80211_sub_if_data *sdata,
struct sk_buff *skb)
{
struct ieee80211_local *local = sdata->local;
+ struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
bool chan_switch = local->hw.wiphy->features &
NL80211_FEATURE_TDLS_CHANNEL_SWITCH;
- bool wider_band = ieee80211_hw_check(&local->hw, TDLS_WIDER_BW);
+ bool wider_band = ieee80211_hw_check(&local->hw, TDLS_WIDER_BW) &&
+ !ifmgd->tdls_wider_bw_prohibited;
enum ieee80211_band band = ieee80211_get_sdata_band(sdata);
struct ieee80211_supported_band *sband = local->hw.wiphy->bands[band];
bool vht = sband && sband->vht_cap.vht_supported;
@@ -331,8 +333,8 @@ ieee80211_tdls_chandef_vht_upgrade(struct ieee80211_sub_if_data *sdata,
/* proceed to downgrade the chandef until usable or the same */
while (uc.width > max_width &&
- !cfg80211_reg_can_beacon(sdata->local->hw.wiphy,
- &uc, sdata->wdev.iftype))
+ !cfg80211_reg_can_beacon_relax(sdata->local->hw.wiphy, &uc,
+ sdata->wdev.iftype))
ieee80211_chandef_downgrade(&uc);
if (!cfg80211_chandef_identical(&uc, &sta->tdls_chandef)) {
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 6f14591d8ca9..314e3bd7fbdb 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -497,6 +497,36 @@ TRACE_EVENT(drv_configure_filter,
)
);
+TRACE_EVENT(drv_config_iface_filter,
+ TP_PROTO(struct ieee80211_local *local,
+ struct ieee80211_sub_if_data *sdata,
+ unsigned int filter_flags,
+ unsigned int changed_flags),
+
+ TP_ARGS(local, sdata, filter_flags, changed_flags),
+
+ TP_STRUCT__entry(
+ LOCAL_ENTRY
+ VIF_ENTRY
+ __field(unsigned int, filter_flags)
+ __field(unsigned int, changed_flags)
+ ),
+
+ TP_fast_assign(
+ LOCAL_ASSIGN;
+ VIF_ASSIGN;
+ __entry->filter_flags = filter_flags;
+ __entry->changed_flags = changed_flags;
+ ),
+
+ TP_printk(
+ LOCAL_PR_FMT VIF_PR_FMT
+ " filter_flags: %#x changed_flags: %#x",
+ LOCAL_PR_ARG, VIF_PR_ARG, __entry->filter_flags,
+ __entry->changed_flags
+ )
+);
+
TRACE_EVENT(drv_set_tim,
TP_PROTO(struct ieee80211_local *local,
struct ieee80211_sta *sta, bool set),
@@ -944,9 +974,9 @@ TRACE_EVENT(drv_ampdu_action,
struct ieee80211_sub_if_data *sdata,
enum ieee80211_ampdu_mlme_action action,
struct ieee80211_sta *sta, u16 tid,
- u16 *ssn, u8 buf_size),
+ u16 *ssn, u8 buf_size, bool amsdu),
- TP_ARGS(local, sdata, action, sta, tid, ssn, buf_size),
+ TP_ARGS(local, sdata, action, sta, tid, ssn, buf_size, amsdu),
TP_STRUCT__entry(
LOCAL_ENTRY
@@ -955,6 +985,7 @@ TRACE_EVENT(drv_ampdu_action,
__field(u16, tid)
__field(u16, ssn)
__field(u8, buf_size)
+ __field(bool, amsdu)
VIF_ENTRY
),
@@ -966,12 +997,13 @@ TRACE_EVENT(drv_ampdu_action,
__entry->tid = tid;
__entry->ssn = ssn ? *ssn : 0;
__entry->buf_size = buf_size;
+ __entry->amsdu = amsdu;
),
TP_printk(
- LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " action:%d tid:%d buf:%d",
+ LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " action:%d tid:%d buf:%d amsdu:%d",
LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->action,
- __entry->tid, __entry->buf_size
+ __entry->tid, __entry->buf_size, __entry->amsdu
)
);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 7892eb8ed4c8..3478a83187e5 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2770,7 +2770,8 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) {
*ieee80211_get_qos_ctl(hdr) = tid;
- hdr->seq_ctrl = ieee80211_tx_next_seq(sta, tid);
+ if (!sta->sta.txq[0])
+ hdr->seq_ctrl = ieee80211_tx_next_seq(sta, tid);
} else {
info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ;
hdr->seq_ctrl = cpu_to_le16(sdata->sequence_number);
@@ -3515,6 +3516,12 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
{
struct ieee80211_mutable_offsets offs = {};
struct sk_buff *bcn = __ieee80211_beacon_get(hw, vif, &offs, false);
+ struct sk_buff *copy;
+ struct ieee80211_supported_band *sband;
+ int shift;
+
+ if (!bcn)
+ return bcn;
if (tim_offset)
*tim_offset = offs.tim_offset;
@@ -3522,6 +3529,19 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
if (tim_length)
*tim_length = offs.tim_length;
+ if (ieee80211_hw_check(hw, BEACON_TX_STATUS) ||
+ !hw_to_local(hw)->monitors)
+ return bcn;
+
+ /* send a copy to monitor interfaces */
+ copy = skb_copy(bcn, GFP_ATOMIC);
+ if (!copy)
+ return bcn;
+
+ shift = ieee80211_vif_get_shift(vif);
+ sband = hw->wiphy->bands[ieee80211_get_sdata_band(vif_to_sdata(vif))];
+ ieee80211_tx_monitor(hw_to_local(hw), copy, sband, 1, shift, false);
+
return bcn;
}
EXPORT_SYMBOL(ieee80211_beacon_get_tim);
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 1104421bc525..60c4dbf92625 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1966,7 +1966,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
if (!sdata->u.mgd.associated)
continue;
- ieee80211_send_nullfunc(local, sdata, 0);
+ ieee80211_send_nullfunc(local, sdata, false);
}
}
@@ -2017,8 +2017,9 @@ int ieee80211_reconfig(struct ieee80211_local *local)
mutex_lock(&local->sta_mtx);
list_for_each_entry(sta, &local->sta_list, list) {
- ieee80211_sta_tear_down_BA_sessions(
- sta, AGG_STOP_LOCAL_REQUEST);
+ if (!local->resuming)
+ ieee80211_sta_tear_down_BA_sessions(
+ sta, AGG_STOP_LOCAL_REQUEST);
clear_sta_flag(sta, WLAN_STA_BLOCK_BA);
}
@@ -2324,6 +2325,8 @@ u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
if (chandef->center_freq2)
vht_oper->center_freq_seg2_idx =
ieee80211_frequency_to_channel(chandef->center_freq2);
+ else
+ vht_oper->center_freq_seg2_idx = 0x00;
switch (chandef->width) {
case NL80211_CHAN_WIDTH_160:
@@ -2541,7 +2544,7 @@ int ieee80211_ave_rssi(struct ieee80211_vif *vif)
/* non-managed type inferfaces */
return 0;
}
- return ifmgd->ave_beacon_signal / 16;
+ return -ewma_beacon_signal_read(&ifmgd->ave_beacon_signal);
}
EXPORT_SYMBOL_GPL(ieee80211_ave_rssi);
diff --git a/net/mac802154/cfg.c b/net/mac802154/cfg.c
index c865ebb2ace2..57b5e94471af 100644
--- a/net/mac802154/cfg.c
+++ b/net/mac802154/cfg.c
@@ -266,6 +266,195 @@ ieee802154_set_ackreq_default(struct wpan_phy *wpan_phy,
return 0;
}
+#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
+static void
+ieee802154_get_llsec_table(struct wpan_phy *wpan_phy,
+ struct wpan_dev *wpan_dev,
+ struct ieee802154_llsec_table **table)
+{
+ struct net_device *dev = wpan_dev->netdev;
+ struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
+
+ *table = &sdata->sec.table;
+}
+
+static void
+ieee802154_lock_llsec_table(struct wpan_phy *wpan_phy,
+ struct wpan_dev *wpan_dev)
+{
+ struct net_device *dev = wpan_dev->netdev;
+ struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
+
+ mutex_lock(&sdata->sec_mtx);
+}
+
+static void
+ieee802154_unlock_llsec_table(struct wpan_phy *wpan_phy,
+ struct wpan_dev *wpan_dev)
+{
+ struct net_device *dev = wpan_dev->netdev;
+ struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
+
+ mutex_unlock(&sdata->sec_mtx);
+}
+
+static int
+ieee802154_set_llsec_params(struct wpan_phy *wpan_phy,
+ struct wpan_dev *wpan_dev,
+ const struct ieee802154_llsec_params *params,
+ int changed)
+{
+ struct net_device *dev = wpan_dev->netdev;
+ struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
+ int res;
+
+ mutex_lock(&sdata->sec_mtx);
+ res = mac802154_llsec_set_params(&sdata->sec, params, changed);
+ mutex_unlock(&sdata->sec_mtx);
+
+ return res;
+}
+
+static int
+ieee802154_get_llsec_params(struct wpan_phy *wpan_phy,
+ struct wpan_dev *wpan_dev,
+ struct ieee802154_llsec_params *params)
+{
+ struct net_device *dev = wpan_dev->netdev;
+ struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
+ int res;
+
+ mutex_lock(&sdata->sec_mtx);
+ res = mac802154_llsec_get_params(&sdata->sec, params);
+ mutex_unlock(&sdata->sec_mtx);
+
+ return res;
+}
+
+static int
+ieee802154_add_llsec_key(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
+ const struct ieee802154_llsec_key_id *id,
+ const struct ieee802154_llsec_key *key)
+{
+ struct net_device *dev = wpan_dev->netdev;
+ struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
+ int res;
+
+ mutex_lock(&sdata->sec_mtx);
+ res = mac802154_llsec_key_add(&sdata->sec, id, key);
+ mutex_unlock(&sdata->sec_mtx);
+
+ return res;
+}
+
+static int
+ieee802154_del_llsec_key(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
+ const struct ieee802154_llsec_key_id *id)
+{
+ struct net_device *dev = wpan_dev->netdev;
+ struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
+ int res;
+
+ mutex_lock(&sdata->sec_mtx);
+ res = mac802154_llsec_key_del(&sdata->sec, id);
+ mutex_unlock(&sdata->sec_mtx);
+
+ return res;
+}
+
+static int
+ieee802154_add_seclevel(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
+ const struct ieee802154_llsec_seclevel *sl)
+{
+ struct net_device *dev = wpan_dev->netdev;
+ struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
+ int res;
+
+ mutex_lock(&sdata->sec_mtx);
+ res = mac802154_llsec_seclevel_add(&sdata->sec, sl);
+ mutex_unlock(&sdata->sec_mtx);
+
+ return res;
+}
+
+static int
+ieee802154_del_seclevel(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
+ const struct ieee802154_llsec_seclevel *sl)
+{
+ struct net_device *dev = wpan_dev->netdev;
+ struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
+ int res;
+
+ mutex_lock(&sdata->sec_mtx);
+ res = mac802154_llsec_seclevel_del(&sdata->sec, sl);
+ mutex_unlock(&sdata->sec_mtx);
+
+ return res;
+}
+
+static int
+ieee802154_add_device(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
+ const struct ieee802154_llsec_device *dev_desc)
+{
+ struct net_device *dev = wpan_dev->netdev;
+ struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
+ int res;
+
+ mutex_lock(&sdata->sec_mtx);
+ res = mac802154_llsec_dev_add(&sdata->sec, dev_desc);
+ mutex_unlock(&sdata->sec_mtx);
+
+ return res;
+}
+
+static int
+ieee802154_del_device(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
+ __le64 extended_addr)
+{
+ struct net_device *dev = wpan_dev->netdev;
+ struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
+ int res;
+
+ mutex_lock(&sdata->sec_mtx);
+ res = mac802154_llsec_dev_del(&sdata->sec, extended_addr);
+ mutex_unlock(&sdata->sec_mtx);
+
+ return res;
+}
+
+static int
+ieee802154_add_devkey(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
+ __le64 extended_addr,
+ const struct ieee802154_llsec_device_key *key)
+{
+ struct net_device *dev = wpan_dev->netdev;
+ struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
+ int res;
+
+ mutex_lock(&sdata->sec_mtx);
+ res = mac802154_llsec_devkey_add(&sdata->sec, extended_addr, key);
+ mutex_unlock(&sdata->sec_mtx);
+
+ return res;
+}
+
+static int
+ieee802154_del_devkey(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
+ __le64 extended_addr,
+ const struct ieee802154_llsec_device_key *key)
+{
+ struct net_device *dev = wpan_dev->netdev;
+ struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
+ int res;
+
+ mutex_lock(&sdata->sec_mtx);
+ res = mac802154_llsec_devkey_del(&sdata->sec, extended_addr, key);
+ mutex_unlock(&sdata->sec_mtx);
+
+ return res;
+}
+#endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */
+
const struct cfg802154_ops mac802154_config_ops = {
.add_virtual_intf_deprecated = ieee802154_add_iface_deprecated,
.del_virtual_intf_deprecated = ieee802154_del_iface_deprecated,
@@ -284,4 +473,20 @@ const struct cfg802154_ops mac802154_config_ops = {
.set_max_frame_retries = ieee802154_set_max_frame_retries,
.set_lbt_mode = ieee802154_set_lbt_mode,
.set_ackreq_default = ieee802154_set_ackreq_default,
+#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
+ .get_llsec_table = ieee802154_get_llsec_table,
+ .lock_llsec_table = ieee802154_lock_llsec_table,
+ .unlock_llsec_table = ieee802154_unlock_llsec_table,
+ /* TODO above */
+ .set_llsec_params = ieee802154_set_llsec_params,
+ .get_llsec_params = ieee802154_get_llsec_params,
+ .add_llsec_key = ieee802154_add_llsec_key,
+ .del_llsec_key = ieee802154_del_llsec_key,
+ .add_seclevel = ieee802154_add_seclevel,
+ .del_seclevel = ieee802154_del_seclevel,
+ .add_device = ieee802154_add_device,
+ .del_device = ieee802154_del_device,
+ .add_devkey = ieee802154_add_devkey,
+ .del_devkey = ieee802154_del_devkey,
+#endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */
};
diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c
index ed26952f9e14..7079cd32a7ad 100644
--- a/net/mac802154/iface.c
+++ b/net/mac802154/iface.c
@@ -367,12 +367,11 @@ static int mac802154_set_header_security(struct ieee802154_sub_if_data *sdata,
return 0;
}
-static int mac802154_header_create(struct sk_buff *skb,
- struct net_device *dev,
- unsigned short type,
- const void *daddr,
- const void *saddr,
- unsigned len)
+static int ieee802154_header_create(struct sk_buff *skb,
+ struct net_device *dev,
+ const struct ieee802154_addr *daddr,
+ const struct ieee802154_addr *saddr,
+ unsigned len)
{
struct ieee802154_hdr hdr;
struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
@@ -423,24 +422,89 @@ static int mac802154_header_create(struct sk_buff *skb,
return hlen;
}
+static const struct wpan_dev_header_ops ieee802154_header_ops = {
+ .create = ieee802154_header_create,
+};
+
+/* This header create functionality assumes a 8 byte array for
+ * source and destination pointer at maximum. To adapt this for
+ * the 802.15.4 dataframe header we use extended address handling
+ * here only and intra pan connection. fc fields are mostly fallback
+ * handling. For provide dev_hard_header for dgram sockets.
+ */
+static int mac802154_header_create(struct sk_buff *skb,
+ struct net_device *dev,
+ unsigned short type,
+ const void *daddr,
+ const void *saddr,
+ unsigned len)
+{
+ struct ieee802154_hdr hdr;
+ struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
+ struct wpan_dev *wpan_dev = &sdata->wpan_dev;
+ struct ieee802154_mac_cb cb = { };
+ int hlen;
+
+ if (!daddr)
+ return -EINVAL;
+
+ memset(&hdr.fc, 0, sizeof(hdr.fc));
+ hdr.fc.type = IEEE802154_FC_TYPE_DATA;
+ hdr.fc.ack_request = wpan_dev->ackreq;
+ hdr.seq = atomic_inc_return(&dev->ieee802154_ptr->dsn) & 0xFF;
+
+ /* TODO currently a workaround to give zero cb block to set
+ * security parameters defaults according MIB.
+ */
+ if (mac802154_set_header_security(sdata, &hdr, &cb) < 0)
+ return -EINVAL;
+
+ hdr.dest.pan_id = wpan_dev->pan_id;
+ hdr.dest.mode = IEEE802154_ADDR_LONG;
+ ieee802154_be64_to_le64(&hdr.dest.extended_addr, daddr);
+
+ hdr.source.pan_id = hdr.dest.pan_id;
+ hdr.source.mode = IEEE802154_ADDR_LONG;
+
+ if (!saddr)
+ hdr.source.extended_addr = wpan_dev->extended_addr;
+ else
+ ieee802154_be64_to_le64(&hdr.source.extended_addr, saddr);
+
+ hlen = ieee802154_hdr_push(skb, &hdr);
+ if (hlen < 0)
+ return -EINVAL;
+
+ skb_reset_mac_header(skb);
+ skb->mac_len = hlen;
+
+ if (len > ieee802154_max_payload(&hdr))
+ return -EMSGSIZE;
+
+ return hlen;
+}
+
static int
mac802154_header_parse(const struct sk_buff *skb, unsigned char *haddr)
{
struct ieee802154_hdr hdr;
- struct ieee802154_addr *addr = (struct ieee802154_addr *)haddr;
if (ieee802154_hdr_peek_addrs(skb, &hdr) < 0) {
pr_debug("malformed packet\n");
return 0;
}
- *addr = hdr.source;
- return sizeof(*addr);
+ if (hdr.source.mode == IEEE802154_ADDR_LONG) {
+ ieee802154_le64_to_be64(haddr, &hdr.source.extended_addr);
+ return IEEE802154_EXTENDED_ADDR_LEN;
+ }
+
+ return 0;
}
-static struct header_ops mac802154_header_ops = {
- .create = mac802154_header_create,
- .parse = mac802154_header_parse,
+static const struct header_ops mac802154_header_ops = {
+ .create = mac802154_header_create,
+ .parse = mac802154_header_parse,
};
static const struct net_device_ops mac802154_wpan_ops = {
@@ -471,9 +535,29 @@ static void ieee802154_if_setup(struct net_device *dev)
dev->addr_len = IEEE802154_EXTENDED_ADDR_LEN;
memset(dev->broadcast, 0xff, IEEE802154_EXTENDED_ADDR_LEN);
- dev->hard_header_len = MAC802154_FRAME_HARD_HEADER_LEN;
- dev->needed_tailroom = 2 + 16; /* FCS + MIC */
- dev->mtu = IEEE802154_MTU;
+ /* Let hard_header_len set to IEEE802154_MIN_HEADER_LEN. AF_PACKET
+ * will not send frames without any payload, but ack frames
+ * has no payload, so substract one that we can send a 3 bytes
+ * frame. The xmit callback assumes at least a hard header where two
+ * bytes fc and sequence field are set.
+ */
+ dev->hard_header_len = IEEE802154_MIN_HEADER_LEN - 1;
+ /* The auth_tag header is for security and places in private payload
+ * room of mac frame which stucks between payload and FCS field.
+ */
+ dev->needed_tailroom = IEEE802154_MAX_AUTH_TAG_LEN +
+ IEEE802154_FCS_LEN;
+ /* The mtu size is the payload without mac header in this case.
+ * We have a dynamic length header with a minimum header length
+ * which is hard_header_len. In this case we let mtu to the size
+ * of maximum payload which is IEEE802154_MTU - IEEE802154_FCS_LEN -
+ * hard_header_len. The FCS which is set by hardware or ndo_start_xmit
+ * and the minimum mac header which can be evaluated inside driver
+ * layer. The rest of mac header will be part of payload if greater
+ * than hard_header_len.
+ */
+ dev->mtu = IEEE802154_MTU - IEEE802154_FCS_LEN -
+ dev->hard_header_len;
dev->tx_queue_len = 300;
dev->flags = IFF_NOARP | IFF_BROADCAST;
}
@@ -513,6 +597,7 @@ ieee802154_setup_sdata(struct ieee802154_sub_if_data *sdata,
sdata->dev->netdev_ops = &mac802154_wpan_ops;
sdata->dev->ml_priv = &mac802154_mlme_wpan;
wpan_dev->promiscuous_mode = false;
+ wpan_dev->header_ops = &ieee802154_header_ops;
mutex_init(&sdata->sec_mtx);
@@ -550,7 +635,8 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name,
if (!ndev)
return ERR_PTR(-ENOMEM);
- ndev->needed_headroom = local->hw.extra_tx_headroom;
+ ndev->needed_headroom = local->hw.extra_tx_headroom +
+ IEEE802154_MAX_HEADER_LEN;
ret = dev_alloc_name(ndev, ndev->name);
if (ret < 0)
diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c
index 985e9394e2af..7799d3c41fe2 100644
--- a/net/mac802154/llsec.c
+++ b/net/mac802154/llsec.c
@@ -401,6 +401,7 @@ int mac802154_llsec_dev_del(struct mac802154_llsec *sec, __le64 device_addr)
hash_del_rcu(&pos->bucket_s);
hash_del_rcu(&pos->bucket_hw);
+ list_del_rcu(&pos->dev.list);
call_rcu(&pos->rcu, llsec_dev_free_rcu);
return 0;
diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c
index d1c33c1d6b9b..42e96729dae6 100644
--- a/net/mac802154/rx.c
+++ b/net/mac802154/rx.c
@@ -87,6 +87,10 @@ ieee802154_subif_frame(struct ieee802154_sub_if_data *sdata,
skb->dev = sdata->dev;
+ /* TODO this should be moved after netif_receive_skb call, otherwise
+ * wireshark will show a mac header with security fields and the
+ * payload is already decrypted.
+ */
rc = mac802154_llsec_decrypt(&sdata->sec, skb);
if (rc) {
pr_debug("decryption failed: %i\n", rc);
diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c
index 7ed439172f30..3827f359b336 100644
--- a/net/mac802154/tx.c
+++ b/net/mac802154/tx.c
@@ -77,9 +77,6 @@ ieee802154_tx(struct ieee802154_local *local, struct sk_buff *skb)
put_unaligned_le16(crc, skb_put(skb, 2));
}
- if (skb_cow_head(skb, local->hw.extra_tx_headroom))
- goto err_tx;
-
/* Stop the netif queue on each sub_if_data object. */
ieee802154_stop_queue(&local->hw);
@@ -121,6 +118,10 @@ ieee802154_subif_start_xmit(struct sk_buff *skb, struct net_device *dev)
struct ieee802154_sub_if_data *sdata = IEEE802154_DEV_TO_SUB_IF(dev);
int rc;
+ /* TODO we should move it to wpan_dev_hard_header and dev_hard_header
+ * functions. The reason is wireshark will show a mac header which is
+ * with security fields but the payload is not encrypted.
+ */
rc = mac802154_llsec_encrypt(&sdata->sec, skb);
if (rc) {
netdev_warn(dev, "encryption failed: %i\n", rc);
diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c
index 21e70bc9af98..67591aef9cae 100644
--- a/net/mpls/mpls_iptunnel.c
+++ b/net/mpls/mpls_iptunnel.c
@@ -37,7 +37,7 @@ static unsigned int mpls_encap_size(struct mpls_iptunnel_encap *en)
return en->labels * sizeof(struct mpls_shim_hdr);
}
-int mpls_output(struct sock *sk, struct sk_buff *skb)
+int mpls_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct mpls_iptunnel_encap *tun_encap_info;
struct mpls_shim_hdr *hdr;
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 3e1b4abf1897..e22349ea7256 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -354,7 +354,7 @@ config NF_CT_NETLINK_HELPER
select NETFILTER_NETLINK
depends on NF_CT_NETLINK
depends on NETFILTER_NETLINK_QUEUE
- depends on NETFILTER_NETLINK_QUEUE_CT
+ depends on NETFILTER_NETLINK_GLUE_CT
depends on NETFILTER_ADVANCED
help
This option enables the user-space connection tracking helpers
@@ -362,13 +362,14 @@ config NF_CT_NETLINK_HELPER
If unsure, say `N'.
-config NETFILTER_NETLINK_QUEUE_CT
- bool "NFQUEUE integration with Connection Tracking"
- default n
- depends on NETFILTER_NETLINK_QUEUE
+config NETFILTER_NETLINK_GLUE_CT
+ bool "NFQUEUE and NFLOG integration with Connection Tracking"
+ default n
+ depends on (NETFILTER_NETLINK_QUEUE || NETFILTER_NETLINK_LOG) && NF_CT_NETLINK
help
- If this option is enabled, NFQUEUE can include Connection Tracking
- information together with the packet is the enqueued via NFNETLINK.
+ If this option is enabled, NFQUEUE and NFLOG can include
+ Connection Tracking information together with the packet is
+ the enqueued via NFNETLINK.
config NF_NAT
tristate
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 70d026d46fe7..7638c36b498c 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -10,8 +10,6 @@ obj-$(CONFIG_NETFILTER) = netfilter.o
obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o
obj-$(CONFIG_NETFILTER_NETLINK_ACCT) += nfnetlink_acct.o
-nfnetlink_queue-y := nfnetlink_queue_core.o
-nfnetlink_queue-$(CONFIG_NETFILTER_NETLINK_QUEUE_CT) += nfnetlink_queue_ct.o
obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o
obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 8e47f8113495..09e661c3ae58 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -269,7 +269,7 @@ unsigned int nf_iterate(struct list_head *head,
/* Optimization: we don't need to hold module
reference here, since function can't sleep. --RR */
repeat:
- verdict = (*elemp)->hook(*elemp, skb, state);
+ verdict = (*elemp)->hook((*elemp)->priv, skb, state);
if (verdict != NF_ACCEPT) {
#ifdef CONFIG_NETFILTER_DEBUG
if (unlikely((verdict & NF_VERDICT_MASK)
@@ -313,8 +313,6 @@ next_hook:
int err = nf_queue(skb, elem, state,
verdict >> NF_VERDICT_QBITS);
if (err < 0) {
- if (err == -ECANCELED)
- goto next_hook;
if (err == -ESRCH &&
(verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
goto next_hook;
@@ -348,6 +346,12 @@ int skb_make_writable(struct sk_buff *skb, unsigned int writable_len)
}
EXPORT_SYMBOL(skb_make_writable);
+/* This needs to be compiled in any case to avoid dependencies between the
+ * nfnetlink_queue code and nf_conntrack.
+ */
+struct nfnl_ct_hook __rcu *nfnl_ct_hook __read_mostly;
+EXPORT_SYMBOL_GPL(nfnl_ct_hook);
+
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
/* This does not belong here, but locally generated errors need it if connection
tracking in use: without this, connection may not be in hash table, and hence
@@ -385,9 +389,6 @@ void nf_conntrack_destroy(struct nf_conntrack *nfct)
}
EXPORT_SYMBOL(nf_conntrack_destroy);
-struct nfq_ct_hook __rcu *nfq_ct_hook __read_mostly;
-EXPORT_SYMBOL_GPL(nfq_ct_hook);
-
/* Built-in default zone used e.g. by modules. */
const struct nf_conntrack_zone nf_ct_zone_dflt = {
.id = NF_CT_DEFAULT_ZONE_ID,
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 338b4047776f..69ab9c2634e1 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -519,8 +519,7 @@ int
ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
const struct xt_action_param *par, struct ip_set_adt_opt *opt)
{
- struct ip_set *set = ip_set_rcu_get(
- dev_net(par->in ? par->in : par->out), index);
+ struct ip_set *set = ip_set_rcu_get(par->net, index);
int ret = 0;
BUG_ON(!set);
@@ -558,8 +557,7 @@ int
ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
const struct xt_action_param *par, struct ip_set_adt_opt *opt)
{
- struct ip_set *set = ip_set_rcu_get(
- dev_net(par->in ? par->in : par->out), index);
+ struct ip_set *set = ip_set_rcu_get(par->net, index);
int ret;
BUG_ON(!set);
@@ -581,8 +579,7 @@ int
ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
const struct xt_action_param *par, struct ip_set_adt_opt *opt)
{
- struct ip_set *set = ip_set_rcu_get(
- dev_net(par->in ? par->in : par->out), index);
+ struct ip_set *set = ip_set_rcu_get(par->net, index);
int ret = 0;
BUG_ON(!set);
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index dfd7b65b3d2a..0328f7250693 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -75,7 +75,7 @@ static void ip_vs_app_inc_rcu_free(struct rcu_head *head)
* Allocate/initialize app incarnation and register it in proto apps.
*/
static int
-ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto,
+ip_vs_app_inc_new(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 proto,
__u16 port)
{
struct ip_vs_protocol *pp;
@@ -107,7 +107,7 @@ ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto,
}
}
- ret = pp->register_app(net, inc);
+ ret = pp->register_app(ipvs, inc);
if (ret)
goto out;
@@ -127,7 +127,7 @@ ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto,
* Release app incarnation
*/
static void
-ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc)
+ip_vs_app_inc_release(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
{
struct ip_vs_protocol *pp;
@@ -135,7 +135,7 @@ ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc)
return;
if (pp->unregister_app)
- pp->unregister_app(net, inc);
+ pp->unregister_app(ipvs, inc);
IP_VS_DBG(9, "%s App %s:%u unregistered\n",
pp->name, inc->name, ntohs(inc->port));
@@ -175,14 +175,14 @@ void ip_vs_app_inc_put(struct ip_vs_app *inc)
* Register an application incarnation in protocol applications
*/
int
-register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto,
+register_ip_vs_app_inc(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 proto,
__u16 port)
{
int result;
mutex_lock(&__ip_vs_app_mutex);
- result = ip_vs_app_inc_new(net, app, proto, port);
+ result = ip_vs_app_inc_new(ipvs, app, proto, port);
mutex_unlock(&__ip_vs_app_mutex);
@@ -191,15 +191,11 @@ register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto,
/* Register application for netns */
-struct ip_vs_app *register_ip_vs_app(struct net *net, struct ip_vs_app *app)
+struct ip_vs_app *register_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_app *a;
int err = 0;
- if (!ipvs)
- return ERR_PTR(-ENOENT);
-
mutex_lock(&__ip_vs_app_mutex);
list_for_each_entry(a, &ipvs->app_list, a_list) {
@@ -230,21 +226,17 @@ out_unlock:
* We are sure there are no app incarnations attached to services
* Caller should use synchronize_rcu() or rcu_barrier()
*/
-void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app)
+void unregister_ip_vs_app(struct netns_ipvs *ipvs, struct ip_vs_app *app)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_app *a, *anxt, *inc, *nxt;
- if (!ipvs)
- return;
-
mutex_lock(&__ip_vs_app_mutex);
list_for_each_entry_safe(a, anxt, &ipvs->app_list, a_list) {
if (app && strcmp(app->name, a->name))
continue;
list_for_each_entry_safe(inc, nxt, &a->incs_list, a_list) {
- ip_vs_app_inc_release(net, inc);
+ ip_vs_app_inc_release(ipvs, inc);
}
list_del(&a->a_list);
@@ -611,17 +603,19 @@ static const struct file_operations ip_vs_app_fops = {
};
#endif
-int __net_init ip_vs_app_net_init(struct net *net)
+int __net_init ip_vs_app_net_init(struct netns_ipvs *ipvs)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
+ struct net *net = ipvs->net;
INIT_LIST_HEAD(&ipvs->app_list);
proc_create("ip_vs_app", 0, net->proc_net, &ip_vs_app_fops);
return 0;
}
-void __net_exit ip_vs_app_net_cleanup(struct net *net)
+void __net_exit ip_vs_app_net_cleanup(struct netns_ipvs *ipvs)
{
- unregister_ip_vs_app(net, NULL /* all */);
+ struct net *net = ipvs->net;
+
+ unregister_ip_vs_app(ipvs, NULL /* all */);
remove_proc_entry("ip_vs_app", net->proc_net);
}
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index b0f7b626b56d..85ca189bdc3d 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -108,7 +108,7 @@ static inline void ct_write_unlock_bh(unsigned int key)
/*
* Returns hash value for IPVS connection entry
*/
-static unsigned int ip_vs_conn_hashkey(struct net *net, int af, unsigned int proto,
+static unsigned int ip_vs_conn_hashkey(struct netns_ipvs *ipvs, int af, unsigned int proto,
const union nf_inet_addr *addr,
__be16 port)
{
@@ -116,11 +116,11 @@ static unsigned int ip_vs_conn_hashkey(struct net *net, int af, unsigned int pro
if (af == AF_INET6)
return (jhash_3words(jhash(addr, 16, ip_vs_conn_rnd),
(__force u32)port, proto, ip_vs_conn_rnd) ^
- ((size_t)net>>8)) & ip_vs_conn_tab_mask;
+ ((size_t)ipvs>>8)) & ip_vs_conn_tab_mask;
#endif
return (jhash_3words((__force u32)addr->ip, (__force u32)port, proto,
ip_vs_conn_rnd) ^
- ((size_t)net>>8)) & ip_vs_conn_tab_mask;
+ ((size_t)ipvs>>8)) & ip_vs_conn_tab_mask;
}
static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p,
@@ -141,14 +141,14 @@ static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p,
port = p->vport;
}
- return ip_vs_conn_hashkey(p->net, p->af, p->protocol, addr, port);
+ return ip_vs_conn_hashkey(p->ipvs, p->af, p->protocol, addr, port);
}
static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp)
{
struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(ip_vs_conn_net(cp), cp->af, cp->protocol,
+ ip_vs_conn_fill_param(cp->ipvs, cp->af, cp->protocol,
&cp->caddr, cp->cport, NULL, 0, &p);
if (cp->pe) {
@@ -279,7 +279,7 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) &&
((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
p->protocol == cp->protocol &&
- ip_vs_conn_net_eq(cp, p->net)) {
+ cp->ipvs == p->ipvs) {
if (!__ip_vs_conn_get(cp))
continue;
/* HIT */
@@ -314,33 +314,34 @@ struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
}
static int
-ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb,
+ip_vs_conn_fill_param_proto(struct netns_ipvs *ipvs,
+ int af, const struct sk_buff *skb,
const struct ip_vs_iphdr *iph,
- int inverse, struct ip_vs_conn_param *p)
+ struct ip_vs_conn_param *p)
{
__be16 _ports[2], *pptr;
- struct net *net = skb_net(skb);
pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
if (pptr == NULL)
return 1;
- if (likely(!inverse))
- ip_vs_conn_fill_param(net, af, iph->protocol, &iph->saddr,
+ if (likely(!ip_vs_iph_inverse(iph)))
+ ip_vs_conn_fill_param(ipvs, af, iph->protocol, &iph->saddr,
pptr[0], &iph->daddr, pptr[1], p);
else
- ip_vs_conn_fill_param(net, af, iph->protocol, &iph->daddr,
+ ip_vs_conn_fill_param(ipvs, af, iph->protocol, &iph->daddr,
pptr[1], &iph->saddr, pptr[0], p);
return 0;
}
struct ip_vs_conn *
-ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
- const struct ip_vs_iphdr *iph, int inverse)
+ip_vs_conn_in_get_proto(struct netns_ipvs *ipvs, int af,
+ const struct sk_buff *skb,
+ const struct ip_vs_iphdr *iph)
{
struct ip_vs_conn_param p;
- if (ip_vs_conn_fill_param_proto(af, skb, iph, inverse, &p))
+ if (ip_vs_conn_fill_param_proto(ipvs, af, skb, iph, &p))
return NULL;
return ip_vs_conn_in_get(&p);
@@ -359,7 +360,7 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) {
if (unlikely(p->pe_data && p->pe->ct_match)) {
- if (!ip_vs_conn_net_eq(cp, p->net))
+ if (cp->ipvs != p->ipvs)
continue;
if (p->pe == cp->pe && p->pe->ct_match(p, cp)) {
if (__ip_vs_conn_get(cp))
@@ -377,7 +378,7 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
p->vport == cp->vport && p->cport == cp->cport &&
cp->flags & IP_VS_CONN_F_TEMPLATE &&
p->protocol == cp->protocol &&
- ip_vs_conn_net_eq(cp, p->net)) {
+ cp->ipvs == p->ipvs) {
if (__ip_vs_conn_get(cp))
goto out;
}
@@ -418,7 +419,7 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) &&
ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) &&
p->protocol == cp->protocol &&
- ip_vs_conn_net_eq(cp, p->net)) {
+ cp->ipvs == p->ipvs) {
if (!__ip_vs_conn_get(cp))
continue;
/* HIT */
@@ -439,12 +440,13 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
}
struct ip_vs_conn *
-ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
- const struct ip_vs_iphdr *iph, int inverse)
+ip_vs_conn_out_get_proto(struct netns_ipvs *ipvs, int af,
+ const struct sk_buff *skb,
+ const struct ip_vs_iphdr *iph)
{
struct ip_vs_conn_param p;
- if (ip_vs_conn_fill_param_proto(af, skb, iph, inverse, &p))
+ if (ip_vs_conn_fill_param_proto(ipvs, af, skb, iph, &p))
return NULL;
return ip_vs_conn_out_get(&p);
@@ -638,7 +640,7 @@ void ip_vs_try_bind_dest(struct ip_vs_conn *cp)
* so we can make the assumption that the svc_af is the same as the
* dest_af
*/
- dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, cp->af, &cp->daddr,
+ dest = ip_vs_find_dest(cp->ipvs, cp->af, cp->af, &cp->daddr,
cp->dport, &cp->vaddr, cp->vport,
cp->protocol, cp->fwmark, cp->flags);
if (dest) {
@@ -668,7 +670,7 @@ void ip_vs_try_bind_dest(struct ip_vs_conn *cp)
#endif
ip_vs_bind_xmit(cp);
- pd = ip_vs_proto_data_get(ip_vs_conn_net(cp), cp->protocol);
+ pd = ip_vs_proto_data_get(cp->ipvs, cp->protocol);
if (pd && atomic_read(&pd->appcnt))
ip_vs_bind_app(cp, pd->pp);
}
@@ -746,7 +748,7 @@ static int expire_quiescent_template(struct netns_ipvs *ipvs,
int ip_vs_check_template(struct ip_vs_conn *ct)
{
struct ip_vs_dest *dest = ct->dest;
- struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(ct));
+ struct netns_ipvs *ipvs = ct->ipvs;
/*
* Checking the dest server status.
@@ -800,8 +802,7 @@ static void ip_vs_conn_rcu_free(struct rcu_head *head)
static void ip_vs_conn_expire(unsigned long data)
{
struct ip_vs_conn *cp = (struct ip_vs_conn *)data;
- struct net *net = ip_vs_conn_net(cp);
- struct netns_ipvs *ipvs = net_ipvs(net);
+ struct netns_ipvs *ipvs = cp->ipvs;
/*
* do I control anybody?
@@ -847,7 +848,7 @@ static void ip_vs_conn_expire(unsigned long data)
cp->timeout = 60*HZ;
if (ipvs->sync_state & IP_VS_STATE_MASTER)
- ip_vs_sync_conn(net, cp, sysctl_sync_threshold(ipvs));
+ ip_vs_sync_conn(ipvs, cp, sysctl_sync_threshold(ipvs));
ip_vs_conn_put(cp);
}
@@ -875,8 +876,8 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
struct ip_vs_dest *dest, __u32 fwmark)
{
struct ip_vs_conn *cp;
- struct netns_ipvs *ipvs = net_ipvs(p->net);
- struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->net,
+ struct netns_ipvs *ipvs = p->ipvs;
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->ipvs,
p->protocol);
cp = kmem_cache_alloc(ip_vs_conn_cachep, GFP_ATOMIC);
@@ -887,7 +888,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af,
INIT_HLIST_NODE(&cp->c_list);
setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
- ip_vs_conn_net_set(cp, p->net);
+ cp->ipvs = ipvs;
cp->af = p->af;
cp->daf = dest_af;
cp->protocol = p->protocol;
@@ -1061,7 +1062,7 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
size_t len = 0;
char dbuf[IP_VS_ADDRSTRLEN];
- if (!ip_vs_conn_net_eq(cp, net))
+ if (!net_eq(cp->ipvs->net, net))
return 0;
if (cp->pe_data) {
pe_data[0] = ' ';
@@ -1146,7 +1147,7 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
const struct ip_vs_conn *cp = v;
struct net *net = seq_file_net(seq);
- if (!ip_vs_conn_net_eq(cp, net))
+ if (!net_eq(cp->ipvs->net, net))
return 0;
#ifdef CONFIG_IP_VS_IPV6
@@ -1240,7 +1241,7 @@ static inline int todrop_entry(struct ip_vs_conn *cp)
}
/* Called from keventd and must protect itself from softirqs */
-void ip_vs_random_dropentry(struct net *net)
+void ip_vs_random_dropentry(struct netns_ipvs *ipvs)
{
int idx;
struct ip_vs_conn *cp, *cp_c;
@@ -1256,7 +1257,7 @@ void ip_vs_random_dropentry(struct net *net)
if (cp->flags & IP_VS_CONN_F_TEMPLATE)
/* connection template */
continue;
- if (!ip_vs_conn_net_eq(cp, net))
+ if (cp->ipvs != ipvs)
continue;
if (cp->protocol == IPPROTO_TCP) {
switch(cp->state) {
@@ -1308,18 +1309,17 @@ void ip_vs_random_dropentry(struct net *net)
/*
* Flush all the connection entries in the ip_vs_conn_tab
*/
-static void ip_vs_conn_flush(struct net *net)
+static void ip_vs_conn_flush(struct netns_ipvs *ipvs)
{
int idx;
struct ip_vs_conn *cp, *cp_c;
- struct netns_ipvs *ipvs = net_ipvs(net);
flush_again:
rcu_read_lock();
for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) {
- if (!ip_vs_conn_net_eq(cp, net))
+ if (cp->ipvs != ipvs)
continue;
IP_VS_DBG(4, "del connection\n");
ip_vs_conn_expire_now(cp);
@@ -1345,23 +1345,22 @@ flush_again:
/*
* per netns init and exit
*/
-int __net_init ip_vs_conn_net_init(struct net *net)
+int __net_init ip_vs_conn_net_init(struct netns_ipvs *ipvs)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
-
atomic_set(&ipvs->conn_count, 0);
- proc_create("ip_vs_conn", 0, net->proc_net, &ip_vs_conn_fops);
- proc_create("ip_vs_conn_sync", 0, net->proc_net, &ip_vs_conn_sync_fops);
+ proc_create("ip_vs_conn", 0, ipvs->net->proc_net, &ip_vs_conn_fops);
+ proc_create("ip_vs_conn_sync", 0, ipvs->net->proc_net,
+ &ip_vs_conn_sync_fops);
return 0;
}
-void __net_exit ip_vs_conn_net_cleanup(struct net *net)
+void __net_exit ip_vs_conn_net_cleanup(struct netns_ipvs *ipvs)
{
/* flush all the connection entries first */
- ip_vs_conn_flush(net);
- remove_proc_entry("ip_vs_conn", net->proc_net);
- remove_proc_entry("ip_vs_conn_sync", net->proc_net);
+ ip_vs_conn_flush(ipvs);
+ remove_proc_entry("ip_vs_conn", ipvs->net->proc_net);
+ remove_proc_entry("ip_vs_conn_sync", ipvs->net->proc_net);
}
int __init ip_vs_conn_init(void)
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 38fbc194b9cb..1e24fff53e4b 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -112,7 +112,7 @@ static inline void
ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
{
struct ip_vs_dest *dest = cp->dest;
- struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
+ struct netns_ipvs *ipvs = cp->ipvs;
if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
struct ip_vs_cpu_stats *s;
@@ -146,7 +146,7 @@ static inline void
ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
{
struct ip_vs_dest *dest = cp->dest;
- struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
+ struct netns_ipvs *ipvs = cp->ipvs;
if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) {
struct ip_vs_cpu_stats *s;
@@ -179,7 +179,7 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
static inline void
ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
{
- struct netns_ipvs *ipvs = net_ipvs(svc->net);
+ struct netns_ipvs *ipvs = svc->ipvs;
struct ip_vs_cpu_stats *s;
s = this_cpu_ptr(cp->dest->stats.cpustats);
@@ -215,7 +215,7 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
const union nf_inet_addr *vaddr, __be16 vport,
struct ip_vs_conn_param *p)
{
- ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr,
+ ip_vs_conn_fill_param(svc->ipvs, svc->af, protocol, caddr, cport, vaddr,
vport, p);
p->pe = rcu_dereference(svc->pe);
if (p->pe && p->pe->fill_param)
@@ -245,20 +245,30 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) };
union nf_inet_addr snet; /* source network of the client,
after masking */
+ const union nf_inet_addr *src_addr, *dst_addr;
+
+ if (likely(!ip_vs_iph_inverse(iph))) {
+ src_addr = &iph->saddr;
+ dst_addr = &iph->daddr;
+ } else {
+ src_addr = &iph->daddr;
+ dst_addr = &iph->saddr;
+ }
+
/* Mask saddr with the netmask to adjust template granularity */
#ifdef CONFIG_IP_VS_IPV6
if (svc->af == AF_INET6)
- ipv6_addr_prefix(&snet.in6, &iph->saddr.in6,
+ ipv6_addr_prefix(&snet.in6, &src_addr->in6,
(__force __u32) svc->netmask);
else
#endif
- snet.ip = iph->saddr.ip & svc->netmask;
+ snet.ip = src_addr->ip & svc->netmask;
IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u "
"mnet %s\n",
- IP_VS_DBG_ADDR(svc->af, &iph->saddr), ntohs(src_port),
- IP_VS_DBG_ADDR(svc->af, &iph->daddr), ntohs(dst_port),
+ IP_VS_DBG_ADDR(svc->af, src_addr), ntohs(src_port),
+ IP_VS_DBG_ADDR(svc->af, dst_addr), ntohs(dst_port),
IP_VS_DBG_ADDR(svc->af, &snet));
/*
@@ -276,7 +286,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
*/
{
int protocol = iph->protocol;
- const union nf_inet_addr *vaddr = &iph->daddr;
+ const union nf_inet_addr *vaddr = dst_addr;
__be16 vport = 0;
if (dst_port == svc->port) {
@@ -366,8 +376,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
/*
* Create a new connection according to the template
*/
- ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol, &iph->saddr,
- src_port, &iph->daddr, dst_port, &param);
+ ip_vs_conn_fill_param(svc->ipvs, svc->af, iph->protocol, src_addr,
+ src_port, dst_addr, dst_port, &param);
cp = ip_vs_conn_new(&param, dest->af, &dest->addr, dport, flags, dest,
skb->mark);
@@ -418,7 +428,8 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
struct ip_vs_conn *cp = NULL;
struct ip_vs_scheduler *sched;
struct ip_vs_dest *dest;
- __be16 _ports[2], *pptr;
+ __be16 _ports[2], *pptr, cport, vport;
+ const void *caddr, *vaddr;
unsigned int flags;
*ignored = 1;
@@ -429,14 +440,26 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
if (pptr == NULL)
return NULL;
+ if (likely(!ip_vs_iph_inverse(iph))) {
+ cport = pptr[0];
+ caddr = &iph->saddr;
+ vport = pptr[1];
+ vaddr = &iph->daddr;
+ } else {
+ cport = pptr[1];
+ caddr = &iph->daddr;
+ vport = pptr[0];
+ vaddr = &iph->saddr;
+ }
+
/*
* FTPDATA needs this check when using local real server.
* Never schedule Active FTPDATA connections from real server.
* For LVS-NAT they must be already created. For other methods
* with persistence the connection is created on SYN+ACK.
*/
- if (pptr[0] == FTPDATA) {
- IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
+ if (cport == FTPDATA) {
+ IP_VS_DBG_PKT(12, svc->af, pp, skb, iph->off,
"Not scheduling FTPDATA");
return NULL;
}
@@ -444,19 +467,25 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
/*
* Do not schedule replies from local real server.
*/
- if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
- (cp = pp->conn_in_get(svc->af, skb, iph, 1))) {
- IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
- "Not scheduling reply for existing connection");
- __ip_vs_conn_put(cp);
- return NULL;
+ if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK)) {
+ iph->hdr_flags ^= IP_VS_HDR_INVERSE;
+ cp = pp->conn_in_get(svc->ipvs, svc->af, skb, iph);
+ iph->hdr_flags ^= IP_VS_HDR_INVERSE;
+
+ if (cp) {
+ IP_VS_DBG_PKT(12, svc->af, pp, skb, iph->off,
+ "Not scheduling reply for existing"
+ " connection");
+ __ip_vs_conn_put(cp);
+ return NULL;
+ }
}
/*
* Persistent service
*/
if (svc->flags & IP_VS_SVC_F_PERSISTENT)
- return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored,
+ return ip_vs_sched_persist(svc, skb, cport, vport, ignored,
iph);
*ignored = 0;
@@ -464,7 +493,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
/*
* Non-persistent service
*/
- if (!svc->fwmark && pptr[1] != svc->port) {
+ if (!svc->fwmark && vport != svc->port) {
if (!svc->port)
pr_err("Schedule: port zero only supported "
"in persistent services, "
@@ -495,11 +524,10 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
{
struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol,
- &iph->saddr, pptr[0], &iph->daddr,
- pptr[1], &p);
+ ip_vs_conn_fill_param(svc->ipvs, svc->af, iph->protocol,
+ caddr, cport, vaddr, vport, &p);
cp = ip_vs_conn_new(&p, dest->af, &dest->addr,
- dest->port ? dest->port : pptr[1],
+ dest->port ? dest->port : vport,
flags, dest, skb->mark);
if (!cp) {
*ignored = -1;
@@ -519,6 +547,15 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
return cp;
}
+static inline int ip_vs_addr_is_unicast(struct net *net, int af,
+ union nf_inet_addr *addr)
+{
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6)
+ return ipv6_addr_type(&addr->in6) & IPV6_ADDR_UNICAST;
+#endif
+ return (inet_addr_type(net, addr->ip) == RTN_UNICAST);
+}
/*
* Pass or drop the packet.
@@ -528,33 +565,21 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
struct ip_vs_proto_data *pd, struct ip_vs_iphdr *iph)
{
- __be16 _ports[2], *pptr;
-#ifdef CONFIG_SYSCTL
- struct net *net;
- struct netns_ipvs *ipvs;
- int unicast;
-#endif
+ __be16 _ports[2], *pptr, dport;
+ struct netns_ipvs *ipvs = svc->ipvs;
+ struct net *net = ipvs->net;
pptr = frag_safe_skb_hp(skb, iph->len, sizeof(_ports), _ports, iph);
- if (pptr == NULL) {
+ if (!pptr)
return NF_DROP;
- }
-
-#ifdef CONFIG_SYSCTL
- net = skb_net(skb);
-
-#ifdef CONFIG_IP_VS_IPV6
- if (svc->af == AF_INET6)
- unicast = ipv6_addr_type(&iph->daddr.in6) & IPV6_ADDR_UNICAST;
- else
-#endif
- unicast = (inet_addr_type(net, iph->daddr.ip) == RTN_UNICAST);
+ dport = likely(!ip_vs_iph_inverse(iph)) ? pptr[1] : pptr[0];
/* if it is fwmark-based service, the cache_bypass sysctl is up
and the destination is a non-local unicast, then create
a cache_bypass connection entry */
- ipvs = net_ipvs(net);
- if (ipvs->sysctl_cache_bypass && svc->fwmark && unicast) {
+ if (sysctl_cache_bypass(ipvs) && svc->fwmark &&
+ !(iph->hdr_flags & (IP_VS_HDR_INVERSE | IP_VS_HDR_ICMP)) &&
+ ip_vs_addr_is_unicast(net, svc->af, &iph->daddr)) {
int ret;
struct ip_vs_conn *cp;
unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
@@ -566,7 +591,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
{
struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(svc->net, svc->af, iph->protocol,
+ ip_vs_conn_fill_param(svc->ipvs, svc->af, iph->protocol,
&iph->saddr, pptr[0],
&iph->daddr, pptr[1], &p);
cp = ip_vs_conn_new(&p, svc->af, &daddr, 0,
@@ -590,7 +615,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
ip_vs_conn_put(cp);
return ret;
}
-#endif
/*
* When the virtual ftp service is presented, packets destined
@@ -598,9 +622,12 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
* listed in the ipvs table), pass the packets, because it is
* not ipvs job to decide to drop the packets.
*/
- if ((svc->port == FTPPORT) && (pptr[1] != FTPPORT))
+ if (svc->port == FTPPORT && dport != FTPPORT)
return NF_ACCEPT;
+ if (unlikely(ip_vs_iph_icmp(iph)))
+ return NF_DROP;
+
/*
* Notify the client that the destination is unreachable, and
* release the socket buffer.
@@ -610,11 +637,8 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
*/
#ifdef CONFIG_IP_VS_IPV6
if (svc->af == AF_INET6) {
- if (!skb->dev) {
- struct net *net_ = dev_net(skb_dst(skb)->dev);
-
- skb->dev = net_->loopback_dev;
- }
+ if (!skb->dev)
+ skb->dev = net->loopback_dev;
icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
} else
#endif
@@ -625,15 +649,13 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
#ifdef CONFIG_SYSCTL
-static int sysctl_snat_reroute(struct sk_buff *skb)
+static int sysctl_snat_reroute(struct netns_ipvs *ipvs)
{
- struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
return ipvs->sysctl_snat_reroute;
}
-static int sysctl_nat_icmp_send(struct net *net)
+static int sysctl_nat_icmp_send(struct netns_ipvs *ipvs)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
return ipvs->sysctl_nat_icmp_send;
}
@@ -644,8 +666,8 @@ static int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs)
#else
-static int sysctl_snat_reroute(struct sk_buff *skb) { return 0; }
-static int sysctl_nat_icmp_send(struct net *net) { return 0; }
+static int sysctl_snat_reroute(struct netns_ipvs *ipvs) { return 0; }
+static int sysctl_nat_icmp_send(struct netns_ipvs *ipvs) { return 0; }
static int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs) { return 0; }
#endif
@@ -664,12 +686,13 @@ static inline enum ip_defrag_users ip_vs_defrag_user(unsigned int hooknum)
return IP_DEFRAG_VS_OUT;
}
-static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
+static inline int ip_vs_gather_frags(struct netns_ipvs *ipvs,
+ struct sk_buff *skb, u_int32_t user)
{
int err;
local_bh_disable();
- err = ip_defrag(skb, user);
+ err = ip_defrag(ipvs->net, skb, user);
local_bh_enable();
if (!err)
ip_send_check(ip_hdr(skb));
@@ -677,10 +700,10 @@ static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
return err;
}
-static int ip_vs_route_me_harder(int af, struct sk_buff *skb,
- unsigned int hooknum)
+static int ip_vs_route_me_harder(struct netns_ipvs *ipvs, int af,
+ struct sk_buff *skb, unsigned int hooknum)
{
- if (!sysctl_snat_reroute(skb))
+ if (!sysctl_snat_reroute(ipvs))
return 0;
/* Reroute replies only to remote clients (FORWARD and LOCAL_OUT) */
if (NF_INET_LOCAL_IN == hooknum)
@@ -690,12 +713,12 @@ static int ip_vs_route_me_harder(int af, struct sk_buff *skb,
struct dst_entry *dst = skb_dst(skb);
if (dst->dev && !(dst->dev->flags & IFF_LOOPBACK) &&
- ip6_route_me_harder(skb) != 0)
+ ip6_route_me_harder(ipvs->net, skb) != 0)
return 1;
} else
#endif
if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
- ip_route_me_harder(skb, RTN_LOCAL) != 0)
+ ip_route_me_harder(ipvs->net, skb, RTN_LOCAL) != 0)
return 1;
return 0;
@@ -848,7 +871,7 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
#endif
ip_vs_nat_icmp(skb, pp, cp, 1);
- if (ip_vs_route_me_harder(af, skb, hooknum))
+ if (ip_vs_route_me_harder(cp->ipvs, af, skb, hooknum))
goto out;
/* do the statistics and put it back */
@@ -872,8 +895,8 @@ out:
* Find any that might be relevant, check against existing connections.
* Currently handles error types - unreachable, quench, ttl exceeded.
*/
-static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
- unsigned int hooknum)
+static int ip_vs_out_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb,
+ int *related, unsigned int hooknum)
{
struct iphdr *iph;
struct icmphdr _icmph, *ic;
@@ -888,7 +911,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
/* reassemble IP fragments */
if (ip_is_fragment(ip_hdr(skb))) {
- if (ip_vs_gather_frags(skb, ip_vs_defrag_user(hooknum)))
+ if (ip_vs_gather_frags(ipvs, skb, ip_vs_defrag_user(hooknum)))
return NF_STOLEN;
}
@@ -934,10 +957,10 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset,
"Checking outgoing ICMP for");
- ip_vs_fill_ip4hdr(cih, &ciph);
- ciph.len += offset;
+ ip_vs_fill_iph_skb_icmp(AF_INET, skb, offset, true, &ciph);
+
/* The embedded headers contain source and dest in reverse order */
- cp = pp->conn_out_get(AF_INET, skb, &ciph, 1);
+ cp = pp->conn_out_get(ipvs, AF_INET, skb, &ciph);
if (!cp)
return NF_ACCEPT;
@@ -947,16 +970,16 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
}
#ifdef CONFIG_IP_VS_IPV6
-static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
- unsigned int hooknum, struct ip_vs_iphdr *ipvsh)
+static int ip_vs_out_icmp_v6(struct netns_ipvs *ipvs, struct sk_buff *skb,
+ int *related, unsigned int hooknum,
+ struct ip_vs_iphdr *ipvsh)
{
struct icmp6hdr _icmph, *ic;
- struct ipv6hdr _ip6h, *ip6h; /* The ip header contained within ICMP */
struct ip_vs_iphdr ciph = {.flags = 0, .fragoffs = 0};/*Contained IP */
struct ip_vs_conn *cp;
struct ip_vs_protocol *pp;
union nf_inet_addr snet;
- unsigned int writable;
+ unsigned int offset;
*related = 1;
ic = frag_safe_skb_hp(skb, ipvsh->len, sizeof(_icmph), &_icmph, ipvsh);
@@ -984,31 +1007,23 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
ic->icmp6_type, ntohs(icmpv6_id(ic)),
&ipvsh->saddr, &ipvsh->daddr);
- /* Now find the contained IP header */
- ciph.len = ipvsh->len + sizeof(_icmph);
- ip6h = skb_header_pointer(skb, ciph.len, sizeof(_ip6h), &_ip6h);
- if (ip6h == NULL)
+ if (!ip_vs_fill_iph_skb_icmp(AF_INET6, skb, ipvsh->len + sizeof(_icmph),
+ true, &ciph))
return NF_ACCEPT; /* The packet looks wrong, ignore */
- ciph.saddr.in6 = ip6h->saddr; /* conn_out_get() handles reverse order */
- ciph.daddr.in6 = ip6h->daddr;
- /* skip possible IPv6 exthdrs of contained IPv6 packet */
- ciph.protocol = ipv6_find_hdr(skb, &ciph.len, -1, &ciph.fragoffs, NULL);
- if (ciph.protocol < 0)
- return NF_ACCEPT; /* Contained IPv6 hdr looks wrong, ignore */
pp = ip_vs_proto_get(ciph.protocol);
if (!pp)
return NF_ACCEPT;
/* The embedded headers contain source and dest in reverse order */
- cp = pp->conn_out_get(AF_INET6, skb, &ciph, 1);
+ cp = pp->conn_out_get(ipvs, AF_INET6, skb, &ciph);
if (!cp)
return NF_ACCEPT;
snet.in6 = ciph.saddr.in6;
- writable = ciph.len;
+ offset = ciph.len;
return handle_response_icmp(AF_INET6, skb, &snet, ciph.protocol, cp,
- pp, writable, sizeof(struct ipv6hdr),
+ pp, offset, sizeof(struct ipv6hdr),
hooknum);
}
#endif
@@ -1093,7 +1108,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
{
struct ip_vs_protocol *pp = pd->pp;
- IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet");
+ IP_VS_DBG_PKT(11, af, pp, skb, iph->off, "Outgoing packet");
if (!skb_make_writable(skb, iph->len))
goto drop;
@@ -1127,10 +1142,10 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
* if it came from this machine itself. So re-compute
* the routing information.
*/
- if (ip_vs_route_me_harder(af, skb, hooknum))
+ if (ip_vs_route_me_harder(cp->ipvs, af, skb, hooknum))
goto drop;
- IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT");
+ IP_VS_DBG_PKT(10, af, pp, skb, iph->off, "After SNAT");
ip_vs_out_stats(cp, skb);
ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pd);
@@ -1155,9 +1170,8 @@ drop:
* Check if outgoing packet belongs to the established ip_vs_conn.
*/
static unsigned int
-ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
+ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int af)
{
- struct net *net = NULL;
struct ip_vs_iphdr iph;
struct ip_vs_protocol *pp;
struct ip_vs_proto_data *pd;
@@ -1182,16 +1196,15 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
if (unlikely(!skb_dst(skb)))
return NF_ACCEPT;
- net = skb_net(skb);
- if (!net_ipvs(net)->enable)
+ if (!ipvs->enable)
return NF_ACCEPT;
- ip_vs_fill_iph_skb(af, skb, &iph);
+ ip_vs_fill_iph_skb(af, skb, false, &iph);
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
int related;
- int verdict = ip_vs_out_icmp_v6(skb, &related,
+ int verdict = ip_vs_out_icmp_v6(ipvs, skb, &related,
hooknum, &iph);
if (related)
@@ -1201,13 +1214,13 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
#endif
if (unlikely(iph.protocol == IPPROTO_ICMP)) {
int related;
- int verdict = ip_vs_out_icmp(skb, &related, hooknum);
+ int verdict = ip_vs_out_icmp(ipvs, skb, &related, hooknum);
if (related)
return verdict;
}
- pd = ip_vs_proto_data_get(net, iph.protocol);
+ pd = ip_vs_proto_data_get(ipvs, iph.protocol);
if (unlikely(!pd))
return NF_ACCEPT;
pp = pd->pp;
@@ -1217,21 +1230,21 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
if (af == AF_INET)
#endif
if (unlikely(ip_is_fragment(ip_hdr(skb)) && !pp->dont_defrag)) {
- if (ip_vs_gather_frags(skb,
+ if (ip_vs_gather_frags(ipvs, skb,
ip_vs_defrag_user(hooknum)))
return NF_STOLEN;
- ip_vs_fill_ip4hdr(skb_network_header(skb), &iph);
+ ip_vs_fill_iph_skb(AF_INET, skb, false, &iph);
}
/*
* Check if the packet belongs to an existing entry
*/
- cp = pp->conn_out_get(af, skb, &iph, 0);
+ cp = pp->conn_out_get(ipvs, af, skb, &iph);
if (likely(cp))
return handle_response(af, skb, pd, cp, &iph, hooknum);
- if (sysctl_nat_icmp_send(net) &&
+ if (sysctl_nat_icmp_send(ipvs) &&
(pp->protocol == IPPROTO_TCP ||
pp->protocol == IPPROTO_UDP ||
pp->protocol == IPPROTO_SCTP)) {
@@ -1241,7 +1254,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
sizeof(_ports), _ports, &iph);
if (pptr == NULL)
return NF_ACCEPT; /* Not for me */
- if (ip_vs_has_real_service(net, af, iph.protocol, &iph.saddr,
+ if (ip_vs_has_real_service(ipvs, af, iph.protocol, &iph.saddr,
pptr[0])) {
/*
* Notify the real server: there is no
@@ -1258,7 +1271,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
if (!skb->dev)
- skb->dev = net->loopback_dev;
+ skb->dev = ipvs->net->loopback_dev;
icmpv6_send(skb,
ICMPV6_DEST_UNREACH,
ICMPV6_PORT_UNREACH,
@@ -1272,7 +1285,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
}
}
}
- IP_VS_DBG_PKT(12, af, pp, skb, 0,
+ IP_VS_DBG_PKT(12, af, pp, skb, iph.off,
"ip_vs_out: packet continues traversal as normal");
return NF_ACCEPT;
}
@@ -1283,10 +1296,10 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
* Check if packet is reply for established ip_vs_conn.
*/
static unsigned int
-ip_vs_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_reply4(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return ip_vs_out(ops->hooknum, skb, AF_INET);
+ return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET);
}
/*
@@ -1294,10 +1307,10 @@ ip_vs_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
* Check if packet is reply for established ip_vs_conn.
*/
static unsigned int
-ip_vs_local_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_local_reply4(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return ip_vs_out(ops->hooknum, skb, AF_INET);
+ return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET);
}
#ifdef CONFIG_IP_VS_IPV6
@@ -1308,10 +1321,10 @@ ip_vs_local_reply4(const struct nf_hook_ops *ops, struct sk_buff *skb,
* Check if packet is reply for established ip_vs_conn.
*/
static unsigned int
-ip_vs_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_reply6(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return ip_vs_out(ops->hooknum, skb, AF_INET6);
+ return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET6);
}
/*
@@ -1319,14 +1332,51 @@ ip_vs_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
* Check if packet is reply for established ip_vs_conn.
*/
static unsigned int
-ip_vs_local_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_local_reply6(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return ip_vs_out(ops->hooknum, skb, AF_INET6);
+ return ip_vs_out(net_ipvs(state->net), state->hook, skb, AF_INET6);
}
#endif
+static unsigned int
+ip_vs_try_to_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
+ struct ip_vs_proto_data *pd,
+ int *verdict, struct ip_vs_conn **cpp,
+ struct ip_vs_iphdr *iph)
+{
+ struct ip_vs_protocol *pp = pd->pp;
+
+ if (!iph->fragoffs) {
+ /* No (second) fragments need to enter here, as nf_defrag_ipv6
+ * replayed fragment zero will already have created the cp
+ */
+
+ /* Schedule and create new connection entry into cpp */
+ if (!pp->conn_schedule(ipvs, af, skb, pd, verdict, cpp, iph))
+ return 0;
+ }
+
+ if (unlikely(!*cpp)) {
+ /* sorry, all this trouble for a no-hit :) */
+ IP_VS_DBG_PKT(12, af, pp, skb, iph->off,
+ "ip_vs_in: packet continues traversal as normal");
+ if (iph->fragoffs) {
+ /* Fragment that couldn't be mapped to a conn entry
+ * is missing module nf_defrag_ipv6
+ */
+ IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n");
+ IP_VS_DBG_PKT(7, af, pp, skb, iph->off,
+ "unhandled fragment");
+ }
+ *verdict = NF_ACCEPT;
+ return 0;
+ }
+
+ return 1;
+}
+
/*
* Handle ICMP messages in the outside-to-inside direction (incoming).
* Find any that might be relevant, check against existing connections,
@@ -1334,9 +1384,9 @@ ip_vs_local_reply6(const struct nf_hook_ops *ops, struct sk_buff *skb,
* Currently handles error types - unreachable, quench, ttl exceeded.
*/
static int
-ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
+ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related,
+ unsigned int hooknum)
{
- struct net *net = NULL;
struct iphdr *iph;
struct icmphdr _icmph, *ic;
struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */
@@ -1345,13 +1395,13 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
struct ip_vs_protocol *pp;
struct ip_vs_proto_data *pd;
unsigned int offset, offset2, ihl, verdict;
- bool ipip;
+ bool ipip, new_cp = false;
*related = 1;
/* reassemble IP fragments */
if (ip_is_fragment(ip_hdr(skb))) {
- if (ip_vs_gather_frags(skb, ip_vs_defrag_user(hooknum)))
+ if (ip_vs_gather_frags(ipvs, skb, ip_vs_defrag_user(hooknum)))
return NF_STOLEN;
}
@@ -1385,8 +1435,6 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
if (cih == NULL)
return NF_ACCEPT; /* The packet looks wrong, ignore */
- net = skb_net(skb);
-
/* Special case for errors for IPIP packets */
ipip = false;
if (cih->protocol == IPPROTO_IPIP) {
@@ -1402,7 +1450,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
ipip = true;
}
- pd = ip_vs_proto_data_get(net, cih->protocol);
+ pd = ip_vs_proto_data_get(ipvs, cih->protocol);
if (!pd)
return NF_ACCEPT;
pp = pd->pp;
@@ -1416,15 +1464,24 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
"Checking incoming ICMP for");
offset2 = offset;
- ip_vs_fill_ip4hdr(cih, &ciph);
- ciph.len += offset;
+ ip_vs_fill_iph_skb_icmp(AF_INET, skb, offset, !ipip, &ciph);
offset = ciph.len;
+
/* The embedded headers contain source and dest in reverse order.
* For IPIP this is error for request, not for reply.
*/
- cp = pp->conn_in_get(AF_INET, skb, &ciph, ipip ? 0 : 1);
- if (!cp)
- return NF_ACCEPT;
+ cp = pp->conn_in_get(ipvs, AF_INET, skb, &ciph);
+
+ if (!cp) {
+ int v;
+
+ if (!sysctl_schedule_icmp(ipvs))
+ return NF_ACCEPT;
+
+ if (!ip_vs_try_to_schedule(ipvs, AF_INET, skb, pd, &v, &cp, &ciph))
+ return v;
+ new_cp = true;
+ }
verdict = NF_DROP;
@@ -1455,7 +1512,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
skb_reset_network_header(skb);
IP_VS_DBG(12, "ICMP for IPIP %pI4->%pI4: mtu=%u\n",
&ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, mtu);
- ipv4_update_pmtu(skb, dev_net(skb->dev),
+ ipv4_update_pmtu(skb, ipvs->net,
mtu, 0, 0, 0, 0);
/* Client uses PMTUD? */
if (!(frag_off & htons(IP_DF)))
@@ -1501,23 +1558,26 @@ ignore_ipip:
verdict = ip_vs_icmp_xmit(skb, cp, pp, offset, hooknum, &ciph);
out:
- __ip_vs_conn_put(cp);
+ if (likely(!new_cp))
+ __ip_vs_conn_put(cp);
+ else
+ ip_vs_conn_put(cp);
return verdict;
}
#ifdef CONFIG_IP_VS_IPV6
-static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related,
- unsigned int hooknum, struct ip_vs_iphdr *iph)
+static int ip_vs_in_icmp_v6(struct netns_ipvs *ipvs, struct sk_buff *skb,
+ int *related, unsigned int hooknum,
+ struct ip_vs_iphdr *iph)
{
- struct net *net = NULL;
- struct ipv6hdr _ip6h, *ip6h;
struct icmp6hdr _icmph, *ic;
struct ip_vs_iphdr ciph = {.flags = 0, .fragoffs = 0};/*Contained IP */
struct ip_vs_conn *cp;
struct ip_vs_protocol *pp;
struct ip_vs_proto_data *pd;
- unsigned int offs_ciph, writable, verdict;
+ unsigned int offset, verdict;
+ bool new_cp = false;
*related = 1;
@@ -1546,21 +1606,11 @@ static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related,
ic->icmp6_type, ntohs(icmpv6_id(ic)),
&iph->saddr, &iph->daddr);
- /* Now find the contained IP header */
- ciph.len = iph->len + sizeof(_icmph);
- offs_ciph = ciph.len; /* Save ip header offset */
- ip6h = skb_header_pointer(skb, ciph.len, sizeof(_ip6h), &_ip6h);
- if (ip6h == NULL)
- return NF_ACCEPT; /* The packet looks wrong, ignore */
- ciph.saddr.in6 = ip6h->saddr; /* conn_in_get() handles reverse order */
- ciph.daddr.in6 = ip6h->daddr;
- /* skip possible IPv6 exthdrs of contained IPv6 packet */
- ciph.protocol = ipv6_find_hdr(skb, &ciph.len, -1, &ciph.fragoffs, NULL);
- if (ciph.protocol < 0)
- return NF_ACCEPT; /* Contained IPv6 hdr looks wrong, ignore */
-
- net = skb_net(skb);
- pd = ip_vs_proto_data_get(net, ciph.protocol);
+ offset = iph->len + sizeof(_icmph);
+ if (!ip_vs_fill_iph_skb_icmp(AF_INET6, skb, offset, true, &ciph))
+ return NF_ACCEPT;
+
+ pd = ip_vs_proto_data_get(ipvs, ciph.protocol);
if (!pd)
return NF_ACCEPT;
pp = pd->pp;
@@ -1569,36 +1619,49 @@ static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related,
if (ciph.fragoffs)
return NF_ACCEPT;
- IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offs_ciph,
+ IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset,
"Checking incoming ICMPv6 for");
/* The embedded headers contain source and dest in reverse order
* if not from localhost
*/
- cp = pp->conn_in_get(AF_INET6, skb, &ciph,
- (hooknum == NF_INET_LOCAL_OUT) ? 0 : 1);
+ cp = pp->conn_in_get(ipvs, AF_INET6, skb, &ciph);
+
+ if (!cp) {
+ int v;
+
+ if (!sysctl_schedule_icmp(ipvs))
+ return NF_ACCEPT;
+
+ if (!ip_vs_try_to_schedule(ipvs, AF_INET6, skb, pd, &v, &cp, &ciph))
+ return v;
+
+ new_cp = true;
+ }
- if (!cp)
- return NF_ACCEPT;
/* VS/TUN, VS/DR and LOCALNODE just let it go */
if ((hooknum == NF_INET_LOCAL_OUT) &&
(IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)) {
- __ip_vs_conn_put(cp);
- return NF_ACCEPT;
+ verdict = NF_ACCEPT;
+ goto out;
}
/* do the statistics and put it back */
ip_vs_in_stats(cp, skb);
/* Need to mangle contained IPv6 header in ICMPv6 packet */
- writable = ciph.len;
+ offset = ciph.len;
if (IPPROTO_TCP == ciph.protocol || IPPROTO_UDP == ciph.protocol ||
IPPROTO_SCTP == ciph.protocol)
- writable += 2 * sizeof(__u16); /* Also mangle ports */
+ offset += 2 * sizeof(__u16); /* Also mangle ports */
- verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, writable, hooknum, &ciph);
+ verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset, hooknum, &ciph);
- __ip_vs_conn_put(cp);
+out:
+ if (likely(!new_cp))
+ __ip_vs_conn_put(cp);
+ else
+ ip_vs_conn_put(cp);
return verdict;
}
@@ -1610,15 +1673,13 @@ static int ip_vs_in_icmp_v6(struct sk_buff *skb, int *related,
* and send it on its way...
*/
static unsigned int
-ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
+ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int af)
{
- struct net *net;
struct ip_vs_iphdr iph;
struct ip_vs_protocol *pp;
struct ip_vs_proto_data *pd;
struct ip_vs_conn *cp;
int ret, pkts;
- struct netns_ipvs *ipvs;
int conn_reuse_mode;
/* Already marked as IPVS request or reply? */
@@ -1633,7 +1694,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
if (unlikely((skb->pkt_type != PACKET_HOST &&
hooknum != NF_INET_LOCAL_OUT) ||
!skb_dst(skb))) {
- ip_vs_fill_iph_skb(af, skb, &iph);
+ ip_vs_fill_iph_skb(af, skb, false, &iph);
IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s"
" ignored in hook %u\n",
skb->pkt_type, iph.protocol,
@@ -1641,12 +1702,10 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
return NF_ACCEPT;
}
/* ipvs enabled in this netns ? */
- net = skb_net(skb);
- ipvs = net_ipvs(net);
if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
return NF_ACCEPT;
- ip_vs_fill_iph_skb(af, skb, &iph);
+ ip_vs_fill_iph_skb(af, skb, false, &iph);
/* Bad... Do not break raw sockets */
if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT &&
@@ -1662,8 +1721,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
if (af == AF_INET6) {
if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
int related;
- int verdict = ip_vs_in_icmp_v6(skb, &related, hooknum,
- &iph);
+ int verdict = ip_vs_in_icmp_v6(ipvs, skb, &related,
+ hooknum, &iph);
if (related)
return verdict;
@@ -1672,21 +1731,30 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
#endif
if (unlikely(iph.protocol == IPPROTO_ICMP)) {
int related;
- int verdict = ip_vs_in_icmp(skb, &related, hooknum);
+ int verdict = ip_vs_in_icmp(ipvs, skb, &related,
+ hooknum);
if (related)
return verdict;
}
/* Protocol supported? */
- pd = ip_vs_proto_data_get(net, iph.protocol);
- if (unlikely(!pd))
+ pd = ip_vs_proto_data_get(ipvs, iph.protocol);
+ if (unlikely(!pd)) {
+ /* The only way we'll see this packet again is if it's
+ * encapsulated, so mark it with ipvs_property=1 so we
+ * skip it if we're ignoring tunneled packets
+ */
+ if (sysctl_ignore_tunneled(ipvs))
+ skb->ipvs_property = 1;
+
return NF_ACCEPT;
+ }
pp = pd->pp;
/*
* Check if the packet belongs to an existing connection entry
*/
- cp = pp->conn_in_get(af, skb, &iph, 0);
+ cp = pp->conn_in_get(ipvs, af, skb, &iph);
conn_reuse_mode = sysctl_conn_reuse_mode(ipvs);
if (conn_reuse_mode && !iph.fragoffs &&
@@ -1700,32 +1768,15 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
cp = NULL;
}
- if (unlikely(!cp) && !iph.fragoffs) {
- /* No (second) fragments need to enter here, as nf_defrag_ipv6
- * replayed fragment zero will already have created the cp
- */
+ if (unlikely(!cp)) {
int v;
- /* Schedule and create new connection entry into &cp */
- if (!pp->conn_schedule(af, skb, pd, &v, &cp, &iph))
+ if (!ip_vs_try_to_schedule(ipvs, af, skb, pd, &v, &cp, &iph))
return v;
}
- if (unlikely(!cp)) {
- /* sorry, all this trouble for a no-hit :) */
- IP_VS_DBG_PKT(12, af, pp, skb, 0,
- "ip_vs_in: packet continues traversal as normal");
- if (iph.fragoffs) {
- /* Fragment that couldn't be mapped to a conn entry
- * is missing module nf_defrag_ipv6
- */
- IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n");
- IP_VS_DBG_PKT(7, af, pp, skb, 0, "unhandled fragment");
- }
- return NF_ACCEPT;
- }
+ IP_VS_DBG_PKT(11, af, pp, skb, iph.off, "Incoming packet");
- IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet");
/* Check the server status */
if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
/* the destination server is not available */
@@ -1765,7 +1816,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
pkts = atomic_add_return(1, &cp->in_pkts);
if (ipvs->sync_state & IP_VS_STATE_MASTER)
- ip_vs_sync_conn(net, cp, pkts);
+ ip_vs_sync_conn(ipvs, cp, pkts);
ip_vs_conn_put(cp);
return ret;
@@ -1776,10 +1827,10 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
* Schedule and forward packets from remote clients
*/
static unsigned int
-ip_vs_remote_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_remote_request4(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return ip_vs_in(ops->hooknum, skb, AF_INET);
+ return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET);
}
/*
@@ -1787,10 +1838,10 @@ ip_vs_remote_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
* Schedule and forward packets from local clients
*/
static unsigned int
-ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_local_request4(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return ip_vs_in(ops->hooknum, skb, AF_INET);
+ return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET);
}
#ifdef CONFIG_IP_VS_IPV6
@@ -1800,10 +1851,10 @@ ip_vs_local_request4(const struct nf_hook_ops *ops, struct sk_buff *skb,
* Schedule and forward packets from remote clients
*/
static unsigned int
-ip_vs_remote_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_remote_request6(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return ip_vs_in(ops->hooknum, skb, AF_INET6);
+ return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET6);
}
/*
@@ -1811,10 +1862,10 @@ ip_vs_remote_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
* Schedule and forward packets from local clients
*/
static unsigned int
-ip_vs_local_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_local_request6(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- return ip_vs_in(ops->hooknum, skb, AF_INET6);
+ return ip_vs_in(net_ipvs(state->net), state->hook, skb, AF_INET6);
}
#endif
@@ -1830,46 +1881,40 @@ ip_vs_local_request6(const struct nf_hook_ops *ops, struct sk_buff *skb,
* and send them to ip_vs_in_icmp.
*/
static unsigned int
-ip_vs_forward_icmp(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_forward_icmp(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
int r;
- struct net *net;
- struct netns_ipvs *ipvs;
+ struct netns_ipvs *ipvs = net_ipvs(state->net);
if (ip_hdr(skb)->protocol != IPPROTO_ICMP)
return NF_ACCEPT;
/* ipvs enabled in this netns ? */
- net = skb_net(skb);
- ipvs = net_ipvs(net);
if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
return NF_ACCEPT;
- return ip_vs_in_icmp(skb, &r, ops->hooknum);
+ return ip_vs_in_icmp(ipvs, skb, &r, state->hook);
}
#ifdef CONFIG_IP_VS_IPV6
static unsigned int
-ip_vs_forward_icmp_v6(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ip_vs_forward_icmp_v6(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
int r;
- struct net *net;
- struct netns_ipvs *ipvs;
+ struct netns_ipvs *ipvs = net_ipvs(state->net);
struct ip_vs_iphdr iphdr;
- ip_vs_fill_iph_skb(AF_INET6, skb, &iphdr);
+ ip_vs_fill_iph_skb(AF_INET6, skb, false, &iphdr);
if (iphdr.protocol != IPPROTO_ICMPV6)
return NF_ACCEPT;
/* ipvs enabled in this netns ? */
- net = skb_net(skb);
- ipvs = net_ipvs(net);
if (unlikely(sysctl_backup_only(ipvs) || !ipvs->enable))
return NF_ACCEPT;
- return ip_vs_in_icmp_v6(skb, &r, ops->hooknum, &iphdr);
+ return ip_vs_in_icmp_v6(ipvs, skb, &r, state->hook, &iphdr);
}
#endif
@@ -1878,7 +1923,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
/* After packet filtering, change source only for VS/NAT */
{
.hook = ip_vs_reply4,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_NAT_SRC - 2,
@@ -1888,7 +1932,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
* applied to IPVS. */
{
.hook = ip_vs_remote_request4,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_NAT_SRC - 1,
@@ -1896,7 +1939,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
/* Before ip_vs_in, change source only for VS/NAT */
{
.hook = ip_vs_local_reply4,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP_PRI_NAT_DST + 1,
@@ -1904,7 +1946,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
/* After mangle, schedule and forward local requests */
{
.hook = ip_vs_local_request4,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP_PRI_NAT_DST + 2,
@@ -1913,7 +1954,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
* destined for 0.0.0.0/0, which is for incoming IPVS connections */
{
.hook = ip_vs_forward_icmp,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_FORWARD,
.priority = 99,
@@ -1921,7 +1961,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
/* After packet filtering, change source only for VS/NAT */
{
.hook = ip_vs_reply4,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_FORWARD,
.priority = 100,
@@ -1930,7 +1969,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
/* After packet filtering, change source only for VS/NAT */
{
.hook = ip_vs_reply6,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP6_PRI_NAT_SRC - 2,
@@ -1940,7 +1978,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
* applied to IPVS. */
{
.hook = ip_vs_remote_request6,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP6_PRI_NAT_SRC - 1,
@@ -1948,7 +1985,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
/* Before ip_vs_in, change source only for VS/NAT */
{
.hook = ip_vs_local_reply6,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP6_PRI_NAT_DST + 1,
@@ -1956,7 +1992,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
/* After mangle, schedule and forward local requests */
{
.hook = ip_vs_local_request6,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP6_PRI_NAT_DST + 2,
@@ -1965,7 +2000,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
* destined for 0.0.0.0/0, which is for incoming IPVS connections */
{
.hook = ip_vs_forward_icmp_v6,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_FORWARD,
.priority = 99,
@@ -1973,7 +2007,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
/* After packet filtering, change source only for VS/NAT */
{
.hook = ip_vs_reply6,
- .owner = THIS_MODULE,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_FORWARD,
.priority = 100,
@@ -1999,22 +2032,22 @@ static int __net_init __ip_vs_init(struct net *net)
atomic_inc(&ipvs_netns_cnt);
net->ipvs = ipvs;
- if (ip_vs_estimator_net_init(net) < 0)
+ if (ip_vs_estimator_net_init(ipvs) < 0)
goto estimator_fail;
- if (ip_vs_control_net_init(net) < 0)
+ if (ip_vs_control_net_init(ipvs) < 0)
goto control_fail;
- if (ip_vs_protocol_net_init(net) < 0)
+ if (ip_vs_protocol_net_init(ipvs) < 0)
goto protocol_fail;
- if (ip_vs_app_net_init(net) < 0)
+ if (ip_vs_app_net_init(ipvs) < 0)
goto app_fail;
- if (ip_vs_conn_net_init(net) < 0)
+ if (ip_vs_conn_net_init(ipvs) < 0)
goto conn_fail;
- if (ip_vs_sync_net_init(net) < 0)
+ if (ip_vs_sync_net_init(ipvs) < 0)
goto sync_fail;
printk(KERN_INFO "IPVS: Creating netns size=%zu id=%d\n",
@@ -2025,15 +2058,15 @@ static int __net_init __ip_vs_init(struct net *net)
*/
sync_fail:
- ip_vs_conn_net_cleanup(net);
+ ip_vs_conn_net_cleanup(ipvs);
conn_fail:
- ip_vs_app_net_cleanup(net);
+ ip_vs_app_net_cleanup(ipvs);
app_fail:
- ip_vs_protocol_net_cleanup(net);
+ ip_vs_protocol_net_cleanup(ipvs);
protocol_fail:
- ip_vs_control_net_cleanup(net);
+ ip_vs_control_net_cleanup(ipvs);
control_fail:
- ip_vs_estimator_net_cleanup(net);
+ ip_vs_estimator_net_cleanup(ipvs);
estimator_fail:
net->ipvs = NULL;
return -ENOMEM;
@@ -2041,22 +2074,25 @@ estimator_fail:
static void __net_exit __ip_vs_cleanup(struct net *net)
{
- ip_vs_service_net_cleanup(net); /* ip_vs_flush() with locks */
- ip_vs_conn_net_cleanup(net);
- ip_vs_app_net_cleanup(net);
- ip_vs_protocol_net_cleanup(net);
- ip_vs_control_net_cleanup(net);
- ip_vs_estimator_net_cleanup(net);
- IP_VS_DBG(2, "ipvs netns %d released\n", net_ipvs(net)->gen);
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
+ ip_vs_service_net_cleanup(ipvs); /* ip_vs_flush() with locks */
+ ip_vs_conn_net_cleanup(ipvs);
+ ip_vs_app_net_cleanup(ipvs);
+ ip_vs_protocol_net_cleanup(ipvs);
+ ip_vs_control_net_cleanup(ipvs);
+ ip_vs_estimator_net_cleanup(ipvs);
+ IP_VS_DBG(2, "ipvs netns %d released\n", ipvs->gen);
net->ipvs = NULL;
}
static void __net_exit __ip_vs_dev_cleanup(struct net *net)
{
+ struct netns_ipvs *ipvs = net_ipvs(net);
EnterFunction(2);
- net_ipvs(net)->enable = 0; /* Disable packet reception */
+ ipvs->enable = 0; /* Disable packet reception */
smp_wmb();
- ip_vs_sync_net_cleanup(net);
+ ip_vs_sync_net_cleanup(ipvs);
LeaveFunction(2);
}
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 1a23e91d50d8..e7c1b052c2a3 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -228,7 +228,7 @@ static void defense_work_handler(struct work_struct *work)
update_defense_level(ipvs);
if (atomic_read(&ipvs->dropentry))
- ip_vs_random_dropentry(ipvs->net);
+ ip_vs_random_dropentry(ipvs);
schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
}
#endif
@@ -263,7 +263,7 @@ static struct hlist_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
* Returns hash value for virtual service
*/
static inline unsigned int
-ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto,
+ip_vs_svc_hashkey(struct netns_ipvs *ipvs, int af, unsigned int proto,
const union nf_inet_addr *addr, __be16 port)
{
register unsigned int porth = ntohs(port);
@@ -276,7 +276,7 @@ ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto,
addr->ip6[2]^addr->ip6[3];
#endif
ahash = ntohl(addr_fold);
- ahash ^= ((size_t) net >> 8);
+ ahash ^= ((size_t) ipvs >> 8);
return (proto ^ ahash ^ (porth >> IP_VS_SVC_TAB_BITS) ^ porth) &
IP_VS_SVC_TAB_MASK;
@@ -285,9 +285,9 @@ ip_vs_svc_hashkey(struct net *net, int af, unsigned int proto,
/*
* Returns hash value of fwmark for virtual service lookup
*/
-static inline unsigned int ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
+static inline unsigned int ip_vs_svc_fwm_hashkey(struct netns_ipvs *ipvs, __u32 fwmark)
{
- return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
+ return (((size_t)ipvs>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
}
/*
@@ -309,14 +309,14 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
/*
* Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
*/
- hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
+ hash = ip_vs_svc_hashkey(svc->ipvs, svc->af, svc->protocol,
&svc->addr, svc->port);
hlist_add_head_rcu(&svc->s_list, &ip_vs_svc_table[hash]);
} else {
/*
* Hash it by fwmark in svc_fwm_table
*/
- hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
+ hash = ip_vs_svc_fwm_hashkey(svc->ipvs, svc->fwmark);
hlist_add_head_rcu(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
}
@@ -357,21 +357,21 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
* Get service by {netns, proto,addr,port} in the service table.
*/
static inline struct ip_vs_service *
-__ip_vs_service_find(struct net *net, int af, __u16 protocol,
+__ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u16 protocol,
const union nf_inet_addr *vaddr, __be16 vport)
{
unsigned int hash;
struct ip_vs_service *svc;
/* Check for "full" addressed entries */
- hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
+ hash = ip_vs_svc_hashkey(ipvs, af, protocol, vaddr, vport);
hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[hash], s_list) {
if ((svc->af == af)
&& ip_vs_addr_equal(af, &svc->addr, vaddr)
&& (svc->port == vport)
&& (svc->protocol == protocol)
- && net_eq(svc->net, net)) {
+ && (svc->ipvs == ipvs)) {
/* HIT */
return svc;
}
@@ -385,17 +385,17 @@ __ip_vs_service_find(struct net *net, int af, __u16 protocol,
* Get service by {fwmark} in the service table.
*/
static inline struct ip_vs_service *
-__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
+__ip_vs_svc_fwm_find(struct netns_ipvs *ipvs, int af, __u32 fwmark)
{
unsigned int hash;
struct ip_vs_service *svc;
/* Check for fwmark addressed entries */
- hash = ip_vs_svc_fwm_hashkey(net, fwmark);
+ hash = ip_vs_svc_fwm_hashkey(ipvs, fwmark);
hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[hash], f_list) {
if (svc->fwmark == fwmark && svc->af == af
- && net_eq(svc->net, net)) {
+ && (svc->ipvs == ipvs)) {
/* HIT */
return svc;
}
@@ -406,17 +406,16 @@ __ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
/* Find service, called under RCU lock */
struct ip_vs_service *
-ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol,
+ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u32 fwmark, __u16 protocol,
const union nf_inet_addr *vaddr, __be16 vport)
{
struct ip_vs_service *svc;
- struct netns_ipvs *ipvs = net_ipvs(net);
/*
* Check the table hashed by fwmark first
*/
if (fwmark) {
- svc = __ip_vs_svc_fwm_find(net, af, fwmark);
+ svc = __ip_vs_svc_fwm_find(ipvs, af, fwmark);
if (svc)
goto out;
}
@@ -425,7 +424,7 @@ ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol,
* Check the table hashed by <protocol,addr,port>
* for "full" addressed entries
*/
- svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
+ svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, vport);
if (svc == NULL
&& protocol == IPPROTO_TCP
@@ -435,7 +434,7 @@ ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol,
* Check if ftp service entry exists, the packet
* might belong to FTP data connections.
*/
- svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
+ svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, FTPPORT);
}
if (svc == NULL
@@ -443,7 +442,7 @@ ip_vs_service_find(struct net *net, int af, __u32 fwmark, __u16 protocol,
/*
* Check if the catch-all port (port zero) exists
*/
- svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
+ svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, 0);
}
out:
@@ -543,10 +542,9 @@ static void ip_vs_rs_unhash(struct ip_vs_dest *dest)
}
/* Check if real service by <proto,addr,port> is present */
-bool ip_vs_has_real_service(struct net *net, int af, __u16 protocol,
+bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol,
const union nf_inet_addr *daddr, __be16 dport)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
unsigned int hash;
struct ip_vs_dest *dest;
@@ -601,7 +599,7 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, int dest_af,
* on the backup.
* Called under RCU lock, no refcnt is returned.
*/
-struct ip_vs_dest *ip_vs_find_dest(struct net *net, int svc_af, int dest_af,
+struct ip_vs_dest *ip_vs_find_dest(struct netns_ipvs *ipvs, int svc_af, int dest_af,
const union nf_inet_addr *daddr,
__be16 dport,
const union nf_inet_addr *vaddr,
@@ -612,7 +610,7 @@ struct ip_vs_dest *ip_vs_find_dest(struct net *net, int svc_af, int dest_af,
struct ip_vs_service *svc;
__be16 port = dport;
- svc = ip_vs_service_find(net, svc_af, fwmark, protocol, vaddr, vport);
+ svc = ip_vs_service_find(ipvs, svc_af, fwmark, protocol, vaddr, vport);
if (!svc)
return NULL;
if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ)
@@ -660,7 +658,7 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, int dest_af,
const union nf_inet_addr *daddr, __be16 dport)
{
struct ip_vs_dest *dest;
- struct netns_ipvs *ipvs = net_ipvs(svc->net);
+ struct netns_ipvs *ipvs = svc->ipvs;
/*
* Find the destination in trash
@@ -715,10 +713,9 @@ static void ip_vs_dest_free(struct ip_vs_dest *dest)
* are expired, and the refcnt of each destination in the trash must
* be 0, so we simply release them here.
*/
-static void ip_vs_trash_cleanup(struct net *net)
+static void ip_vs_trash_cleanup(struct netns_ipvs *ipvs)
{
struct ip_vs_dest *dest, *nxt;
- struct netns_ipvs *ipvs = net_ipvs(net);
del_timer_sync(&ipvs->dest_trash_timer);
/* No need to use dest_trash_lock */
@@ -788,7 +785,7 @@ static void
__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
struct ip_vs_dest_user_kern *udest, int add)
{
- struct netns_ipvs *ipvs = net_ipvs(svc->net);
+ struct netns_ipvs *ipvs = svc->ipvs;
struct ip_vs_service *old_svc;
struct ip_vs_scheduler *sched;
int conn_flags;
@@ -843,7 +840,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
spin_unlock_bh(&dest->dst_lock);
if (add) {
- ip_vs_start_estimator(svc->net, &dest->stats);
+ ip_vs_start_estimator(svc->ipvs, &dest->stats);
list_add_rcu(&dest->n_list, &svc->destinations);
svc->num_dests++;
sched = rcu_dereference_protected(svc->scheduler, 1);
@@ -874,12 +871,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
atype = ipv6_addr_type(&udest->addr.in6);
if ((!(atype & IPV6_ADDR_UNICAST) ||
atype & IPV6_ADDR_LINKLOCAL) &&
- !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
+ !__ip_vs_addr_is_local_v6(svc->ipvs->net, &udest->addr.in6))
return -EINVAL;
} else
#endif
{
- atype = inet_addr_type(svc->net, udest->addr.ip);
+ atype = inet_addr_type(svc->ipvs->net, udest->addr.ip);
if (atype != RTN_LOCAL && atype != RTN_UNICAST)
return -EINVAL;
}
@@ -1036,12 +1033,10 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
/*
* Delete a destination (must be already unlinked from the service)
*/
-static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest,
+static void __ip_vs_del_dest(struct netns_ipvs *ipvs, struct ip_vs_dest *dest,
bool cleanup)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
-
- ip_vs_stop_estimator(net, &dest->stats);
+ ip_vs_stop_estimator(ipvs, &dest->stats);
/*
* Remove it from the d-linked list with the real services.
@@ -1079,7 +1074,7 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
svc->num_dests--;
if (dest->af != svc->af)
- net_ipvs(svc->net)->mixed_address_family_dests--;
+ svc->ipvs->mixed_address_family_dests--;
if (svcupd) {
struct ip_vs_scheduler *sched;
@@ -1120,7 +1115,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
/*
* Delete the destination
*/
- __ip_vs_del_dest(svc->net, dest, false);
+ __ip_vs_del_dest(svc->ipvs, dest, false);
LeaveFunction(2);
@@ -1129,8 +1124,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
static void ip_vs_dest_trash_expire(unsigned long data)
{
- struct net *net = (struct net *) data;
- struct netns_ipvs *ipvs = net_ipvs(net);
+ struct netns_ipvs *ipvs = (struct netns_ipvs *)data;
struct ip_vs_dest *dest, *next;
unsigned long now = jiffies;
@@ -1163,14 +1157,13 @@ static void ip_vs_dest_trash_expire(unsigned long data)
* Add a service into the service hash table
*/
static int
-ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
+ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u,
struct ip_vs_service **svc_p)
{
int ret = 0, i;
struct ip_vs_scheduler *sched = NULL;
struct ip_vs_pe *pe = NULL;
struct ip_vs_service *svc = NULL;
- struct netns_ipvs *ipvs = net_ipvs(net);
/* increase the module use count */
ip_vs_use_count_inc();
@@ -1237,7 +1230,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
svc->flags = u->flags;
svc->timeout = u->timeout * HZ;
svc->netmask = u->netmask;
- svc->net = net;
+ svc->ipvs = ipvs;
INIT_LIST_HEAD(&svc->destinations);
spin_lock_init(&svc->sched_lock);
@@ -1261,7 +1254,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
else if (svc->port == 0)
atomic_inc(&ipvs->nullsvc_counter);
- ip_vs_start_estimator(net, &svc->stats);
+ ip_vs_start_estimator(ipvs, &svc->stats);
/* Count only IPv4 services for old get/setsockopt interface */
if (svc->af == AF_INET)
@@ -1381,7 +1374,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
struct ip_vs_dest *dest, *nxt;
struct ip_vs_scheduler *old_sched;
struct ip_vs_pe *old_pe;
- struct netns_ipvs *ipvs = net_ipvs(svc->net);
+ struct netns_ipvs *ipvs = svc->ipvs;
pr_info("%s: enter\n", __func__);
@@ -1389,7 +1382,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
if (svc->af == AF_INET)
ipvs->num_services--;
- ip_vs_stop_estimator(svc->net, &svc->stats);
+ ip_vs_stop_estimator(svc->ipvs, &svc->stats);
/* Unbind scheduler */
old_sched = rcu_dereference_protected(svc->scheduler, 1);
@@ -1405,7 +1398,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup)
*/
list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
__ip_vs_unlink_dest(svc, dest, 0);
- __ip_vs_del_dest(svc->net, dest, cleanup);
+ __ip_vs_del_dest(svc->ipvs, dest, cleanup);
}
/*
@@ -1456,7 +1449,7 @@ static int ip_vs_del_service(struct ip_vs_service *svc)
/*
* Flush all the virtual services
*/
-static int ip_vs_flush(struct net *net, bool cleanup)
+static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup)
{
int idx;
struct ip_vs_service *svc;
@@ -1468,7 +1461,7 @@ static int ip_vs_flush(struct net *net, bool cleanup)
for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
hlist_for_each_entry_safe(svc, n, &ip_vs_svc_table[idx],
s_list) {
- if (net_eq(svc->net, net))
+ if (svc->ipvs == ipvs)
ip_vs_unlink_service(svc, cleanup);
}
}
@@ -1479,7 +1472,7 @@ static int ip_vs_flush(struct net *net, bool cleanup)
for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
hlist_for_each_entry_safe(svc, n, &ip_vs_svc_fwm_table[idx],
f_list) {
- if (net_eq(svc->net, net))
+ if (svc->ipvs == ipvs)
ip_vs_unlink_service(svc, cleanup);
}
}
@@ -1491,12 +1484,12 @@ static int ip_vs_flush(struct net *net, bool cleanup)
* Delete service by {netns} in the service table.
* Called by __ip_vs_cleanup()
*/
-void ip_vs_service_net_cleanup(struct net *net)
+void ip_vs_service_net_cleanup(struct netns_ipvs *ipvs)
{
EnterFunction(2);
/* Check for "full" addressed entries */
mutex_lock(&__ip_vs_mutex);
- ip_vs_flush(net, true);
+ ip_vs_flush(ipvs, true);
mutex_unlock(&__ip_vs_mutex);
LeaveFunction(2);
}
@@ -1540,7 +1533,7 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
mutex_lock(&__ip_vs_mutex);
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
- if (net_eq(svc->net, net)) {
+ if (svc->ipvs == ipvs) {
list_for_each_entry(dest, &svc->destinations,
n_list) {
ip_vs_forget_dev(dest, dev);
@@ -1549,7 +1542,7 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event,
}
hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
- if (net_eq(svc->net, net)) {
+ if (svc->ipvs == ipvs) {
list_for_each_entry(dest, &svc->destinations,
n_list) {
ip_vs_forget_dev(dest, dev);
@@ -1583,26 +1576,26 @@ static int ip_vs_zero_service(struct ip_vs_service *svc)
return 0;
}
-static int ip_vs_zero_all(struct net *net)
+static int ip_vs_zero_all(struct netns_ipvs *ipvs)
{
int idx;
struct ip_vs_service *svc;
for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
- if (net_eq(svc->net, net))
+ if (svc->ipvs == ipvs)
ip_vs_zero_service(svc);
}
}
for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
- if (net_eq(svc->net, net))
+ if (svc->ipvs == ipvs)
ip_vs_zero_service(svc);
}
}
- ip_vs_zero_stats(&net_ipvs(net)->tot_stats);
+ ip_vs_zero_stats(&ipvs->tot_stats);
return 0;
}
@@ -1615,7 +1608,7 @@ static int
proc_do_defense_mode(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
- struct net *net = current->nsproxy->net_ns;
+ struct netns_ipvs *ipvs = table->extra2;
int *valp = table->data;
int val = *valp;
int rc;
@@ -1626,7 +1619,7 @@ proc_do_defense_mode(struct ctl_table *table, int write,
/* Restore the correct value */
*valp = val;
} else {
- update_defense_level(net_ipvs(net));
+ update_defense_level(ipvs);
}
}
return rc;
@@ -1844,6 +1837,18 @@ static struct ctl_table vs_vars[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "schedule_icmp",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "ignore_tunneled",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
#ifdef CONFIG_IP_VS_DEBUG
{
.procname = "debug_level",
@@ -1889,6 +1894,7 @@ static inline const char *ip_vs_fwd_name(unsigned int flags)
static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
{
struct net *net = seq_file_net(seq);
+ struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_iter *iter = seq->private;
int idx;
struct ip_vs_service *svc;
@@ -1896,7 +1902,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
/* look in hash by protocol */
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[idx], s_list) {
- if (net_eq(svc->net, net) && pos-- == 0) {
+ if ((svc->ipvs == ipvs) && pos-- == 0) {
iter->table = ip_vs_svc_table;
iter->bucket = idx;
return svc;
@@ -1908,7 +1914,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[idx],
f_list) {
- if (net_eq(svc->net, net) && pos-- == 0) {
+ if ((svc->ipvs == ipvs) && pos-- == 0) {
iter->table = ip_vs_svc_fwm_table;
iter->bucket = idx;
return svc;
@@ -2196,7 +2202,7 @@ static const struct file_operations ip_vs_stats_percpu_fops = {
/*
* Set timeout values for tcp tcpfin udp in the timeout_table.
*/
-static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
+static int ip_vs_set_timeout(struct netns_ipvs *ipvs, struct ip_vs_timeout_user *u)
{
#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
struct ip_vs_proto_data *pd;
@@ -2209,13 +2215,13 @@ static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
#ifdef CONFIG_IP_VS_PROTO_TCP
if (u->tcp_timeout) {
- pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+ pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
= u->tcp_timeout * HZ;
}
if (u->tcp_fin_timeout) {
- pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+ pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
= u->tcp_fin_timeout * HZ;
}
@@ -2223,7 +2229,7 @@ static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
#ifdef CONFIG_IP_VS_PROTO_UDP
if (u->udp_timeout) {
- pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
+ pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP);
pd->timeout_table[IP_VS_UDP_S_NORMAL]
= u->udp_timeout * HZ;
}
@@ -2344,12 +2350,12 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
cfg.syncid = dm->syncid;
rtnl_lock();
mutex_lock(&ipvs->sync_mutex);
- ret = start_sync_thread(net, &cfg, dm->state);
+ ret = start_sync_thread(ipvs, &cfg, dm->state);
mutex_unlock(&ipvs->sync_mutex);
rtnl_unlock();
} else {
mutex_lock(&ipvs->sync_mutex);
- ret = stop_sync_thread(net, dm->state);
+ ret = stop_sync_thread(ipvs, dm->state);
mutex_unlock(&ipvs->sync_mutex);
}
goto out_dec;
@@ -2358,11 +2364,11 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
mutex_lock(&__ip_vs_mutex);
if (cmd == IP_VS_SO_SET_FLUSH) {
/* Flush the virtual service */
- ret = ip_vs_flush(net, false);
+ ret = ip_vs_flush(ipvs, false);
goto out_unlock;
} else if (cmd == IP_VS_SO_SET_TIMEOUT) {
/* Set timeout values for (tcp tcpfin udp) */
- ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
+ ret = ip_vs_set_timeout(ipvs, (struct ip_vs_timeout_user *)arg);
goto out_unlock;
}
@@ -2377,7 +2383,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
if (cmd == IP_VS_SO_SET_ZERO) {
/* if no service address is set, zero counters in all */
if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
- ret = ip_vs_zero_all(net);
+ ret = ip_vs_zero_all(ipvs);
goto out_unlock;
}
}
@@ -2395,10 +2401,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
/* Lookup the exact service by <protocol, addr, port> or fwmark */
rcu_read_lock();
if (usvc.fwmark == 0)
- svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
+ svc = __ip_vs_service_find(ipvs, usvc.af, usvc.protocol,
&usvc.addr, usvc.port);
else
- svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
+ svc = __ip_vs_svc_fwm_find(ipvs, usvc.af, usvc.fwmark);
rcu_read_unlock();
if (cmd != IP_VS_SO_SET_ADD
@@ -2412,7 +2418,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
if (svc != NULL)
ret = -EEXIST;
else
- ret = ip_vs_add_service(net, &usvc, &svc);
+ ret = ip_vs_add_service(ipvs, &usvc, &svc);
break;
case IP_VS_SO_SET_EDIT:
ret = ip_vs_edit_service(svc, &usvc);
@@ -2471,7 +2477,7 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
}
static inline int
-__ip_vs_get_service_entries(struct net *net,
+__ip_vs_get_service_entries(struct netns_ipvs *ipvs,
const struct ip_vs_get_services *get,
struct ip_vs_get_services __user *uptr)
{
@@ -2483,7 +2489,7 @@ __ip_vs_get_service_entries(struct net *net,
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
/* Only expose IPv4 entries to old interface */
- if (svc->af != AF_INET || !net_eq(svc->net, net))
+ if (svc->af != AF_INET || (svc->ipvs != ipvs))
continue;
if (count >= get->num_services)
@@ -2502,7 +2508,7 @@ __ip_vs_get_service_entries(struct net *net,
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
/* Only expose IPv4 entries to old interface */
- if (svc->af != AF_INET || !net_eq(svc->net, net))
+ if (svc->af != AF_INET || (svc->ipvs != ipvs))
continue;
if (count >= get->num_services)
@@ -2522,7 +2528,7 @@ out:
}
static inline int
-__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
+__ip_vs_get_dest_entries(struct netns_ipvs *ipvs, const struct ip_vs_get_dests *get,
struct ip_vs_get_dests __user *uptr)
{
struct ip_vs_service *svc;
@@ -2531,9 +2537,9 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
rcu_read_lock();
if (get->fwmark)
- svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
+ svc = __ip_vs_svc_fwm_find(ipvs, AF_INET, get->fwmark);
else
- svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
+ svc = __ip_vs_service_find(ipvs, AF_INET, get->protocol, &addr,
get->port);
rcu_read_unlock();
@@ -2578,7 +2584,7 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
}
static inline void
-__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
+__ip_vs_get_timeouts(struct netns_ipvs *ipvs, struct ip_vs_timeout_user *u)
{
#if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP)
struct ip_vs_proto_data *pd;
@@ -2587,12 +2593,12 @@ __ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
memset(u, 0, sizeof (*u));
#ifdef CONFIG_IP_VS_PROTO_TCP
- pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+ pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
#endif
#ifdef CONFIG_IP_VS_PROTO_UDP
- pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
+ pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP);
u->udp_timeout =
pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
#endif
@@ -2711,7 +2717,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
ret = -EINVAL;
goto out;
}
- ret = __ip_vs_get_service_entries(net, get, user);
+ ret = __ip_vs_get_service_entries(ipvs, get, user);
}
break;
@@ -2725,9 +2731,9 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
addr.ip = entry->addr;
rcu_read_lock();
if (entry->fwmark)
- svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
+ svc = __ip_vs_svc_fwm_find(ipvs, AF_INET, entry->fwmark);
else
- svc = __ip_vs_service_find(net, AF_INET,
+ svc = __ip_vs_service_find(ipvs, AF_INET,
entry->protocol, &addr,
entry->port);
rcu_read_unlock();
@@ -2753,7 +2759,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
ret = -EINVAL;
goto out;
}
- ret = __ip_vs_get_dest_entries(net, get, user);
+ ret = __ip_vs_get_dest_entries(ipvs, get, user);
}
break;
@@ -2761,7 +2767,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
{
struct ip_vs_timeout_user t;
- __ip_vs_get_timeouts(net, &t);
+ __ip_vs_get_timeouts(ipvs, &t);
if (copy_to_user(user, &t, sizeof(t)) != 0)
ret = -EFAULT;
}
@@ -2996,12 +3002,13 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
int idx = 0, i;
int start = cb->args[0];
struct ip_vs_service *svc;
- struct net *net = skb_sknet(skb);
+ struct net *net = sock_net(skb->sk);
+ struct netns_ipvs *ipvs = net_ipvs(net);
mutex_lock(&__ip_vs_mutex);
for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
hlist_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
- if (++idx <= start || !net_eq(svc->net, net))
+ if (++idx <= start || (svc->ipvs != ipvs))
continue;
if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
idx--;
@@ -3012,7 +3019,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
- if (++idx <= start || !net_eq(svc->net, net))
+ if (++idx <= start || (svc->ipvs != ipvs))
continue;
if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
idx--;
@@ -3028,7 +3035,7 @@ nla_put_failure:
return skb->len;
}
-static int ip_vs_genl_parse_service(struct net *net,
+static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs,
struct ip_vs_service_user_kern *usvc,
struct nlattr *nla, int full_entry,
struct ip_vs_service **ret_svc)
@@ -3073,9 +3080,9 @@ static int ip_vs_genl_parse_service(struct net *net,
rcu_read_lock();
if (usvc->fwmark)
- svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
+ svc = __ip_vs_svc_fwm_find(ipvs, usvc->af, usvc->fwmark);
else
- svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
+ svc = __ip_vs_service_find(ipvs, usvc->af, usvc->protocol,
&usvc->addr, usvc->port);
rcu_read_unlock();
*ret_svc = svc;
@@ -3113,14 +3120,14 @@ static int ip_vs_genl_parse_service(struct net *net,
return 0;
}
-static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
+static struct ip_vs_service *ip_vs_genl_find_service(struct netns_ipvs *ipvs,
struct nlattr *nla)
{
struct ip_vs_service_user_kern usvc;
struct ip_vs_service *svc;
int ret;
- ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
+ ret = ip_vs_genl_parse_service(ipvs, &usvc, nla, 0, &svc);
return ret ? ERR_PTR(ret) : svc;
}
@@ -3195,7 +3202,8 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
struct ip_vs_service *svc;
struct ip_vs_dest *dest;
struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
- struct net *net = skb_sknet(skb);
+ struct net *net = sock_net(skb->sk);
+ struct netns_ipvs *ipvs = net_ipvs(net);
mutex_lock(&__ip_vs_mutex);
@@ -3205,7 +3213,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
goto out_err;
- svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
+ svc = ip_vs_genl_find_service(ipvs, attrs[IPVS_CMD_ATTR_SERVICE]);
if (IS_ERR(svc) || svc == NULL)
goto out_err;
@@ -3341,7 +3349,7 @@ nla_put_failure:
static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
struct netlink_callback *cb)
{
- struct net *net = skb_sknet(skb);
+ struct net *net = sock_net(skb->sk);
struct netns_ipvs *ipvs = net_ipvs(net);
mutex_lock(&ipvs->sync_mutex);
@@ -3367,9 +3375,8 @@ nla_put_failure:
return skb->len;
}
-static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
+static int ip_vs_genl_new_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
struct ipvs_sync_daemon_cfg c;
struct nlattr *a;
int ret;
@@ -3426,33 +3433,32 @@ static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
rtnl_lock();
mutex_lock(&ipvs->sync_mutex);
- ret = start_sync_thread(net, &c,
+ ret = start_sync_thread(ipvs, &c,
nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
mutex_unlock(&ipvs->sync_mutex);
rtnl_unlock();
return ret;
}
-static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
+static int ip_vs_genl_del_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
int ret;
if (!attrs[IPVS_DAEMON_ATTR_STATE])
return -EINVAL;
mutex_lock(&ipvs->sync_mutex);
- ret = stop_sync_thread(net,
+ ret = stop_sync_thread(ipvs,
nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
mutex_unlock(&ipvs->sync_mutex);
return ret;
}
-static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
+static int ip_vs_genl_set_config(struct netns_ipvs *ipvs, struct nlattr **attrs)
{
struct ip_vs_timeout_user t;
- __ip_vs_get_timeouts(net, &t);
+ __ip_vs_get_timeouts(ipvs, &t);
if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
@@ -3464,17 +3470,15 @@ static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
- return ip_vs_set_timeout(net, &t);
+ return ip_vs_set_timeout(ipvs, &t);
}
static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info)
{
int ret = -EINVAL, cmd;
- struct net *net;
- struct netns_ipvs *ipvs;
+ struct net *net = sock_net(skb->sk);
+ struct netns_ipvs *ipvs = net_ipvs(net);
- net = skb_sknet(skb);
- ipvs = net_ipvs(net);
cmd = info->genlhdr->cmd;
if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) {
@@ -3487,9 +3491,9 @@ static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info)
goto out;
if (cmd == IPVS_CMD_NEW_DAEMON)
- ret = ip_vs_genl_new_daemon(net, daemon_attrs);
+ ret = ip_vs_genl_new_daemon(ipvs, daemon_attrs);
else
- ret = ip_vs_genl_del_daemon(net, daemon_attrs);
+ ret = ip_vs_genl_del_daemon(ipvs, daemon_attrs);
}
out:
@@ -3503,22 +3507,22 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
struct ip_vs_dest_user_kern udest;
int ret = 0, cmd;
int need_full_svc = 0, need_full_dest = 0;
- struct net *net;
+ struct net *net = sock_net(skb->sk);
+ struct netns_ipvs *ipvs = net_ipvs(net);
- net = skb_sknet(skb);
cmd = info->genlhdr->cmd;
mutex_lock(&__ip_vs_mutex);
if (cmd == IPVS_CMD_FLUSH) {
- ret = ip_vs_flush(net, false);
+ ret = ip_vs_flush(ipvs, false);
goto out;
} else if (cmd == IPVS_CMD_SET_CONFIG) {
- ret = ip_vs_genl_set_config(net, info->attrs);
+ ret = ip_vs_genl_set_config(ipvs, info->attrs);
goto out;
} else if (cmd == IPVS_CMD_ZERO &&
!info->attrs[IPVS_CMD_ATTR_SERVICE]) {
- ret = ip_vs_zero_all(net);
+ ret = ip_vs_zero_all(ipvs);
goto out;
}
@@ -3528,7 +3532,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
need_full_svc = 1;
- ret = ip_vs_genl_parse_service(net, &usvc,
+ ret = ip_vs_genl_parse_service(ipvs, &usvc,
info->attrs[IPVS_CMD_ATTR_SERVICE],
need_full_svc, &svc);
if (ret)
@@ -3567,7 +3571,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
/* The synchronization protocol is incompatible
* with mixed family services
*/
- if (net_ipvs(net)->sync_state) {
+ if (ipvs->sync_state) {
ret = -EINVAL;
goto out;
}
@@ -3587,7 +3591,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
switch (cmd) {
case IPVS_CMD_NEW_SERVICE:
if (svc == NULL)
- ret = ip_vs_add_service(net, &usvc, &svc);
+ ret = ip_vs_add_service(ipvs, &usvc, &svc);
else
ret = -EEXIST;
break;
@@ -3625,9 +3629,9 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
struct sk_buff *msg;
void *reply;
int ret, cmd, reply_cmd;
- struct net *net;
+ struct net *net = sock_net(skb->sk);
+ struct netns_ipvs *ipvs = net_ipvs(net);
- net = skb_sknet(skb);
cmd = info->genlhdr->cmd;
if (cmd == IPVS_CMD_GET_SERVICE)
@@ -3656,7 +3660,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
{
struct ip_vs_service *svc;
- svc = ip_vs_genl_find_service(net,
+ svc = ip_vs_genl_find_service(ipvs,
info->attrs[IPVS_CMD_ATTR_SERVICE]);
if (IS_ERR(svc)) {
ret = PTR_ERR(svc);
@@ -3677,7 +3681,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
{
struct ip_vs_timeout_user t;
- __ip_vs_get_timeouts(net, &t);
+ __ip_vs_get_timeouts(ipvs, &t);
#ifdef CONFIG_IP_VS_PROTO_TCP
if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP,
t.tcp_timeout) ||
@@ -3832,10 +3836,10 @@ static void ip_vs_genl_unregister(void)
* per netns intit/exit func.
*/
#ifdef CONFIG_SYSCTL
-static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
+static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
{
+ struct net *net = ipvs->net;
int idx;
- struct netns_ipvs *ipvs = net_ipvs(net);
struct ctl_table *tbl;
atomic_set(&ipvs->dropentry, 0);
@@ -3854,6 +3858,10 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
} else
tbl = vs_vars;
/* Initialize sysctl defaults */
+ for (idx = 0; idx < ARRAY_SIZE(vs_vars); idx++) {
+ if (tbl[idx].proc_handler == proc_do_defense_mode)
+ tbl[idx].extra2 = ipvs;
+ }
idx = 0;
ipvs->sysctl_amemthresh = 1024;
tbl[idx++].data = &ipvs->sysctl_amemthresh;
@@ -3895,7 +3903,8 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
tbl[idx++].data = &ipvs->sysctl_backup_only;
ipvs->sysctl_conn_reuse_mode = 1;
tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode;
-
+ tbl[idx++].data = &ipvs->sysctl_schedule_icmp;
+ tbl[idx++].data = &ipvs->sysctl_ignore_tunneled;
ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl);
if (ipvs->sysctl_hdr == NULL) {
@@ -3903,7 +3912,7 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
kfree(tbl);
return -ENOMEM;
}
- ip_vs_start_estimator(net, &ipvs->tot_stats);
+ ip_vs_start_estimator(ipvs, &ipvs->tot_stats);
ipvs->sysctl_tbl = tbl;
/* Schedule defense work */
INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
@@ -3912,14 +3921,14 @@ static int __net_init ip_vs_control_net_init_sysctl(struct net *net)
return 0;
}
-static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net)
+static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
+ struct net *net = ipvs->net;
cancel_delayed_work_sync(&ipvs->defense_work);
cancel_work_sync(&ipvs->defense_work.work);
unregister_net_sysctl_table(ipvs->sysctl_hdr);
- ip_vs_stop_estimator(net, &ipvs->tot_stats);
+ ip_vs_stop_estimator(ipvs, &ipvs->tot_stats);
if (!net_eq(net, &init_net))
kfree(ipvs->sysctl_tbl);
@@ -3927,8 +3936,8 @@ static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net)
#else
-static int __net_init ip_vs_control_net_init_sysctl(struct net *net) { return 0; }
-static void __net_exit ip_vs_control_net_cleanup_sysctl(struct net *net) { }
+static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) { return 0; }
+static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs) { }
#endif
@@ -3936,10 +3945,10 @@ static struct notifier_block ip_vs_dst_notifier = {
.notifier_call = ip_vs_dst_event,
};
-int __net_init ip_vs_control_net_init(struct net *net)
+int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs)
{
+ struct net *net = ipvs->net;
int i, idx;
- struct netns_ipvs *ipvs = net_ipvs(net);
/* Initialize rs_table */
for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
@@ -3948,7 +3957,7 @@ int __net_init ip_vs_control_net_init(struct net *net)
INIT_LIST_HEAD(&ipvs->dest_trash);
spin_lock_init(&ipvs->dest_trash_lock);
setup_timer(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire,
- (unsigned long) net);
+ (unsigned long) ipvs);
atomic_set(&ipvs->ftpsvc_counter, 0);
atomic_set(&ipvs->nullsvc_counter, 0);
@@ -3970,7 +3979,7 @@ int __net_init ip_vs_control_net_init(struct net *net)
proc_create("ip_vs_stats_percpu", 0, net->proc_net,
&ip_vs_stats_percpu_fops);
- if (ip_vs_control_net_init_sysctl(net))
+ if (ip_vs_control_net_init_sysctl(ipvs))
goto err;
return 0;
@@ -3980,12 +3989,12 @@ err:
return -ENOMEM;
}
-void __net_exit ip_vs_control_net_cleanup(struct net *net)
+void __net_exit ip_vs_control_net_cleanup(struct netns_ipvs *ipvs)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
+ struct net *net = ipvs->net;
- ip_vs_trash_cleanup(net);
- ip_vs_control_net_cleanup_sysctl(net);
+ ip_vs_trash_cleanup(ipvs);
+ ip_vs_control_net_cleanup_sysctl(ipvs);
remove_proc_entry("ip_vs_stats_percpu", net->proc_net);
remove_proc_entry("ip_vs_stats", net->proc_net);
remove_proc_entry("ip_vs", net->proc_net);
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index ef0eb0a8d552..457c6c193e13 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -102,10 +102,8 @@ static void estimation_timer(unsigned long arg)
struct ip_vs_estimator *e;
struct ip_vs_stats *s;
u64 rate;
- struct net *net = (struct net *)arg;
- struct netns_ipvs *ipvs;
+ struct netns_ipvs *ipvs = (struct netns_ipvs *)arg;
- ipvs = net_ipvs(net);
spin_lock(&ipvs->est_lock);
list_for_each_entry(e, &ipvs->est_list, list) {
s = container_of(e, struct ip_vs_stats, est);
@@ -140,9 +138,8 @@ static void estimation_timer(unsigned long arg)
mod_timer(&ipvs->est_timer, jiffies + 2*HZ);
}
-void ip_vs_start_estimator(struct net *net, struct ip_vs_stats *stats)
+void ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_estimator *est = &stats->est;
INIT_LIST_HEAD(&est->list);
@@ -152,9 +149,8 @@ void ip_vs_start_estimator(struct net *net, struct ip_vs_stats *stats)
spin_unlock_bh(&ipvs->est_lock);
}
-void ip_vs_stop_estimator(struct net *net, struct ip_vs_stats *stats)
+void ip_vs_stop_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_estimator *est = &stats->est;
spin_lock_bh(&ipvs->est_lock);
@@ -192,18 +188,16 @@ void ip_vs_read_estimator(struct ip_vs_kstats *dst, struct ip_vs_stats *stats)
dst->outbps = (e->outbps + 0xF) >> 5;
}
-int __net_init ip_vs_estimator_net_init(struct net *net)
+int __net_init ip_vs_estimator_net_init(struct netns_ipvs *ipvs)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
-
INIT_LIST_HEAD(&ipvs->est_list);
spin_lock_init(&ipvs->est_lock);
- setup_timer(&ipvs->est_timer, estimation_timer, (unsigned long)net);
+ setup_timer(&ipvs->est_timer, estimation_timer, (unsigned long)ipvs);
mod_timer(&ipvs->est_timer, jiffies + 2 * HZ);
return 0;
}
-void __net_exit ip_vs_estimator_net_cleanup(struct net *net)
+void __net_exit ip_vs_estimator_net_cleanup(struct netns_ipvs *ipvs)
{
- del_timer_sync(&net_ipvs(net)->est_timer);
+ del_timer_sync(&ipvs->est_timer);
}
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 5d3daae98bf0..d30c327bb578 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -181,7 +181,6 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
int ret = 0;
enum ip_conntrack_info ctinfo;
struct nf_conn *ct;
- struct net *net;
*diff = 0;
@@ -223,14 +222,14 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
*/
{
struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET,
+ ip_vs_conn_fill_param(cp->ipvs, AF_INET,
iph->protocol, &from, port,
&cp->caddr, 0, &p);
n_cp = ip_vs_conn_out_get(&p);
}
if (!n_cp) {
struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(ip_vs_conn_net(cp),
+ ip_vs_conn_fill_param(cp->ipvs,
AF_INET, IPPROTO_TCP, &cp->caddr,
0, &cp->vaddr, port, &p);
/* As above, this is ipv4 only */
@@ -289,9 +288,8 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
* would be adjusted twice.
*/
- net = skb_net(skb);
cp->app_data = NULL;
- ip_vs_tcp_conn_listen(net, n_cp);
+ ip_vs_tcp_conn_listen(n_cp);
ip_vs_conn_put(n_cp);
return ret;
}
@@ -320,7 +318,6 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
union nf_inet_addr to;
__be16 port;
struct ip_vs_conn *n_cp;
- struct net *net;
/* no diff required for incoming packets */
*diff = 0;
@@ -392,7 +389,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
{
struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET,
+ ip_vs_conn_fill_param(cp->ipvs, AF_INET,
iph->protocol, &to, port, &cp->vaddr,
htons(ntohs(cp->vport)-1), &p);
n_cp = ip_vs_conn_in_get(&p);
@@ -413,8 +410,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
/*
* Move tunnel to listen state
*/
- net = skb_net(skb);
- ip_vs_tcp_conn_listen(net, n_cp);
+ ip_vs_tcp_conn_listen(n_cp);
ip_vs_conn_put(n_cp);
return 1;
@@ -447,14 +443,14 @@ static int __net_init __ip_vs_ftp_init(struct net *net)
if (!ipvs)
return -ENOENT;
- app = register_ip_vs_app(net, &ip_vs_ftp);
+ app = register_ip_vs_app(ipvs, &ip_vs_ftp);
if (IS_ERR(app))
return PTR_ERR(app);
for (i = 0; i < ports_count; i++) {
if (!ports[i])
continue;
- ret = register_ip_vs_app_inc(net, app, app->protocol, ports[i]);
+ ret = register_ip_vs_app_inc(ipvs, app, app->protocol, ports[i]);
if (ret)
goto err_unreg;
pr_info("%s: loaded support on port[%d] = %d\n",
@@ -463,7 +459,7 @@ static int __net_init __ip_vs_ftp_init(struct net *net)
return 0;
err_unreg:
- unregister_ip_vs_app(net, &ip_vs_ftp);
+ unregister_ip_vs_app(ipvs, &ip_vs_ftp);
return ret;
}
/*
@@ -471,7 +467,12 @@ err_unreg:
*/
static void __ip_vs_ftp_exit(struct net *net)
{
- unregister_ip_vs_app(net, &ip_vs_ftp);
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
+ if (!ipvs)
+ return;
+
+ unregister_ip_vs_app(ipvs, &ip_vs_ftp);
}
static struct pernet_operations ip_vs_ftp_ops = {
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 127f14046c51..cccf4d637412 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -250,8 +250,7 @@ static void ip_vs_lblc_flush(struct ip_vs_service *svc)
static int sysctl_lblc_expiration(struct ip_vs_service *svc)
{
#ifdef CONFIG_SYSCTL
- struct netns_ipvs *ipvs = net_ipvs(svc->net);
- return ipvs->sysctl_lblc_expiration;
+ return svc->ipvs->sysctl_lblc_expiration;
#else
return DEFAULT_EXPIRATION;
#endif
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 2229d2d8bbe0..796d70e47ddd 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -415,8 +415,7 @@ static void ip_vs_lblcr_flush(struct ip_vs_service *svc)
static int sysctl_lblcr_expiration(struct ip_vs_service *svc)
{
#ifdef CONFIG_SYSCTL
- struct netns_ipvs *ipvs = net_ipvs(svc->net);
- return ipvs->sysctl_lblcr_expiration;
+ return svc->ipvs->sysctl_lblcr_expiration;
#else
return DEFAULT_EXPIRATION;
#endif
diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c
index 136184572fc9..30434fb133df 100644
--- a/net/netfilter/ipvs/ip_vs_nfct.c
+++ b/net/netfilter/ipvs/ip_vs_nfct.c
@@ -161,7 +161,7 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
/* RS->CLIENT */
orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
- ip_vs_conn_fill_param(net, exp->tuple.src.l3num, orig->dst.protonum,
+ ip_vs_conn_fill_param(net_ipvs(net), exp->tuple.src.l3num, orig->dst.protonum,
&orig->src.u3, orig->src.u.tcp.port,
&orig->dst.u3, orig->dst.u.tcp.port, &p);
cp = ip_vs_conn_out_get(&p);
@@ -274,8 +274,7 @@ void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
" for conn " FMT_CONN "\n",
__func__, ARG_TUPLE(&tuple), ARG_CONN(cp));
- h = nf_conntrack_find_get(ip_vs_conn_net(cp), &nf_ct_zone_dflt,
- &tuple);
+ h = nf_conntrack_find_get(cp->ipvs->net, &nf_ct_zone_dflt, &tuple);
if (h) {
ct = nf_ct_tuplehash_to_ctrack(h);
/* Show what happens instead of calling nf_ct_kill() */
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index bed5f7042529..1b8d594e493a 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -70,7 +70,7 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
const char *dptr;
int retc;
- ip_vs_fill_iph_skb(p->af, skb, &iph);
+ ip_vs_fill_iph_skb(p->af, skb, false, &iph);
/* Only useful with UDP */
if (iph.protocol != IPPROTO_UDP)
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index 939f7fbe9b46..8ae480715cea 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -63,9 +63,8 @@ static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp)
* register an ipvs protocols netns related data
*/
static int
-register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp)
+register_ip_vs_proto_netns(struct netns_ipvs *ipvs, struct ip_vs_protocol *pp)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
unsigned int hash = IP_VS_PROTO_HASH(pp->protocol);
struct ip_vs_proto_data *pd =
kzalloc(sizeof(struct ip_vs_proto_data), GFP_KERNEL);
@@ -79,7 +78,7 @@ register_ip_vs_proto_netns(struct net *net, struct ip_vs_protocol *pp)
atomic_set(&pd->appcnt, 0); /* Init app counter */
if (pp->init_netns != NULL) {
- int ret = pp->init_netns(net, pd);
+ int ret = pp->init_netns(ipvs, pd);
if (ret) {
/* unlink an free proto data */
ipvs->proto_data_table[hash] = pd->next;
@@ -116,9 +115,8 @@ static int unregister_ip_vs_protocol(struct ip_vs_protocol *pp)
* unregister an ipvs protocols netns data
*/
static int
-unregister_ip_vs_proto_netns(struct net *net, struct ip_vs_proto_data *pd)
+unregister_ip_vs_proto_netns(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_proto_data **pd_p;
unsigned int hash = IP_VS_PROTO_HASH(pd->pp->protocol);
@@ -127,7 +125,7 @@ unregister_ip_vs_proto_netns(struct net *net, struct ip_vs_proto_data *pd)
if (*pd_p == pd) {
*pd_p = pd->next;
if (pd->pp->exit_netns != NULL)
- pd->pp->exit_netns(net, pd);
+ pd->pp->exit_netns(ipvs, pd);
kfree(pd);
return 0;
}
@@ -156,8 +154,8 @@ EXPORT_SYMBOL(ip_vs_proto_get);
/*
* get ip_vs_protocol object data by netns and proto
*/
-static struct ip_vs_proto_data *
-__ipvs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto)
+struct ip_vs_proto_data *
+ip_vs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto)
{
struct ip_vs_proto_data *pd;
unsigned int hash = IP_VS_PROTO_HASH(proto);
@@ -169,14 +167,6 @@ __ipvs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto)
return NULL;
}
-
-struct ip_vs_proto_data *
-ip_vs_proto_data_get(struct net *net, unsigned short proto)
-{
- struct netns_ipvs *ipvs = net_ipvs(net);
-
- return __ipvs_proto_data_get(ipvs, proto);
-}
EXPORT_SYMBOL(ip_vs_proto_data_get);
/*
@@ -317,7 +307,7 @@ ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp,
/*
* per network name-space init
*/
-int __net_init ip_vs_protocol_net_init(struct net *net)
+int __net_init ip_vs_protocol_net_init(struct netns_ipvs *ipvs)
{
int i, ret;
static struct ip_vs_protocol *protos[] = {
@@ -339,27 +329,26 @@ int __net_init ip_vs_protocol_net_init(struct net *net)
};
for (i = 0; i < ARRAY_SIZE(protos); i++) {
- ret = register_ip_vs_proto_netns(net, protos[i]);
+ ret = register_ip_vs_proto_netns(ipvs, protos[i]);
if (ret < 0)
goto cleanup;
}
return 0;
cleanup:
- ip_vs_protocol_net_cleanup(net);
+ ip_vs_protocol_net_cleanup(ipvs);
return ret;
}
-void __net_exit ip_vs_protocol_net_cleanup(struct net *net)
+void __net_exit ip_vs_protocol_net_cleanup(struct netns_ipvs *ipvs)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_proto_data *pd;
int i;
/* unregister all the ipvs proto data for this netns */
for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
while ((pd = ipvs->proto_data_table[i]) != NULL)
- unregister_ip_vs_proto_netns(net, pd);
+ unregister_ip_vs_proto_netns(ipvs, pd);
}
}
diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
index 5de3dd312c0f..5320d39976e1 100644
--- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
@@ -41,30 +41,28 @@ struct isakmp_hdr {
#define PORT_ISAKMP 500
static void
-ah_esp_conn_fill_param_proto(struct net *net, int af,
- const struct ip_vs_iphdr *iph, int inverse,
+ah_esp_conn_fill_param_proto(struct netns_ipvs *ipvs, int af,
+ const struct ip_vs_iphdr *iph,
struct ip_vs_conn_param *p)
{
- if (likely(!inverse))
- ip_vs_conn_fill_param(net, af, IPPROTO_UDP,
+ if (likely(!ip_vs_iph_inverse(iph)))
+ ip_vs_conn_fill_param(ipvs, af, IPPROTO_UDP,
&iph->saddr, htons(PORT_ISAKMP),
&iph->daddr, htons(PORT_ISAKMP), p);
else
- ip_vs_conn_fill_param(net, af, IPPROTO_UDP,
+ ip_vs_conn_fill_param(ipvs, af, IPPROTO_UDP,
&iph->daddr, htons(PORT_ISAKMP),
&iph->saddr, htons(PORT_ISAKMP), p);
}
static struct ip_vs_conn *
-ah_esp_conn_in_get(int af, const struct sk_buff *skb,
- const struct ip_vs_iphdr *iph,
- int inverse)
+ah_esp_conn_in_get(struct netns_ipvs *ipvs, int af, const struct sk_buff *skb,
+ const struct ip_vs_iphdr *iph)
{
struct ip_vs_conn *cp;
struct ip_vs_conn_param p;
- struct net *net = skb_net(skb);
- ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p);
+ ah_esp_conn_fill_param_proto(ipvs, af, iph, &p);
cp = ip_vs_conn_in_get(&p);
if (!cp) {
/*
@@ -73,7 +71,7 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb,
*/
IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet "
"%s%s %s->%s\n",
- inverse ? "ICMP+" : "",
+ ip_vs_iph_icmp(iph) ? "ICMP+" : "",
ip_vs_proto_get(iph->protocol)->name,
IP_VS_DBG_ADDR(af, &iph->saddr),
IP_VS_DBG_ADDR(af, &iph->daddr));
@@ -84,19 +82,18 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb,
static struct ip_vs_conn *
-ah_esp_conn_out_get(int af, const struct sk_buff *skb,
- const struct ip_vs_iphdr *iph, int inverse)
+ah_esp_conn_out_get(struct netns_ipvs *ipvs, int af, const struct sk_buff *skb,
+ const struct ip_vs_iphdr *iph)
{
struct ip_vs_conn *cp;
struct ip_vs_conn_param p;
- struct net *net = skb_net(skb);
- ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p);
+ ah_esp_conn_fill_param_proto(ipvs, af, iph, &p);
cp = ip_vs_conn_out_get(&p);
if (!cp) {
IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet "
"%s%s %s->%s\n",
- inverse ? "ICMP+" : "",
+ ip_vs_iph_icmp(iph) ? "ICMP+" : "",
ip_vs_proto_get(iph->protocol)->name,
IP_VS_DBG_ADDR(af, &iph->saddr),
IP_VS_DBG_ADDR(af, &iph->daddr));
@@ -107,7 +104,8 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb,
static int
-ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
+ah_esp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
+ struct ip_vs_proto_data *pd,
int *verdict, struct ip_vs_conn **cpp,
struct ip_vs_iphdr *iph)
{
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 5b84c0b56642..010ddeec135f 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -9,35 +9,44 @@
#include <net/ip_vs.h>
static int
-sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
+sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
+ struct ip_vs_proto_data *pd,
int *verdict, struct ip_vs_conn **cpp,
struct ip_vs_iphdr *iph)
{
- struct net *net;
struct ip_vs_service *svc;
- struct netns_ipvs *ipvs;
sctp_chunkhdr_t _schunkh, *sch;
sctp_sctphdr_t *sh, _sctph;
-
- sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
- if (sh == NULL) {
- *verdict = NF_DROP;
- return 0;
+ __be16 _ports[2], *ports = NULL;
+
+ if (likely(!ip_vs_iph_icmp(iph))) {
+ sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
+ if (sh) {
+ sch = skb_header_pointer(
+ skb, iph->len + sizeof(sctp_sctphdr_t),
+ sizeof(_schunkh), &_schunkh);
+ if (sch && (sch->type == SCTP_CID_INIT ||
+ sysctl_sloppy_sctp(ipvs)))
+ ports = &sh->source;
+ }
+ } else {
+ ports = skb_header_pointer(
+ skb, iph->len, sizeof(_ports), &_ports);
}
- sch = skb_header_pointer(skb, iph->len + sizeof(sctp_sctphdr_t),
- sizeof(_schunkh), &_schunkh);
- if (sch == NULL) {
+ if (!ports) {
*verdict = NF_DROP;
return 0;
}
- net = skb_net(skb);
- ipvs = net_ipvs(net);
rcu_read_lock();
- if ((sch->type == SCTP_CID_INIT || sysctl_sloppy_sctp(ipvs)) &&
- (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
- &iph->daddr, sh->dest))) {
+ if (likely(!ip_vs_iph_inverse(iph)))
+ svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
+ &iph->daddr, ports[1]);
+ else
+ svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
+ &iph->saddr, ports[0]);
+ if (svc) {
int ignored;
if (ip_vs_todrop(ipvs)) {
@@ -474,14 +483,13 @@ static inline __u16 sctp_app_hashkey(__be16 port)
& SCTP_APP_TAB_MASK;
}
-static int sctp_register_app(struct net *net, struct ip_vs_app *inc)
+static int sctp_register_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
{
struct ip_vs_app *i;
__u16 hash;
__be16 port = inc->port;
int ret = 0;
- struct netns_ipvs *ipvs = net_ipvs(net);
- struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_SCTP);
hash = sctp_app_hashkey(port);
@@ -498,9 +506,9 @@ out:
return ret;
}
-static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc)
+static void sctp_unregister_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
{
- struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_SCTP);
atomic_dec(&pd->appcnt);
list_del_rcu(&inc->p_list);
@@ -508,7 +516,7 @@ static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc)
static int sctp_app_conn_bind(struct ip_vs_conn *cp)
{
- struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
+ struct netns_ipvs *ipvs = cp->ipvs;
int hash;
struct ip_vs_app *inc;
int result = 0;
@@ -549,10 +557,8 @@ out:
* timeouts is netns related now.
* ---------------------------------------------
*/
-static int __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd)
+static int __ip_vs_sctp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
-
ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE);
pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts,
sizeof(sctp_timeouts));
@@ -561,7 +567,7 @@ static int __ip_vs_sctp_init(struct net *net, struct ip_vs_proto_data *pd)
return 0;
}
-static void __ip_vs_sctp_exit(struct net *net, struct ip_vs_proto_data *pd)
+static void __ip_vs_sctp_exit(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
{
kfree(pd->timeout_table);
}
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 8e92beb0cca9..d7024b2ed769 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -32,27 +32,47 @@
#include <net/ip_vs.h>
static int
-tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
+tcp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
+ struct ip_vs_proto_data *pd,
int *verdict, struct ip_vs_conn **cpp,
struct ip_vs_iphdr *iph)
{
- struct net *net;
struct ip_vs_service *svc;
struct tcphdr _tcph, *th;
- struct netns_ipvs *ipvs;
+ __be16 _ports[2], *ports = NULL;
- th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph);
- if (th == NULL) {
+ /* In the event of icmp, we're only guaranteed to have the first 8
+ * bytes of the transport header, so we only check the rest of the
+ * TCP packet for non-ICMP packets
+ */
+ if (likely(!ip_vs_iph_icmp(iph))) {
+ th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph);
+ if (th) {
+ if (th->rst || !(sysctl_sloppy_tcp(ipvs) || th->syn))
+ return 1;
+ ports = &th->source;
+ }
+ } else {
+ ports = skb_header_pointer(
+ skb, iph->len, sizeof(_ports), &_ports);
+ }
+
+ if (!ports) {
*verdict = NF_DROP;
return 0;
}
- net = skb_net(skb);
- ipvs = net_ipvs(net);
+
/* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
rcu_read_lock();
- if ((th->syn || sysctl_sloppy_tcp(ipvs)) && !th->rst &&
- (svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
- &iph->daddr, th->dest))) {
+
+ if (likely(!ip_vs_iph_inverse(iph)))
+ svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
+ &iph->daddr, ports[1]);
+ else
+ svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
+ &iph->saddr, ports[0]);
+
+ if (svc) {
int ignored;
if (ip_vs_todrop(ipvs)) {
@@ -571,14 +591,13 @@ static inline __u16 tcp_app_hashkey(__be16 port)
}
-static int tcp_register_app(struct net *net, struct ip_vs_app *inc)
+static int tcp_register_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
{
struct ip_vs_app *i;
__u16 hash;
__be16 port = inc->port;
int ret = 0;
- struct netns_ipvs *ipvs = net_ipvs(net);
- struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
hash = tcp_app_hashkey(port);
@@ -597,9 +616,9 @@ static int tcp_register_app(struct net *net, struct ip_vs_app *inc)
static void
-tcp_unregister_app(struct net *net, struct ip_vs_app *inc)
+tcp_unregister_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
{
- struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP);
atomic_dec(&pd->appcnt);
list_del_rcu(&inc->p_list);
@@ -609,7 +628,7 @@ tcp_unregister_app(struct net *net, struct ip_vs_app *inc)
static int
tcp_app_conn_bind(struct ip_vs_conn *cp)
{
- struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
+ struct netns_ipvs *ipvs = cp->ipvs;
int hash;
struct ip_vs_app *inc;
int result = 0;
@@ -653,9 +672,9 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
/*
* Set LISTEN timeout. (ip_vs_conn_put will setup timer)
*/
-void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp)
+void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp)
{
- struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(cp->ipvs, IPPROTO_TCP);
spin_lock_bh(&cp->lock);
cp->state = IP_VS_TCP_S_LISTEN;
@@ -668,10 +687,8 @@ void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp)
* timeouts is netns related now.
* ---------------------------------------------
*/
-static int __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd)
+static int __ip_vs_tcp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
-
ip_vs_init_hash_table(ipvs->tcp_apps, TCP_APP_TAB_SIZE);
pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts,
sizeof(tcp_timeouts));
@@ -681,7 +698,7 @@ static int __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd)
return 0;
}
-static void __ip_vs_tcp_exit(struct net *net, struct ip_vs_proto_data *pd)
+static void __ip_vs_tcp_exit(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
{
kfree(pd->timeout_table);
}
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index b62a3c0ff9bf..e494e9a88c7f 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -29,28 +29,42 @@
#include <net/ip6_checksum.h>
static int
-udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
+udp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
+ struct ip_vs_proto_data *pd,
int *verdict, struct ip_vs_conn **cpp,
struct ip_vs_iphdr *iph)
{
- struct net *net;
struct ip_vs_service *svc;
struct udphdr _udph, *uh;
+ __be16 _ports[2], *ports = NULL;
- /* IPv6 fragments, only first fragment will hit this */
- uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph);
- if (uh == NULL) {
+ if (likely(!ip_vs_iph_icmp(iph))) {
+ /* IPv6 fragments, only first fragment will hit this */
+ uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph);
+ if (uh)
+ ports = &uh->source;
+ } else {
+ ports = skb_header_pointer(
+ skb, iph->len, sizeof(_ports), &_ports);
+ }
+
+ if (!ports) {
*verdict = NF_DROP;
return 0;
}
- net = skb_net(skb);
+
rcu_read_lock();
- svc = ip_vs_service_find(net, af, skb->mark, iph->protocol,
- &iph->daddr, uh->dest);
+ if (likely(!ip_vs_iph_inverse(iph)))
+ svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
+ &iph->daddr, ports[1]);
+ else
+ svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
+ &iph->saddr, ports[0]);
+
if (svc) {
int ignored;
- if (ip_vs_todrop(net_ipvs(net))) {
+ if (ip_vs_todrop(ipvs)) {
/*
* It seems that we are very loaded.
* We have to drop this packet :(
@@ -348,14 +362,13 @@ static inline __u16 udp_app_hashkey(__be16 port)
}
-static int udp_register_app(struct net *net, struct ip_vs_app *inc)
+static int udp_register_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
{
struct ip_vs_app *i;
__u16 hash;
__be16 port = inc->port;
int ret = 0;
- struct netns_ipvs *ipvs = net_ipvs(net);
- struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP);
hash = udp_app_hashkey(port);
@@ -374,9 +387,9 @@ static int udp_register_app(struct net *net, struct ip_vs_app *inc)
static void
-udp_unregister_app(struct net *net, struct ip_vs_app *inc)
+udp_unregister_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
{
- struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP);
atomic_dec(&pd->appcnt);
list_del_rcu(&inc->p_list);
@@ -385,7 +398,7 @@ udp_unregister_app(struct net *net, struct ip_vs_app *inc)
static int udp_app_conn_bind(struct ip_vs_conn *cp)
{
- struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
+ struct netns_ipvs *ipvs = cp->ipvs;
int hash;
struct ip_vs_app *inc;
int result = 0;
@@ -456,10 +469,8 @@ udp_state_transition(struct ip_vs_conn *cp, int direction,
cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL];
}
-static int __udp_init(struct net *net, struct ip_vs_proto_data *pd)
+static int __udp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
-
ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE);
pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts,
sizeof(udp_timeouts));
@@ -468,7 +479,7 @@ static int __udp_init(struct net *net, struct ip_vs_proto_data *pd)
return 0;
}
-static void __udp_exit(struct net *net, struct ip_vs_proto_data *pd)
+static void __udp_exit(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
{
kfree(pd->timeout_table);
}
diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c
index 98a13433b68c..1e373a5e44e3 100644
--- a/net/netfilter/ipvs/ip_vs_sh.c
+++ b/net/netfilter/ipvs/ip_vs_sh.c
@@ -280,35 +280,29 @@ static int ip_vs_sh_dest_changed(struct ip_vs_service *svc,
static inline __be16
ip_vs_sh_get_port(const struct sk_buff *skb, struct ip_vs_iphdr *iph)
{
- __be16 port;
- struct tcphdr _tcph, *th;
- struct udphdr _udph, *uh;
- sctp_sctphdr_t _sctph, *sh;
+ __be16 _ports[2], *ports;
+ /* At this point we know that we have a valid packet of some kind.
+ * Because ICMP packets are only guaranteed to have the first 8
+ * bytes, let's just grab the ports. Fortunately they're in the
+ * same position for all three of the protocols we care about.
+ */
switch (iph->protocol) {
case IPPROTO_TCP:
- th = skb_header_pointer(skb, iph->len, sizeof(_tcph), &_tcph);
- if (unlikely(th == NULL))
- return 0;
- port = th->source;
- break;
case IPPROTO_UDP:
- uh = skb_header_pointer(skb, iph->len, sizeof(_udph), &_udph);
- if (unlikely(uh == NULL))
- return 0;
- port = uh->source;
- break;
case IPPROTO_SCTP:
- sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
- if (unlikely(sh == NULL))
+ ports = skb_header_pointer(skb, iph->len, sizeof(_ports),
+ &_ports);
+ if (unlikely(!ports))
return 0;
- port = sh->source;
- break;
+
+ if (likely(!ip_vs_iph_inverse(iph)))
+ return ports[0];
+ else
+ return ports[1];
default:
- port = 0;
+ return 0;
}
-
- return port;
}
@@ -322,6 +316,9 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
struct ip_vs_dest *dest;
struct ip_vs_sh_state *s;
__be16 port = 0;
+ const union nf_inet_addr *hash_addr;
+
+ hash_addr = ip_vs_iph_inverse(iph) ? &iph->daddr : &iph->saddr;
IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");
@@ -331,9 +328,9 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
s = (struct ip_vs_sh_state *) svc->sched_data;
if (svc->flags & IP_VS_SVC_F_SCHED_SH_FALLBACK)
- dest = ip_vs_sh_get_fallback(svc, s, &iph->saddr, port);
+ dest = ip_vs_sh_get_fallback(svc, s, hash_addr, port);
else
- dest = ip_vs_sh_get(svc, s, &iph->saddr, port);
+ dest = ip_vs_sh_get(svc, s, hash_addr, port);
if (!dest) {
ip_vs_scheduler_err(svc, "no destination available");
@@ -341,7 +338,7 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb,
}
IP_VS_DBG_BUF(6, "SH: source IP address %s --> server %s:%d\n",
- IP_VS_DBG_ADDR(svc->af, &iph->saddr),
+ IP_VS_DBG_ADDR(svc->af, hash_addr),
IP_VS_DBG_ADDR(dest->af, &dest->addr),
ntohs(dest->port));
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 43f140950075..803001a45aa1 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -193,7 +193,7 @@ union ip_vs_sync_conn {
#define IPVS_OPT_F_PARAM (1 << (IPVS_OPT_PARAM-1))
struct ip_vs_sync_thread_data {
- struct net *net;
+ struct netns_ipvs *ipvs;
struct socket *sock;
char *buf;
int id;
@@ -533,10 +533,9 @@ set:
* Version 0 , could be switched in by sys_ctl.
* Add an ip_vs_conn information into the current sync_buff.
*/
-static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
+static void ip_vs_sync_conn_v0(struct netns_ipvs *ipvs, struct ip_vs_conn *cp,
int pkts)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_sync_mesg_v0 *m;
struct ip_vs_sync_conn_v0 *s;
struct ip_vs_sync_buff *buff;
@@ -615,7 +614,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
pkts = atomic_add_return(1, &cp->in_pkts);
else
pkts = sysctl_sync_threshold(ipvs);
- ip_vs_sync_conn(net, cp, pkts);
+ ip_vs_sync_conn(ipvs, cp, pkts);
}
}
@@ -624,9 +623,8 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
* Called by ip_vs_in.
* Sending Version 1 messages
*/
-void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts)
+void ip_vs_sync_conn(struct netns_ipvs *ipvs, struct ip_vs_conn *cp, int pkts)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_sync_mesg *m;
union ip_vs_sync_conn *s;
struct ip_vs_sync_buff *buff;
@@ -637,7 +635,7 @@ void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts)
/* Handle old version of the protocol */
if (sysctl_sync_ver(ipvs) == 0) {
- ip_vs_sync_conn_v0(net, cp, pkts);
+ ip_vs_sync_conn_v0(ipvs, cp, pkts);
return;
}
/* Do not sync ONE PACKET */
@@ -784,21 +782,21 @@ control:
* fill_param used by version 1
*/
static inline int
-ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc,
+ip_vs_conn_fill_param_sync(struct netns_ipvs *ipvs, int af, union ip_vs_sync_conn *sc,
struct ip_vs_conn_param *p,
__u8 *pe_data, unsigned int pe_data_len,
__u8 *pe_name, unsigned int pe_name_len)
{
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6)
- ip_vs_conn_fill_param(net, af, sc->v6.protocol,
+ ip_vs_conn_fill_param(ipvs, af, sc->v6.protocol,
(const union nf_inet_addr *)&sc->v6.caddr,
sc->v6.cport,
(const union nf_inet_addr *)&sc->v6.vaddr,
sc->v6.vport, p);
else
#endif
- ip_vs_conn_fill_param(net, af, sc->v4.protocol,
+ ip_vs_conn_fill_param(ipvs, af, sc->v4.protocol,
(const union nf_inet_addr *)&sc->v4.caddr,
sc->v4.cport,
(const union nf_inet_addr *)&sc->v4.vaddr,
@@ -837,7 +835,7 @@ ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc,
* Param: ...
* timeout is in sec.
*/
-static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
+static void ip_vs_proc_conn(struct netns_ipvs *ipvs, struct ip_vs_conn_param *param,
unsigned int flags, unsigned int state,
unsigned int protocol, unsigned int type,
const union nf_inet_addr *daddr, __be16 dport,
@@ -846,7 +844,6 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
{
struct ip_vs_dest *dest;
struct ip_vs_conn *cp;
- struct netns_ipvs *ipvs = net_ipvs(net);
if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
cp = ip_vs_conn_in_get(param);
@@ -904,7 +901,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
* with synchronization, so we can make the assumption that
* the svc_af is the same as the dest_af
*/
- dest = ip_vs_find_dest(net, type, type, daddr, dport,
+ dest = ip_vs_find_dest(ipvs, type, type, daddr, dport,
param->vaddr, param->vport, protocol,
fwmark, flags);
@@ -941,7 +938,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
} else {
struct ip_vs_proto_data *pd;
- pd = ip_vs_proto_data_get(net, protocol);
+ pd = ip_vs_proto_data_get(ipvs, protocol);
if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table)
cp->timeout = pd->timeout_table[state];
else
@@ -953,7 +950,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
/*
* Process received multicast message for Version 0
*/
-static void ip_vs_process_message_v0(struct net *net, const char *buffer,
+static void ip_vs_process_message_v0(struct netns_ipvs *ipvs, const char *buffer,
const size_t buflen)
{
struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer;
@@ -1009,14 +1006,14 @@ static void ip_vs_process_message_v0(struct net *net, const char *buffer,
}
}
- ip_vs_conn_fill_param(net, AF_INET, s->protocol,
+ ip_vs_conn_fill_param(ipvs, AF_INET, s->protocol,
(const union nf_inet_addr *)&s->caddr,
s->cport,
(const union nf_inet_addr *)&s->vaddr,
s->vport, &param);
/* Send timeout as Zero */
- ip_vs_proc_conn(net, &param, flags, state, s->protocol, AF_INET,
+ ip_vs_proc_conn(ipvs, &param, flags, state, s->protocol, AF_INET,
(union nf_inet_addr *)&s->daddr, s->dport,
0, 0, opt);
}
@@ -1067,7 +1064,7 @@ static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len,
/*
* Process a Version 1 sync. connection
*/
-static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end)
+static inline int ip_vs_proc_sync_conn(struct netns_ipvs *ipvs, __u8 *p, __u8 *msg_end)
{
struct ip_vs_sync_conn_options opt;
union ip_vs_sync_conn *s;
@@ -1171,21 +1168,21 @@ static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end)
state = 0;
}
}
- if (ip_vs_conn_fill_param_sync(net, af, s, &param, pe_data,
+ if (ip_vs_conn_fill_param_sync(ipvs, af, s, &param, pe_data,
pe_data_len, pe_name, pe_name_len)) {
retc = 50;
goto out;
}
/* If only IPv4, just silent skip IPv6 */
if (af == AF_INET)
- ip_vs_proc_conn(net, &param, flags, state, s->v4.protocol, af,
+ ip_vs_proc_conn(ipvs, &param, flags, state, s->v4.protocol, af,
(union nf_inet_addr *)&s->v4.daddr, s->v4.dport,
ntohl(s->v4.timeout), ntohl(s->v4.fwmark),
(opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
);
#ifdef CONFIG_IP_VS_IPV6
else
- ip_vs_proc_conn(net, &param, flags, state, s->v6.protocol, af,
+ ip_vs_proc_conn(ipvs, &param, flags, state, s->v6.protocol, af,
(union nf_inet_addr *)&s->v6.daddr, s->v6.dport,
ntohl(s->v6.timeout), ntohl(s->v6.fwmark),
(opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
@@ -1204,10 +1201,9 @@ out:
* ip_vs_conn entries.
* Handles Version 0 & 1
*/
-static void ip_vs_process_message(struct net *net, __u8 *buffer,
+static void ip_vs_process_message(struct netns_ipvs *ipvs, __u8 *buffer,
const size_t buflen)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer;
__u8 *p, *msg_end;
int i, nr_conns;
@@ -1257,7 +1253,7 @@ static void ip_vs_process_message(struct net *net, __u8 *buffer,
return;
}
/* Process a single sync_conn */
- retc = ip_vs_proc_sync_conn(net, p, msg_end);
+ retc = ip_vs_proc_sync_conn(ipvs, p, msg_end);
if (retc < 0) {
IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n",
retc);
@@ -1268,7 +1264,7 @@ static void ip_vs_process_message(struct net *net, __u8 *buffer,
}
} else {
/* Old type of message */
- ip_vs_process_message_v0(net, buffer, buflen);
+ ip_vs_process_message_v0(ipvs, buffer, buflen);
return;
}
}
@@ -1493,16 +1489,15 @@ static void get_mcast_sockaddr(union ipvs_sockaddr *sa, int *salen,
/*
* Set up sending multicast socket over UDP
*/
-static struct socket *make_send_sock(struct net *net, int id)
+static struct socket *make_send_sock(struct netns_ipvs *ipvs, int id)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
/* multicast addr */
union ipvs_sockaddr mcast_addr;
struct socket *sock;
int result, salen;
/* First create a socket */
- result = sock_create_kern(net, ipvs->mcfg.mcast_af, SOCK_DGRAM,
+ result = sock_create_kern(ipvs->net, ipvs->mcfg.mcast_af, SOCK_DGRAM,
IPPROTO_UDP, &sock);
if (result < 0) {
pr_err("Error during creation of socket; terminating\n");
@@ -1550,16 +1545,15 @@ error:
/*
* Set up receiving multicast socket over UDP
*/
-static struct socket *make_receive_sock(struct net *net, int id)
+static struct socket *make_receive_sock(struct netns_ipvs *ipvs, int id)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
/* multicast addr */
union ipvs_sockaddr mcast_addr;
struct socket *sock;
int result, salen;
/* First create a socket */
- result = sock_create_kern(net, ipvs->bcfg.mcast_af, SOCK_DGRAM,
+ result = sock_create_kern(ipvs->net, ipvs->bcfg.mcast_af, SOCK_DGRAM,
IPPROTO_UDP, &sock);
if (result < 0) {
pr_err("Error during creation of socket; terminating\n");
@@ -1687,7 +1681,7 @@ next_sync_buff(struct netns_ipvs *ipvs, struct ipvs_master_sync_state *ms)
static int sync_thread_master(void *data)
{
struct ip_vs_sync_thread_data *tinfo = data;
- struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
+ struct netns_ipvs *ipvs = tinfo->ipvs;
struct ipvs_master_sync_state *ms = &ipvs->ms[tinfo->id];
struct sock *sk = tinfo->sock->sk;
struct ip_vs_sync_buff *sb;
@@ -1743,7 +1737,7 @@ done:
static int sync_thread_backup(void *data)
{
struct ip_vs_sync_thread_data *tinfo = data;
- struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
+ struct netns_ipvs *ipvs = tinfo->ipvs;
int len;
pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, "
@@ -1765,7 +1759,7 @@ static int sync_thread_backup(void *data)
break;
}
- ip_vs_process_message(tinfo->net, tinfo->buf, len);
+ ip_vs_process_message(ipvs, tinfo->buf, len);
}
}
@@ -1778,13 +1772,12 @@ static int sync_thread_backup(void *data)
}
-int start_sync_thread(struct net *net, struct ipvs_sync_daemon_cfg *c,
+int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c,
int state)
{
struct ip_vs_sync_thread_data *tinfo;
struct task_struct **array = NULL, *task;
struct socket *sock;
- struct netns_ipvs *ipvs = net_ipvs(net);
struct net_device *dev;
char *name;
int (*threadfn)(void *data);
@@ -1811,7 +1804,7 @@ int start_sync_thread(struct net *net, struct ipvs_sync_daemon_cfg *c,
if (!c->mcast_ttl)
c->mcast_ttl = 1;
- dev = __dev_get_by_name(net, c->mcast_ifn);
+ dev = __dev_get_by_name(ipvs->net, c->mcast_ifn);
if (!dev) {
pr_err("Unknown mcast interface: %s\n", c->mcast_ifn);
return -ENODEV;
@@ -1873,9 +1866,9 @@ int start_sync_thread(struct net *net, struct ipvs_sync_daemon_cfg *c,
tinfo = NULL;
for (id = 0; id < count; id++) {
if (state == IP_VS_STATE_MASTER)
- sock = make_send_sock(net, id);
+ sock = make_send_sock(ipvs, id);
else
- sock = make_receive_sock(net, id);
+ sock = make_receive_sock(ipvs, id);
if (IS_ERR(sock)) {
result = PTR_ERR(sock);
goto outtinfo;
@@ -1883,7 +1876,7 @@ int start_sync_thread(struct net *net, struct ipvs_sync_daemon_cfg *c,
tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
if (!tinfo)
goto outsocket;
- tinfo->net = net;
+ tinfo->ipvs = ipvs;
tinfo->sock = sock;
if (state == IP_VS_STATE_BACKUP) {
tinfo->buf = kmalloc(ipvs->bcfg.sync_maxlen,
@@ -1947,9 +1940,8 @@ out:
}
-int stop_sync_thread(struct net *net, int state)
+int stop_sync_thread(struct netns_ipvs *ipvs, int state)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
struct task_struct **array;
int id;
int retc = -EINVAL;
@@ -2015,27 +2007,24 @@ int stop_sync_thread(struct net *net, int state)
/*
* Initialize data struct for each netns
*/
-int __net_init ip_vs_sync_net_init(struct net *net)
+int __net_init ip_vs_sync_net_init(struct netns_ipvs *ipvs)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
-
__mutex_init(&ipvs->sync_mutex, "ipvs->sync_mutex", &__ipvs_sync_key);
spin_lock_init(&ipvs->sync_lock);
spin_lock_init(&ipvs->sync_buff_lock);
return 0;
}
-void ip_vs_sync_net_cleanup(struct net *net)
+void ip_vs_sync_net_cleanup(struct netns_ipvs *ipvs)
{
int retc;
- struct netns_ipvs *ipvs = net_ipvs(net);
mutex_lock(&ipvs->sync_mutex);
- retc = stop_sync_thread(net, IP_VS_STATE_MASTER);
+ retc = stop_sync_thread(ipvs, IP_VS_STATE_MASTER);
if (retc && retc != -ESRCH)
pr_err("Failed to stop Master Daemon\n");
- retc = stop_sync_thread(net, IP_VS_STATE_BACKUP);
+ retc = stop_sync_thread(ipvs, IP_VS_STATE_BACKUP);
if (retc && retc != -ESRCH)
pr_err("Failed to stop Backup Daemon\n");
mutex_unlock(&ipvs->sync_mutex);
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 258a0b0e82a2..3264cb49b333 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -212,19 +212,20 @@ static inline void maybe_update_pmtu(int skb_af, struct sk_buff *skb, int mtu)
ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
}
-static inline bool ensure_mtu_is_adequate(int skb_af, int rt_mode,
+static inline bool ensure_mtu_is_adequate(struct netns_ipvs *ipvs, int skb_af,
+ int rt_mode,
struct ip_vs_iphdr *ipvsh,
struct sk_buff *skb, int mtu)
{
#ifdef CONFIG_IP_VS_IPV6
if (skb_af == AF_INET6) {
- struct net *net = dev_net(skb_dst(skb)->dev);
+ struct net *net = ipvs->net;
if (unlikely(__mtu_check_toobig_v6(skb, mtu))) {
if (!skb->dev)
skb->dev = net->loopback_dev;
/* only send ICMP too big on first fragment */
- if (!ipvsh->fragoffs)
+ if (!ipvsh->fragoffs && !ip_vs_iph_icmp(ipvsh))
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
IP_VS_DBG(1, "frag needed for %pI6c\n",
&ipv6_hdr(skb)->saddr);
@@ -233,8 +234,6 @@ static inline bool ensure_mtu_is_adequate(int skb_af, int rt_mode,
} else
#endif
{
- struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
-
/* If we're going to tunnel the packet and pmtu discovery
* is disabled, we'll just fragment it anyway
*/
@@ -242,7 +241,8 @@ static inline bool ensure_mtu_is_adequate(int skb_af, int rt_mode,
return true;
if (unlikely(ip_hdr(skb)->frag_off & htons(IP_DF) &&
- skb->len > mtu && !skb_is_gso(skb))) {
+ skb->len > mtu && !skb_is_gso(skb) &&
+ !ip_vs_iph_icmp(ipvsh))) {
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(mtu));
IP_VS_DBG(1, "frag needed for %pI4\n",
@@ -256,11 +256,12 @@ static inline bool ensure_mtu_is_adequate(int skb_af, int rt_mode,
/* Get route to destination or remote server */
static int
-__ip_vs_get_out_rt(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
+__ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
+ struct ip_vs_dest *dest,
__be32 daddr, int rt_mode, __be32 *ret_saddr,
struct ip_vs_iphdr *ipvsh)
{
- struct net *net = dev_net(skb_dst(skb)->dev);
+ struct net *net = ipvs->net;
struct ip_vs_dest_dst *dest_dst;
struct rtable *rt; /* Route to the other host */
int mtu;
@@ -336,7 +337,7 @@ __ip_vs_get_out_rt(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
maybe_update_pmtu(skb_af, skb, mtu);
}
- if (!ensure_mtu_is_adequate(skb_af, rt_mode, ipvsh, skb, mtu))
+ if (!ensure_mtu_is_adequate(ipvs, skb_af, rt_mode, ipvsh, skb, mtu))
goto err_put;
skb_dst_drop(skb);
@@ -402,11 +403,12 @@ out_err:
* Get route to destination or remote server
*/
static int
-__ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
+__ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
+ struct ip_vs_dest *dest,
struct in6_addr *daddr, struct in6_addr *ret_saddr,
struct ip_vs_iphdr *ipvsh, int do_xfrm, int rt_mode)
{
- struct net *net = dev_net(skb_dst(skb)->dev);
+ struct net *net = ipvs->net;
struct ip_vs_dest_dst *dest_dst;
struct rt6_info *rt; /* Route to the other host */
struct dst_entry *dst;
@@ -484,7 +486,7 @@ __ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
maybe_update_pmtu(skb_af, skb, mtu);
}
- if (!ensure_mtu_is_adequate(skb_af, rt_mode, ipvsh, skb, mtu))
+ if (!ensure_mtu_is_adequate(ipvs, skb_af, rt_mode, ipvsh, skb, mtu))
goto err_put;
skb_dst_drop(skb);
@@ -573,8 +575,8 @@ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb,
skb_forward_csum(skb);
if (!skb->sk)
skb_sender_cpu_clear(skb);
- NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb,
- NULL, skb_dst(skb)->dev, dst_output_sk);
+ NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb,
+ NULL, skb_dst(skb)->dev, dst_output);
} else
ret = NF_ACCEPT;
@@ -595,8 +597,8 @@ static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb,
skb_forward_csum(skb);
if (!skb->sk)
skb_sender_cpu_clear(skb);
- NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb,
- NULL, skb_dst(skb)->dev, dst_output_sk);
+ NF_HOOK(pf, NF_INET_LOCAL_OUT, cp->ipvs->net, NULL, skb,
+ NULL, skb_dst(skb)->dev, dst_output);
} else
ret = NF_ACCEPT;
return ret;
@@ -629,7 +631,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
rcu_read_lock();
- if (__ip_vs_get_out_rt(cp->af, skb, NULL, iph->daddr,
+ if (__ip_vs_get_out_rt(cp->ipvs, cp->af, skb, NULL, iph->daddr,
IP_VS_RT_MODE_NON_LOCAL, NULL, ipvsh) < 0)
goto tx_error;
@@ -656,10 +658,13 @@ int
ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
{
+ struct ipv6hdr *iph = ipv6_hdr(skb);
+
EnterFunction(10);
rcu_read_lock();
- if (__ip_vs_get_out_rt_v6(cp->af, skb, NULL, &ipvsh->daddr.in6, NULL,
+ if (__ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, NULL,
+ &iph->daddr, NULL,
ipvsh, 0, IP_VS_RT_MODE_NON_LOCAL) < 0)
goto tx_error;
@@ -706,7 +711,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
}
was_input = rt_is_input_route(skb_rtable(skb));
- local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip,
+ local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_RDR, NULL, ipvsh);
@@ -723,7 +728,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
if (ct && !nf_ct_is_untracked(ct)) {
- IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0,
+ IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, ipvsh->off,
"ip_vs_nat_xmit(): "
"stopping DNAT to local address");
goto tx_error;
@@ -733,8 +738,9 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
/* From world but DNAT to loopback address? */
if (local && ipv4_is_loopback(cp->daddr.ip) && was_input) {
- IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): "
- "stopping DNAT to loopback address");
+ IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, ipvsh->off,
+ "ip_vs_nat_xmit(): stopping DNAT to loopback "
+ "address");
goto tx_error;
}
@@ -751,7 +757,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
ip_hdr(skb)->daddr = cp->daddr.ip;
ip_send_check(ip_hdr(skb));
- IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT");
+ IP_VS_DBG_PKT(10, AF_INET, pp, skb, ipvsh->off, "After DNAT");
/* FIXME: when application helper enlarges the packet and the length
is larger than the MTU of outgoing device, there will be still
@@ -794,7 +800,8 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
}
- local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6,
+ local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
+ &cp->daddr.in6,
NULL, ipvsh, 0,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
@@ -812,7 +819,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
if (ct && !nf_ct_is_untracked(ct)) {
- IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0,
+ IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, ipvsh->off,
"ip_vs_nat_xmit_v6(): "
"stopping DNAT to local address");
goto tx_error;
@@ -823,7 +830,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
/* From world but DNAT to loopback address? */
if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) &&
ipv6_addr_type(&cp->daddr.in6) & IPV6_ADDR_LOOPBACK) {
- IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0,
+ IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, ipvsh->off,
"ip_vs_nat_xmit_v6(): "
"stopping DNAT to loopback address");
goto tx_error;
@@ -841,7 +848,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
goto tx_error;
ipv6_hdr(skb)->daddr = cp->daddr.in6;
- IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT");
+ IP_VS_DBG_PKT(10, AF_INET6, pp, skb, ipvsh->off, "After DNAT");
/* FIXME: when application helper enlarges the packet and the length
is larger than the MTU of outgoing device, there will be still
@@ -967,8 +974,8 @@ int
ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
{
- struct net *net = skb_net(skb);
- struct netns_ipvs *ipvs = net_ipvs(net);
+ struct netns_ipvs *ipvs = cp->ipvs;
+ struct net *net = ipvs->net;
struct rtable *rt; /* Route to the other host */
__be32 saddr; /* Source for tunnel */
struct net_device *tdev; /* Device to other host */
@@ -984,7 +991,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
rcu_read_lock();
- local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip,
+ local = __ip_vs_get_out_rt(ipvs, cp->af, skb, cp->dest, cp->daddr.ip,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_CONNECT |
@@ -1042,7 +1049,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
ret = ip_vs_tunnel_xmit_prepare(skb, cp);
if (ret == NF_ACCEPT)
- ip_local_out(skb);
+ ip_local_out(net, skb->sk, skb);
else if (ret == NF_DROP)
kfree_skb(skb);
rcu_read_unlock();
@@ -1078,7 +1085,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
rcu_read_lock();
- local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6,
+ local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
+ &cp->daddr.in6,
&saddr, ipvsh, 1,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
@@ -1133,7 +1141,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
ret = ip_vs_tunnel_xmit_prepare(skb, cp);
if (ret == NF_ACCEPT)
- ip6_local_out(skb);
+ ip6_local_out(cp->ipvs->net, skb->sk, skb);
else if (ret == NF_DROP)
kfree_skb(skb);
rcu_read_unlock();
@@ -1165,7 +1173,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
rcu_read_lock();
- local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip,
+ local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_KNOWN_NH, NULL, ipvsh);
@@ -1204,7 +1212,8 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
EnterFunction(10);
rcu_read_lock();
- local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6,
+ local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
+ &cp->daddr.in6,
NULL, ipvsh, 0,
IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL |
@@ -1273,7 +1282,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
rcu_read_lock();
- local = __ip_vs_get_out_rt(cp->af, skb, cp->dest, cp->daddr.ip, rt_mode,
+ local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip, rt_mode,
NULL, iph);
if (local < 0)
goto tx_error;
@@ -1365,8 +1374,8 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL;
rcu_read_lock();
- local = __ip_vs_get_out_rt_v6(cp->af, skb, cp->dest, &cp->daddr.in6,
- NULL, ipvsh, 0, rt_mode);
+ local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
+ &cp->daddr.in6, NULL, ipvsh, 0, rt_mode);
if (local < 0)
goto tx_error;
rt = (struct rt6_info *) skb_dst(skb);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index c09d6c7198f6..3cb3cb831591 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -168,6 +168,7 @@ nf_ct_get_tuple(const struct sk_buff *skb,
unsigned int dataoff,
u_int16_t l3num,
u_int8_t protonum,
+ struct net *net,
struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_l3proto *l3proto,
const struct nf_conntrack_l4proto *l4proto)
@@ -181,12 +182,13 @@ nf_ct_get_tuple(const struct sk_buff *skb,
tuple->dst.protonum = protonum;
tuple->dst.dir = IP_CT_DIR_ORIGINAL;
- return l4proto->pkt_to_tuple(skb, dataoff, tuple);
+ return l4proto->pkt_to_tuple(skb, dataoff, net, tuple);
}
EXPORT_SYMBOL_GPL(nf_ct_get_tuple);
bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
- u_int16_t l3num, struct nf_conntrack_tuple *tuple)
+ u_int16_t l3num,
+ struct net *net, struct nf_conntrack_tuple *tuple)
{
struct nf_conntrack_l3proto *l3proto;
struct nf_conntrack_l4proto *l4proto;
@@ -205,7 +207,7 @@ bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
l4proto = __nf_ct_l4proto_find(l3num, protonum);
- ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, tuple,
+ ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, net, tuple,
l3proto, l4proto);
rcu_read_unlock();
@@ -938,10 +940,13 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
}
timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL;
- if (timeout_ext)
- timeouts = NF_CT_TIMEOUT_EXT_DATA(timeout_ext);
- else
+ if (timeout_ext) {
+ timeouts = nf_ct_timeout_data(timeout_ext);
+ if (unlikely(!timeouts))
+ timeouts = l4proto->get_timeouts(net);
+ } else {
timeouts = l4proto->get_timeouts(net);
+ }
if (!l4proto->new(ct, skb, dataoff, timeouts)) {
nf_conntrack_free(ct);
@@ -950,7 +955,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
}
if (timeout_ext)
- nf_ct_timeout_ext_add(ct, timeout_ext->timeout, GFP_ATOMIC);
+ nf_ct_timeout_ext_add(ct, rcu_dereference(timeout_ext->timeout),
+ GFP_ATOMIC);
nf_ct_acct_ext_add(ct, GFP_ATOMIC);
nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
@@ -1029,7 +1035,7 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
u32 hash;
if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
- dataoff, l3num, protonum, &tuple, l3proto,
+ dataoff, l3num, protonum, net, &tuple, l3proto,
l4proto)) {
pr_debug("resolve_normal_ct: Can't get tuple\n");
return NULL;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 94a66541e0b7..9f5272968abb 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2133,9 +2133,9 @@ ctnetlink_alloc_expect(const struct nlattr *const cda[], struct nf_conn *ct,
struct nf_conntrack_tuple *tuple,
struct nf_conntrack_tuple *mask);
-#ifdef CONFIG_NETFILTER_NETLINK_QUEUE_CT
+#ifdef CONFIG_NETFILTER_NETLINK_GLUE_CT
static size_t
-ctnetlink_nfqueue_build_size(const struct nf_conn *ct)
+ctnetlink_glue_build_size(const struct nf_conn *ct)
{
return 3 * nla_total_size(0) /* CTA_TUPLE_ORIG|REPL|MASTER */
+ 3 * nla_total_size(0) /* CTA_TUPLE_IP */
@@ -2162,8 +2162,19 @@ ctnetlink_nfqueue_build_size(const struct nf_conn *ct)
;
}
-static int
-ctnetlink_nfqueue_build(struct sk_buff *skb, struct nf_conn *ct)
+static struct nf_conn *ctnetlink_glue_get_ct(const struct sk_buff *skb,
+ enum ip_conntrack_info *ctinfo)
+{
+ struct nf_conn *ct;
+
+ ct = nf_ct_get(skb, ctinfo);
+ if (ct && nf_ct_is_untracked(ct))
+ ct = NULL;
+
+ return ct;
+}
+
+static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct)
{
const struct nf_conntrack_zone *zone;
struct nlattr *nest_parms;
@@ -2236,7 +2247,32 @@ nla_put_failure:
}
static int
-ctnetlink_nfqueue_parse_ct(const struct nlattr *cda[], struct nf_conn *ct)
+ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ u_int16_t ct_attr, u_int16_t ct_info_attr)
+{
+ struct nlattr *nest_parms;
+
+ nest_parms = nla_nest_start(skb, ct_attr | NLA_F_NESTED);
+ if (!nest_parms)
+ goto nla_put_failure;
+
+ if (__ctnetlink_glue_build(skb, ct) < 0)
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nest_parms);
+
+ if (nla_put_be32(skb, ct_info_attr, htonl(ctinfo)))
+ goto nla_put_failure;
+
+ return 0;
+
+nla_put_failure:
+ return -ENOSPC;
+}
+
+static int
+ctnetlink_glue_parse_ct(const struct nlattr *cda[], struct nf_conn *ct)
{
int err;
@@ -2276,7 +2312,7 @@ ctnetlink_nfqueue_parse_ct(const struct nlattr *cda[], struct nf_conn *ct)
}
static int
-ctnetlink_nfqueue_parse(const struct nlattr *attr, struct nf_conn *ct)
+ctnetlink_glue_parse(const struct nlattr *attr, struct nf_conn *ct)
{
struct nlattr *cda[CTA_MAX+1];
int ret;
@@ -2286,16 +2322,16 @@ ctnetlink_nfqueue_parse(const struct nlattr *attr, struct nf_conn *ct)
return ret;
spin_lock_bh(&nf_conntrack_expect_lock);
- ret = ctnetlink_nfqueue_parse_ct((const struct nlattr **)cda, ct);
+ ret = ctnetlink_glue_parse_ct((const struct nlattr **)cda, ct);
spin_unlock_bh(&nf_conntrack_expect_lock);
return ret;
}
-static int ctnetlink_nfqueue_exp_parse(const struct nlattr * const *cda,
- const struct nf_conn *ct,
- struct nf_conntrack_tuple *tuple,
- struct nf_conntrack_tuple *mask)
+static int ctnetlink_glue_exp_parse(const struct nlattr * const *cda,
+ const struct nf_conn *ct,
+ struct nf_conntrack_tuple *tuple,
+ struct nf_conntrack_tuple *mask)
{
int err;
@@ -2309,8 +2345,8 @@ static int ctnetlink_nfqueue_exp_parse(const struct nlattr * const *cda,
}
static int
-ctnetlink_nfqueue_attach_expect(const struct nlattr *attr, struct nf_conn *ct,
- u32 portid, u32 report)
+ctnetlink_glue_attach_expect(const struct nlattr *attr, struct nf_conn *ct,
+ u32 portid, u32 report)
{
struct nlattr *cda[CTA_EXPECT_MAX+1];
struct nf_conntrack_tuple tuple, mask;
@@ -2322,8 +2358,8 @@ ctnetlink_nfqueue_attach_expect(const struct nlattr *attr, struct nf_conn *ct,
if (err < 0)
return err;
- err = ctnetlink_nfqueue_exp_parse((const struct nlattr * const *)cda,
- ct, &tuple, &mask);
+ err = ctnetlink_glue_exp_parse((const struct nlattr * const *)cda,
+ ct, &tuple, &mask);
if (err < 0)
return err;
@@ -2350,14 +2386,24 @@ ctnetlink_nfqueue_attach_expect(const struct nlattr *attr, struct nf_conn *ct,
return 0;
}
-static struct nfq_ct_hook ctnetlink_nfqueue_hook = {
- .build_size = ctnetlink_nfqueue_build_size,
- .build = ctnetlink_nfqueue_build,
- .parse = ctnetlink_nfqueue_parse,
- .attach_expect = ctnetlink_nfqueue_attach_expect,
- .seq_adjust = nf_ct_tcp_seqadj_set,
+static void ctnetlink_glue_seqadj(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo, int diff)
+{
+ if (!(ct->status & IPS_NAT_MASK))
+ return;
+
+ nf_ct_tcp_seqadj_set(skb, ct, ctinfo, diff);
+}
+
+static struct nfnl_ct_hook ctnetlink_glue_hook = {
+ .get_ct = ctnetlink_glue_get_ct,
+ .build_size = ctnetlink_glue_build_size,
+ .build = ctnetlink_glue_build,
+ .parse = ctnetlink_glue_parse,
+ .attach_expect = ctnetlink_glue_attach_expect,
+ .seq_adjust = ctnetlink_glue_seqadj,
};
-#endif /* CONFIG_NETFILTER_NETLINK_QUEUE_CT */
+#endif /* CONFIG_NETFILTER_NETLINK_GLUE_CT */
/***********************************************************************
* EXPECT
@@ -3341,9 +3387,9 @@ static int __init ctnetlink_init(void)
pr_err("ctnetlink_init: cannot register pernet operations\n");
goto err_unreg_exp_subsys;
}
-#ifdef CONFIG_NETFILTER_NETLINK_QUEUE_CT
+#ifdef CONFIG_NETFILTER_NETLINK_GLUE_CT
/* setup interaction between nf_queue and nf_conntrack_netlink. */
- RCU_INIT_POINTER(nfq_ct_hook, &ctnetlink_nfqueue_hook);
+ RCU_INIT_POINTER(nfnl_ct_hook, &ctnetlink_glue_hook);
#endif
return 0;
@@ -3362,8 +3408,8 @@ static void __exit ctnetlink_exit(void)
unregister_pernet_subsys(&ctnetlink_net_ops);
nfnetlink_subsys_unregister(&ctnl_exp_subsys);
nfnetlink_subsys_unregister(&ctnl_subsys);
-#ifdef CONFIG_NETFILTER_NETLINK_QUEUE_CT
- RCU_INIT_POINTER(nfq_ct_hook, NULL);
+#ifdef CONFIG_NETFILTER_NETLINK_GLUE_CT
+ RCU_INIT_POINTER(nfnl_ct_hook, NULL);
#endif
}
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 6dd995c7c72b..fce1b1cca32d 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -398,7 +398,7 @@ static inline struct dccp_net *dccp_pernet(struct net *net)
}
static bool dccp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
- struct nf_conntrack_tuple *tuple)
+ struct net *net, struct nf_conntrack_tuple *tuple)
{
struct dccp_hdr _hdr, *dh;
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index 2281be419a74..86dc752e5349 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -45,7 +45,7 @@ static inline struct nf_generic_net *generic_pernet(struct net *net)
static bool generic_pkt_to_tuple(const struct sk_buff *skb,
unsigned int dataoff,
- struct nf_conntrack_tuple *tuple)
+ struct net *net, struct nf_conntrack_tuple *tuple)
{
tuple->src.u.all = 0;
tuple->dst.u.all = 0;
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 7648674f29c3..a96451a7af20 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -190,9 +190,8 @@ static bool gre_invert_tuple(struct nf_conntrack_tuple *tuple,
/* gre hdr info to tuple */
static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
- struct nf_conntrack_tuple *tuple)
+ struct net *net, struct nf_conntrack_tuple *tuple)
{
- struct net *net = dev_net(skb->dev ? skb->dev : skb_dst(skb)->dev);
const struct gre_hdr_pptp *pgrehdr;
struct gre_hdr_pptp _pgrehdr;
__be16 srckey;
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 67197731eb68..9578a7c371ef 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -156,7 +156,7 @@ static inline struct sctp_net *sctp_pernet(struct net *net)
}
static bool sctp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
- struct nf_conntrack_tuple *tuple)
+ struct net *net, struct nf_conntrack_tuple *tuple)
{
const struct sctphdr *hp;
struct sctphdr _hdr;
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 70383de72054..278f3b9356ef 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -277,7 +277,7 @@ static inline struct nf_tcp_net *tcp_pernet(struct net *net)
}
static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
- struct nf_conntrack_tuple *tuple)
+ struct net *net, struct nf_conntrack_tuple *tuple)
{
const struct tcphdr *hp;
struct tcphdr _hdr;
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 6957281ffee5..478f92f834b6 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -38,6 +38,7 @@ static inline struct nf_udp_net *udp_pernet(struct net *net)
static bool udp_pkt_to_tuple(const struct sk_buff *skb,
unsigned int dataoff,
+ struct net *net,
struct nf_conntrack_tuple *tuple)
{
const struct udphdr *hp;
diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c
index c5903d1649f9..1ac8ee13a873 100644
--- a/net/netfilter/nf_conntrack_proto_udplite.c
+++ b/net/netfilter/nf_conntrack_proto_udplite.c
@@ -48,6 +48,7 @@ static inline struct udplite_net *udplite_pernet(struct net *net)
static bool udplite_pkt_to_tuple(const struct sk_buff *skb,
unsigned int dataoff,
+ struct net *net,
struct nf_conntrack_tuple *tuple)
{
const struct udphdr *hp;
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 5113dfd39df9..06a9f45771ab 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -83,7 +83,7 @@ out:
rcu_read_unlock();
}
-int nf_xfrm_me_harder(struct sk_buff *skb, unsigned int family)
+int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family)
{
struct flowi fl;
unsigned int hh_len;
@@ -99,7 +99,7 @@ int nf_xfrm_me_harder(struct sk_buff *skb, unsigned int family)
dst = ((struct xfrm_dst *)dst)->route;
dst_hold(dst);
- dst = xfrm_lookup(dev_net(dst->dev), dst, &fl, skb->sk, 0);
+ dst = xfrm_lookup(net, dst, &fl, skb->sk, 0);
if (IS_ERR(dst))
return PTR_ERR(dst);
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 96777f9a9350..5baa8e24e6ac 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -69,19 +69,14 @@ void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
dev_put(physdev);
}
#endif
- /* Drop reference to owner of hook which queued us. */
- module_put(entry->elem->owner);
}
EXPORT_SYMBOL_GPL(nf_queue_entry_release_refs);
/* Bump dev refs so they don't vanish while packet is out */
-bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
+void nf_queue_entry_get_refs(struct nf_queue_entry *entry)
{
struct nf_hook_state *state = &entry->state;
- if (!try_module_get(entry->elem->owner))
- return false;
-
if (state->in)
dev_hold(state->in);
if (state->out)
@@ -100,8 +95,6 @@ bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
dev_hold(physdev);
}
#endif
-
- return true;
}
EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
@@ -131,22 +124,20 @@ int nf_queue(struct sk_buff *skb,
const struct nf_queue_handler *qh;
/* QUEUE == DROP if no one is waiting, to be safe. */
- rcu_read_lock();
-
qh = rcu_dereference(queue_handler);
if (!qh) {
status = -ESRCH;
- goto err_unlock;
+ goto err;
}
afinfo = nf_get_afinfo(state->pf);
if (!afinfo)
- goto err_unlock;
+ goto err;
entry = kmalloc(sizeof(*entry) + afinfo->route_key_size, GFP_ATOMIC);
if (!entry) {
status = -ENOMEM;
- goto err_unlock;
+ goto err;
}
*entry = (struct nf_queue_entry) {
@@ -156,16 +147,11 @@ int nf_queue(struct sk_buff *skb,
.size = sizeof(*entry) + afinfo->route_key_size,
};
- if (!nf_queue_entry_get_refs(entry)) {
- status = -ECANCELED;
- goto err_unlock;
- }
+ nf_queue_entry_get_refs(entry);
skb_dst_force(skb);
afinfo->saveroute(skb, entry);
status = qh->outfn(entry, queuenum);
- rcu_read_unlock();
-
if (status < 0) {
nf_queue_entry_release_refs(entry);
goto err;
@@ -173,8 +159,6 @@ int nf_queue(struct sk_buff *skb,
return 0;
-err_unlock:
- rcu_read_unlock();
err:
kfree(entry);
return status;
@@ -187,19 +171,15 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
const struct nf_afinfo *afinfo;
int err;
- rcu_read_lock();
-
nf_queue_entry_release_refs(entry);
/* Continue traversal iff userspace said ok... */
- if (verdict == NF_REPEAT) {
- elem = list_entry(elem->list.prev, struct nf_hook_ops, list);
- verdict = NF_ACCEPT;
- }
+ if (verdict == NF_REPEAT)
+ verdict = elem->hook(elem->priv, skb, &entry->state);
if (verdict == NF_ACCEPT) {
afinfo = nf_get_afinfo(entry->state.pf);
- if (!afinfo || afinfo->reroute(skb, entry) < 0)
+ if (!afinfo || afinfo->reroute(entry->state.net, skb, entry) < 0)
verdict = NF_DROP;
}
@@ -215,15 +195,13 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
case NF_ACCEPT:
case NF_STOP:
local_bh_disable();
- entry->state.okfn(entry->state.sk, skb);
+ entry->state.okfn(entry->state.net, entry->state.sk, skb);
local_bh_enable();
break;
case NF_QUEUE:
err = nf_queue(skb, elem, &entry->state,
verdict >> NF_VERDICT_QBITS);
if (err < 0) {
- if (err == -ECANCELED)
- goto next_hook;
if (err == -ESRCH &&
(verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
goto next_hook;
@@ -235,7 +213,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
default:
kfree_skb(skb);
}
- rcu_read_unlock();
+
kfree(entry);
}
EXPORT_SYMBOL(nf_reinject);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 4a41eb92bcc0..93cc4737018f 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1433,7 +1433,6 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
for (i = 0; i < afi->nops; i++) {
ops = &basechain->ops[i];
ops->pf = family;
- ops->owner = afi->owner;
ops->hooknum = hooknum;
ops->priority = priority;
ops->priv = chain;
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 05d0b03530f6..f3695a497408 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -48,9 +48,7 @@ static void __nft_trace_packet(const struct nft_pktinfo *pkt,
const struct nft_chain *chain,
int rulenum, enum nft_trace type)
{
- struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
-
- nf_log_trace(net, pkt->xt.family, pkt->ops->hooknum, pkt->skb, pkt->in,
+ nf_log_trace(pkt->net, pkt->pf, pkt->hook, pkt->skb, pkt->in,
pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ",
chain->table->name, chain->name, comments[type],
rulenum);
@@ -111,10 +109,10 @@ struct nft_jumpstack {
};
unsigned int
-nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops)
+nft_do_chain(struct nft_pktinfo *pkt, void *priv)
{
- const struct nft_chain *chain = ops->priv, *basechain = chain;
- const struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
+ const struct nft_chain *chain = priv, *basechain = chain;
+ const struct net *net = pkt->net;
const struct nft_rule *rule;
const struct nft_expr *expr, *last;
struct nft_regs regs;
diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c
index 2cae4d4a03b7..7b9c053ba750 100644
--- a/net/netfilter/nf_tables_netdev.c
+++ b/net/netfilter/nf_tables_netdev.c
@@ -17,13 +17,13 @@
static inline void
nft_netdev_set_pktinfo_ipv4(struct nft_pktinfo *pkt,
- const struct nf_hook_ops *ops, struct sk_buff *skb,
+ struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct iphdr *iph, _iph;
u32 len, thoff;
- nft_set_pktinfo(pkt, ops, skb, state);
+ nft_set_pktinfo(pkt, skb, state);
iph = skb_header_pointer(skb, skb_network_offset(skb), sizeof(*iph),
&_iph);
@@ -48,7 +48,6 @@ nft_netdev_set_pktinfo_ipv4(struct nft_pktinfo *pkt,
static inline void
__nft_netdev_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
- const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
@@ -82,33 +81,32 @@ __nft_netdev_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
}
static inline void nft_netdev_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
- const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- nft_set_pktinfo(pkt, ops, skb, state);
- __nft_netdev_set_pktinfo_ipv6(pkt, ops, skb, state);
+ nft_set_pktinfo(pkt, skb, state);
+ __nft_netdev_set_pktinfo_ipv6(pkt, skb, state);
}
static unsigned int
-nft_do_chain_netdev(const struct nf_hook_ops *ops, struct sk_buff *skb,
+nft_do_chain_netdev(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct nft_pktinfo pkt;
switch (eth_hdr(skb)->h_proto) {
case htons(ETH_P_IP):
- nft_netdev_set_pktinfo_ipv4(&pkt, ops, skb, state);
+ nft_netdev_set_pktinfo_ipv4(&pkt, skb, state);
break;
case htons(ETH_P_IPV6):
- nft_netdev_set_pktinfo_ipv6(&pkt, ops, skb, state);
+ nft_netdev_set_pktinfo_ipv6(&pkt, skb, state);
break;
default:
- nft_set_pktinfo(&pkt, ops, skb, state);
+ nft_set_pktinfo(&pkt, skb, state);
break;
}
- return nft_do_chain(&pkt, ops);
+ return nft_do_chain(&pkt, priv);
}
static struct nft_af_info nft_af_netdev __read_mostly = {
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 70277b11f742..f1d9e887f5b1 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -64,7 +64,7 @@ void nfnl_unlock(__u8 subsys_id)
EXPORT_SYMBOL_GPL(nfnl_unlock);
#ifdef CONFIG_PROVE_LOCKING
-int lockdep_nfnl_is_held(u8 subsys_id)
+bool lockdep_nfnl_is_held(u8 subsys_id)
{
return lockdep_is_held(&table[subsys_id].mutex);
}
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 476accd17145..c7a2d0e1c462 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -291,6 +291,34 @@ cttimeout_get_timeout(struct sock *ctnl, struct sk_buff *skb,
return ret;
}
+static void untimeout(struct nf_conntrack_tuple_hash *i,
+ struct ctnl_timeout *timeout)
+{
+ struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i);
+ struct nf_conn_timeout *timeout_ext = nf_ct_timeout_find(ct);
+
+ if (timeout_ext && (!timeout || timeout_ext->timeout == timeout))
+ RCU_INIT_POINTER(timeout_ext->timeout, NULL);
+}
+
+static void ctnl_untimeout(struct ctnl_timeout *timeout)
+{
+ struct nf_conntrack_tuple_hash *h;
+ const struct hlist_nulls_node *nn;
+ int i;
+
+ local_bh_disable();
+ for (i = 0; i < init_net.ct.htable_size; i++) {
+ spin_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
+ if (i < init_net.ct.htable_size) {
+ hlist_nulls_for_each_entry(h, nn, &init_net.ct.hash[i], hnnode)
+ untimeout(h, timeout);
+ }
+ spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
+ }
+ local_bh_enable();
+}
+
/* try to delete object, fail if it is still in use. */
static int ctnl_timeout_try_del(struct ctnl_timeout *timeout)
{
@@ -301,6 +329,7 @@ static int ctnl_timeout_try_del(struct ctnl_timeout *timeout)
/* We are protected by nfnl mutex. */
list_del_rcu(&timeout->head);
nf_ct_l4proto_put(timeout->l4proto);
+ ctnl_untimeout(timeout);
kfree_rcu(timeout, rcu_head);
} else {
/* still in use, restore reference counter. */
@@ -567,6 +596,10 @@ static void __exit cttimeout_exit(void)
pr_info("cttimeout: unregistering from nfnetlink.\n");
nfnetlink_subsys_unregister(&cttimeout_subsys);
+
+ /* Make sure no conntrack objects refer to custom timeouts anymore. */
+ ctnl_untimeout(NULL);
+
list_for_each_entry_safe(cur, tmp, &cttimeout_list, head) {
list_del_rcu(&cur->head);
/* We are sure that our objects have no clients at this point,
@@ -579,6 +612,7 @@ static void __exit cttimeout_exit(void)
RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, NULL);
RCU_INIT_POINTER(nf_ct_timeout_put_hook, NULL);
#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
+ rcu_barrier();
}
module_init(cttimeout_init);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 4670821b569d..06eb48fceb42 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -27,6 +27,7 @@
#include <net/netlink.h>
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_log.h>
+#include <linux/netfilter/nf_conntrack_common.h>
#include <linux/spinlock.h>
#include <linux/sysctl.h>
#include <linux/proc_fs.h>
@@ -401,7 +402,9 @@ __build_packet_message(struct nfnl_log_net *log,
unsigned int hooknum,
const struct net_device *indev,
const struct net_device *outdev,
- const char *prefix, unsigned int plen)
+ const char *prefix, unsigned int plen,
+ const struct nfnl_ct_hook *nfnl_ct,
+ struct nf_conn *ct, enum ip_conntrack_info ctinfo)
{
struct nfulnl_msg_packet_hdr pmsg;
struct nlmsghdr *nlh;
@@ -538,9 +541,9 @@ __build_packet_message(struct nfnl_log_net *log,
if (skb->tstamp.tv64) {
struct nfulnl_msg_packet_timestamp ts;
- struct timeval tv = ktime_to_timeval(skb->tstamp);
- ts.sec = cpu_to_be64(tv.tv_sec);
- ts.usec = cpu_to_be64(tv.tv_usec);
+ struct timespec64 kts = ktime_to_timespec64(skb->tstamp);
+ ts.sec = cpu_to_be64(kts.tv_sec);
+ ts.usec = cpu_to_be64(kts.tv_nsec / NSEC_PER_USEC);
if (nla_put(inst->skb, NFULA_TIMESTAMP, sizeof(ts), &ts))
goto nla_put_failure;
@@ -575,6 +578,10 @@ __build_packet_message(struct nfnl_log_net *log,
htonl(atomic_inc_return(&log->global_seq))))
goto nla_put_failure;
+ if (ct && nfnl_ct->build(inst->skb, ct, ctinfo,
+ NFULA_CT, NFULA_CT_INFO) < 0)
+ goto nla_put_failure;
+
if (data_len) {
struct nlattr *nla;
int size = nla_attr_size(data_len);
@@ -620,12 +627,16 @@ nfulnl_log_packet(struct net *net,
const struct nf_loginfo *li_user,
const char *prefix)
{
- unsigned int size, data_len;
+ size_t size;
+ unsigned int data_len;
struct nfulnl_instance *inst;
const struct nf_loginfo *li;
unsigned int qthreshold;
unsigned int plen;
struct nfnl_log_net *log = nfnl_log_pernet(net);
+ const struct nfnl_ct_hook *nfnl_ct = NULL;
+ struct nf_conn *ct = NULL;
+ enum ip_conntrack_info uninitialized_var(ctinfo);
if (li_user && li_user->type == NF_LOG_TYPE_ULOG)
li = li_user;
@@ -671,6 +682,14 @@ nfulnl_log_packet(struct net *net,
size += nla_total_size(sizeof(u_int32_t));
if (inst->flags & NFULNL_CFG_F_SEQ_GLOBAL)
size += nla_total_size(sizeof(u_int32_t));
+ if (inst->flags & NFULNL_CFG_F_CONNTRACK) {
+ nfnl_ct = rcu_dereference(nfnl_ct_hook);
+ if (nfnl_ct != NULL) {
+ ct = nfnl_ct->get_ct(skb, &ctinfo);
+ if (ct != NULL)
+ size += nfnl_ct->build_size(ct);
+ }
+ }
qthreshold = inst->qthreshold;
/* per-rule qthreshold overrides per-instance */
@@ -715,7 +734,8 @@ nfulnl_log_packet(struct net *net,
inst->qlen++;
__build_packet_message(log, inst, skb, data_len, pf,
- hooknum, in, out, prefix, plen);
+ hooknum, in, out, prefix, plen,
+ nfnl_ct, ct, ctinfo);
if (inst->qlen >= qthreshold)
__nfulnl_flush(inst);
@@ -805,6 +825,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
struct net *net = sock_net(ctnl);
struct nfnl_log_net *log = nfnl_log_pernet(net);
int ret = 0;
+ u16 flags;
if (nfula[NFULA_CFG_CMD]) {
u_int8_t pf = nfmsg->nfgen_family;
@@ -826,6 +847,28 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
goto out_put;
}
+ /* Check if we support these flags in first place, dependencies should
+ * be there too not to break atomicity.
+ */
+ if (nfula[NFULA_CFG_FLAGS]) {
+ flags = ntohs(nla_get_be16(nfula[NFULA_CFG_FLAGS]));
+
+ if ((flags & NFULNL_CFG_F_CONNTRACK) &&
+ !rcu_access_pointer(nfnl_ct_hook)) {
+#ifdef CONFIG_MODULES
+ nfnl_unlock(NFNL_SUBSYS_ULOG);
+ request_module("ip_conntrack_netlink");
+ nfnl_lock(NFNL_SUBSYS_ULOG);
+ if (rcu_access_pointer(nfnl_ct_hook)) {
+ ret = -EAGAIN;
+ goto out_put;
+ }
+#endif
+ ret = -EOPNOTSUPP;
+ goto out_put;
+ }
+ }
+
if (cmd != NULL) {
switch (cmd->command) {
case NFULNL_CFG_CMD_BIND:
@@ -854,16 +897,15 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
ret = -ENOTSUPP;
break;
}
+ } else if (!inst) {
+ ret = -ENODEV;
+ goto out;
}
if (nfula[NFULA_CFG_MODE]) {
- struct nfulnl_msg_config_mode *params;
- params = nla_data(nfula[NFULA_CFG_MODE]);
+ struct nfulnl_msg_config_mode *params =
+ nla_data(nfula[NFULA_CFG_MODE]);
- if (!inst) {
- ret = -ENODEV;
- goto out;
- }
nfulnl_set_mode(inst, params->copy_mode,
ntohl(params->copy_range));
}
@@ -871,42 +913,23 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
if (nfula[NFULA_CFG_TIMEOUT]) {
__be32 timeout = nla_get_be32(nfula[NFULA_CFG_TIMEOUT]);
- if (!inst) {
- ret = -ENODEV;
- goto out;
- }
nfulnl_set_timeout(inst, ntohl(timeout));
}
if (nfula[NFULA_CFG_NLBUFSIZ]) {
__be32 nlbufsiz = nla_get_be32(nfula[NFULA_CFG_NLBUFSIZ]);
- if (!inst) {
- ret = -ENODEV;
- goto out;
- }
nfulnl_set_nlbufsiz(inst, ntohl(nlbufsiz));
}
if (nfula[NFULA_CFG_QTHRESH]) {
__be32 qthresh = nla_get_be32(nfula[NFULA_CFG_QTHRESH]);
- if (!inst) {
- ret = -ENODEV;
- goto out;
- }
nfulnl_set_qthresh(inst, ntohl(qthresh));
}
- if (nfula[NFULA_CFG_FLAGS]) {
- __be16 flags = nla_get_be16(nfula[NFULA_CFG_FLAGS]);
-
- if (!inst) {
- ret = -ENODEV;
- goto out;
- }
- nfulnl_set_flags(inst, ntohs(flags));
- }
+ if (nfula[NFULA_CFG_FLAGS])
+ nfulnl_set_flags(inst, flags);
out_put:
instance_put(inst);
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue.c
index a5cd6d90b78b..7d81d280cb4f 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -28,12 +28,12 @@
#include <linux/netfilter_bridge.h>
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_queue.h>
+#include <linux/netfilter/nf_conntrack_common.h>
#include <linux/list.h>
#include <net/sock.h>
#include <net/tcp_states.h>
#include <net/netfilter/nf_queue.h>
#include <net/netns/generic.h>
-#include <net/netfilter/nfnetlink_queue.h>
#include <linux/atomic.h>
@@ -313,6 +313,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
struct net_device *outdev;
struct nf_conn *ct = NULL;
enum ip_conntrack_info uninitialized_var(ctinfo);
+ struct nfnl_ct_hook *nfnl_ct;
bool csum_verify;
char *secdata = NULL;
u32 seclen = 0;
@@ -364,8 +365,14 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
break;
}
- if (queue->flags & NFQA_CFG_F_CONNTRACK)
- ct = nfqnl_ct_get(entskb, &size, &ctinfo);
+ if (queue->flags & NFQA_CFG_F_CONNTRACK) {
+ nfnl_ct = rcu_dereference(nfnl_ct_hook);
+ if (nfnl_ct != NULL) {
+ ct = nfnl_ct->get_ct(entskb, &ctinfo);
+ if (ct != NULL)
+ size += nfnl_ct->build_size(ct);
+ }
+ }
if (queue->flags & NFQA_CFG_F_UID_GID) {
size += (nla_total_size(sizeof(u_int32_t)) /* uid */
@@ -493,9 +500,10 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
if (entskb->tstamp.tv64) {
struct nfqnl_msg_packet_timestamp ts;
- struct timeval tv = ktime_to_timeval(entskb->tstamp);
- ts.sec = cpu_to_be64(tv.tv_sec);
- ts.usec = cpu_to_be64(tv.tv_usec);
+ struct timespec64 kts = ktime_to_timespec64(skb->tstamp);
+
+ ts.sec = cpu_to_be64(kts.tv_sec);
+ ts.usec = cpu_to_be64(kts.tv_nsec / NSEC_PER_USEC);
if (nla_put(skb, NFQA_TIMESTAMP, sizeof(ts), &ts))
goto nla_put_failure;
@@ -508,7 +516,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
if (seclen && nla_put(skb, NFQA_SECCTX, seclen, secdata))
goto nla_put_failure;
- if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0)
+ if (ct && nfnl_ct->build(skb, ct, ctinfo, NFQA_CT, NFQA_CT_INFO) < 0)
goto nla_put_failure;
if (cap_len > data_len &&
@@ -598,12 +606,9 @@ static struct nf_queue_entry *
nf_queue_entry_dup(struct nf_queue_entry *e)
{
struct nf_queue_entry *entry = kmemdup(e, e->size, GFP_ATOMIC);
- if (entry) {
- if (nf_queue_entry_get_refs(entry))
- return entry;
- kfree(entry);
- }
- return NULL;
+ if (entry)
+ nf_queue_entry_get_refs(entry);
+ return entry;
}
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
@@ -670,8 +675,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
struct nfqnl_instance *queue;
struct sk_buff *skb, *segs;
int err = -ENOBUFS;
- struct net *net = dev_net(entry->state.in ?
- entry->state.in : entry->state.out);
+ struct net *net = entry->state.net;
struct nfnl_queue_net *q = nfnl_queue_pernet(net);
/* rcu_read_lock()ed by nf_hook_slow() */
@@ -699,7 +703,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
nf_bridge_adjust_skb_data(skb);
segs = skb_gso_segment(skb, 0);
/* Does not use PTR_ERR to limit the number of error codes that can be
- * returned by nf_queue. For instance, callers rely on -ECANCELED to
+ * returned by nf_queue. For instance, callers rely on -ESRCH to
* mean 'ignore this hook'.
*/
if (IS_ERR_OR_NULL(segs))
@@ -1002,6 +1006,28 @@ nfqnl_recv_verdict_batch(struct sock *ctnl, struct sk_buff *skb,
return 0;
}
+static struct nf_conn *nfqnl_ct_parse(struct nfnl_ct_hook *nfnl_ct,
+ const struct nlmsghdr *nlh,
+ const struct nlattr * const nfqa[],
+ struct nf_queue_entry *entry,
+ enum ip_conntrack_info *ctinfo)
+{
+ struct nf_conn *ct;
+
+ ct = nfnl_ct->get_ct(entry->skb, ctinfo);
+ if (ct == NULL)
+ return NULL;
+
+ if (nfnl_ct->parse(nfqa[NFQA_CT], ct) < 0)
+ return NULL;
+
+ if (nfqa[NFQA_EXP])
+ nfnl_ct->attach_expect(nfqa[NFQA_EXP], ct,
+ NETLINK_CB(entry->skb).portid,
+ nlmsg_report(nlh));
+ return ct;
+}
+
static int
nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
const struct nlmsghdr *nlh,
@@ -1015,6 +1041,7 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
unsigned int verdict;
struct nf_queue_entry *entry;
enum ip_conntrack_info uninitialized_var(ctinfo);
+ struct nfnl_ct_hook *nfnl_ct;
struct nf_conn *ct = NULL;
struct net *net = sock_net(ctnl);
@@ -1038,12 +1065,10 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
return -ENOENT;
if (nfqa[NFQA_CT]) {
- ct = nfqnl_ct_parse(entry->skb, nfqa[NFQA_CT], &ctinfo);
- if (ct && nfqa[NFQA_EXP]) {
- nfqnl_attach_expect(ct, nfqa[NFQA_EXP],
- NETLINK_CB(skb).portid,
- nlmsg_report(nlh));
- }
+ /* rcu lock already held from nfnl->call_rcu. */
+ nfnl_ct = rcu_dereference(nfnl_ct_hook);
+ if (nfnl_ct != NULL)
+ ct = nfqnl_ct_parse(nfnl_ct, nlh, nfqa, entry, &ctinfo);
}
if (nfqa[NFQA_PAYLOAD]) {
@@ -1054,8 +1079,8 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb,
payload_len, entry, diff) < 0)
verdict = NF_DROP;
- if (ct)
- nfqnl_ct_seq_adjust(entry->skb, ct, ctinfo, diff);
+ if (ct && diff)
+ nfnl_ct->seq_adjust(entry->skb, ct, ctinfo, diff);
}
if (nfqa[NFQA_MARK])
diff --git a/net/netfilter/nfnetlink_queue_ct.c b/net/netfilter/nfnetlink_queue_ct.c
deleted file mode 100644
index 96cac50e0d12..000000000000
--- a/net/netfilter/nfnetlink_queue_ct.c
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * (C) 2012 by Pablo Neira Ayuso <pablo@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-
-#include <linux/skbuff.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter/nfnetlink.h>
-#include <linux/netfilter/nfnetlink_queue.h>
-#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nfnetlink_queue.h>
-
-struct nf_conn *nfqnl_ct_get(struct sk_buff *entskb, size_t *size,
- enum ip_conntrack_info *ctinfo)
-{
- struct nfq_ct_hook *nfq_ct;
- struct nf_conn *ct;
-
- /* rcu_read_lock()ed by __nf_queue already. */
- nfq_ct = rcu_dereference(nfq_ct_hook);
- if (nfq_ct == NULL)
- return NULL;
-
- ct = nf_ct_get(entskb, ctinfo);
- if (ct) {
- if (!nf_ct_is_untracked(ct))
- *size += nfq_ct->build_size(ct);
- else
- ct = NULL;
- }
- return ct;
-}
-
-struct nf_conn *
-nfqnl_ct_parse(const struct sk_buff *skb, const struct nlattr *attr,
- enum ip_conntrack_info *ctinfo)
-{
- struct nfq_ct_hook *nfq_ct;
- struct nf_conn *ct;
-
- /* rcu_read_lock()ed by __nf_queue already. */
- nfq_ct = rcu_dereference(nfq_ct_hook);
- if (nfq_ct == NULL)
- return NULL;
-
- ct = nf_ct_get(skb, ctinfo);
- if (ct && !nf_ct_is_untracked(ct))
- nfq_ct->parse(attr, ct);
-
- return ct;
-}
-
-int nfqnl_ct_put(struct sk_buff *skb, struct nf_conn *ct,
- enum ip_conntrack_info ctinfo)
-{
- struct nfq_ct_hook *nfq_ct;
- struct nlattr *nest_parms;
- u_int32_t tmp;
-
- nfq_ct = rcu_dereference(nfq_ct_hook);
- if (nfq_ct == NULL)
- return 0;
-
- nest_parms = nla_nest_start(skb, NFQA_CT | NLA_F_NESTED);
- if (!nest_parms)
- goto nla_put_failure;
-
- if (nfq_ct->build(skb, ct) < 0)
- goto nla_put_failure;
-
- nla_nest_end(skb, nest_parms);
-
- tmp = ctinfo;
- if (nla_put_be32(skb, NFQA_CT_INFO, htonl(tmp)))
- goto nla_put_failure;
-
- return 0;
-
-nla_put_failure:
- return -1;
-}
-
-void nfqnl_ct_seq_adjust(struct sk_buff *skb, struct nf_conn *ct,
- enum ip_conntrack_info ctinfo, int diff)
-{
- struct nfq_ct_hook *nfq_ct;
-
- nfq_ct = rcu_dereference(nfq_ct_hook);
- if (nfq_ct == NULL)
- return;
-
- if ((ct->status & IPS_NAT_MASK) && diff)
- nfq_ct->seq_adjust(skb, ct, ctinfo, diff);
-}
-
-int nfqnl_attach_expect(struct nf_conn *ct, const struct nlattr *attr,
- u32 portid, u32 report)
-{
- struct nfq_ct_hook *nfq_ct;
-
- if (nf_ct_is_untracked(ct))
- return 0;
-
- nfq_ct = rcu_dereference(nfq_ct_hook);
- if (nfq_ct == NULL)
- return -EOPNOTSUPP;
-
- return nfq_ct->attach_expect(attr, ct, portid, report);
-}
diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
index a13d6a386d63..319c22b4bca2 100644
--- a/net/netfilter/nft_log.c
+++ b/net/netfilter/nft_log.c
@@ -31,9 +31,8 @@ static void nft_log_eval(const struct nft_expr *expr,
const struct nft_pktinfo *pkt)
{
const struct nft_log *priv = nft_expr_priv(expr);
- struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
- nf_log_packet(net, pkt->ops->pf, pkt->ops->hooknum, pkt->skb, pkt->in,
+ nf_log_packet(pkt->net, pkt->pf, pkt->hook, pkt->skb, pkt->in,
pkt->out, &priv->loginfo, "%s", priv->prefix);
}
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index cb2f13ebb5a6..e4ad2c24bc41 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -42,7 +42,7 @@ void nft_meta_get_eval(const struct nft_expr *expr,
*(__be16 *)dest = skb->protocol;
break;
case NFT_META_NFPROTO:
- *dest = pkt->ops->pf;
+ *dest = pkt->pf;
break;
case NFT_META_L4PROTO:
*dest = pkt->tprot;
@@ -135,7 +135,7 @@ void nft_meta_get_eval(const struct nft_expr *expr,
break;
}
- switch (pkt->ops->pf) {
+ switch (pkt->pf) {
case NFPROTO_IPV4:
if (ipv4_is_multicast(ip_hdr(skb)->daddr))
*dest = PACKET_MULTICAST;
diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c
index 96805d21d618..61d216eb7917 100644
--- a/net/netfilter/nft_queue.c
+++ b/net/netfilter/nft_queue.c
@@ -42,7 +42,7 @@ static void nft_queue_eval(const struct nft_expr *expr,
queue = priv->queuenum + cpu % priv->queues_total;
} else {
queue = nfqueue_hash(pkt->skb, queue,
- priv->queues_total, pkt->ops->pf,
+ priv->queues_total, pkt->pf,
jhash_initval);
}
}
diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c
index 635dbba93d01..759ca5248a3d 100644
--- a/net/netfilter/nft_reject_inet.c
+++ b/net/netfilter/nft_reject_inet.c
@@ -22,38 +22,37 @@ static void nft_reject_inet_eval(const struct nft_expr *expr,
const struct nft_pktinfo *pkt)
{
struct nft_reject *priv = nft_expr_priv(expr);
- struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out);
- switch (pkt->ops->pf) {
+ switch (pkt->pf) {
case NFPROTO_IPV4:
switch (priv->type) {
case NFT_REJECT_ICMP_UNREACH:
nf_send_unreach(pkt->skb, priv->icmp_code,
- pkt->ops->hooknum);
+ pkt->hook);
break;
case NFT_REJECT_TCP_RST:
- nf_send_reset(pkt->skb, pkt->ops->hooknum);
+ nf_send_reset(pkt->net, pkt->skb, pkt->hook);
break;
case NFT_REJECT_ICMPX_UNREACH:
nf_send_unreach(pkt->skb,
nft_reject_icmp_code(priv->icmp_code),
- pkt->ops->hooknum);
+ pkt->hook);
break;
}
break;
case NFPROTO_IPV6:
switch (priv->type) {
case NFT_REJECT_ICMP_UNREACH:
- nf_send_unreach6(net, pkt->skb, priv->icmp_code,
- pkt->ops->hooknum);
+ nf_send_unreach6(pkt->net, pkt->skb, priv->icmp_code,
+ pkt->hook);
break;
case NFT_REJECT_TCP_RST:
- nf_send_reset6(net, pkt->skb, pkt->ops->hooknum);
+ nf_send_reset6(pkt->net, pkt->skb, pkt->hook);
break;
case NFT_REJECT_ICMPX_UNREACH:
- nf_send_unreach6(net, pkt->skb,
+ nf_send_unreach6(pkt->net, pkt->skb,
nft_reject_icmpv6_code(priv->icmp_code),
- pkt->ops->hooknum);
+ pkt->hook);
break;
}
break;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 9b42b5ea6dcd..d4aaad747ea9 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -1193,7 +1193,6 @@ struct nf_hook_ops *xt_hook_link(const struct xt_table *table, nf_hookfn *fn)
if (!(hook_mask & 1))
continue;
ops[i].hook = fn;
- ops[i].owner = table->me;
ops[i].pf = table->af;
ops[i].hooknum = hooknum;
ops[i].priority = table->priority;
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index faf32d888198..e7ac07e53b59 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -171,6 +171,9 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par,
if (timeout_ext == NULL)
ret = -ENOMEM;
+ rcu_read_unlock();
+ return ret;
+
err_put_timeout:
__xt_ct_tg_timeout_put(timeout);
out:
@@ -318,8 +321,10 @@ static void xt_ct_destroy_timeout(struct nf_conn *ct)
if (timeout_put) {
timeout_ext = nf_ct_timeout_find(ct);
- if (timeout_ext)
+ if (timeout_ext) {
timeout_put(timeout_ext->timeout);
+ RCU_INIT_POINTER(timeout_ext->timeout, NULL);
+ }
}
rcu_read_unlock();
#endif
diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c
index c13b79440ede..1763ab82bcd7 100644
--- a/net/netfilter/xt_LOG.c
+++ b/net/netfilter/xt_LOG.c
@@ -33,7 +33,7 @@ log_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_log_info *loginfo = par->targinfo;
struct nf_loginfo li;
- struct net *net = dev_net(par->in ? par->in : par->out);
+ struct net *net = par->net;
li.type = NF_LOG_TYPE_LOG;
li.u.log.level = loginfo->level;
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index fb7497c928a0..a1fa2c800cb9 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -26,7 +26,7 @@ nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_nflog_info *info = par->targinfo;
struct nf_loginfo li;
- struct net *net = dev_net(par->in ? par->in : par->out);
+ struct net *net = par->net;
li.type = NF_LOG_TYPE_ULOG;
li.u.ulog.copy_len = info->len;
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 8c02501a530f..b7c43def0dc6 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -108,7 +108,7 @@ tcpmss_mangle_packet(struct sk_buff *skb,
return -1;
if (info->mss == XT_TCPMSS_CLAMP_PMTU) {
- struct net *net = dev_net(par->in ? par->in : par->out);
+ struct net *net = par->net;
unsigned int in_mtu = tcpmss_reverse_mtu(net, skb, family);
if (dst_mtu(skb_dst(skb)) <= minlen) {
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index fd980aa7715d..899b06115fc5 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -32,7 +32,7 @@ tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_tee_tginfo *info = par->targinfo;
- nf_dup_ipv4(skb, par->hooknum, &info->gw.in, info->priv->oif);
+ nf_dup_ipv4(par->net, skb, par->hooknum, &info->gw.in, info->priv->oif);
return XT_CONTINUE;
}
@@ -43,7 +43,7 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_tee_tginfo *info = par->targinfo;
- nf_dup_ipv6(skb, par->hooknum, &info->gw.in6, info->priv->oif);
+ nf_dup_ipv6(par->net, skb, par->hooknum, &info->gw.in6, info->priv->oif);
return XT_CONTINUE;
}
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index d0c96c5ae29a..3ab591e73ec0 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -250,8 +250,8 @@ nf_tproxy_get_sock_v6(struct net *net, const u8 protocol,
* no such listener is found, or NULL if the TCP header is incomplete.
*/
static struct sock *
-tproxy_handle_time_wait4(struct sk_buff *skb, __be32 laddr, __be16 lport,
- struct sock *sk)
+tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb,
+ __be32 laddr, __be16 lport, struct sock *sk)
{
const struct iphdr *iph = ip_hdr(skb);
struct tcphdr _hdr, *hp;
@@ -267,7 +267,7 @@ tproxy_handle_time_wait4(struct sk_buff *skb, __be32 laddr, __be16 lport,
* to a listener socket if there's one */
struct sock *sk2;
- sk2 = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol,
+ sk2 = nf_tproxy_get_sock_v4(net, iph->protocol,
iph->saddr, laddr ? laddr : iph->daddr,
hp->source, lport ? lport : hp->dest,
skb->dev, NFT_LOOKUP_LISTENER);
@@ -290,7 +290,7 @@ nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
}
static unsigned int
-tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport,
+tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport,
u_int32_t mark_mask, u_int32_t mark_value)
{
const struct iphdr *iph = ip_hdr(skb);
@@ -305,7 +305,7 @@ tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport,
* addresses, this happens if the redirect already happened
* and the current packet belongs to an already established
* connection */
- sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol,
+ sk = nf_tproxy_get_sock_v4(net, iph->protocol,
iph->saddr, iph->daddr,
hp->source, hp->dest,
skb->dev, NFT_LOOKUP_ESTABLISHED);
@@ -317,11 +317,11 @@ tproxy_tg4(struct sk_buff *skb, __be32 laddr, __be16 lport,
/* UDP has no TCP_TIME_WAIT state, so we never enter here */
if (sk && sk->sk_state == TCP_TIME_WAIT)
/* reopening a TIME_WAIT connection needs special handling */
- sk = tproxy_handle_time_wait4(skb, laddr, lport, sk);
+ sk = tproxy_handle_time_wait4(net, skb, laddr, lport, sk);
else if (!sk)
/* no, there's no established connection, check if
* there's a listener on the redirected addr/port */
- sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol,
+ sk = nf_tproxy_get_sock_v4(net, iph->protocol,
iph->saddr, laddr,
hp->source, lport,
skb->dev, NFT_LOOKUP_LISTENER);
@@ -351,7 +351,7 @@ tproxy_tg4_v0(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_tproxy_target_info *tgi = par->targinfo;
- return tproxy_tg4(skb, tgi->laddr, tgi->lport, tgi->mark_mask, tgi->mark_value);
+ return tproxy_tg4(par->net, skb, tgi->laddr, tgi->lport, tgi->mark_mask, tgi->mark_value);
}
static unsigned int
@@ -359,7 +359,7 @@ tproxy_tg4_v1(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_tproxy_target_info_v1 *tgi = par->targinfo;
- return tproxy_tg4(skb, tgi->laddr.ip, tgi->lport, tgi->mark_mask, tgi->mark_value);
+ return tproxy_tg4(par->net, skb, tgi->laddr.ip, tgi->lport, tgi->mark_mask, tgi->mark_value);
}
#ifdef XT_TPROXY_HAVE_IPV6
@@ -429,7 +429,7 @@ tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
* to a listener socket if there's one */
struct sock *sk2;
- sk2 = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto,
+ sk2 = nf_tproxy_get_sock_v6(par->net, tproto,
&iph->saddr,
tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr),
hp->source,
@@ -472,7 +472,7 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
* addresses, this happens if the redirect already happened
* and the current packet belongs to an already established
* connection */
- sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto,
+ sk = nf_tproxy_get_sock_v6(par->net, tproto,
&iph->saddr, &iph->daddr,
hp->source, hp->dest,
par->in, NFT_LOOKUP_ESTABLISHED);
@@ -487,7 +487,7 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
else if (!sk)
/* no there's no established connection, check if
* there's a listener on the redirected addr/port */
- sk = nf_tproxy_get_sock_v6(dev_net(skb->dev), tproto,
+ sk = nf_tproxy_get_sock_v6(par->net, tproto,
&iph->saddr, laddr,
hp->source, lport,
par->in, NFT_LOOKUP_LISTENER);
diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c
index 5b4743cc0436..11d6091991a4 100644
--- a/net/netfilter/xt_addrtype.c
+++ b/net/netfilter/xt_addrtype.c
@@ -125,7 +125,7 @@ static inline bool match_type(struct net *net, const struct net_device *dev,
static bool
addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
{
- struct net *net = dev_net(par->in ? par->in : par->out);
+ struct net *net = par->net;
const struct xt_addrtype_info *info = par->matchinfo;
const struct iphdr *iph = ip_hdr(skb);
bool ret = true;
@@ -143,7 +143,7 @@ addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
static bool
addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
{
- struct net *net = dev_net(par->in ? par->in : par->out);
+ struct net *net = par->net;
const struct xt_addrtype_info_v1 *info = par->matchinfo;
const struct iphdr *iph;
const struct net_device *dev = NULL;
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 075d89d94d28..99bbc829868d 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -317,7 +317,7 @@ static int count_them(struct net *net,
static bool
connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
- struct net *net = dev_net(par->in ? par->in : par->out);
+ struct net *net = par->net;
const struct xt_connlimit_info *info = par->matchinfo;
union nf_inet_addr addr;
struct nf_conntrack_tuple tuple;
@@ -332,7 +332,7 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
zone = nf_ct_zone(ct);
} else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
- par->family, &tuple)) {
+ par->family, net, &tuple)) {
goto hotdrop;
}
diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c
index 8d47c3780fda..71a9d95e0a81 100644
--- a/net/netfilter/xt_ipvs.c
+++ b/net/netfilter/xt_ipvs.c
@@ -48,6 +48,7 @@ static bool
ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_ipvs_mtinfo *data = par->matchinfo;
+ struct netns_ipvs *ipvs = net_ipvs(par->net);
/* ipvs_mt_check ensures that family is only NFPROTO_IPV[46]. */
const u_int8_t family = par->family;
struct ip_vs_iphdr iph;
@@ -67,7 +68,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par)
goto out;
}
- ip_vs_fill_iph_skb(family, skb, &iph);
+ ip_vs_fill_iph_skb(family, skb, true, &iph);
if (data->bitmask & XT_IPVS_PROTO)
if ((iph.protocol == data->l4proto) ^
@@ -85,7 +86,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par)
/*
* Check if the packet belongs to an existing entry
*/
- cp = pp->conn_out_get(family, skb, &iph, 1 /* inverse */);
+ cp = pp->conn_out_get(ipvs, family, skb, &iph);
if (unlikely(cp == NULL)) {
match = false;
goto out;
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index 0778855ea5e7..df8801e02a32 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -200,7 +200,7 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
unsigned char opts[MAX_IPOPTLEN];
const struct xt_osf_finger *kf;
const struct xt_osf_user_finger *f;
- struct net *net = dev_net(p->in ? p->in : p->out);
+ struct net *net = p->net;
if (!info)
return false;
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 45e1b30e4fb2..d725a27743a1 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -237,7 +237,7 @@ static void recent_table_flush(struct recent_table *t)
static bool
recent_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
- struct net *net = dev_net(par->in ? par->in : par->out);
+ struct net *net = par->net;
struct recent_net *recent_net = recent_pernet(net);
const struct xt_recent_mtinfo_v1 *info = par->matchinfo;
struct recent_table *t;
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 43e26c881100..2ec08f04b816 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -143,7 +143,8 @@ static bool xt_socket_sk_is_transparent(struct sock *sk)
}
}
-static struct sock *xt_socket_lookup_slow_v4(const struct sk_buff *skb,
+static struct sock *xt_socket_lookup_slow_v4(struct net *net,
+ const struct sk_buff *skb,
const struct net_device *indev)
{
const struct iphdr *iph = ip_hdr(skb);
@@ -197,7 +198,7 @@ static struct sock *xt_socket_lookup_slow_v4(const struct sk_buff *skb,
}
#endif
- return xt_socket_get_sock_v4(dev_net(skb->dev), protocol, saddr, daddr,
+ return xt_socket_get_sock_v4(net, protocol, saddr, daddr,
sport, dport, indev);
}
@@ -209,7 +210,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
struct sock *sk = skb->sk;
if (!sk)
- sk = xt_socket_lookup_slow_v4(skb, par->in);
+ sk = xt_socket_lookup_slow_v4(par->net, skb, par->in);
if (sk) {
bool wildcard;
bool transparent = true;
@@ -335,7 +336,8 @@ xt_socket_get_sock_v6(struct net *net, const u8 protocol,
return NULL;
}
-static struct sock *xt_socket_lookup_slow_v6(const struct sk_buff *skb,
+static struct sock *xt_socket_lookup_slow_v6(struct net *net,
+ const struct sk_buff *skb,
const struct net_device *indev)
{
__be16 uninitialized_var(dport), uninitialized_var(sport);
@@ -371,7 +373,7 @@ static struct sock *xt_socket_lookup_slow_v6(const struct sk_buff *skb,
return NULL;
}
- return xt_socket_get_sock_v6(dev_net(skb->dev), tproto, saddr, daddr,
+ return xt_socket_get_sock_v6(net, tproto, saddr, daddr,
sport, dport, indev);
}
@@ -383,7 +385,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
struct sock *sk = skb->sk;
if (!sk)
- sk = xt_socket_lookup_slow_v6(skb, par->in);
+ sk = xt_socket_lookup_slow_v6(par->net, skb, par->in);
if (sk) {
bool wildcard;
bool transparent = true;
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 2ed5f964772e..bc0e504f33a6 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -39,7 +39,7 @@ void genl_unlock(void)
EXPORT_SYMBOL(genl_unlock);
#ifdef CONFIG_LOCKDEP
-int lockdep_genl_is_held(void)
+bool lockdep_genl_is_held(void)
{
return lockdep_is_held(&genl_mutex);
}
@@ -1136,19 +1136,19 @@ int genlmsg_multicast_allns(struct genl_family *family, struct sk_buff *skb,
}
EXPORT_SYMBOL(genlmsg_multicast_allns);
-void genl_notify(struct genl_family *family,
- struct sk_buff *skb, struct net *net, u32 portid, u32 group,
- struct nlmsghdr *nlh, gfp_t flags)
+void genl_notify(struct genl_family *family, struct sk_buff *skb,
+ struct genl_info *info, u32 group, gfp_t flags)
{
+ struct net *net = genl_info_net(info);
struct sock *sk = net->genl_sock;
int report = 0;
- if (nlh)
- report = nlmsg_report(nlh);
+ if (info->nlhdr)
+ report = nlmsg_report(info->nlhdr);
if (WARN_ON_ONCE(group >= family->n_mcgrps))
return;
group = family->mcgrp_offset + group;
- nlmsg_notify(sk, skb, portid, group, report, flags);
+ nlmsg_notify(sk, skb, info->snd_portid, group, report, flags);
}
EXPORT_SYMBOL(genl_notify);
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index c6a39bf2c3b9..c6087233d7fc 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -620,7 +620,7 @@ static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key,
return 0;
}
-static int ovs_vport_output(struct sock *sock, struct sk_buff *skb)
+static int ovs_vport_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct ovs_frag_data *data = this_cpu_ptr(&ovs_frag_data_storage);
struct vport *vport = data->vport;
@@ -679,8 +679,8 @@ static void prepare_frag(struct vport *vport, struct sk_buff *skb)
skb_pull(skb, hlen);
}
-static void ovs_fragment(struct vport *vport, struct sk_buff *skb, u16 mru,
- __be16 ethertype)
+static void ovs_fragment(struct net *net, struct vport *vport,
+ struct sk_buff *skb, u16 mru, __be16 ethertype)
{
if (skb_network_offset(skb) > MAX_L2_LEN) {
OVS_NLERR(1, "L2 header too long to fragment");
@@ -700,7 +700,7 @@ static void ovs_fragment(struct vport *vport, struct sk_buff *skb, u16 mru,
skb_dst_set_noref(skb, &ovs_dst);
IPCB(skb)->frag_max_size = mru;
- ip_do_fragment(skb->sk, skb, ovs_vport_output);
+ ip_do_fragment(net, skb->sk, skb, ovs_vport_output);
refdst_drop(orig_dst);
} else if (ethertype == htons(ETH_P_IPV6)) {
const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
@@ -721,7 +721,7 @@ static void ovs_fragment(struct vport *vport, struct sk_buff *skb, u16 mru,
skb_dst_set_noref(skb, &ovs_rt.dst);
IP6CB(skb)->frag_max_size = mru;
- v6ops->fragment(skb->sk, skb, ovs_vport_output);
+ v6ops->fragment(net, skb->sk, skb, ovs_vport_output);
refdst_drop(orig_dst);
} else {
WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.",
@@ -746,6 +746,7 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
if (likely(!mru || (skb->len <= mru + ETH_HLEN))) {
ovs_vport_send(vport, skb);
} else if (mru <= vport->dev->mtu) {
+ struct net *net = read_pnet(&dp->net);
__be16 ethertype = key->eth.type;
if (!is_flow_key_valid(key)) {
@@ -755,7 +756,7 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port,
ethertype = vlan_get_protocol(skb);
}
- ovs_fragment(vport, skb, mru, ethertype);
+ ovs_fragment(net, vport, skb, mru, ethertype);
} else {
kfree_skb(skb);
}
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 80bf702715bb..9ed833e9bb7d 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -304,7 +304,7 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key,
int err;
memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
- err = ip_defrag(skb, user);
+ err = ip_defrag(net, skb, user);
if (err)
return err;
@@ -315,7 +315,7 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key,
struct sk_buff *reasm;
memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
- reasm = nf_ct_frag6_gather(skb, user);
+ reasm = nf_ct_frag6_gather(net, skb, user);
if (!reasm)
return -EINPROGRESS;
@@ -347,7 +347,7 @@ ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone,
{
struct nf_conntrack_tuple tuple;
- if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, &tuple))
+ if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, net, &tuple))
return NULL;
return __nf_ct_expect_find(net, zone, &tuple);
}
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index b816ff871528..a75828091e21 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -91,8 +91,7 @@ static bool ovs_must_notify(struct genl_family *family, struct genl_info *info,
static void ovs_notify(struct genl_family *family,
struct sk_buff *skb, struct genl_info *info)
{
- genl_notify(family, skb, genl_info_net(info), info->snd_portid,
- 0, info->nlhdr, GFP_KERNEL);
+ genl_notify(family, skb, info, 0, GFP_KERNEL);
}
/**
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index c8db44ab2ee7..0ea128eeeab2 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -698,8 +698,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
{
/* Extract metadata from packet. */
if (tun_info) {
- if (ip_tunnel_info_af(tun_info) != AF_INET)
- return -EINVAL;
+ key->tun_proto = ip_tunnel_info_af(tun_info);
memcpy(&key->tun_key, &tun_info->key, sizeof(key->tun_key));
if (tun_info->options_len) {
@@ -714,6 +713,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
key->tun_opts_len = 0;
}
} else {
+ key->tun_proto = 0;
key->tun_opts_len = 0;
memset(&key->tun_key, 0, sizeof(key->tun_key));
}
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 8cfa15a08668..1d055c559eaf 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -63,6 +63,7 @@ struct sw_flow_key {
u32 skb_mark; /* SKB mark. */
u16 in_port; /* Input switch port (or DP_MAX_PORTS). */
} __packed phy; /* Safe when right after 'tun_key'. */
+ u8 tun_proto; /* Protocol of encapsulating tunnel. */
u32 ovs_flow_hash; /* Datapath computed hash value. */
u32 recirc_id; /* Recirculation ID. */
struct {
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 171a691f1c32..6799c8d470c6 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -262,8 +262,8 @@ size_t ovs_tun_key_attr_size(void)
* updating this function.
*/
return nla_total_size(8) /* OVS_TUNNEL_KEY_ATTR_ID */
- + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */
- + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */
+ + nla_total_size(16) /* OVS_TUNNEL_KEY_ATTR_IPV[46]_SRC */
+ + nla_total_size(16) /* OVS_TUNNEL_KEY_ATTR_IPV[46]_DST */
+ nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */
+ nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */
+ nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
@@ -323,6 +323,8 @@ static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1]
[OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = { .len = OVS_ATTR_VARIABLE },
[OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS] = { .len = OVS_ATTR_NESTED,
.next = ovs_vxlan_ext_key_lens },
+ [OVS_TUNNEL_KEY_ATTR_IPV6_SRC] = { .len = sizeof(struct in6_addr) },
+ [OVS_TUNNEL_KEY_ATTR_IPV6_DST] = { .len = sizeof(struct in6_addr) },
};
/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */
@@ -542,14 +544,14 @@ static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr,
return 0;
}
-static int ipv4_tun_from_nlattr(const struct nlattr *attr,
- struct sw_flow_match *match, bool is_mask,
- bool log)
+static int ip_tun_from_nlattr(const struct nlattr *attr,
+ struct sw_flow_match *match, bool is_mask,
+ bool log)
{
struct nlattr *a;
int rem;
bool ttl = false;
- __be16 tun_flags = 0;
+ __be16 tun_flags = 0, ipv4 = false, ipv6 = false;
int opts_type = 0;
nla_for_each_nested(a, attr, rem) {
@@ -578,10 +580,22 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.src,
nla_get_in_addr(a), is_mask);
+ ipv4 = true;
break;
case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.dst,
nla_get_in_addr(a), is_mask);
+ ipv4 = true;
+ break;
+ case OVS_TUNNEL_KEY_ATTR_IPV6_SRC:
+ SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.dst,
+ nla_get_in6_addr(a), is_mask);
+ ipv6 = true;
+ break;
+ case OVS_TUNNEL_KEY_ATTR_IPV6_DST:
+ SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.dst,
+ nla_get_in6_addr(a), is_mask);
+ ipv6 = true;
break;
case OVS_TUNNEL_KEY_ATTR_TOS:
SW_FLOW_KEY_PUT(match, tun_key.tos,
@@ -636,28 +650,46 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
opts_type = type;
break;
default:
- OVS_NLERR(log, "Unknown IPv4 tunnel attribute %d",
+ OVS_NLERR(log, "Unknown IP tunnel attribute %d",
type);
return -EINVAL;
}
}
SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
+ if (is_mask)
+ SW_FLOW_KEY_MEMSET_FIELD(match, tun_proto, 0xff, true);
+ else
+ SW_FLOW_KEY_PUT(match, tun_proto, ipv6 ? AF_INET6 : AF_INET,
+ false);
if (rem > 0) {
- OVS_NLERR(log, "IPv4 tunnel attribute has %d unknown bytes.",
+ OVS_NLERR(log, "IP tunnel attribute has %d unknown bytes.",
rem);
return -EINVAL;
}
+ if (ipv4 && ipv6) {
+ OVS_NLERR(log, "Mixed IPv4 and IPv6 tunnel attributes");
+ return -EINVAL;
+ }
+
if (!is_mask) {
- if (!match->key->tun_key.u.ipv4.dst) {
+ if (!ipv4 && !ipv6) {
+ OVS_NLERR(log, "IP tunnel dst address not specified");
+ return -EINVAL;
+ }
+ if (ipv4 && !match->key->tun_key.u.ipv4.dst) {
OVS_NLERR(log, "IPv4 tunnel dst address is zero");
return -EINVAL;
}
+ if (ipv6 && ipv6_addr_any(&match->key->tun_key.u.ipv6.dst)) {
+ OVS_NLERR(log, "IPv6 tunnel dst address is zero");
+ return -EINVAL;
+ }
if (!ttl) {
- OVS_NLERR(log, "IPv4 tunnel TTL not specified.");
+ OVS_NLERR(log, "IP tunnel TTL not specified.");
return -EINVAL;
}
}
@@ -682,21 +714,36 @@ static int vxlan_opt_to_nlattr(struct sk_buff *skb,
return 0;
}
-static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
- const struct ip_tunnel_key *output,
- const void *tun_opts, int swkey_tun_opts_len)
+static int __ip_tun_to_nlattr(struct sk_buff *skb,
+ const struct ip_tunnel_key *output,
+ const void *tun_opts, int swkey_tun_opts_len,
+ unsigned short tun_proto)
{
if (output->tun_flags & TUNNEL_KEY &&
nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id))
return -EMSGSIZE;
- if (output->u.ipv4.src &&
- nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC,
- output->u.ipv4.src))
- return -EMSGSIZE;
- if (output->u.ipv4.dst &&
- nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST,
- output->u.ipv4.dst))
- return -EMSGSIZE;
+ switch (tun_proto) {
+ case AF_INET:
+ if (output->u.ipv4.src &&
+ nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC,
+ output->u.ipv4.src))
+ return -EMSGSIZE;
+ if (output->u.ipv4.dst &&
+ nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST,
+ output->u.ipv4.dst))
+ return -EMSGSIZE;
+ break;
+ case AF_INET6:
+ if (!ipv6_addr_any(&output->u.ipv6.src) &&
+ nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_SRC,
+ &output->u.ipv6.src))
+ return -EMSGSIZE;
+ if (!ipv6_addr_any(&output->u.ipv6.dst) &&
+ nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_DST,
+ &output->u.ipv6.dst))
+ return -EMSGSIZE;
+ break;
+ }
if (output->tos &&
nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->tos))
return -EMSGSIZE;
@@ -730,9 +777,10 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
return 0;
}
-static int ipv4_tun_to_nlattr(struct sk_buff *skb,
- const struct ip_tunnel_key *output,
- const void *tun_opts, int swkey_tun_opts_len)
+static int ip_tun_to_nlattr(struct sk_buff *skb,
+ const struct ip_tunnel_key *output,
+ const void *tun_opts, int swkey_tun_opts_len,
+ unsigned short tun_proto)
{
struct nlattr *nla;
int err;
@@ -741,7 +789,8 @@ static int ipv4_tun_to_nlattr(struct sk_buff *skb,
if (!nla)
return -EMSGSIZE;
- err = __ipv4_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len);
+ err = __ip_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len,
+ tun_proto);
if (err)
return err;
@@ -753,9 +802,10 @@ int ovs_nla_put_egress_tunnel_key(struct sk_buff *skb,
const struct ip_tunnel_info *egress_tun_info,
const void *egress_tun_opts)
{
- return __ipv4_tun_to_nlattr(skb, &egress_tun_info->key,
- egress_tun_opts,
- egress_tun_info->options_len);
+ return __ip_tun_to_nlattr(skb, &egress_tun_info->key,
+ egress_tun_opts,
+ egress_tun_info->options_len,
+ ip_tunnel_info_af(egress_tun_info));
}
static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
@@ -806,8 +856,8 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
*attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK);
}
if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) {
- if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
- is_mask, log) < 0)
+ if (ip_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
+ is_mask, log) < 0)
return -EINVAL;
*attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
}
@@ -1200,7 +1250,7 @@ int ovs_nla_get_match(struct net *net, struct sw_flow_match *match,
/* The userspace does not send tunnel attributes that
* are 0, but we should not wildcard them nonetheless.
*/
- if (match->key->tun_key.u.ipv4.dst)
+ if (match->key->tun_proto)
SW_FLOW_KEY_MEMSET_FIELD(match, tun_key,
0xff, true);
@@ -1373,14 +1423,14 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
goto nla_put_failure;
- if ((swkey->tun_key.u.ipv4.dst || is_mask)) {
+ if ((swkey->tun_proto || is_mask)) {
const void *opts = NULL;
if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT)
opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len);
- if (ipv4_tun_to_nlattr(skb, &output->tun_key, opts,
- swkey->tun_opts_len))
+ if (ip_tun_to_nlattr(skb, &output->tun_key, opts,
+ swkey->tun_opts_len, swkey->tun_proto))
goto nla_put_failure;
}
@@ -1883,7 +1933,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
int err = 0, start, opts_type;
ovs_match_init(&match, &key, NULL);
- opts_type = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log);
+ opts_type = ip_tun_from_nlattr(nla_data(attr), &match, false, log);
if (opts_type < 0)
return opts_type;
@@ -1919,6 +1969,8 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
tun_info = &tun_dst->u.tun_info;
tun_info->mode = IP_TUNNEL_INFO_TX;
+ if (key.tun_proto == AF_INET6)
+ tun_info->mode |= IP_TUNNEL_INFO_IPV6;
tun_info->key = key.tun_key;
/* We need to store the options in the action itself since
@@ -2380,10 +2432,11 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
if (!start)
return -EMSGSIZE;
- err = ipv4_tun_to_nlattr(skb, &tun_info->key,
- tun_info->options_len ?
+ err = ip_tun_to_nlattr(skb, &tun_info->key,
+ tun_info->options_len ?
ip_tunnel_info_opts(tun_info) : NULL,
- tun_info->options_len);
+ tun_info->options_len,
+ ip_tunnel_info_af(tun_info));
if (err)
return err;
nla_nest_end(skb, start);
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index c7f74aab34b9..d073fff82fdb 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -428,7 +428,7 @@ static u32 flow_hash(const struct sw_flow_key *key,
static int flow_key_start(const struct sw_flow_key *key)
{
- if (key->tun_key.u.ipv4.dst)
+ if (key->tun_proto)
return 0;
else
return rounddown(offsetof(struct sw_flow_key, phy),
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index c11413d5075f..fb3cdb85905d 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -151,7 +151,8 @@ static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
{
struct vxlan_dev *vxlan = netdev_priv(vport->dev);
struct net *net = ovs_dp_get_net(vport->dp);
- __be16 dst_port = vxlan_dev_dst_port(vxlan);
+ unsigned short family = ip_tunnel_info_af(upcall->egress_tun_info);
+ __be16 dst_port = vxlan_dev_dst_port(vxlan, family);
__be16 src_port;
int port_min;
int port_max;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index aa4b15c35884..691660b9b7ef 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1423,7 +1423,7 @@ static unsigned int fanout_demux_bpf(struct packet_fanout *f,
rcu_read_lock();
prog = rcu_dereference(f->bpf_prog);
if (prog)
- ret = BPF_PROG_RUN(prog, skb) % num;
+ ret = bpf_prog_run_clear_cb(prog, skb) % num;
rcu_read_unlock();
return ret;
@@ -1439,17 +1439,17 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
{
struct packet_fanout *f = pt->af_packet_priv;
unsigned int num = READ_ONCE(f->num_members);
+ struct net *net = read_pnet(&f->net);
struct packet_sock *po;
unsigned int idx;
- if (!net_eq(dev_net(dev), read_pnet(&f->net)) ||
- !num) {
+ if (!net_eq(dev_net(dev), net) || !num) {
kfree_skb(skb);
return 0;
}
if (fanout_has_flag(f, PACKET_FANOUT_FLAG_DEFRAG)) {
- skb = ip_check_defrag(skb, IP_DEFRAG_AF_PACKET);
+ skb = ip_check_defrag(net, skb, IP_DEFRAG_AF_PACKET);
if (!skb)
return 0;
}
@@ -1519,10 +1519,10 @@ static void __fanout_unlink(struct sock *sk, struct packet_sock *po)
static bool match_fanout_group(struct packet_type *ptype, struct sock *sk)
{
- if (ptype->af_packet_priv == (void *)((struct packet_sock *)sk)->fanout)
- return true;
+ if (sk->sk_family != PF_PACKET)
+ return false;
- return false;
+ return ptype->af_packet_priv == pkt_sk(sk)->fanout;
}
static void fanout_init_data(struct packet_fanout *f)
@@ -1567,7 +1567,7 @@ static int fanout_set_data_cbpf(struct packet_sock *po, char __user *data,
if (copy_from_user(&fprog, data, len))
return -EFAULT;
- ret = bpf_prog_create_from_user(&new, &fprog, NULL);
+ ret = bpf_prog_create_from_user(&new, &fprog, NULL, false);
if (ret)
return ret;
@@ -1939,16 +1939,16 @@ out_free:
return err;
}
-static unsigned int run_filter(const struct sk_buff *skb,
- const struct sock *sk,
- unsigned int res)
+static unsigned int run_filter(struct sk_buff *skb,
+ const struct sock *sk,
+ unsigned int res)
{
struct sk_filter *filter;
rcu_read_lock();
filter = rcu_dereference(sk->sk_filter);
if (filter != NULL)
- res = SK_RUN_FILTER(filter, skb);
+ res = bpf_prog_run_clear_cb(filter->prog, skb);
rcu_read_unlock();
return res;
@@ -2630,6 +2630,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
__be16 proto;
unsigned char *addr;
int err, reserve = 0;
+ struct sockcm_cookie sockc;
struct virtio_net_hdr vnet_hdr = { 0 };
int offset = 0;
int vnet_hdr_len;
@@ -2665,6 +2666,13 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
if (unlikely(!(dev->flags & IFF_UP)))
goto out_unlock;
+ sockc.mark = sk->sk_mark;
+ if (msg->msg_controllen) {
+ err = sock_cmsg_send(sk, msg, &sockc);
+ if (unlikely(err))
+ goto out_unlock;
+ }
+
if (sock->type == SOCK_RAW)
reserve = dev->hard_header_len;
if (po->has_vnet_hdr) {
@@ -2774,7 +2782,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
skb->protocol = proto;
skb->dev = dev;
skb->priority = sk->sk_priority;
- skb->mark = sk->sk_mark;
+ skb->mark = sockc.mark;
packet_pick_tx_queue(dev, skb);
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index a2f28a6d4dc5..384ea1e3cd69 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -72,13 +72,7 @@ static int rds_release(struct socket *sock)
rds_clear_recv_queue(rs);
rds_cong_remove_socket(rs);
- /*
- * the binding lookup hash uses rcu, we need to
- * make sure we synchronize_rcu before we free our
- * entry
- */
rds_remove_bound(rs);
- synchronize_rcu();
rds_send_drop_to(rs, NULL);
rds_rdma_drop_keys(rs);
@@ -588,6 +582,8 @@ static int rds_init(void)
{
int ret;
+ rds_bind_lock_init();
+
ret = rds_conn_init();
if (ret)
goto out;
diff --git a/net/rds/bind.c b/net/rds/bind.c
index dd666fb9b4e1..61925667b7a4 100644
--- a/net/rds/bind.c
+++ b/net/rds/bind.c
@@ -38,48 +38,50 @@
#include <linux/ratelimit.h>
#include "rds.h"
+struct bind_bucket {
+ rwlock_t lock;
+ struct hlist_head head;
+};
+
#define BIND_HASH_SIZE 1024
-static struct hlist_head bind_hash_table[BIND_HASH_SIZE];
-static DEFINE_SPINLOCK(rds_bind_lock);
+static struct bind_bucket bind_hash_table[BIND_HASH_SIZE];
-static struct hlist_head *hash_to_bucket(__be32 addr, __be16 port)
+static struct bind_bucket *hash_to_bucket(__be32 addr, __be16 port)
{
return bind_hash_table + (jhash_2words((u32)addr, (u32)port, 0) &
(BIND_HASH_SIZE - 1));
}
-static struct rds_sock *rds_bind_lookup(__be32 addr, __be16 port,
+/* must hold either read or write lock (write lock for insert != NULL) */
+static struct rds_sock *rds_bind_lookup(struct bind_bucket *bucket,
+ __be32 addr, __be16 port,
struct rds_sock *insert)
{
struct rds_sock *rs;
- struct hlist_head *head = hash_to_bucket(addr, port);
+ struct hlist_head *head = &bucket->head;
u64 cmp;
u64 needle = ((u64)be32_to_cpu(addr) << 32) | be16_to_cpu(port);
- rcu_read_lock();
- hlist_for_each_entry_rcu(rs, head, rs_bound_node) {
+ hlist_for_each_entry(rs, head, rs_bound_node) {
cmp = ((u64)be32_to_cpu(rs->rs_bound_addr) << 32) |
be16_to_cpu(rs->rs_bound_port);
if (cmp == needle) {
- rcu_read_unlock();
+ rds_sock_addref(rs);
return rs;
}
}
- rcu_read_unlock();
if (insert) {
/*
* make sure our addr and port are set before
- * we are added to the list, other people
- * in rcu will find us as soon as the
- * hlist_add_head_rcu is done
+ * we are added to the list.
*/
insert->rs_bound_addr = addr;
insert->rs_bound_port = port;
rds_sock_addref(insert);
- hlist_add_head_rcu(&insert->rs_bound_node, head);
+ hlist_add_head(&insert->rs_bound_node, head);
}
return NULL;
}
@@ -93,16 +95,21 @@ static struct rds_sock *rds_bind_lookup(__be32 addr, __be16 port,
struct rds_sock *rds_find_bound(__be32 addr, __be16 port)
{
struct rds_sock *rs;
+ unsigned long flags;
+ struct bind_bucket *bucket = hash_to_bucket(addr, port);
- rs = rds_bind_lookup(addr, port, NULL);
+ read_lock_irqsave(&bucket->lock, flags);
+ rs = rds_bind_lookup(bucket, addr, port, NULL);
+ read_unlock_irqrestore(&bucket->lock, flags);
- if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD))
- rds_sock_addref(rs);
- else
+ if (rs && sock_flag(rds_rs_to_sk(rs), SOCK_DEAD)) {
+ rds_sock_put(rs);
rs = NULL;
+ }
rdsdebug("returning rs %p for %pI4:%u\n", rs, &addr,
ntohs(port));
+
return rs;
}
@@ -112,6 +119,7 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
unsigned long flags;
int ret = -EADDRINUSE;
u16 rover, last;
+ struct bind_bucket *bucket;
if (*port != 0) {
rover = be16_to_cpu(*port);
@@ -121,42 +129,48 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)
last = rover - 1;
}
- spin_lock_irqsave(&rds_bind_lock, flags);
-
do {
+ struct rds_sock *rrs;
if (rover == 0)
rover++;
- if (!rds_bind_lookup(addr, cpu_to_be16(rover), rs)) {
+
+ bucket = hash_to_bucket(addr, cpu_to_be16(rover));
+ write_lock_irqsave(&bucket->lock, flags);
+ rrs = rds_bind_lookup(bucket, addr, cpu_to_be16(rover), rs);
+ write_unlock_irqrestore(&bucket->lock, flags);
+ if (!rrs) {
*port = rs->rs_bound_port;
ret = 0;
rdsdebug("rs %p binding to %pI4:%d\n",
rs, &addr, (int)ntohs(*port));
break;
+ } else {
+ rds_sock_put(rrs);
}
} while (rover++ != last);
- spin_unlock_irqrestore(&rds_bind_lock, flags);
-
return ret;
}
void rds_remove_bound(struct rds_sock *rs)
{
unsigned long flags;
+ struct bind_bucket *bucket =
+ hash_to_bucket(rs->rs_bound_addr, rs->rs_bound_port);
- spin_lock_irqsave(&rds_bind_lock, flags);
+ write_lock_irqsave(&bucket->lock, flags);
if (rs->rs_bound_addr) {
rdsdebug("rs %p unbinding from %pI4:%d\n",
rs, &rs->rs_bound_addr,
ntohs(rs->rs_bound_port));
- hlist_del_init_rcu(&rs->rs_bound_node);
+ hlist_del_init(&rs->rs_bound_node);
rds_sock_put(rs);
rs->rs_bound_addr = 0;
}
- spin_unlock_irqrestore(&rds_bind_lock, flags);
+ write_unlock_irqrestore(&bucket->lock, flags);
}
int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
@@ -182,7 +196,14 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
goto out;
if (rs->rs_transport) { /* previously bound */
- ret = 0;
+ trans = rs->rs_transport;
+ if (trans->laddr_check(sock_net(sock->sk),
+ sin->sin_addr.s_addr) != 0) {
+ ret = -ENOPROTOOPT;
+ rds_remove_bound(rs);
+ } else {
+ ret = 0;
+ }
goto out;
}
trans = rds_trans_get_preferred(sock_net(sock->sk),
@@ -200,9 +221,13 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
out:
release_sock(sk);
-
- /* we might have called rds_remove_bound on error */
- if (ret)
- synchronize_rcu();
return ret;
}
+
+void rds_bind_lock_init(void)
+{
+ int i;
+
+ for (i = 0; i < BIND_HASH_SIZE; i++)
+ rwlock_init(&bind_hash_table[i].lock);
+}
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 49adeef8090c..d4564036a339 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -128,10 +128,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,
struct rds_transport *loop_trans;
unsigned long flags;
int ret;
- struct rds_transport *otrans = trans;
- if (!is_outgoing && otrans->t_type == RDS_TRANS_TCP)
- goto new_conn;
rcu_read_lock();
conn = rds_conn_lookup(net, head, laddr, faddr, trans);
if (conn && conn->c_loopback && conn->c_trans != &rds_loop_transport &&
@@ -147,7 +144,6 @@ static struct rds_connection *__rds_conn_create(struct net *net,
if (conn)
goto out;
-new_conn:
conn = kmem_cache_zalloc(rds_conn_slab, gfp);
if (!conn) {
conn = ERR_PTR(-ENOMEM);
@@ -207,6 +203,7 @@ new_conn:
atomic_set(&conn->c_state, RDS_CONN_DOWN);
conn->c_send_gen = 0;
+ conn->c_outgoing = (is_outgoing ? 1 : 0);
conn->c_reconnect_jiffies = 0;
INIT_DELAYED_WORK(&conn->c_send_w, rds_send_worker);
INIT_DELAYED_WORK(&conn->c_recv_w, rds_recv_worker);
@@ -243,22 +240,13 @@ new_conn:
/* Creating normal conn */
struct rds_connection *found;
- if (!is_outgoing && otrans->t_type == RDS_TRANS_TCP)
- found = NULL;
- else
- found = rds_conn_lookup(net, head, laddr, faddr, trans);
+ found = rds_conn_lookup(net, head, laddr, faddr, trans);
if (found) {
trans->conn_free(conn->c_transport_data);
kmem_cache_free(rds_conn_slab, conn);
conn = found;
} else {
- if ((is_outgoing && otrans->t_type == RDS_TRANS_TCP) ||
- (otrans->t_type != RDS_TRANS_TCP)) {
- /* Only the active side should be added to
- * reconnect list for TCP.
- */
- hlist_add_head_rcu(&conn->c_hash_node, head);
- }
+ hlist_add_head_rcu(&conn->c_hash_node, head);
rds_cong_add_conn(conn);
rds_conn_count++;
}
@@ -337,7 +325,9 @@ void rds_conn_shutdown(struct rds_connection *conn)
rcu_read_lock();
if (!hlist_unhashed(&conn->c_hash_node)) {
rcu_read_unlock();
- rds_queue_reconnect(conn);
+ if (conn->c_trans->t_type != RDS_TRANS_TCP ||
+ conn->c_outgoing == 1)
+ rds_queue_reconnect(conn);
} else {
rcu_read_unlock();
}
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 2d3f2ab475df..a833ab7898fe 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -43,14 +43,14 @@
#include "rds.h"
#include "ib.h"
-static unsigned int fmr_pool_size = RDS_FMR_POOL_SIZE;
-unsigned int fmr_message_size = RDS_FMR_SIZE + 1; /* +1 allows for unaligned MRs */
+unsigned int rds_ib_fmr_1m_pool_size = RDS_FMR_1M_POOL_SIZE;
+unsigned int rds_ib_fmr_8k_pool_size = RDS_FMR_8K_POOL_SIZE;
unsigned int rds_ib_retry_count = RDS_IB_DEFAULT_RETRY_COUNT;
-module_param(fmr_pool_size, int, 0444);
-MODULE_PARM_DESC(fmr_pool_size, " Max number of fmr per HCA");
-module_param(fmr_message_size, int, 0444);
-MODULE_PARM_DESC(fmr_message_size, " Max size of a RDMA transfer");
+module_param(rds_ib_fmr_1m_pool_size, int, 0444);
+MODULE_PARM_DESC(rds_ib_fmr_1m_pool_size, " Max number of 1M fmr per HCA");
+module_param(rds_ib_fmr_8k_pool_size, int, 0444);
+MODULE_PARM_DESC(rds_ib_fmr_8k_pool_size, " Max number of 8K fmr per HCA");
module_param(rds_ib_retry_count, int, 0444);
MODULE_PARM_DESC(rds_ib_retry_count, " Number of hw retries before reporting an error");
@@ -97,8 +97,10 @@ static void rds_ib_dev_free(struct work_struct *work)
struct rds_ib_device *rds_ibdev = container_of(work,
struct rds_ib_device, free_work);
- if (rds_ibdev->mr_pool)
- rds_ib_destroy_mr_pool(rds_ibdev->mr_pool);
+ if (rds_ibdev->mr_8k_pool)
+ rds_ib_destroy_mr_pool(rds_ibdev->mr_8k_pool);
+ if (rds_ibdev->mr_1m_pool)
+ rds_ib_destroy_mr_pool(rds_ibdev->mr_1m_pool);
if (rds_ibdev->pd)
ib_dealloc_pd(rds_ibdev->pd);
@@ -148,9 +150,13 @@ static void rds_ib_add_one(struct ib_device *device)
rds_ibdev->max_sge = min(dev_attr->max_sge, RDS_IB_MAX_SGE);
rds_ibdev->fmr_max_remaps = dev_attr->max_map_per_fmr?: 32;
- rds_ibdev->max_fmrs = dev_attr->max_fmr ?
- min_t(unsigned int, dev_attr->max_fmr, fmr_pool_size) :
- fmr_pool_size;
+ rds_ibdev->max_1m_fmrs = dev_attr->max_mr ?
+ min_t(unsigned int, (dev_attr->max_mr / 2),
+ rds_ib_fmr_1m_pool_size) : rds_ib_fmr_1m_pool_size;
+
+ rds_ibdev->max_8k_fmrs = dev_attr->max_mr ?
+ min_t(unsigned int, ((dev_attr->max_mr / 2) * RDS_MR_8K_SCALE),
+ rds_ib_fmr_8k_pool_size) : rds_ib_fmr_8k_pool_size;
rds_ibdev->max_initiator_depth = dev_attr->max_qp_init_rd_atom;
rds_ibdev->max_responder_resources = dev_attr->max_qp_rd_atom;
@@ -162,12 +168,25 @@ static void rds_ib_add_one(struct ib_device *device)
goto put_dev;
}
- rds_ibdev->mr_pool = rds_ib_create_mr_pool(rds_ibdev);
- if (IS_ERR(rds_ibdev->mr_pool)) {
- rds_ibdev->mr_pool = NULL;
+ rds_ibdev->mr_1m_pool =
+ rds_ib_create_mr_pool(rds_ibdev, RDS_IB_MR_1M_POOL);
+ if (IS_ERR(rds_ibdev->mr_1m_pool)) {
+ rds_ibdev->mr_1m_pool = NULL;
goto put_dev;
}
+ rds_ibdev->mr_8k_pool =
+ rds_ib_create_mr_pool(rds_ibdev, RDS_IB_MR_8K_POOL);
+ if (IS_ERR(rds_ibdev->mr_8k_pool)) {
+ rds_ibdev->mr_8k_pool = NULL;
+ goto put_dev;
+ }
+
+ rdsdebug("RDS/IB: max_mr = %d, max_wrs = %d, max_sge = %d, fmr_max_remaps = %d, max_1m_fmrs = %d, max_8k_fmrs = %d\n",
+ dev_attr->max_fmr, rds_ibdev->max_wrs, rds_ibdev->max_sge,
+ rds_ibdev->fmr_max_remaps, rds_ibdev->max_1m_fmrs,
+ rds_ibdev->max_8k_fmrs);
+
INIT_LIST_HEAD(&rds_ibdev->ipaddr_list);
INIT_LIST_HEAD(&rds_ibdev->conn_list);
diff --git a/net/rds/ib.h b/net/rds/ib.h
index aae60fda77f6..f17d09567890 100644
--- a/net/rds/ib.h
+++ b/net/rds/ib.h
@@ -9,8 +9,11 @@
#include "rds.h"
#include "rdma_transport.h"
-#define RDS_FMR_SIZE 256
-#define RDS_FMR_POOL_SIZE 8192
+#define RDS_FMR_1M_POOL_SIZE (8192 / 2)
+#define RDS_FMR_1M_MSG_SIZE 256
+#define RDS_FMR_8K_MSG_SIZE 2
+#define RDS_MR_8K_SCALE (256 / (RDS_FMR_8K_MSG_SIZE + 1))
+#define RDS_FMR_8K_POOL_SIZE (RDS_MR_8K_SCALE * (8192 / 2))
#define RDS_IB_MAX_SGE 8
#define RDS_IB_RECV_SGE 2
@@ -24,6 +27,9 @@
#define RDS_IB_RECYCLE_BATCH_COUNT 32
+#define RDS_IB_WC_MAX 32
+#define RDS_IB_SEND_OP BIT_ULL(63)
+
extern struct rw_semaphore rds_ib_devices_lock;
extern struct list_head rds_ib_devices;
@@ -89,6 +95,20 @@ struct rds_ib_work_ring {
atomic_t w_free_ctr;
};
+/* Rings are posted with all the allocations they'll need to queue the
+ * incoming message to the receiving socket so this can't fail.
+ * All fragments start with a header, so we can make sure we're not receiving
+ * garbage, and we can tell a small 8 byte fragment from an ACK frame.
+ */
+struct rds_ib_ack_state {
+ u64 ack_next;
+ u64 ack_recv;
+ unsigned int ack_required:1;
+ unsigned int ack_next_valid:1;
+ unsigned int ack_recv_valid:1;
+};
+
+
struct rds_ib_device;
struct rds_ib_connection {
@@ -102,6 +122,12 @@ struct rds_ib_connection {
struct ib_pd *i_pd;
struct ib_cq *i_send_cq;
struct ib_cq *i_recv_cq;
+ struct ib_wc i_send_wc[RDS_IB_WC_MAX];
+ struct ib_wc i_recv_wc[RDS_IB_WC_MAX];
+
+ /* interrupt handling */
+ struct tasklet_struct i_send_tasklet;
+ struct tasklet_struct i_recv_tasklet;
/* tx */
struct rds_ib_work_ring i_send_ring;
@@ -112,7 +138,6 @@ struct rds_ib_connection {
atomic_t i_signaled_sends;
/* rx */
- struct tasklet_struct i_recv_tasklet;
struct mutex i_recv_mutex;
struct rds_ib_work_ring i_recv_ring;
struct rds_ib_incoming *i_ibinc;
@@ -164,6 +189,12 @@ struct rds_ib_connection {
struct rds_ib_ipaddr {
struct list_head list;
__be32 ipaddr;
+ struct rcu_head rcu;
+};
+
+enum {
+ RDS_IB_MR_8K_POOL,
+ RDS_IB_MR_1M_POOL,
};
struct rds_ib_device {
@@ -172,9 +203,12 @@ struct rds_ib_device {
struct list_head conn_list;
struct ib_device *dev;
struct ib_pd *pd;
- struct rds_ib_mr_pool *mr_pool;
- unsigned int fmr_max_remaps;
unsigned int max_fmrs;
+ struct rds_ib_mr_pool *mr_1m_pool;
+ struct rds_ib_mr_pool *mr_8k_pool;
+ unsigned int fmr_max_remaps;
+ unsigned int max_8k_fmrs;
+ unsigned int max_1m_fmrs;
int max_sge;
unsigned int max_wrs;
unsigned int max_initiator_depth;
@@ -197,14 +231,14 @@ struct rds_ib_device {
struct rds_ib_statistics {
uint64_t s_ib_connect_raced;
uint64_t s_ib_listen_closed_stale;
- uint64_t s_ib_tx_cq_call;
+ uint64_t s_ib_evt_handler_call;
+ uint64_t s_ib_tasklet_call;
uint64_t s_ib_tx_cq_event;
uint64_t s_ib_tx_ring_full;
uint64_t s_ib_tx_throttle;
uint64_t s_ib_tx_sg_mapping_failure;
uint64_t s_ib_tx_stalled;
uint64_t s_ib_tx_credit_updates;
- uint64_t s_ib_rx_cq_call;
uint64_t s_ib_rx_cq_event;
uint64_t s_ib_rx_ring_empty;
uint64_t s_ib_rx_refill_from_cq;
@@ -216,12 +250,18 @@ struct rds_ib_statistics {
uint64_t s_ib_ack_send_delayed;
uint64_t s_ib_ack_send_piggybacked;
uint64_t s_ib_ack_received;
- uint64_t s_ib_rdma_mr_alloc;
- uint64_t s_ib_rdma_mr_free;
- uint64_t s_ib_rdma_mr_used;
- uint64_t s_ib_rdma_mr_pool_flush;
- uint64_t s_ib_rdma_mr_pool_wait;
- uint64_t s_ib_rdma_mr_pool_depleted;
+ uint64_t s_ib_rdma_mr_8k_alloc;
+ uint64_t s_ib_rdma_mr_8k_free;
+ uint64_t s_ib_rdma_mr_8k_used;
+ uint64_t s_ib_rdma_mr_8k_pool_flush;
+ uint64_t s_ib_rdma_mr_8k_pool_wait;
+ uint64_t s_ib_rdma_mr_8k_pool_depleted;
+ uint64_t s_ib_rdma_mr_1m_alloc;
+ uint64_t s_ib_rdma_mr_1m_free;
+ uint64_t s_ib_rdma_mr_1m_used;
+ uint64_t s_ib_rdma_mr_1m_pool_flush;
+ uint64_t s_ib_rdma_mr_1m_pool_wait;
+ uint64_t s_ib_rdma_mr_1m_pool_depleted;
uint64_t s_ib_atomic_cswp;
uint64_t s_ib_atomic_fadd;
};
@@ -273,7 +313,8 @@ struct rds_ib_device *rds_ib_get_client_data(struct ib_device *device);
void rds_ib_dev_put(struct rds_ib_device *rds_ibdev);
extern struct ib_client rds_ib_client;
-extern unsigned int fmr_message_size;
+extern unsigned int rds_ib_fmr_1m_pool_size;
+extern unsigned int rds_ib_fmr_8k_pool_size;
extern unsigned int rds_ib_retry_count;
extern spinlock_t ib_nodev_conns_lock;
@@ -303,7 +344,8 @@ int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr);
void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn);
void rds_ib_destroy_nodev_conns(void);
-struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *);
+struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_dev,
+ int npages);
void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo);
void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *);
void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
@@ -323,7 +365,8 @@ void rds_ib_recv_free_caches(struct rds_ib_connection *ic);
void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp);
void rds_ib_inc_free(struct rds_incoming *inc);
int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to);
-void rds_ib_recv_cq_comp_handler(struct ib_cq *cq, void *context);
+void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc,
+ struct rds_ib_ack_state *state);
void rds_ib_recv_tasklet_fn(unsigned long data);
void rds_ib_recv_init_ring(struct rds_ib_connection *ic);
void rds_ib_recv_clear_ring(struct rds_ib_connection *ic);
@@ -331,6 +374,7 @@ void rds_ib_recv_init_ack(struct rds_ib_connection *ic);
void rds_ib_attempt_ack(struct rds_ib_connection *ic);
void rds_ib_ack_send_complete(struct rds_ib_connection *ic);
u64 rds_ib_piggyb_ack(struct rds_ib_connection *ic);
+void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, int ack_required);
/* ib_ring.c */
void rds_ib_ring_init(struct rds_ib_work_ring *ring, u32 nr);
@@ -348,7 +392,7 @@ extern wait_queue_head_t rds_ib_ring_empty_wait;
void rds_ib_xmit_complete(struct rds_connection *conn);
int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
unsigned int hdr_off, unsigned int sg, unsigned int off);
-void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context);
+void rds_ib_send_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc);
void rds_ib_send_init_ring(struct rds_ib_connection *ic);
void rds_ib_send_clear_ring(struct rds_ib_connection *ic);
int rds_ib_xmit_rdma(struct rds_connection *conn, struct rm_rdma_op *op);
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 9043f5c04787..2b2370e7f356 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -216,6 +216,96 @@ static void rds_ib_cq_event_handler(struct ib_event *event, void *data)
event->event, ib_event_msg(event->event), data);
}
+/* Plucking the oldest entry from the ring can be done concurrently with
+ * the thread refilling the ring. Each ring operation is protected by
+ * spinlocks and the transient state of refilling doesn't change the
+ * recording of which entry is oldest.
+ *
+ * This relies on IB only calling one cq comp_handler for each cq so that
+ * there will only be one caller of rds_recv_incoming() per RDS connection.
+ */
+static void rds_ib_cq_comp_handler_recv(struct ib_cq *cq, void *context)
+{
+ struct rds_connection *conn = context;
+ struct rds_ib_connection *ic = conn->c_transport_data;
+
+ rdsdebug("conn %p cq %p\n", conn, cq);
+
+ rds_ib_stats_inc(s_ib_evt_handler_call);
+
+ tasklet_schedule(&ic->i_recv_tasklet);
+}
+
+static void poll_cq(struct rds_ib_connection *ic, struct ib_cq *cq,
+ struct ib_wc *wcs,
+ struct rds_ib_ack_state *ack_state)
+{
+ int nr;
+ int i;
+ struct ib_wc *wc;
+
+ while ((nr = ib_poll_cq(cq, RDS_IB_WC_MAX, wcs)) > 0) {
+ for (i = 0; i < nr; i++) {
+ wc = wcs + i;
+ rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n",
+ (unsigned long long)wc->wr_id, wc->status,
+ wc->byte_len, be32_to_cpu(wc->ex.imm_data));
+
+ if (wc->wr_id & RDS_IB_SEND_OP)
+ rds_ib_send_cqe_handler(ic, wc);
+ else
+ rds_ib_recv_cqe_handler(ic, wc, ack_state);
+ }
+ }
+}
+
+static void rds_ib_tasklet_fn_send(unsigned long data)
+{
+ struct rds_ib_connection *ic = (struct rds_ib_connection *)data;
+ struct rds_connection *conn = ic->conn;
+ struct rds_ib_ack_state state;
+
+ rds_ib_stats_inc(s_ib_tasklet_call);
+
+ memset(&state, 0, sizeof(state));
+ poll_cq(ic, ic->i_send_cq, ic->i_send_wc, &state);
+ ib_req_notify_cq(ic->i_send_cq, IB_CQ_NEXT_COMP);
+ poll_cq(ic, ic->i_send_cq, ic->i_send_wc, &state);
+
+ if (rds_conn_up(conn) &&
+ (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags) ||
+ test_bit(0, &conn->c_map_queued)))
+ rds_send_xmit(ic->conn);
+}
+
+static void rds_ib_tasklet_fn_recv(unsigned long data)
+{
+ struct rds_ib_connection *ic = (struct rds_ib_connection *)data;
+ struct rds_connection *conn = ic->conn;
+ struct rds_ib_device *rds_ibdev = ic->rds_ibdev;
+ struct rds_ib_ack_state state;
+
+ if (!rds_ibdev)
+ rds_conn_drop(conn);
+
+ rds_ib_stats_inc(s_ib_tasklet_call);
+
+ memset(&state, 0, sizeof(state));
+ poll_cq(ic, ic->i_recv_cq, ic->i_recv_wc, &state);
+ ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED);
+ poll_cq(ic, ic->i_recv_cq, ic->i_recv_wc, &state);
+
+ if (state.ack_next_valid)
+ rds_ib_set_ack(ic, state.ack_next, state.ack_required);
+ if (state.ack_recv_valid && state.ack_recv > ic->i_ack_recv) {
+ rds_send_drop_acked(conn, state.ack_recv, NULL);
+ ic->i_ack_recv = state.ack_recv;
+ }
+
+ if (rds_conn_up(conn))
+ rds_ib_attempt_ack(ic);
+}
+
static void rds_ib_qp_event_handler(struct ib_event *event, void *data)
{
struct rds_connection *conn = data;
@@ -238,6 +328,18 @@ static void rds_ib_qp_event_handler(struct ib_event *event, void *data)
}
}
+static void rds_ib_cq_comp_handler_send(struct ib_cq *cq, void *context)
+{
+ struct rds_connection *conn = context;
+ struct rds_ib_connection *ic = conn->c_transport_data;
+
+ rdsdebug("conn %p cq %p\n", conn, cq);
+
+ rds_ib_stats_inc(s_ib_evt_handler_call);
+
+ tasklet_schedule(&ic->i_send_tasklet);
+}
+
/*
* This needs to be very careful to not leave IS_ERR pointers around for
* cleanup to trip over.
@@ -271,7 +373,8 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
ic->i_pd = rds_ibdev->pd;
cq_attr.cqe = ic->i_send_ring.w_nr + 1;
- ic->i_send_cq = ib_create_cq(dev, rds_ib_send_cq_comp_handler,
+
+ ic->i_send_cq = ib_create_cq(dev, rds_ib_cq_comp_handler_send,
rds_ib_cq_event_handler, conn,
&cq_attr);
if (IS_ERR(ic->i_send_cq)) {
@@ -282,7 +385,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn)
}
cq_attr.cqe = ic->i_recv_ring.w_nr;
- ic->i_recv_cq = ib_create_cq(dev, rds_ib_recv_cq_comp_handler,
+ ic->i_recv_cq = ib_create_cq(dev, rds_ib_cq_comp_handler_recv,
rds_ib_cq_event_handler, conn,
&cq_attr);
if (IS_ERR(ic->i_recv_cq)) {
@@ -637,6 +740,7 @@ void rds_ib_conn_shutdown(struct rds_connection *conn)
wait_event(rds_ib_ring_empty_wait,
rds_ib_ring_empty(&ic->i_recv_ring) &&
(atomic_read(&ic->i_signaled_sends) == 0));
+ tasklet_kill(&ic->i_send_tasklet);
tasklet_kill(&ic->i_recv_tasklet);
/* first destroy the ib state that generates callbacks */
@@ -743,8 +847,10 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp)
}
INIT_LIST_HEAD(&ic->ib_node);
- tasklet_init(&ic->i_recv_tasklet, rds_ib_recv_tasklet_fn,
- (unsigned long) ic);
+ tasklet_init(&ic->i_send_tasklet, rds_ib_tasklet_fn_send,
+ (unsigned long)ic);
+ tasklet_init(&ic->i_recv_tasklet, rds_ib_tasklet_fn_recv,
+ (unsigned long)ic);
mutex_init(&ic->i_recv_mutex);
#ifndef KERNEL_HAS_ATOMIC64
spin_lock_init(&ic->i_ack_lock);
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 251d1ce0b7c7..a2340748ec86 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -65,6 +65,7 @@ struct rds_ib_mr {
* Our own little FMR pool
*/
struct rds_ib_mr_pool {
+ unsigned int pool_type;
struct mutex flush_lock; /* serialize fmr invalidate */
struct delayed_work flush_worker; /* flush worker */
@@ -83,7 +84,7 @@ struct rds_ib_mr_pool {
struct ib_fmr_attr fmr_attr;
};
-struct workqueue_struct *rds_ib_fmr_wq;
+static struct workqueue_struct *rds_ib_fmr_wq;
int rds_ib_fmr_init(void)
{
@@ -159,10 +160,8 @@ static void rds_ib_remove_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
}
spin_unlock_irq(&rds_ibdev->spinlock);
- if (to_free) {
- synchronize_rcu();
- kfree(to_free);
- }
+ if (to_free)
+ kfree_rcu(to_free, rcu);
}
int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
@@ -236,7 +235,8 @@ void rds_ib_destroy_nodev_conns(void)
rds_conn_destroy(ic->conn);
}
-struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev)
+struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev,
+ int pool_type)
{
struct rds_ib_mr_pool *pool;
@@ -244,6 +244,7 @@ struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev)
if (!pool)
return ERR_PTR(-ENOMEM);
+ pool->pool_type = pool_type;
init_llist_head(&pool->free_list);
init_llist_head(&pool->drop_list);
init_llist_head(&pool->clean_list);
@@ -251,28 +252,30 @@ struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev)
init_waitqueue_head(&pool->flush_wait);
INIT_DELAYED_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker);
- pool->fmr_attr.max_pages = fmr_message_size;
+ if (pool_type == RDS_IB_MR_1M_POOL) {
+ /* +1 allows for unaligned MRs */
+ pool->fmr_attr.max_pages = RDS_FMR_1M_MSG_SIZE + 1;
+ pool->max_items = RDS_FMR_1M_POOL_SIZE;
+ } else {
+ /* pool_type == RDS_IB_MR_8K_POOL */
+ pool->fmr_attr.max_pages = RDS_FMR_8K_MSG_SIZE + 1;
+ pool->max_items = RDS_FMR_8K_POOL_SIZE;
+ }
+
+ pool->max_free_pinned = pool->max_items * pool->fmr_attr.max_pages / 4;
pool->fmr_attr.max_maps = rds_ibdev->fmr_max_remaps;
pool->fmr_attr.page_shift = PAGE_SHIFT;
- pool->max_free_pinned = rds_ibdev->max_fmrs * fmr_message_size / 4;
-
- /* We never allow more than max_items MRs to be allocated.
- * When we exceed more than max_items_soft, we start freeing
- * items more aggressively.
- * Make sure that max_items > max_items_soft > max_items / 2
- */
pool->max_items_soft = rds_ibdev->max_fmrs * 3 / 4;
- pool->max_items = rds_ibdev->max_fmrs;
return pool;
}
void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo)
{
- struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
+ struct rds_ib_mr_pool *pool_1m = rds_ibdev->mr_1m_pool;
- iinfo->rdma_mr_max = pool->max_items;
- iinfo->rdma_mr_size = pool->fmr_attr.max_pages;
+ iinfo->rdma_mr_max = pool_1m->max_items;
+ iinfo->rdma_mr_size = pool_1m->fmr_attr.max_pages;
}
void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool)
@@ -314,14 +317,28 @@ static inline void wait_clean_list_grace(void)
}
}
-static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev)
+static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev,
+ int npages)
{
- struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
+ struct rds_ib_mr_pool *pool;
struct rds_ib_mr *ibmr = NULL;
int err = 0, iter = 0;
+ if (npages <= RDS_FMR_8K_MSG_SIZE)
+ pool = rds_ibdev->mr_8k_pool;
+ else
+ pool = rds_ibdev->mr_1m_pool;
+
if (atomic_read(&pool->dirty_count) >= pool->max_items / 10)
- schedule_delayed_work(&pool->flush_worker, 10);
+ queue_delayed_work(rds_ib_fmr_wq, &pool->flush_worker, 10);
+
+ /* Switch pools if one of the pool is reaching upper limit */
+ if (atomic_read(&pool->dirty_count) >= pool->max_items * 9 / 10) {
+ if (pool->pool_type == RDS_IB_MR_8K_POOL)
+ pool = rds_ibdev->mr_1m_pool;
+ else
+ pool = rds_ibdev->mr_8k_pool;
+ }
while (1) {
ibmr = rds_ib_reuse_fmr(pool);
@@ -343,12 +360,18 @@ static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev)
atomic_dec(&pool->item_count);
if (++iter > 2) {
- rds_ib_stats_inc(s_ib_rdma_mr_pool_depleted);
+ if (pool->pool_type == RDS_IB_MR_8K_POOL)
+ rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_depleted);
+ else
+ rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_depleted);
return ERR_PTR(-EAGAIN);
}
/* We do have some empty MRs. Flush them out. */
- rds_ib_stats_inc(s_ib_rdma_mr_pool_wait);
+ if (pool->pool_type == RDS_IB_MR_8K_POOL)
+ rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_wait);
+ else
+ rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_wait);
rds_ib_flush_mr_pool(pool, 0, &ibmr);
if (ibmr)
return ibmr;
@@ -373,7 +396,12 @@ static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev)
goto out_no_cigar;
}
- rds_ib_stats_inc(s_ib_rdma_mr_alloc);
+ ibmr->pool = pool;
+ if (pool->pool_type == RDS_IB_MR_8K_POOL)
+ rds_ib_stats_inc(s_ib_rdma_mr_8k_alloc);
+ else
+ rds_ib_stats_inc(s_ib_rdma_mr_1m_alloc);
+
return ibmr;
out_no_cigar:
@@ -429,7 +457,7 @@ static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibm
}
page_cnt += len >> PAGE_SHIFT;
- if (page_cnt > fmr_message_size)
+ if (page_cnt > ibmr->pool->fmr_attr.max_pages)
return -EINVAL;
dma_pages = kmalloc_node(sizeof(u64) * page_cnt, GFP_ATOMIC,
@@ -461,7 +489,10 @@ static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibm
ibmr->sg_dma_len = sg_dma_len;
ibmr->remap_count++;
- rds_ib_stats_inc(s_ib_rdma_mr_used);
+ if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL)
+ rds_ib_stats_inc(s_ib_rdma_mr_8k_used);
+ else
+ rds_ib_stats_inc(s_ib_rdma_mr_1m_used);
ret = 0;
out:
@@ -524,8 +555,7 @@ static void rds_ib_teardown_mr(struct rds_ib_mr *ibmr)
__rds_ib_teardown_mr(ibmr);
if (pinned) {
- struct rds_ib_device *rds_ibdev = ibmr->device;
- struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
+ struct rds_ib_mr_pool *pool = ibmr->pool;
atomic_sub(pinned, &pool->free_pinned);
}
@@ -594,7 +624,7 @@ static void list_to_llist_nodes(struct rds_ib_mr_pool *pool,
* to free as many MRs as needed to get back to this limit.
*/
static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
- int free_all, struct rds_ib_mr **ibmr_ret)
+ int free_all, struct rds_ib_mr **ibmr_ret)
{
struct rds_ib_mr *ibmr, *next;
struct llist_node *clean_nodes;
@@ -605,11 +635,14 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
unsigned int nfreed = 0, dirty_to_clean = 0, free_goal;
int ret = 0;
- rds_ib_stats_inc(s_ib_rdma_mr_pool_flush);
+ if (pool->pool_type == RDS_IB_MR_8K_POOL)
+ rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_flush);
+ else
+ rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_flush);
if (ibmr_ret) {
DEFINE_WAIT(wait);
- while(!mutex_trylock(&pool->flush_lock)) {
+ while (!mutex_trylock(&pool->flush_lock)) {
ibmr = rds_ib_reuse_fmr(pool);
if (ibmr) {
*ibmr_ret = ibmr;
@@ -666,8 +699,12 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
list_for_each_entry_safe(ibmr, next, &unmap_list, unmap_list) {
unpinned += ibmr->sg_len;
__rds_ib_teardown_mr(ibmr);
- if (nfreed < free_goal || ibmr->remap_count >= pool->fmr_attr.max_maps) {
- rds_ib_stats_inc(s_ib_rdma_mr_free);
+ if (nfreed < free_goal ||
+ ibmr->remap_count >= pool->fmr_attr.max_maps) {
+ if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL)
+ rds_ib_stats_inc(s_ib_rdma_mr_8k_free);
+ else
+ rds_ib_stats_inc(s_ib_rdma_mr_1m_free);
list_del(&ibmr->unmap_list);
ib_dealloc_fmr(ibmr->fmr);
kfree(ibmr);
@@ -719,8 +756,8 @@ static void rds_ib_mr_pool_flush_worker(struct work_struct *work)
void rds_ib_free_mr(void *trans_private, int invalidate)
{
struct rds_ib_mr *ibmr = trans_private;
+ struct rds_ib_mr_pool *pool = ibmr->pool;
struct rds_ib_device *rds_ibdev = ibmr->device;
- struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
rdsdebug("RDS/IB: free_mr nents %u\n", ibmr->sg_len);
@@ -759,10 +796,11 @@ void rds_ib_flush_mrs(void)
down_read(&rds_ib_devices_lock);
list_for_each_entry(rds_ibdev, &rds_ib_devices, list) {
- struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool;
+ if (rds_ibdev->mr_8k_pool)
+ rds_ib_flush_mr_pool(rds_ibdev->mr_8k_pool, 0, NULL);
- if (pool)
- rds_ib_flush_mr_pool(pool, 0, NULL);
+ if (rds_ibdev->mr_1m_pool)
+ rds_ib_flush_mr_pool(rds_ibdev->mr_1m_pool, 0, NULL);
}
up_read(&rds_ib_devices_lock);
}
@@ -780,12 +818,12 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
goto out;
}
- if (!rds_ibdev->mr_pool) {
+ if (!rds_ibdev->mr_8k_pool || !rds_ibdev->mr_1m_pool) {
ret = -ENODEV;
goto out;
}
- ibmr = rds_ib_alloc_fmr(rds_ibdev);
+ ibmr = rds_ib_alloc_fmr(rds_ibdev, nents);
if (IS_ERR(ibmr)) {
rds_ib_dev_put(rds_ibdev);
return ibmr;
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index f43831e4186a..96744b75db93 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -596,8 +596,7 @@ void rds_ib_recv_init_ack(struct rds_ib_connection *ic)
* wr_id and avoids working with the ring in that case.
*/
#ifndef KERNEL_HAS_ATOMIC64
-static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq,
- int ack_required)
+void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, int ack_required)
{
unsigned long flags;
@@ -622,8 +621,7 @@ static u64 rds_ib_get_ack(struct rds_ib_connection *ic)
return seq;
}
#else
-static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq,
- int ack_required)
+void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, int ack_required)
{
atomic64_set(&ic->i_ack_next, seq);
if (ack_required) {
@@ -830,20 +828,6 @@ static void rds_ib_cong_recv(struct rds_connection *conn,
rds_cong_map_updated(map, uncongested);
}
-/*
- * Rings are posted with all the allocations they'll need to queue the
- * incoming message to the receiving socket so this can't fail.
- * All fragments start with a header, so we can make sure we're not receiving
- * garbage, and we can tell a small 8 byte fragment from an ACK frame.
- */
-struct rds_ib_ack_state {
- u64 ack_next;
- u64 ack_recv;
- unsigned int ack_required:1;
- unsigned int ack_next_valid:1;
- unsigned int ack_recv_valid:1;
-};
-
static void rds_ib_process_recv(struct rds_connection *conn,
struct rds_ib_recv_work *recv, u32 data_len,
struct rds_ib_ack_state *state)
@@ -969,96 +953,50 @@ static void rds_ib_process_recv(struct rds_connection *conn,
}
}
-/*
- * Plucking the oldest entry from the ring can be done concurrently with
- * the thread refilling the ring. Each ring operation is protected by
- * spinlocks and the transient state of refilling doesn't change the
- * recording of which entry is oldest.
- *
- * This relies on IB only calling one cq comp_handler for each cq so that
- * there will only be one caller of rds_recv_incoming() per RDS connection.
- */
-void rds_ib_recv_cq_comp_handler(struct ib_cq *cq, void *context)
-{
- struct rds_connection *conn = context;
- struct rds_ib_connection *ic = conn->c_transport_data;
-
- rdsdebug("conn %p cq %p\n", conn, cq);
-
- rds_ib_stats_inc(s_ib_rx_cq_call);
-
- tasklet_schedule(&ic->i_recv_tasklet);
-}
-
-static inline void rds_poll_cq(struct rds_ib_connection *ic,
- struct rds_ib_ack_state *state)
+void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic,
+ struct ib_wc *wc,
+ struct rds_ib_ack_state *state)
{
struct rds_connection *conn = ic->conn;
- struct ib_wc wc;
struct rds_ib_recv_work *recv;
- while (ib_poll_cq(ic->i_recv_cq, 1, &wc) > 0) {
- rdsdebug("wc wr_id 0x%llx status %u (%s) byte_len %u imm_data %u\n",
- (unsigned long long)wc.wr_id, wc.status,
- ib_wc_status_msg(wc.status), wc.byte_len,
- be32_to_cpu(wc.ex.imm_data));
- rds_ib_stats_inc(s_ib_rx_cq_event);
+ rdsdebug("wc wr_id 0x%llx status %u (%s) byte_len %u imm_data %u\n",
+ (unsigned long long)wc->wr_id, wc->status,
+ ib_wc_status_msg(wc->status), wc->byte_len,
+ be32_to_cpu(wc->ex.imm_data));
- recv = &ic->i_recvs[rds_ib_ring_oldest(&ic->i_recv_ring)];
-
- ib_dma_unmap_sg(ic->i_cm_id->device, &recv->r_frag->f_sg, 1, DMA_FROM_DEVICE);
-
- /*
- * Also process recvs in connecting state because it is possible
- * to get a recv completion _before_ the rdmacm ESTABLISHED
- * event is processed.
- */
- if (wc.status == IB_WC_SUCCESS) {
- rds_ib_process_recv(conn, recv, wc.byte_len, state);
- } else {
- /* We expect errors as the qp is drained during shutdown */
- if (rds_conn_up(conn) || rds_conn_connecting(conn))
- rds_ib_conn_error(conn, "recv completion on %pI4 had "
- "status %u (%s), disconnecting and "
- "reconnecting\n", &conn->c_faddr,
- wc.status,
- ib_wc_status_msg(wc.status));
- }
+ rds_ib_stats_inc(s_ib_rx_cq_event);
+ recv = &ic->i_recvs[rds_ib_ring_oldest(&ic->i_recv_ring)];
+ ib_dma_unmap_sg(ic->i_cm_id->device, &recv->r_frag->f_sg, 1,
+ DMA_FROM_DEVICE);
- /*
- * rds_ib_process_recv() doesn't always consume the frag, and
- * we might not have called it at all if the wc didn't indicate
- * success. We already unmapped the frag's pages, though, and
- * the following rds_ib_ring_free() call tells the refill path
- * that it will not find an allocated frag here. Make sure we
- * keep that promise by freeing a frag that's still on the ring.
- */
- if (recv->r_frag) {
- rds_ib_frag_free(ic, recv->r_frag);
- recv->r_frag = NULL;
- }
- rds_ib_ring_free(&ic->i_recv_ring, 1);
+ /* Also process recvs in connecting state because it is possible
+ * to get a recv completion _before_ the rdmacm ESTABLISHED
+ * event is processed.
+ */
+ if (wc->status == IB_WC_SUCCESS) {
+ rds_ib_process_recv(conn, recv, wc->byte_len, state);
+ } else {
+ /* We expect errors as the qp is drained during shutdown */
+ if (rds_conn_up(conn) || rds_conn_connecting(conn))
+ rds_ib_conn_error(conn, "recv completion on %pI4 had status %u (%s), disconnecting and reconnecting\n",
+ &conn->c_faddr,
+ wc->status,
+ ib_wc_status_msg(wc->status));
}
-}
-void rds_ib_recv_tasklet_fn(unsigned long data)
-{
- struct rds_ib_connection *ic = (struct rds_ib_connection *) data;
- struct rds_connection *conn = ic->conn;
- struct rds_ib_ack_state state = { 0, };
-
- rds_poll_cq(ic, &state);
- ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED);
- rds_poll_cq(ic, &state);
-
- if (state.ack_next_valid)
- rds_ib_set_ack(ic, state.ack_next, state.ack_required);
- if (state.ack_recv_valid && state.ack_recv > ic->i_ack_recv) {
- rds_send_drop_acked(conn, state.ack_recv, NULL);
- ic->i_ack_recv = state.ack_recv;
+ /* rds_ib_process_recv() doesn't always consume the frag, and
+ * we might not have called it at all if the wc didn't indicate
+ * success. We already unmapped the frag's pages, though, and
+ * the following rds_ib_ring_free() call tells the refill path
+ * that it will not find an allocated frag here. Make sure we
+ * keep that promise by freeing a frag that's still on the ring.
+ */
+ if (recv->r_frag) {
+ rds_ib_frag_free(ic, recv->r_frag);
+ recv->r_frag = NULL;
}
- if (rds_conn_up(conn))
- rds_ib_attempt_ack(ic);
+ rds_ib_ring_free(&ic->i_recv_ring, 1);
/* If we ever end up with a really empty receive ring, we're
* in deep trouble, as the sender will definitely see RNR
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index 4e88047086b6..670882c752e9 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -195,7 +195,7 @@ void rds_ib_send_init_ring(struct rds_ib_connection *ic)
send->s_op = NULL;
- send->s_wr.wr_id = i;
+ send->s_wr.wr_id = i | RDS_IB_SEND_OP;
send->s_wr.sg_list = send->s_sge;
send->s_wr.ex.imm_data = 0;
@@ -237,81 +237,73 @@ static void rds_ib_sub_signaled(struct rds_ib_connection *ic, int nr)
* unallocs the next free entry in the ring it doesn't alter which is
* the next to be freed, which is what this is concerned with.
*/
-void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
+void rds_ib_send_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc)
{
- struct rds_connection *conn = context;
- struct rds_ib_connection *ic = conn->c_transport_data;
struct rds_message *rm = NULL;
- struct ib_wc wc;
+ struct rds_connection *conn = ic->conn;
struct rds_ib_send_work *send;
u32 completed;
u32 oldest;
u32 i = 0;
- int ret;
int nr_sig = 0;
- rdsdebug("cq %p conn %p\n", cq, conn);
- rds_ib_stats_inc(s_ib_tx_cq_call);
- ret = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
- if (ret)
- rdsdebug("ib_req_notify_cq send failed: %d\n", ret);
-
- while (ib_poll_cq(cq, 1, &wc) > 0) {
- rdsdebug("wc wr_id 0x%llx status %u (%s) byte_len %u imm_data %u\n",
- (unsigned long long)wc.wr_id, wc.status,
- ib_wc_status_msg(wc.status), wc.byte_len,
- be32_to_cpu(wc.ex.imm_data));
- rds_ib_stats_inc(s_ib_tx_cq_event);
-
- if (wc.wr_id == RDS_IB_ACK_WR_ID) {
- if (time_after(jiffies, ic->i_ack_queued + HZ/2))
- rds_ib_stats_inc(s_ib_tx_stalled);
- rds_ib_ack_send_complete(ic);
- continue;
- }
- oldest = rds_ib_ring_oldest(&ic->i_send_ring);
+ rdsdebug("wc wr_id 0x%llx status %u (%s) byte_len %u imm_data %u\n",
+ (unsigned long long)wc->wr_id, wc->status,
+ ib_wc_status_msg(wc->status), wc->byte_len,
+ be32_to_cpu(wc->ex.imm_data));
+ rds_ib_stats_inc(s_ib_tx_cq_event);
- completed = rds_ib_ring_completed(&ic->i_send_ring, wc.wr_id, oldest);
+ if (wc->wr_id == RDS_IB_ACK_WR_ID) {
+ if (time_after(jiffies, ic->i_ack_queued + HZ / 2))
+ rds_ib_stats_inc(s_ib_tx_stalled);
+ rds_ib_ack_send_complete(ic);
+ return;
+ }
- for (i = 0; i < completed; i++) {
- send = &ic->i_sends[oldest];
- if (send->s_wr.send_flags & IB_SEND_SIGNALED)
- nr_sig++;
+ oldest = rds_ib_ring_oldest(&ic->i_send_ring);
- rm = rds_ib_send_unmap_op(ic, send, wc.status);
+ completed = rds_ib_ring_completed(&ic->i_send_ring,
+ (wc->wr_id & ~RDS_IB_SEND_OP),
+ oldest);
- if (time_after(jiffies, send->s_queued + HZ/2))
- rds_ib_stats_inc(s_ib_tx_stalled);
+ for (i = 0; i < completed; i++) {
+ send = &ic->i_sends[oldest];
+ if (send->s_wr.send_flags & IB_SEND_SIGNALED)
+ nr_sig++;
- if (send->s_op) {
- if (send->s_op == rm->m_final_op) {
- /* If anyone waited for this message to get flushed out, wake
- * them up now */
- rds_message_unmapped(rm);
- }
- rds_message_put(rm);
- send->s_op = NULL;
- }
+ rm = rds_ib_send_unmap_op(ic, send, wc->status);
- oldest = (oldest + 1) % ic->i_send_ring.w_nr;
- }
+ if (time_after(jiffies, send->s_queued + HZ / 2))
+ rds_ib_stats_inc(s_ib_tx_stalled);
- rds_ib_ring_free(&ic->i_send_ring, completed);
- rds_ib_sub_signaled(ic, nr_sig);
- nr_sig = 0;
-
- if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags) ||
- test_bit(0, &conn->c_map_queued))
- queue_delayed_work(rds_wq, &conn->c_send_w, 0);
-
- /* We expect errors as the qp is drained during shutdown */
- if (wc.status != IB_WC_SUCCESS && rds_conn_up(conn)) {
- rds_ib_conn_error(conn, "send completion on %pI4 had status "
- "%u (%s), disconnecting and reconnecting\n",
- &conn->c_faddr, wc.status,
- ib_wc_status_msg(wc.status));
+ if (send->s_op) {
+ if (send->s_op == rm->m_final_op) {
+ /* If anyone waited for this message to get
+ * flushed out, wake them up now
+ */
+ rds_message_unmapped(rm);
+ }
+ rds_message_put(rm);
+ send->s_op = NULL;
}
+
+ oldest = (oldest + 1) % ic->i_send_ring.w_nr;
+ }
+
+ rds_ib_ring_free(&ic->i_send_ring, completed);
+ rds_ib_sub_signaled(ic, nr_sig);
+ nr_sig = 0;
+
+ if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags) ||
+ test_bit(0, &conn->c_map_queued))
+ queue_delayed_work(rds_wq, &conn->c_send_w, 0);
+
+ /* We expect errors as the qp is drained during shutdown */
+ if (wc->status != IB_WC_SUCCESS && rds_conn_up(conn)) {
+ rds_ib_conn_error(conn, "send completion on %pI4 had status %u (%s), disconnecting and reconnecting\n",
+ &conn->c_faddr, wc->status,
+ ib_wc_status_msg(wc->status));
}
}
diff --git a/net/rds/ib_stats.c b/net/rds/ib_stats.c
index 2d5965d6e97c..d77e04473056 100644
--- a/net/rds/ib_stats.c
+++ b/net/rds/ib_stats.c
@@ -42,14 +42,14 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_ib_statistics, rds_ib_stats);
static const char *const rds_ib_stat_names[] = {
"ib_connect_raced",
"ib_listen_closed_stale",
- "ib_tx_cq_call",
+ "s_ib_evt_handler_call",
+ "ib_tasklet_call",
"ib_tx_cq_event",
"ib_tx_ring_full",
"ib_tx_throttle",
"ib_tx_sg_mapping_failure",
"ib_tx_stalled",
"ib_tx_credit_updates",
- "ib_rx_cq_call",
"ib_rx_cq_event",
"ib_rx_ring_empty",
"ib_rx_refill_from_cq",
@@ -61,12 +61,18 @@ static const char *const rds_ib_stat_names[] = {
"ib_ack_send_delayed",
"ib_ack_send_piggybacked",
"ib_ack_received",
- "ib_rdma_mr_alloc",
- "ib_rdma_mr_free",
- "ib_rdma_mr_used",
- "ib_rdma_mr_pool_flush",
- "ib_rdma_mr_pool_wait",
- "ib_rdma_mr_pool_depleted",
+ "ib_rdma_mr_8k_alloc",
+ "ib_rdma_mr_8k_free",
+ "ib_rdma_mr_8k_used",
+ "ib_rdma_mr_8k_pool_flush",
+ "ib_rdma_mr_8k_pool_wait",
+ "ib_rdma_mr_8k_pool_depleted",
+ "ib_rdma_mr_1m_alloc",
+ "ib_rdma_mr_1m_free",
+ "ib_rdma_mr_1m_used",
+ "ib_rdma_mr_1m_pool_flush",
+ "ib_rdma_mr_1m_pool_wait",
+ "ib_rdma_mr_1m_pool_depleted",
"ib_atomic_cswp",
"ib_atomic_fadd",
};
diff --git a/net/rds/rds.h b/net/rds/rds.h
index afb4048d0cfd..543c308fcc2a 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -86,7 +86,9 @@ struct rds_connection {
struct hlist_node c_hash_node;
__be32 c_laddr;
__be32 c_faddr;
- unsigned int c_loopback:1;
+ unsigned int c_loopback:1,
+ c_outgoing:1,
+ c_pad_to_32:30;
struct rds_connection *c_passive;
struct rds_cong_map *c_lcong;
@@ -603,6 +605,7 @@ extern wait_queue_head_t rds_poll_waitq;
int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
void rds_remove_bound(struct rds_sock *rs);
struct rds_sock *rds_find_bound(__be32 addr, __be16 port);
+void rds_bind_lock_init(void);
/* cong.c */
int rds_cong_get_maps(struct rds_connection *conn);
diff --git a/net/rds/send.c b/net/rds/send.c
index 4df61a515b83..827155c2ead1 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -38,6 +38,7 @@
#include <linux/list.h>
#include <linux/ratelimit.h>
#include <linux/export.h>
+#include <linux/sizes.h>
#include "rds.h"
@@ -51,7 +52,7 @@
* it to 0 will restore the old behavior (where we looped until we had
* drained the queue).
*/
-static int send_batch_count = 64;
+static int send_batch_count = SZ_1K;
module_param(send_batch_count, int, 0444);
MODULE_PARM_DESC(send_batch_count, " batch factor when working the send queue");
@@ -223,7 +224,7 @@ restart:
* through a lot of messages, lets back off and see
* if anyone else jumps in
*/
- if (batch_count >= 1024)
+ if (batch_count >= send_batch_count)
goto over_batch;
spin_lock_irqsave(&conn->c_lock, flags);
@@ -423,12 +424,15 @@ over_batch:
!list_empty(&conn->c_send_queue)) &&
send_gen == conn->c_send_gen) {
rds_stats_inc(s_send_lock_queue_raced);
- goto restart;
+ if (batch_count < send_batch_count)
+ goto restart;
+ queue_delayed_work(rds_wq, &conn->c_send_w, 1);
}
}
out:
return ret;
}
+EXPORT_SYMBOL_GPL(rds_send_xmit);
static void rds_send_sndbuf_remove(struct rds_sock *rs, struct rds_message *rm)
{
@@ -1120,8 +1124,9 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
*/
rds_stats_inc(s_send_queued);
- if (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags))
- rds_send_xmit(conn);
+ ret = rds_send_xmit(conn);
+ if (ret == -ENOMEM || ret == -EAGAIN)
+ queue_delayed_work(rds_wq, &conn->c_send_w, 1);
rds_message_put(rm);
return payload_len;
@@ -1177,8 +1182,8 @@ rds_send_pong(struct rds_connection *conn, __be16 dport)
rds_stats_inc(s_send_queued);
rds_stats_inc(s_send_pong);
- if (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags))
- queue_delayed_work(rds_wq, &conn->c_send_w, 0);
+ /* schedule the send work on rds_wq */
+ queue_delayed_work(rds_wq, &conn->c_send_w, 1);
rds_message_put(rm);
return 0;
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index c42b60bf4c68..9d6ddbacd875 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -67,21 +67,13 @@ void rds_tcp_nonagle(struct socket *sock)
set_fs(oldfs);
}
+/* All module specific customizations to the RDS-TCP socket should be done in
+ * rds_tcp_tune() and applied after socket creation. In general these
+ * customizations should be tunable via module_param()
+ */
void rds_tcp_tune(struct socket *sock)
{
- struct sock *sk = sock->sk;
-
rds_tcp_nonagle(sock);
-
- /*
- * We're trying to saturate gigabit with the default,
- * see svc_sock_setbufsize().
- */
- lock_sock(sk);
- sk->sk_sndbuf = RDS_TCP_DEFAULT_BUFSIZE;
- sk->sk_rcvbuf = RDS_TCP_DEFAULT_BUFSIZE;
- sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK;
- release_sock(sk);
}
u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc)
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 444d78d0bd77..0936a4a32b47 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -110,28 +110,27 @@ int rds_tcp_accept_one(struct socket *sock)
goto out;
}
/* An incoming SYN request came in, and TCP just accepted it.
- * We always create a new conn for listen side of TCP, and do not
- * add it to the c_hash_list.
*
* If the client reboots, this conn will need to be cleaned up.
* rds_tcp_state_change() will do that cleanup
*/
rs_tcp = (struct rds_tcp_connection *)conn->c_transport_data;
- WARN_ON(!rs_tcp || rs_tcp->t_sock);
-
- /*
- * see the comment above rds_queue_delayed_reconnect()
- */
- if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) {
- if (rds_conn_state(conn) == RDS_CONN_UP)
- rds_tcp_stats_inc(s_tcp_listen_closed_stale);
- else
- rds_tcp_stats_inc(s_tcp_connect_raced);
- rds_conn_drop(conn);
+ if (rs_tcp->t_sock &&
+ ntohl(inet->inet_saddr) < ntohl(inet->inet_daddr)) {
+ struct sock *nsk = new_sock->sk;
+
+ nsk->sk_user_data = NULL;
+ nsk->sk_prot->disconnect(nsk, 0);
+ tcp_done(nsk);
+ new_sock = NULL;
ret = 0;
goto out;
+ } else if (rs_tcp->t_sock) {
+ rds_tcp_restore_callbacks(rs_tcp->t_sock, rs_tcp);
+ conn->c_outgoing = 0;
}
+ rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING);
rds_tcp_set_callbacks(new_sock, conn);
rds_connect_complete(conn);
new_sock = NULL;
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index 53b17ca0dff5..2894e6095e3b 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -83,6 +83,7 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
struct rds_tcp_connection *tc = conn->c_transport_data;
int done = 0;
int ret = 0;
+ int more;
if (hdr_off == 0) {
/*
@@ -116,12 +117,15 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
goto out;
}
+ more = rm->data.op_nents > 1 ? (MSG_MORE | MSG_SENDPAGE_NOTLAST) : 0;
while (sg < rm->data.op_nents) {
+ int flags = MSG_DONTWAIT | MSG_NOSIGNAL | more;
+
ret = tc->t_sock->ops->sendpage(tc->t_sock,
sg_page(&rm->data.op_sg[sg]),
rm->data.op_sg[sg].offset + off,
rm->data.op_sg[sg].length - off,
- MSG_DONTWAIT|MSG_NOSIGNAL);
+ flags);
rdsdebug("tcp sendpage %p:%u:%u ret %d\n", (void *)sg_page(&rm->data.op_sg[sg]),
rm->data.op_sg[sg].offset + off, rm->data.op_sg[sg].length - off,
ret);
@@ -134,6 +138,8 @@ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm,
off = 0;
sg++;
}
+ if (sg == rm->data.op_nents - 1)
+ more = 0;
}
out:
diff --git a/net/rds/threads.c b/net/rds/threads.c
index dc2402e871fd..454aa6d23327 100644
--- a/net/rds/threads.c
+++ b/net/rds/threads.c
@@ -162,7 +162,9 @@ void rds_send_worker(struct work_struct *work)
int ret;
if (rds_conn_state(conn) == RDS_CONN_UP) {
+ clear_bit(RDS_LL_SEND_FULL, &conn->c_flags);
ret = rds_send_xmit(conn);
+ cond_resched();
rdsdebug("conn %p ret %d\n", conn, ret);
switch (ret) {
case -EAGAIN:
diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c
index 6631f4f1e39b..692b3e67fb54 100644
--- a/net/rxrpc/ar-connection.c
+++ b/net/rxrpc/ar-connection.c
@@ -808,7 +808,7 @@ void rxrpc_put_connection(struct rxrpc_connection *conn)
ASSERTCMP(atomic_read(&conn->usage), >, 0);
- conn->put_time = get_seconds();
+ conn->put_time = ktime_get_seconds();
if (atomic_dec_and_test(&conn->usage)) {
_debug("zombie");
rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
@@ -852,7 +852,7 @@ static void rxrpc_connection_reaper(struct work_struct *work)
_enter("");
- now = get_seconds();
+ now = ktime_get_seconds();
earliest = ULONG_MAX;
write_lock_bh(&rxrpc_connection_lock);
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index aef1bd294e17..2934a73a5981 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -208,7 +208,7 @@ struct rxrpc_transport {
struct rb_root server_conns; /* server connections on this transport */
struct list_head link; /* link in master session list */
struct sk_buff_head error_queue; /* error packets awaiting processing */
- time_t put_time; /* time at which to reap */
+ unsigned long put_time; /* time at which to reap */
spinlock_t client_lock; /* client connection allocation lock */
rwlock_t conn_lock; /* lock for active/dead connections */
atomic_t usage;
@@ -256,7 +256,7 @@ struct rxrpc_connection {
struct rxrpc_crypt csum_iv; /* packet checksum base */
unsigned long events;
#define RXRPC_CONN_CHALLENGE 0 /* send challenge packet */
- time_t put_time; /* time at which to reap */
+ unsigned long put_time; /* time at which to reap */
rwlock_t lock; /* access lock */
spinlock_t state_lock; /* state-change lock */
atomic_t usage;
diff --git a/net/rxrpc/ar-transport.c b/net/rxrpc/ar-transport.c
index 1976dec84f29..9946467f16b4 100644
--- a/net/rxrpc/ar-transport.c
+++ b/net/rxrpc/ar-transport.c
@@ -189,7 +189,7 @@ void rxrpc_put_transport(struct rxrpc_transport *trans)
ASSERTCMP(atomic_read(&trans->usage), >, 0);
- trans->put_time = get_seconds();
+ trans->put_time = ktime_get_seconds();
if (unlikely(atomic_dec_and_test(&trans->usage))) {
_debug("zombie");
/* let the reaper determine the timeout to avoid a race with
@@ -226,7 +226,7 @@ static void rxrpc_transport_reaper(struct work_struct *work)
_enter("");
- now = get_seconds();
+ now = ktime_get_seconds();
earliest = ULONG_MAX;
/* extract all the transports that have been dead too long */
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 559bfa011bda..0bc6f912f870 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -72,6 +72,7 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
case TC_ACT_PIPE:
case TC_ACT_RECLASSIFY:
case TC_ACT_OK:
+ case TC_ACT_REDIRECT:
action = filter_res;
break;
case TC_ACT_SHOT:
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 5019a47b9270..bb41699c6c49 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -68,13 +68,13 @@ static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a,
}
if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
- proto, &tuple))
+ proto, ca->net, &tuple))
goto out;
zone.id = ca->zone;
zone.dir = NF_CT_DEFAULT_ZONE_DIR;
- thash = nf_conntrack_find_get(dev_net(skb->dev), &zone, &tuple);
+ thash = nf_conntrack_find_get(ca->net, &zone, &tuple);
if (!thash)
goto out;
@@ -119,6 +119,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
ci = to_connmark(a);
ci->tcf_action = parm->action;
+ ci->net = net;
ci->zone = parm->zone;
tcf_hash_insert(a);
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 99c9cc1c7af9..d05869646515 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -189,6 +189,7 @@ static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a,
* worry later - danger - this API seems to have changed
* from earlier kernels
*/
+ par.net = dev_net(skb->dev);
par.in = skb->dev;
par.out = NULL;
par.hooknum = ipt->tcfi_hook;
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index e5168f8b9640..5faaa5425f7b 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -38,6 +38,7 @@ struct cls_bpf_prog {
struct bpf_prog *filter;
struct list_head link;
struct tcf_result res;
+ bool exts_integrated;
struct tcf_exts exts;
u32 handle;
union {
@@ -52,6 +53,7 @@ struct cls_bpf_prog {
static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
[TCA_BPF_CLASSID] = { .type = NLA_U32 },
+ [TCA_BPF_FLAGS] = { .type = NLA_U32 },
[TCA_BPF_FD] = { .type = NLA_U32 },
[TCA_BPF_NAME] = { .type = NLA_NUL_STRING, .len = CLS_BPF_NAME_LEN },
[TCA_BPF_OPS_LEN] = { .type = NLA_U16 },
@@ -59,6 +61,20 @@ static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
.len = sizeof(struct sock_filter) * BPF_MAXINSNS },
};
+static int cls_bpf_exec_opcode(int code)
+{
+ switch (code) {
+ case TC_ACT_OK:
+ case TC_ACT_SHOT:
+ case TC_ACT_STOLEN:
+ case TC_ACT_REDIRECT:
+ case TC_ACT_UNSPEC:
+ return code;
+ default:
+ return TC_ACT_UNSPEC;
+ }
+}
+
static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
@@ -79,6 +95,8 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
list_for_each_entry_rcu(prog, &head->plist, link) {
int filter_res;
+ qdisc_skb_cb(skb)->tc_classid = prog->res.classid;
+
if (at_ingress) {
/* It is safe to push/pull even if skb_shared() */
__skb_push(skb, skb->mac_len);
@@ -88,6 +106,16 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
filter_res = BPF_PROG_RUN(prog->filter, skb);
}
+ if (prog->exts_integrated) {
+ res->class = prog->res.class;
+ res->classid = qdisc_skb_cb(skb)->tc_classid;
+
+ ret = cls_bpf_exec_opcode(filter_res);
+ if (ret == TC_ACT_UNSPEC)
+ continue;
+ break;
+ }
+
if (filter_res == 0)
continue;
@@ -195,8 +223,7 @@ static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle)
return ret;
}
-static int cls_bpf_prog_from_ops(struct nlattr **tb,
- struct cls_bpf_prog *prog, u32 classid)
+static int cls_bpf_prog_from_ops(struct nlattr **tb, struct cls_bpf_prog *prog)
{
struct sock_filter *bpf_ops;
struct sock_fprog_kern fprog_tmp;
@@ -230,15 +257,13 @@ static int cls_bpf_prog_from_ops(struct nlattr **tb,
prog->bpf_ops = bpf_ops;
prog->bpf_num_ops = bpf_num_ops;
prog->bpf_name = NULL;
-
prog->filter = fp;
- prog->res.classid = classid;
return 0;
}
-static int cls_bpf_prog_from_efd(struct nlattr **tb,
- struct cls_bpf_prog *prog, u32 classid)
+static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
+ const struct tcf_proto *tp)
{
struct bpf_prog *fp;
char *name = NULL;
@@ -268,9 +293,10 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb,
prog->bpf_ops = NULL;
prog->bpf_fd = bpf_fd;
prog->bpf_name = name;
-
prog->filter = fp;
- prog->res.classid = classid;
+
+ if (fp->dst_needed)
+ netif_keep_dst(qdisc_dev(tp->q));
return 0;
}
@@ -280,16 +306,13 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
unsigned long base, struct nlattr **tb,
struct nlattr *est, bool ovr)
{
+ bool is_bpf, is_ebpf, have_exts = false;
struct tcf_exts exts;
- bool is_bpf, is_ebpf;
- u32 classid;
int ret;
is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS];
is_ebpf = tb[TCA_BPF_FD];
-
- if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf) ||
- !tb[TCA_BPF_CLASSID])
+ if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf))
return -EINVAL;
tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE);
@@ -297,18 +320,32 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
if (ret < 0)
return ret;
- classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
+ if (tb[TCA_BPF_FLAGS]) {
+ u32 bpf_flags = nla_get_u32(tb[TCA_BPF_FLAGS]);
+
+ if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT) {
+ tcf_exts_destroy(&exts);
+ return -EINVAL;
+ }
+
+ have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT;
+ }
+
+ prog->exts_integrated = have_exts;
- ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog, classid) :
- cls_bpf_prog_from_efd(tb, prog, classid);
+ ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) :
+ cls_bpf_prog_from_efd(tb, prog, tp);
if (ret < 0) {
tcf_exts_destroy(&exts);
return ret;
}
- tcf_bind_filter(tp, &prog->res, base);
- tcf_exts_change(tp, &prog->exts, &exts);
+ if (tb[TCA_BPF_CLASSID]) {
+ prog->res.classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
+ tcf_bind_filter(tp, &prog->res, base);
+ }
+ tcf_exts_change(tp, &prog->exts, &exts);
return 0;
}
@@ -429,6 +466,7 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
{
struct cls_bpf_prog *prog = (struct cls_bpf_prog *) fh;
struct nlattr *nest;
+ u32 bpf_flags = 0;
int ret;
if (prog == NULL)
@@ -440,7 +478,8 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
if (nest == NULL)
goto nla_put_failure;
- if (nla_put_u32(skb, TCA_BPF_CLASSID, prog->res.classid))
+ if (prog->res.classid &&
+ nla_put_u32(skb, TCA_BPF_CLASSID, prog->res.classid))
goto nla_put_failure;
if (cls_bpf_is_ebpf(prog))
@@ -453,6 +492,11 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
if (tcf_exts_dump(skb, &prog->exts) < 0)
goto nla_put_failure;
+ if (prog->exts_integrated)
+ bpf_flags |= TCA_BPF_FLAG_ACT_DIRECT;
+ if (bpf_flags && nla_put_u32(skb, TCA_BPF_FLAGS, bpf_flags))
+ goto nla_put_failure;
+
nla_nest_end(skb, nest);
if (tcf_exts_dump_stats(skb, &prog->exts) < 0)
diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c
index df0328ba6a48..c66ca9400ab4 100644
--- a/net/sched/em_ipset.c
+++ b/net/sched/em_ipset.c
@@ -95,6 +95,7 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em,
if (skb->skb_iif)
indev = dev_get_by_index_rcu(em->net, skb->skb_iif);
+ acpar.net = em->net;
acpar.in = indev ? indev : dev;
acpar.out = dev;
diff --git a/net/sched/sch_blackhole.c b/net/sched/sch_blackhole.c
index 094a874b48bc..3fee70d9814f 100644
--- a/net/sched/sch_blackhole.c
+++ b/net/sched/sch_blackhole.c
@@ -11,7 +11,7 @@
* Note: Quantum tunneling is not supported.
*/
-#include <linux/module.h>
+#include <linux/init.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/skbuff.h>
@@ -37,17 +37,8 @@ static struct Qdisc_ops blackhole_qdisc_ops __read_mostly = {
.owner = THIS_MODULE,
};
-static int __init blackhole_module_init(void)
+static int __init blackhole_init(void)
{
return register_qdisc(&blackhole_qdisc_ops);
}
-
-static void __exit blackhole_module_exit(void)
-{
- unregister_qdisc(&blackhole_qdisc_ops);
-}
-
-module_init(blackhole_module_init)
-module_exit(blackhole_module_exit)
-
-MODULE_LICENSE("GPL");
+device_initcall(blackhole_init)
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index c4d45fd8c551..f357f34d02d2 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -35,14 +35,20 @@
#define NO_DEFAULT_INDEX (1 << 16)
+struct mask_value {
+ u8 mask;
+ u8 value;
+};
+
struct dsmark_qdisc_data {
struct Qdisc *q;
struct tcf_proto __rcu *filter_list;
- u8 *mask; /* "owns" the array */
- u8 *value;
+ struct mask_value *mv;
u16 indices;
+ u8 set_tc_index;
u32 default_index; /* index range is 0...0xffff */
- int set_tc_index;
+#define DSMARK_EMBEDDED_SZ 16
+ struct mask_value embedded[DSMARK_EMBEDDED_SZ];
};
static inline int dsmark_valid_index(struct dsmark_qdisc_data *p, u16 index)
@@ -116,7 +122,6 @@ static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent,
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_DSMARK_MAX + 1];
int err = -EINVAL;
- u8 mask = 0;
pr_debug("%s(sch %p,[qdisc %p],classid %x,parent %x), arg 0x%lx\n",
__func__, sch, p, classid, parent, *arg);
@@ -133,14 +138,11 @@ static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent,
if (err < 0)
goto errout;
- if (tb[TCA_DSMARK_MASK])
- mask = nla_get_u8(tb[TCA_DSMARK_MASK]);
-
if (tb[TCA_DSMARK_VALUE])
- p->value[*arg - 1] = nla_get_u8(tb[TCA_DSMARK_VALUE]);
+ p->mv[*arg - 1].value = nla_get_u8(tb[TCA_DSMARK_VALUE]);
if (tb[TCA_DSMARK_MASK])
- p->mask[*arg - 1] = mask;
+ p->mv[*arg - 1].mask = nla_get_u8(tb[TCA_DSMARK_MASK]);
err = 0;
@@ -155,8 +157,8 @@ static int dsmark_delete(struct Qdisc *sch, unsigned long arg)
if (!dsmark_valid_index(p, arg))
return -EINVAL;
- p->mask[arg - 1] = 0xff;
- p->value[arg - 1] = 0;
+ p->mv[arg - 1].mask = 0xff;
+ p->mv[arg - 1].value = 0;
return 0;
}
@@ -173,7 +175,7 @@ static void dsmark_walk(struct Qdisc *sch, struct qdisc_walker *walker)
return;
for (i = 0; i < p->indices; i++) {
- if (p->mask[i] == 0xff && !p->value[i])
+ if (p->mv[i].mask == 0xff && !p->mv[i].value)
goto ignore;
if (walker->count >= walker->skip) {
if (walker->fn(sch, i + 1, walker) < 0) {
@@ -291,12 +293,12 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
switch (tc_skb_protocol(skb)) {
case htons(ETH_P_IP):
- ipv4_change_dsfield(ip_hdr(skb), p->mask[index],
- p->value[index]);
+ ipv4_change_dsfield(ip_hdr(skb), p->mv[index].mask,
+ p->mv[index].value);
break;
case htons(ETH_P_IPV6):
- ipv6_change_dsfield(ipv6_hdr(skb), p->mask[index],
- p->value[index]);
+ ipv6_change_dsfield(ipv6_hdr(skb), p->mv[index].mask,
+ p->mv[index].value);
break;
default:
/*
@@ -304,7 +306,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
* This way, we can send non-IP traffic through dsmark
* and don't need yet another qdisc as a bypass.
*/
- if (p->mask[index] != 0xff || p->value[index])
+ if (p->mv[index].mask != 0xff || p->mv[index].value)
pr_warn("%s: unsupported protocol %d\n",
__func__, ntohs(tc_skb_protocol(skb)));
break;
@@ -346,7 +348,7 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
int err = -EINVAL;
u32 default_index = NO_DEFAULT_INDEX;
u16 indices;
- u8 *mask;
+ int i;
pr_debug("%s(sch %p,[qdisc %p],opt %p)\n", __func__, sch, p, opt);
@@ -366,18 +368,18 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt)
if (tb[TCA_DSMARK_DEFAULT_INDEX])
default_index = nla_get_u16(tb[TCA_DSMARK_DEFAULT_INDEX]);
- mask = kmalloc(indices * 2, GFP_KERNEL);
- if (mask == NULL) {
+ if (indices <= DSMARK_EMBEDDED_SZ)
+ p->mv = p->embedded;
+ else
+ p->mv = kmalloc_array(indices, sizeof(*p->mv), GFP_KERNEL);
+ if (!p->mv) {
err = -ENOMEM;
goto errout;
}
-
- p->mask = mask;
- memset(p->mask, 0xff, indices);
-
- p->value = p->mask + indices;
- memset(p->value, 0, indices);
-
+ for (i = 0; i < indices; i++) {
+ p->mv[i].mask = 0xff;
+ p->mv[i].value = 0;
+ }
p->indices = indices;
p->default_index = default_index;
p->set_tc_index = nla_get_flag(tb[TCA_DSMARK_SET_TC_INDEX]);
@@ -410,7 +412,8 @@ static void dsmark_destroy(struct Qdisc *sch)
tcf_destroy_chain(&p->filter_list);
qdisc_destroy(p->q);
- kfree(p->mask);
+ if (p->mv != p->embedded)
+ kfree(p->mv);
}
static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl,
@@ -430,8 +433,8 @@ static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl,
opts = nla_nest_start(skb, TCA_OPTIONS);
if (opts == NULL)
goto nla_put_failure;
- if (nla_put_u8(skb, TCA_DSMARK_MASK, p->mask[cl - 1]) ||
- nla_put_u8(skb, TCA_DSMARK_VALUE, p->value[cl - 1]))
+ if (nla_put_u8(skb, TCA_DSMARK_MASK, p->mv[cl - 1].mask) ||
+ nla_put_u8(skb, TCA_DSMARK_VALUE, p->mv[cl - 1].value))
goto nla_put_failure;
return nla_nest_end(skb, opts);
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index f377702d4b91..109b2322778f 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -224,13 +224,16 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
if (unlikely((skb->priority & TC_PRIO_MAX) == TC_PRIO_CONTROL))
return &q->internal;
- /* SYNACK messages are attached to a listener socket.
- * 1) They are not part of a 'flow' yet
- * 2) We do not want to rate limit them (eg SYNFLOOD attack),
+ /* SYNACK messages are attached to a TCP_NEW_SYN_RECV request socket
+ * or a listener (SYNCOOKIE mode)
+ * 1) request sockets are not full blown,
+ * they do not contain sk_pacing_rate
+ * 2) They are not part of a 'flow' yet
+ * 3) We do not want to rate limit them (eg SYNFLOOD attack),
* especially if the listener set SO_MAX_PACING_RATE
- * 3) We pretend they are orphaned
+ * 4) We pretend they are orphaned
*/
- if (!sk || sk->sk_state == TCP_LISTEN) {
+ if (!sk || sk_listener(sk)) {
unsigned long hash = skb_get_hash(skb) & q->orphan_mask;
/* By forcing low order bit to 1, we make sure to not
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 7954c52e1794..763e06a55155 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2494,7 +2494,7 @@ static int sctp_process_param(struct sctp_association *asoc,
__u16 sat;
int retval = 1;
sctp_scope_t scope;
- time_t stale;
+ u32 stale;
struct sctp_af *af;
union sctp_addr_param *addr_param;
struct sctp_transport *t;
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index d7eaa7354cf7..6f46aa16cb76 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -2306,7 +2306,7 @@ static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net,
sctp_cmd_seq_t *commands)
{
struct sctp_chunk *chunk = arg;
- time_t stale;
+ u32 stale;
sctp_cookie_preserve_param_t bht;
sctp_errhdr_t *err;
struct sctp_chunk *reply;
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 77f5d17e2612..1eb76956b439 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -1,6 +1,6 @@
/*
* net/switchdev/switchdev.c - Switch device API
- * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
+ * Copyright (c) 2014-2015 Jiri Pirko <jiri@resnulli.us>
* Copyright (c) 2014-2015 Scott Feldman <sfeldma@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
@@ -15,12 +15,166 @@
#include <linux/mutex.h>
#include <linux/notifier.h>
#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
#include <linux/if_bridge.h>
+#include <linux/list.h>
+#include <linux/workqueue.h>
#include <linux/if_vlan.h>
#include <net/ip_fib.h>
#include <net/switchdev.h>
/**
+ * switchdev_trans_item_enqueue - Enqueue data item to transaction queue
+ *
+ * @trans: transaction
+ * @data: pointer to data being queued
+ * @destructor: data destructor
+ * @tritem: transaction item being queued
+ *
+ * Enqeueue data item to transaction queue. tritem is typically placed in
+ * cointainter pointed at by data pointer. Destructor is called on
+ * transaction abort and after successful commit phase in case
+ * the caller did not dequeue the item before.
+ */
+void switchdev_trans_item_enqueue(struct switchdev_trans *trans,
+ void *data, void (*destructor)(void const *),
+ struct switchdev_trans_item *tritem)
+{
+ tritem->data = data;
+ tritem->destructor = destructor;
+ list_add_tail(&tritem->list, &trans->item_list);
+}
+EXPORT_SYMBOL_GPL(switchdev_trans_item_enqueue);
+
+static struct switchdev_trans_item *
+__switchdev_trans_item_dequeue(struct switchdev_trans *trans)
+{
+ struct switchdev_trans_item *tritem;
+
+ if (list_empty(&trans->item_list))
+ return NULL;
+ tritem = list_first_entry(&trans->item_list,
+ struct switchdev_trans_item, list);
+ list_del(&tritem->list);
+ return tritem;
+}
+
+/**
+ * switchdev_trans_item_dequeue - Dequeue data item from transaction queue
+ *
+ * @trans: transaction
+ */
+void *switchdev_trans_item_dequeue(struct switchdev_trans *trans)
+{
+ struct switchdev_trans_item *tritem;
+
+ tritem = __switchdev_trans_item_dequeue(trans);
+ BUG_ON(!tritem);
+ return tritem->data;
+}
+EXPORT_SYMBOL_GPL(switchdev_trans_item_dequeue);
+
+static void switchdev_trans_init(struct switchdev_trans *trans)
+{
+ INIT_LIST_HEAD(&trans->item_list);
+}
+
+static void switchdev_trans_items_destroy(struct switchdev_trans *trans)
+{
+ struct switchdev_trans_item *tritem;
+
+ while ((tritem = __switchdev_trans_item_dequeue(trans)))
+ tritem->destructor(tritem->data);
+}
+
+static void switchdev_trans_items_warn_destroy(struct net_device *dev,
+ struct switchdev_trans *trans)
+{
+ WARN(!list_empty(&trans->item_list), "%s: transaction item queue is not empty.\n",
+ dev->name);
+ switchdev_trans_items_destroy(trans);
+}
+
+static LIST_HEAD(deferred);
+static DEFINE_SPINLOCK(deferred_lock);
+
+typedef void switchdev_deferred_func_t(struct net_device *dev,
+ const void *data);
+
+struct switchdev_deferred_item {
+ struct list_head list;
+ struct net_device *dev;
+ switchdev_deferred_func_t *func;
+ unsigned long data[0];
+};
+
+static struct switchdev_deferred_item *switchdev_deferred_dequeue(void)
+{
+ struct switchdev_deferred_item *dfitem;
+
+ spin_lock_bh(&deferred_lock);
+ if (list_empty(&deferred)) {
+ dfitem = NULL;
+ goto unlock;
+ }
+ dfitem = list_first_entry(&deferred,
+ struct switchdev_deferred_item, list);
+ list_del(&dfitem->list);
+unlock:
+ spin_unlock_bh(&deferred_lock);
+ return dfitem;
+}
+
+/**
+ * switchdev_deferred_process - Process ops in deferred queue
+ *
+ * Called to flush the ops currently queued in deferred ops queue.
+ * rtnl_lock must be held.
+ */
+void switchdev_deferred_process(void)
+{
+ struct switchdev_deferred_item *dfitem;
+
+ ASSERT_RTNL();
+
+ while ((dfitem = switchdev_deferred_dequeue())) {
+ dfitem->func(dfitem->dev, dfitem->data);
+ dev_put(dfitem->dev);
+ kfree(dfitem);
+ }
+}
+EXPORT_SYMBOL_GPL(switchdev_deferred_process);
+
+static void switchdev_deferred_process_work(struct work_struct *work)
+{
+ rtnl_lock();
+ switchdev_deferred_process();
+ rtnl_unlock();
+}
+
+static DECLARE_WORK(deferred_process_work, switchdev_deferred_process_work);
+
+static int switchdev_deferred_enqueue(struct net_device *dev,
+ const void *data, size_t data_len,
+ switchdev_deferred_func_t *func)
+{
+ struct switchdev_deferred_item *dfitem;
+
+ dfitem = kmalloc(sizeof(*dfitem) + data_len, GFP_ATOMIC);
+ if (!dfitem)
+ return -ENOMEM;
+ dfitem->dev = dev;
+ dfitem->func = func;
+ memcpy(dfitem->data, data, data_len);
+ dev_hold(dev);
+ spin_lock_bh(&deferred_lock);
+ list_add_tail(&dfitem->list, &deferred);
+ spin_unlock_bh(&deferred_lock);
+ schedule_work(&deferred_process_work);
+ return 0;
+}
+
+/**
* switchdev_port_attr_get - Get port attribute
*
* @dev: port device
@@ -32,7 +186,7 @@ int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr)
struct net_device *lower_dev;
struct list_head *iter;
struct switchdev_attr first = {
- .id = SWITCHDEV_ATTR_UNDEFINED
+ .id = SWITCHDEV_ATTR_ID_UNDEFINED
};
int err = -EOPNOTSUPP;
@@ -52,7 +206,7 @@ int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr)
err = switchdev_port_attr_get(lower_dev, attr);
if (err)
break;
- if (first.id == SWITCHDEV_ATTR_UNDEFINED)
+ if (first.id == SWITCHDEV_ATTR_ID_UNDEFINED)
first = *attr;
else if (memcmp(&first, attr, sizeof(*attr)))
return -ENODATA;
@@ -63,7 +217,8 @@ int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr)
EXPORT_SYMBOL_GPL(switchdev_port_attr_get);
static int __switchdev_port_attr_set(struct net_device *dev,
- struct switchdev_attr *attr)
+ const struct switchdev_attr *attr,
+ struct switchdev_trans *trans)
{
const struct switchdev_ops *ops = dev->switchdev_ops;
struct net_device *lower_dev;
@@ -71,10 +226,10 @@ static int __switchdev_port_attr_set(struct net_device *dev,
int err = -EOPNOTSUPP;
if (ops && ops->switchdev_port_attr_set)
- return ops->switchdev_port_attr_set(dev, attr);
+ return ops->switchdev_port_attr_set(dev, attr, trans);
if (attr->flags & SWITCHDEV_F_NO_RECURSE)
- return err;
+ goto done;
/* Switch device port(s) may be stacked under
* bond/team/vlan dev, so recurse down to set attr on
@@ -82,80 +237,28 @@ static int __switchdev_port_attr_set(struct net_device *dev,
*/
netdev_for_each_lower_dev(dev, lower_dev, iter) {
- err = __switchdev_port_attr_set(lower_dev, attr);
+ err = __switchdev_port_attr_set(lower_dev, attr, trans);
+ if (err == -EOPNOTSUPP &&
+ attr->flags & SWITCHDEV_F_SKIP_EOPNOTSUPP)
+ continue;
if (err)
break;
}
- return err;
-}
-
-struct switchdev_attr_set_work {
- struct work_struct work;
- struct net_device *dev;
- struct switchdev_attr attr;
-};
-
-static void switchdev_port_attr_set_work(struct work_struct *work)
-{
- struct switchdev_attr_set_work *asw =
- container_of(work, struct switchdev_attr_set_work, work);
- int err;
-
- rtnl_lock();
- err = switchdev_port_attr_set(asw->dev, &asw->attr);
- if (err && err != -EOPNOTSUPP)
- netdev_err(asw->dev, "failed (err=%d) to set attribute (id=%d)\n",
- err, asw->attr.id);
- rtnl_unlock();
+done:
+ if (err == -EOPNOTSUPP && attr->flags & SWITCHDEV_F_SKIP_EOPNOTSUPP)
+ err = 0;
- dev_put(asw->dev);
- kfree(work);
-}
-
-static int switchdev_port_attr_set_defer(struct net_device *dev,
- struct switchdev_attr *attr)
-{
- struct switchdev_attr_set_work *asw;
-
- asw = kmalloc(sizeof(*asw), GFP_ATOMIC);
- if (!asw)
- return -ENOMEM;
-
- INIT_WORK(&asw->work, switchdev_port_attr_set_work);
-
- dev_hold(dev);
- asw->dev = dev;
- memcpy(&asw->attr, attr, sizeof(asw->attr));
-
- schedule_work(&asw->work);
-
- return 0;
+ return err;
}
-/**
- * switchdev_port_attr_set - Set port attribute
- *
- * @dev: port device
- * @attr: attribute to set
- *
- * Use a 2-phase prepare-commit transaction model to ensure
- * system is not left in a partially updated state due to
- * failure from driver/device.
- */
-int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr)
+static int switchdev_port_attr_set_now(struct net_device *dev,
+ const struct switchdev_attr *attr)
{
+ struct switchdev_trans trans;
int err;
- if (!rtnl_is_locked()) {
- /* Running prepare-commit transaction across stacked
- * devices requires nothing moves, so if rtnl_lock is
- * not held, schedule a worker thread to hold rtnl_lock
- * while setting attr.
- */
-
- return switchdev_port_attr_set_defer(dev, attr);
- }
+ switchdev_trans_init(&trans);
/* Phase I: prepare for attr set. Driver/device should fail
* here if there are going to be issues in the commit phase,
@@ -164,18 +267,16 @@ int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr)
* but should not commit the attr.
*/
- attr->trans = SWITCHDEV_TRANS_PREPARE;
- err = __switchdev_port_attr_set(dev, attr);
+ trans.ph_prepare = true;
+ err = __switchdev_port_attr_set(dev, attr, &trans);
if (err) {
/* Prepare phase failed: abort the transaction. Any
* resources reserved in the prepare phase are
* released.
*/
- if (err != -EOPNOTSUPP) {
- attr->trans = SWITCHDEV_TRANS_ABORT;
- __switchdev_port_attr_set(dev, attr);
- }
+ if (err != -EOPNOTSUPP)
+ switchdev_trans_items_destroy(&trans);
return err;
}
@@ -185,17 +286,60 @@ int switchdev_port_attr_set(struct net_device *dev, struct switchdev_attr *attr)
* because the driver said everythings was OK in phase I.
*/
- attr->trans = SWITCHDEV_TRANS_COMMIT;
- err = __switchdev_port_attr_set(dev, attr);
+ trans.ph_prepare = false;
+ err = __switchdev_port_attr_set(dev, attr, &trans);
WARN(err, "%s: Commit of attribute (id=%d) failed.\n",
dev->name, attr->id);
+ switchdev_trans_items_warn_destroy(dev, &trans);
return err;
}
+
+static void switchdev_port_attr_set_deferred(struct net_device *dev,
+ const void *data)
+{
+ const struct switchdev_attr *attr = data;
+ int err;
+
+ err = switchdev_port_attr_set_now(dev, attr);
+ if (err && err != -EOPNOTSUPP)
+ netdev_err(dev, "failed (err=%d) to set attribute (id=%d)\n",
+ err, attr->id);
+}
+
+static int switchdev_port_attr_set_defer(struct net_device *dev,
+ const struct switchdev_attr *attr)
+{
+ return switchdev_deferred_enqueue(dev, attr, sizeof(*attr),
+ switchdev_port_attr_set_deferred);
+}
+
+/**
+ * switchdev_port_attr_set - Set port attribute
+ *
+ * @dev: port device
+ * @attr: attribute to set
+ *
+ * Use a 2-phase prepare-commit transaction model to ensure
+ * system is not left in a partially updated state due to
+ * failure from driver/device.
+ *
+ * rtnl_lock must be held and must not be in atomic section,
+ * in case SWITCHDEV_F_DEFER flag is not set.
+ */
+int switchdev_port_attr_set(struct net_device *dev,
+ const struct switchdev_attr *attr)
+{
+ if (attr->flags & SWITCHDEV_F_DEFER)
+ return switchdev_port_attr_set_defer(dev, attr);
+ ASSERT_RTNL();
+ return switchdev_port_attr_set_now(dev, attr);
+}
EXPORT_SYMBOL_GPL(switchdev_port_attr_set);
static int __switchdev_port_obj_add(struct net_device *dev,
- struct switchdev_obj *obj)
+ const struct switchdev_obj *obj,
+ struct switchdev_trans *trans)
{
const struct switchdev_ops *ops = dev->switchdev_ops;
struct net_device *lower_dev;
@@ -203,7 +347,7 @@ static int __switchdev_port_obj_add(struct net_device *dev,
int err = -EOPNOTSUPP;
if (ops && ops->switchdev_port_obj_add)
- return ops->switchdev_port_obj_add(dev, obj);
+ return ops->switchdev_port_obj_add(dev, obj, trans);
/* Switch device port(s) may be stacked under
* bond/team/vlan dev, so recurse down to add object on
@@ -211,7 +355,7 @@ static int __switchdev_port_obj_add(struct net_device *dev,
*/
netdev_for_each_lower_dev(dev, lower_dev, iter) {
- err = __switchdev_port_obj_add(lower_dev, obj);
+ err = __switchdev_port_obj_add(lower_dev, obj, trans);
if (err)
break;
}
@@ -219,24 +363,16 @@ static int __switchdev_port_obj_add(struct net_device *dev,
return err;
}
-/**
- * switchdev_port_obj_add - Add port object
- *
- * @dev: port device
- * @obj: object to add
- *
- * Use a 2-phase prepare-commit transaction model to ensure
- * system is not left in a partially updated state due to
- * failure from driver/device.
- *
- * rtnl_lock must be held.
- */
-int switchdev_port_obj_add(struct net_device *dev, struct switchdev_obj *obj)
+static int switchdev_port_obj_add_now(struct net_device *dev,
+ const struct switchdev_obj *obj)
{
+ struct switchdev_trans trans;
int err;
ASSERT_RTNL();
+ switchdev_trans_init(&trans);
+
/* Phase I: prepare for obj add. Driver/device should fail
* here if there are going to be issues in the commit phase,
* such as lack of resources or support. The driver/device
@@ -244,18 +380,16 @@ int switchdev_port_obj_add(struct net_device *dev, struct switchdev_obj *obj)
* but should not commit the obj.
*/
- obj->trans = SWITCHDEV_TRANS_PREPARE;
- err = __switchdev_port_obj_add(dev, obj);
+ trans.ph_prepare = true;
+ err = __switchdev_port_obj_add(dev, obj, &trans);
if (err) {
/* Prepare phase failed: abort the transaction. Any
* resources reserved in the prepare phase are
* released.
*/
- if (err != -EOPNOTSUPP) {
- obj->trans = SWITCHDEV_TRANS_ABORT;
- __switchdev_port_obj_add(dev, obj);
- }
+ if (err != -EOPNOTSUPP)
+ switchdev_trans_items_destroy(&trans);
return err;
}
@@ -265,21 +399,59 @@ int switchdev_port_obj_add(struct net_device *dev, struct switchdev_obj *obj)
* because the driver said everythings was OK in phase I.
*/
- obj->trans = SWITCHDEV_TRANS_COMMIT;
- err = __switchdev_port_obj_add(dev, obj);
+ trans.ph_prepare = false;
+ err = __switchdev_port_obj_add(dev, obj, &trans);
WARN(err, "%s: Commit of object (id=%d) failed.\n", dev->name, obj->id);
+ switchdev_trans_items_warn_destroy(dev, &trans);
return err;
}
-EXPORT_SYMBOL_GPL(switchdev_port_obj_add);
+
+static void switchdev_port_obj_add_deferred(struct net_device *dev,
+ const void *data)
+{
+ const struct switchdev_obj *obj = data;
+ int err;
+
+ err = switchdev_port_obj_add_now(dev, obj);
+ if (err && err != -EOPNOTSUPP)
+ netdev_err(dev, "failed (err=%d) to add object (id=%d)\n",
+ err, obj->id);
+}
+
+static int switchdev_port_obj_add_defer(struct net_device *dev,
+ const struct switchdev_obj *obj)
+{
+ return switchdev_deferred_enqueue(dev, obj, sizeof(*obj),
+ switchdev_port_obj_add_deferred);
+}
/**
- * switchdev_port_obj_del - Delete port object
+ * switchdev_port_obj_add - Add port object
*
* @dev: port device
- * @obj: object to delete
+ * @id: object ID
+ * @obj: object to add
+ *
+ * Use a 2-phase prepare-commit transaction model to ensure
+ * system is not left in a partially updated state due to
+ * failure from driver/device.
+ *
+ * rtnl_lock must be held and must not be in atomic section,
+ * in case SWITCHDEV_F_DEFER flag is not set.
*/
-int switchdev_port_obj_del(struct net_device *dev, struct switchdev_obj *obj)
+int switchdev_port_obj_add(struct net_device *dev,
+ const struct switchdev_obj *obj)
+{
+ if (obj->flags & SWITCHDEV_F_DEFER)
+ return switchdev_port_obj_add_defer(dev, obj);
+ ASSERT_RTNL();
+ return switchdev_port_obj_add_now(dev, obj);
+}
+EXPORT_SYMBOL_GPL(switchdev_port_obj_add);
+
+static int switchdev_port_obj_del_now(struct net_device *dev,
+ const struct switchdev_obj *obj)
{
const struct switchdev_ops *ops = dev->switchdev_ops;
struct net_device *lower_dev;
@@ -295,30 +467,75 @@ int switchdev_port_obj_del(struct net_device *dev, struct switchdev_obj *obj)
*/
netdev_for_each_lower_dev(dev, lower_dev, iter) {
- err = switchdev_port_obj_del(lower_dev, obj);
+ err = switchdev_port_obj_del_now(lower_dev, obj);
if (err)
break;
}
return err;
}
+
+static void switchdev_port_obj_del_deferred(struct net_device *dev,
+ const void *data)
+{
+ const struct switchdev_obj *obj = data;
+ int err;
+
+ err = switchdev_port_obj_del_now(dev, obj);
+ if (err && err != -EOPNOTSUPP)
+ netdev_err(dev, "failed (err=%d) to del object (id=%d)\n",
+ err, obj->id);
+}
+
+static int switchdev_port_obj_del_defer(struct net_device *dev,
+ const struct switchdev_obj *obj)
+{
+ return switchdev_deferred_enqueue(dev, obj, sizeof(*obj),
+ switchdev_port_obj_del_deferred);
+}
+
+/**
+ * switchdev_port_obj_del - Delete port object
+ *
+ * @dev: port device
+ * @id: object ID
+ * @obj: object to delete
+ *
+ * rtnl_lock must be held and must not be in atomic section,
+ * in case SWITCHDEV_F_DEFER flag is not set.
+ */
+int switchdev_port_obj_del(struct net_device *dev,
+ const struct switchdev_obj *obj)
+{
+ if (obj->flags & SWITCHDEV_F_DEFER)
+ return switchdev_port_obj_del_defer(dev, obj);
+ ASSERT_RTNL();
+ return switchdev_port_obj_del_now(dev, obj);
+}
EXPORT_SYMBOL_GPL(switchdev_port_obj_del);
/**
* switchdev_port_obj_dump - Dump port objects
*
* @dev: port device
+ * @id: object ID
* @obj: object to dump
+ * @cb: function to call with a filled object
+ *
+ * rtnl_lock must be held.
*/
-int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj)
+int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj,
+ switchdev_obj_dump_cb_t *cb)
{
const struct switchdev_ops *ops = dev->switchdev_ops;
struct net_device *lower_dev;
struct list_head *iter;
int err = -EOPNOTSUPP;
+ ASSERT_RTNL();
+
if (ops && ops->switchdev_port_obj_dump)
- return ops->switchdev_port_obj_dump(dev, obj);
+ return ops->switchdev_port_obj_dump(dev, obj, cb);
/* Switch device port(s) may be stacked under
* bond/team/vlan dev, so recurse down to dump objects on
@@ -326,7 +543,7 @@ int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj)
*/
netdev_for_each_lower_dev(dev, lower_dev, iter) {
- err = switchdev_port_obj_dump(lower_dev, obj);
+ err = switchdev_port_obj_dump(lower_dev, obj, cb);
break;
}
@@ -398,7 +615,7 @@ int call_switchdev_notifiers(unsigned long val, struct net_device *dev,
EXPORT_SYMBOL_GPL(call_switchdev_notifiers);
struct switchdev_vlan_dump {
- struct switchdev_obj obj;
+ struct switchdev_obj_port_vlan vlan;
struct sk_buff *skb;
u32 filter_mask;
u16 flags;
@@ -406,8 +623,7 @@ struct switchdev_vlan_dump {
u16 end;
};
-static int switchdev_port_vlan_dump_put(struct net_device *dev,
- struct switchdev_vlan_dump *dump)
+static int switchdev_port_vlan_dump_put(struct switchdev_vlan_dump *dump)
{
struct bridge_vlan_info vinfo;
@@ -437,12 +653,11 @@ static int switchdev_port_vlan_dump_put(struct net_device *dev,
return 0;
}
-static int switchdev_port_vlan_dump_cb(struct net_device *dev,
- struct switchdev_obj *obj)
+static int switchdev_port_vlan_dump_cb(struct switchdev_obj *obj)
{
+ struct switchdev_obj_port_vlan *vlan = SWITCHDEV_OBJ_PORT_VLAN(obj);
struct switchdev_vlan_dump *dump =
- container_of(obj, struct switchdev_vlan_dump, obj);
- struct switchdev_obj_vlan *vlan = &dump->obj.u.vlan;
+ container_of(vlan, struct switchdev_vlan_dump, vlan);
int err = 0;
if (vlan->vid_begin > vlan->vid_end)
@@ -453,7 +668,7 @@ static int switchdev_port_vlan_dump_cb(struct net_device *dev,
for (dump->begin = dump->end = vlan->vid_begin;
dump->begin <= vlan->vid_end;
dump->begin++, dump->end++) {
- err = switchdev_port_vlan_dump_put(dev, dump);
+ err = switchdev_port_vlan_dump_put(dump);
if (err)
return err;
}
@@ -465,7 +680,7 @@ static int switchdev_port_vlan_dump_cb(struct net_device *dev,
/* prepend */
dump->begin = vlan->vid_begin;
} else {
- err = switchdev_port_vlan_dump_put(dev, dump);
+ err = switchdev_port_vlan_dump_put(dump);
dump->flags = vlan->flags;
dump->begin = vlan->vid_begin;
dump->end = vlan->vid_end;
@@ -477,7 +692,7 @@ static int switchdev_port_vlan_dump_cb(struct net_device *dev,
/* append */
dump->end = vlan->vid_end;
} else {
- err = switchdev_port_vlan_dump_put(dev, dump);
+ err = switchdev_port_vlan_dump_put(dump);
dump->flags = vlan->flags;
dump->begin = vlan->vid_begin;
dump->end = vlan->vid_end;
@@ -494,10 +709,7 @@ static int switchdev_port_vlan_fill(struct sk_buff *skb, struct net_device *dev,
u32 filter_mask)
{
struct switchdev_vlan_dump dump = {
- .obj = {
- .id = SWITCHDEV_OBJ_PORT_VLAN,
- .cb = switchdev_port_vlan_dump_cb,
- },
+ .vlan.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
.skb = skb,
.filter_mask = filter_mask,
};
@@ -505,12 +717,13 @@ static int switchdev_port_vlan_fill(struct sk_buff *skb, struct net_device *dev,
if ((filter_mask & RTEXT_FILTER_BRVLAN) ||
(filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) {
- err = switchdev_port_obj_dump(dev, &dump.obj);
+ err = switchdev_port_obj_dump(dev, &dump.vlan.obj,
+ switchdev_port_vlan_dump_cb);
if (err)
goto err_out;
if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)
/* last one */
- err = switchdev_port_vlan_dump_put(dev, &dump);
+ err = switchdev_port_vlan_dump_put(&dump);
}
err_out:
@@ -530,7 +743,7 @@ int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
int nlflags)
{
struct switchdev_attr attr = {
- .id = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS,
+ .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS,
};
u16 mode = BRIDGE_MODE_UNDEF;
u32 mask = BR_LEARNING | BR_LEARNING_SYNC;
@@ -551,7 +764,7 @@ static int switchdev_port_br_setflag(struct net_device *dev,
unsigned long brport_flag)
{
struct switchdev_attr attr = {
- .id = SWITCHDEV_ATTR_PORT_BRIDGE_FLAGS,
+ .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS,
};
u8 flag = nla_get_u8(nlattr);
int err;
@@ -618,14 +831,13 @@ static int switchdev_port_br_setlink_protinfo(struct net_device *dev,
static int switchdev_port_br_afspec(struct net_device *dev,
struct nlattr *afspec,
int (*f)(struct net_device *dev,
- struct switchdev_obj *obj))
+ const struct switchdev_obj *obj))
{
struct nlattr *attr;
struct bridge_vlan_info *vinfo;
- struct switchdev_obj obj = {
- .id = SWITCHDEV_OBJ_PORT_VLAN,
+ struct switchdev_obj_port_vlan vlan = {
+ .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
};
- struct switchdev_obj_vlan *vlan = &obj.u.vlan;
int rem;
int err;
@@ -637,30 +849,33 @@ static int switchdev_port_br_afspec(struct net_device *dev,
vinfo = nla_data(attr);
if (!vinfo->vid || vinfo->vid >= VLAN_VID_MASK)
return -EINVAL;
- vlan->flags = vinfo->flags;
+ vlan.flags = vinfo->flags;
if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) {
- if (vlan->vid_begin)
+ if (vlan.vid_begin)
+ return -EINVAL;
+ vlan.vid_begin = vinfo->vid;
+ /* don't allow range of pvids */
+ if (vlan.flags & BRIDGE_VLAN_INFO_PVID)
return -EINVAL;
- vlan->vid_begin = vinfo->vid;
} else if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END) {
- if (!vlan->vid_begin)
+ if (!vlan.vid_begin)
return -EINVAL;
- vlan->vid_end = vinfo->vid;
- if (vlan->vid_end <= vlan->vid_begin)
+ vlan.vid_end = vinfo->vid;
+ if (vlan.vid_end <= vlan.vid_begin)
return -EINVAL;
- err = f(dev, &obj);
+ err = f(dev, &vlan.obj);
if (err)
return err;
- memset(vlan, 0, sizeof(*vlan));
+ memset(&vlan, 0, sizeof(vlan));
} else {
- if (vlan->vid_begin)
+ if (vlan.vid_begin)
return -EINVAL;
- vlan->vid_begin = vinfo->vid;
- vlan->vid_end = vinfo->vid;
- err = f(dev, &obj);
+ vlan.vid_begin = vinfo->vid;
+ vlan.vid_end = vinfo->vid;
+ err = f(dev, &vlan.obj);
if (err)
return err;
- memset(vlan, 0, sizeof(*vlan));
+ memset(&vlan, 0, sizeof(vlan));
}
}
@@ -742,15 +957,13 @@ int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev, const unsigned char *addr,
u16 vid, u16 nlm_flags)
{
- struct switchdev_obj obj = {
- .id = SWITCHDEV_OBJ_PORT_FDB,
- .u.fdb = {
- .addr = addr,
- .vid = vid,
- },
+ struct switchdev_obj_port_fdb fdb = {
+ .obj.id = SWITCHDEV_OBJ_ID_PORT_FDB,
+ .vid = vid,
};
- return switchdev_port_obj_add(dev, &obj);
+ ether_addr_copy(fdb.addr, addr);
+ return switchdev_port_obj_add(dev, &fdb.obj);
}
EXPORT_SYMBOL_GPL(switchdev_port_fdb_add);
@@ -769,30 +982,29 @@ int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev, const unsigned char *addr,
u16 vid)
{
- struct switchdev_obj obj = {
- .id = SWITCHDEV_OBJ_PORT_FDB,
- .u.fdb = {
- .addr = addr,
- .vid = vid,
- },
+ struct switchdev_obj_port_fdb fdb = {
+ .obj.id = SWITCHDEV_OBJ_ID_PORT_FDB,
+ .vid = vid,
};
- return switchdev_port_obj_del(dev, &obj);
+ ether_addr_copy(fdb.addr, addr);
+ return switchdev_port_obj_del(dev, &fdb.obj);
}
EXPORT_SYMBOL_GPL(switchdev_port_fdb_del);
struct switchdev_fdb_dump {
- struct switchdev_obj obj;
+ struct switchdev_obj_port_fdb fdb;
+ struct net_device *dev;
struct sk_buff *skb;
struct netlink_callback *cb;
int idx;
};
-static int switchdev_port_fdb_dump_cb(struct net_device *dev,
- struct switchdev_obj *obj)
+static int switchdev_port_fdb_dump_cb(struct switchdev_obj *obj)
{
+ struct switchdev_obj_port_fdb *fdb = SWITCHDEV_OBJ_PORT_FDB(obj);
struct switchdev_fdb_dump *dump =
- container_of(obj, struct switchdev_fdb_dump, obj);
+ container_of(fdb, struct switchdev_fdb_dump, fdb);
u32 portid = NETLINK_CB(dump->cb->skb).portid;
u32 seq = dump->cb->nlh->nlmsg_seq;
struct nlmsghdr *nlh;
@@ -812,13 +1024,13 @@ static int switchdev_port_fdb_dump_cb(struct net_device *dev,
ndm->ndm_pad2 = 0;
ndm->ndm_flags = NTF_SELF;
ndm->ndm_type = 0;
- ndm->ndm_ifindex = dev->ifindex;
- ndm->ndm_state = obj->u.fdb.ndm_state;
+ ndm->ndm_ifindex = dump->dev->ifindex;
+ ndm->ndm_state = fdb->ndm_state;
- if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, obj->u.fdb.addr))
+ if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, fdb->addr))
goto nla_put_failure;
- if (obj->u.fdb.vid && nla_put_u16(dump->skb, NDA_VLAN, obj->u.fdb.vid))
+ if (fdb->vid && nla_put_u16(dump->skb, NDA_VLAN, fdb->vid))
goto nla_put_failure;
nlmsg_end(dump->skb, nlh);
@@ -848,16 +1060,14 @@ int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
struct net_device *filter_dev, int idx)
{
struct switchdev_fdb_dump dump = {
- .obj = {
- .id = SWITCHDEV_OBJ_PORT_FDB,
- .cb = switchdev_port_fdb_dump_cb,
- },
+ .fdb.obj.id = SWITCHDEV_OBJ_ID_PORT_FDB,
+ .dev = dev,
.skb = skb,
.cb = cb,
.idx = idx,
};
- switchdev_port_obj_dump(dev, &dump.obj);
+ switchdev_port_obj_dump(dev, &dump.fdb.obj, switchdev_port_fdb_dump_cb);
return dump.idx;
}
EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump);
@@ -888,12 +1098,14 @@ static struct net_device *switchdev_get_lowest_dev(struct net_device *dev)
static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi)
{
struct switchdev_attr attr = {
- .id = SWITCHDEV_ATTR_PORT_PARENT_ID,
+ .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
};
struct switchdev_attr prev_attr;
struct net_device *dev = NULL;
int nhsel;
+ ASSERT_RTNL();
+
/* For this route, all nexthop devs must be on the same switch. */
for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
@@ -935,21 +1147,20 @@ static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi)
int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
u8 tos, u8 type, u32 nlflags, u32 tb_id)
{
- struct switchdev_obj fib_obj = {
- .id = SWITCHDEV_OBJ_IPV4_FIB,
- .u.ipv4_fib = {
- .dst = dst,
- .dst_len = dst_len,
- .fi = fi,
- .tos = tos,
- .type = type,
- .nlflags = nlflags,
- .tb_id = tb_id,
- },
+ struct switchdev_obj_ipv4_fib ipv4_fib = {
+ .obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB,
+ .dst = dst,
+ .dst_len = dst_len,
+ .tos = tos,
+ .type = type,
+ .nlflags = nlflags,
+ .tb_id = tb_id,
};
struct net_device *dev;
int err = 0;
+ memcpy(&ipv4_fib.fi, fi, sizeof(ipv4_fib.fi));
+
/* Don't offload route if using custom ip rules or if
* IPv4 FIB offloading has been disabled completely.
*/
@@ -966,7 +1177,7 @@ int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
if (!dev)
return 0;
- err = switchdev_port_obj_add(dev, &fib_obj);
+ err = switchdev_port_obj_add(dev, &ipv4_fib.obj);
if (!err)
fi->fib_flags |= RTNH_F_OFFLOAD;
@@ -989,21 +1200,20 @@ EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_add);
int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
u8 tos, u8 type, u32 tb_id)
{
- struct switchdev_obj fib_obj = {
- .id = SWITCHDEV_OBJ_IPV4_FIB,
- .u.ipv4_fib = {
- .dst = dst,
- .dst_len = dst_len,
- .fi = fi,
- .tos = tos,
- .type = type,
- .nlflags = 0,
- .tb_id = tb_id,
- },
+ struct switchdev_obj_ipv4_fib ipv4_fib = {
+ .obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB,
+ .dst = dst,
+ .dst_len = dst_len,
+ .tos = tos,
+ .type = type,
+ .nlflags = 0,
+ .tb_id = tb_id,
};
struct net_device *dev;
int err = 0;
+ memcpy(&ipv4_fib.fi, fi, sizeof(ipv4_fib.fi));
+
if (!(fi->fib_flags & RTNH_F_OFFLOAD))
return 0;
@@ -1011,7 +1221,7 @@ int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
if (!dev)
return 0;
- err = switchdev_port_obj_del(dev, &fib_obj);
+ err = switchdev_port_obj_del(dev, &ipv4_fib.obj);
if (!err)
fi->fib_flags &= ~RTNH_F_OFFLOAD;
@@ -1043,11 +1253,11 @@ static bool switchdev_port_same_parent_id(struct net_device *a,
struct net_device *b)
{
struct switchdev_attr a_attr = {
- .id = SWITCHDEV_ATTR_PORT_PARENT_ID,
+ .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
.flags = SWITCHDEV_F_NO_RECURSE,
};
struct switchdev_attr b_attr = {
- .id = SWITCHDEV_ATTR_PORT_PARENT_ID,
+ .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
.flags = SWITCHDEV_F_NO_RECURSE,
};
@@ -1126,10 +1336,11 @@ void switchdev_port_fwd_mark_set(struct net_device *dev,
u32 mark = dev->ifindex;
u32 reset_mark = 0;
- if (group_dev && joining) {
- mark = switchdev_port_fwd_mark_get(dev, group_dev);
- } else if (group_dev && !joining) {
- if (dev->offload_fwd_mark == mark)
+ if (group_dev) {
+ ASSERT_RTNL();
+ if (joining)
+ mark = switchdev_port_fwd_mark_get(dev, group_dev);
+ else if (dev->offload_fwd_mark == mark)
/* Ohoh, this port was the mark reference port,
* but it's leaving the group, so reset the
* mark for the remaining ports in the group.
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index ce9f7bfc0b92..82b278668ab7 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -362,6 +362,7 @@ static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr)
b_ptr->media->disable_media(b_ptr);
tipc_node_delete_links(net, b_ptr->identity);
+ RCU_INIT_POINTER(b_ptr->media_ptr, NULL);
if (b_ptr->link_req)
tipc_disc_delete(b_ptr->link_req);
@@ -399,16 +400,13 @@ int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
/* tipc_disable_l2_media - detach TIPC bearer from an L2 interface
*
- * Mark L2 bearer as inactive so that incoming buffers are thrown away,
- * then get worker thread to complete bearer cleanup. (Can't do cleanup
- * here because cleanup code needs to sleep and caller holds spinlocks.)
+ * Mark L2 bearer as inactive so that incoming buffers are thrown away
*/
void tipc_disable_l2_media(struct tipc_bearer *b)
{
struct net_device *dev;
dev = (struct net_device *)rtnl_dereference(b->media_ptr);
- RCU_INIT_POINTER(b->media_ptr, NULL);
RCU_INIT_POINTER(dev->tipc_ptr, NULL);
synchronize_net();
dev_put(dev);
@@ -554,7 +552,7 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt,
case NETDEV_CHANGE:
if (netif_carrier_ok(dev))
break;
- case NETDEV_DOWN:
+ case NETDEV_GOING_DOWN:
case NETDEV_CHANGEMTU:
tipc_reset_bearer(net, b_ptr);
break;
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 75db07c78a69..ff9b0b92e62e 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -120,11 +120,21 @@ bool tipc_link_is_up(struct tipc_link *l)
return link_is_up(l);
}
+bool tipc_link_peer_is_down(struct tipc_link *l)
+{
+ return l->state == LINK_PEER_RESET;
+}
+
bool tipc_link_is_reset(struct tipc_link *l)
{
return l->state & (LINK_RESET | LINK_FAILINGOVER | LINK_ESTABLISHING);
}
+bool tipc_link_is_establishing(struct tipc_link *l)
+{
+ return l->state == LINK_ESTABLISHING;
+}
+
bool tipc_link_is_synching(struct tipc_link *l)
{
return l->state == LINK_SYNCHING;
@@ -321,14 +331,15 @@ int tipc_link_fsm_evt(struct tipc_link *l, int evt)
switch (evt) {
case LINK_ESTABLISH_EVT:
l->state = LINK_ESTABLISHED;
- rc |= TIPC_LINK_UP_EVT;
break;
case LINK_FAILOVER_BEGIN_EVT:
l->state = LINK_FAILINGOVER;
break;
- case LINK_PEER_RESET_EVT:
case LINK_RESET_EVT:
+ l->state = LINK_RESET;
+ break;
case LINK_FAILURE_EVT:
+ case LINK_PEER_RESET_EVT:
case LINK_SYNCH_BEGIN_EVT:
case LINK_FAILOVER_END_EVT:
break;
@@ -578,8 +589,6 @@ void tipc_link_purge_queues(struct tipc_link *l_ptr)
void tipc_link_reset(struct tipc_link *l)
{
- tipc_link_fsm_evt(l, LINK_RESET_EVT);
-
/* Link is down, accept any session */
l->peer_session = WILDCARD_SESSION;
@@ -953,7 +962,7 @@ static bool tipc_data_input(struct tipc_link *link, struct sk_buff *skb,
case TIPC_HIGH_IMPORTANCE:
case TIPC_CRITICAL_IMPORTANCE:
case CONN_MANAGER:
- __skb_queue_tail(inputq, skb);
+ skb_queue_tail(inputq, skb);
return true;
case NAME_DISTRIBUTOR:
node->bclink.recv_permitted = true;
@@ -982,6 +991,7 @@ static int tipc_link_input(struct tipc_link *l, struct sk_buff *skb,
struct tipc_msg *hdr = buf_msg(skb);
struct sk_buff **reasm_skb = &l->reasm_buf;
struct sk_buff *iskb;
+ struct sk_buff_head tmpq;
int usr = msg_user(hdr);
int rc = 0;
int pos = 0;
@@ -1006,10 +1016,12 @@ static int tipc_link_input(struct tipc_link *l, struct sk_buff *skb,
}
if (usr == MSG_BUNDLER) {
+ skb_queue_head_init(&tmpq);
l->stats.recv_bundles++;
l->stats.recv_bundled += msg_msgcnt(hdr);
while (tipc_msg_extract(skb, &iskb, &pos))
- tipc_data_input(l, iskb, inputq);
+ tipc_data_input(l, iskb, &tmpq);
+ tipc_skb_queue_splice_tail(&tmpq, inputq);
return 0;
} else if (usr == MSG_FRAGMENTER) {
l->stats.recv_fragments++;
@@ -1044,49 +1056,76 @@ static bool tipc_link_release_pkts(struct tipc_link *l, u16 acked)
return released;
}
+/* tipc_link_build_ack_msg: prepare link acknowledge message for transmission
+ */
+void tipc_link_build_ack_msg(struct tipc_link *l, struct sk_buff_head *xmitq)
+{
+ l->rcv_unacked = 0;
+ l->stats.sent_acks++;
+ tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq);
+}
+
+/* tipc_link_build_reset_msg: prepare link RESET or ACTIVATE message
+ */
+void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq)
+{
+ int mtyp = RESET_MSG;
+
+ if (l->state == LINK_ESTABLISHING)
+ mtyp = ACTIVATE_MSG;
+
+ tipc_link_build_proto_msg(l, mtyp, 0, 0, 0, 0, xmitq);
+}
+
+/* tipc_link_build_nack_msg: prepare link nack message for transmission
+ */
+static void tipc_link_build_nack_msg(struct tipc_link *l,
+ struct sk_buff_head *xmitq)
+{
+ u32 def_cnt = ++l->stats.deferred_recv;
+
+ if ((skb_queue_len(&l->deferdq) == 1) || !(def_cnt % TIPC_NACK_INTV))
+ tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq);
+}
+
/* tipc_link_rcv - process TIPC packets/messages arriving from off-node
- * @link: the link that should handle the message
+ * @l: the link that should handle the message
* @skb: TIPC packet
* @xmitq: queue to place packets to be sent after this call
*/
int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb,
struct sk_buff_head *xmitq)
{
- struct sk_buff_head *arrvq = &l->deferdq;
- struct sk_buff_head tmpq;
+ struct sk_buff_head *defq = &l->deferdq;
struct tipc_msg *hdr;
- u16 seqno, rcv_nxt;
+ u16 seqno, rcv_nxt, win_lim;
int rc = 0;
- __skb_queue_head_init(&tmpq);
-
- if (unlikely(!__tipc_skb_queue_sorted(arrvq, skb))) {
- if (!(skb_queue_len(arrvq) % TIPC_NACK_INTV))
- tipc_link_build_proto_msg(l, STATE_MSG, 0,
- 0, 0, 0, xmitq);
- return rc;
- }
-
- while ((skb = skb_peek(arrvq))) {
+ do {
hdr = buf_msg(skb);
+ seqno = msg_seqno(hdr);
+ rcv_nxt = l->rcv_nxt;
+ win_lim = rcv_nxt + TIPC_MAX_LINK_WIN;
/* Verify and update link state */
- if (unlikely(msg_user(hdr) == LINK_PROTOCOL)) {
- __skb_dequeue(arrvq);
- rc = tipc_link_proto_rcv(l, skb, xmitq);
- continue;
- }
+ if (unlikely(msg_user(hdr) == LINK_PROTOCOL))
+ return tipc_link_proto_rcv(l, skb, xmitq);
if (unlikely(!link_is_up(l))) {
- rc = tipc_link_fsm_evt(l, LINK_ESTABLISH_EVT);
- if (!link_is_up(l)) {
- kfree_skb(__skb_dequeue(arrvq));
- goto exit;
- }
+ if (l->state == LINK_ESTABLISHING)
+ rc = TIPC_LINK_UP_EVT;
+ goto drop;
}
+ /* Don't send probe at next timeout expiration */
l->silent_intv_cnt = 0;
+ /* Drop if outside receive window */
+ if (unlikely(less(seqno, rcv_nxt) || more(seqno, win_lim))) {
+ l->stats.duplicates++;
+ goto drop;
+ }
+
/* Forward queues and wake up waiting users */
if (likely(tipc_link_release_pkts(l, msg_ack(hdr)))) {
tipc_link_advance_backlog(l, xmitq);
@@ -1094,39 +1133,28 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb,
link_prepare_wakeup(l);
}
- /* Defer reception if there is a gap in the sequence */
- seqno = msg_seqno(hdr);
- rcv_nxt = l->rcv_nxt;
- if (unlikely(less(rcv_nxt, seqno))) {
- l->stats.deferred_recv++;
- goto exit;
- }
-
- __skb_dequeue(arrvq);
-
- /* Drop if packet already received */
- if (unlikely(more(rcv_nxt, seqno))) {
- l->stats.duplicates++;
- kfree_skb(skb);
- goto exit;
+ /* Defer delivery if sequence gap */
+ if (unlikely(seqno != rcv_nxt)) {
+ __tipc_skb_queue_sorted(defq, seqno, skb);
+ tipc_link_build_nack_msg(l, xmitq);
+ break;
}
- /* Packet can be delivered */
+ /* Deliver packet */
l->rcv_nxt++;
l->stats.recv_info++;
- if (unlikely(!tipc_data_input(l, skb, &tmpq)))
- rc = tipc_link_input(l, skb, &tmpq);
+ if (!tipc_data_input(l, skb, l->inputq))
+ rc = tipc_link_input(l, skb, l->inputq);
+ if (unlikely(rc))
+ break;
+ if (unlikely(++l->rcv_unacked >= TIPC_MIN_LINK_WIN))
+ tipc_link_build_ack_msg(l, xmitq);
+
+ } while ((skb = __skb_dequeue(defq)));
- /* Ack at regular intervals */
- if (unlikely(++l->rcv_unacked >= TIPC_MIN_LINK_WIN)) {
- l->rcv_unacked = 0;
- l->stats.sent_acks++;
- tipc_link_build_proto_msg(l, STATE_MSG,
- 0, 0, 0, 0, xmitq);
- }
- }
-exit:
- tipc_skb_queue_splice_tail(&tmpq, l->inputq);
+ return rc;
+drop:
+ kfree_skb(skb);
return rc;
}
@@ -1250,7 +1278,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
}
/* tipc_link_tnl_prepare(): prepare and return a list of tunnel packets
- * with contents of the link's tranmsit and backlog queues.
+ * with contents of the link's transmit and backlog queues.
*/
void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl,
int mtyp, struct sk_buff_head *xmitq)
@@ -1331,6 +1359,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
u16 peers_tol = msg_link_tolerance(hdr);
u16 peers_prio = msg_linkprio(hdr);
u16 rcv_nxt = l->rcv_nxt;
+ int mtyp = msg_type(hdr);
char *if_name;
int rc = 0;
@@ -1340,7 +1369,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
if (link_own_addr(l) > msg_prevnode(hdr))
l->net_plane = msg_net_plane(hdr);
- switch (msg_type(hdr)) {
+ switch (mtyp) {
case RESET_MSG:
/* Ignore duplicate RESET with old session number */
@@ -1367,12 +1396,14 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
if (in_range(peers_prio, l->priority + 1, TIPC_MAX_LINK_PRI))
l->priority = peers_prio;
- if (msg_type(hdr) == RESET_MSG) {
- rc |= tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT);
- } else if (!link_is_up(l)) {
- tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT);
- rc |= tipc_link_fsm_evt(l, LINK_ESTABLISH_EVT);
- }
+ /* ACTIVATE_MSG serves as PEER_RESET if link is already down */
+ if ((mtyp == RESET_MSG) || !link_is_up(l))
+ rc = tipc_link_fsm_evt(l, LINK_PEER_RESET_EVT);
+
+ /* ACTIVATE_MSG takes up link if it was already locally reset */
+ if ((mtyp == ACTIVATE_MSG) && (l->state == LINK_ESTABLISHING))
+ rc = TIPC_LINK_UP_EVT;
+
l->peer_session = msg_session(hdr);
l->peer_bearer_id = msg_bearer_id(hdr);
if (l->mtu > msg_max_pkt(hdr))
@@ -1389,9 +1420,12 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
l->stats.recv_states++;
if (msg_probe(hdr))
l->stats.recv_probes++;
- rc = tipc_link_fsm_evt(l, LINK_ESTABLISH_EVT);
- if (!link_is_up(l))
+
+ if (!link_is_up(l)) {
+ if (l->state == LINK_ESTABLISHING)
+ rc = TIPC_LINK_UP_EVT;
break;
+ }
/* Send NACK if peer has sent pkts we haven't received yet */
if (more(peers_snd_nxt, rcv_nxt) && !tipc_link_is_synching(l))
diff --git a/net/tipc/link.h b/net/tipc/link.h
index 39ff8b6919a4..0201212cb49a 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -185,7 +185,7 @@ struct tipc_link {
} backlog[5];
u16 snd_nxt;
u16 last_retransm;
- u32 window;
+ u16 window;
u32 stale_count;
/* Reception */
@@ -213,10 +213,13 @@ void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl,
int mtyp, struct sk_buff_head *xmitq);
void tipc_link_build_bcast_sync_msg(struct tipc_link *l,
struct sk_buff_head *xmitq);
+void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq);
int tipc_link_fsm_evt(struct tipc_link *l, int evt);
void tipc_link_reset_fragments(struct tipc_link *l_ptr);
bool tipc_link_is_up(struct tipc_link *l);
+bool tipc_link_peer_is_down(struct tipc_link *l);
bool tipc_link_is_reset(struct tipc_link *l);
+bool tipc_link_is_establishing(struct tipc_link *l);
bool tipc_link_is_synching(struct tipc_link *l);
bool tipc_link_is_failingover(struct tipc_link *l);
bool tipc_link_is_blocked(struct tipc_link *l);
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index c5ac436235e0..454f5ec275c8 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -590,3 +590,34 @@ error:
kfree_skb(head);
return NULL;
}
+
+/* tipc_skb_queue_sorted(); sort pkt into list according to sequence number
+ * @list: list to be appended to
+ * @seqno: sequence number of buffer to add
+ * @skb: buffer to add
+ */
+void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno,
+ struct sk_buff *skb)
+{
+ struct sk_buff *_skb, *tmp;
+
+ if (skb_queue_empty(list) || less(seqno, buf_seqno(skb_peek(list)))) {
+ __skb_queue_head(list, skb);
+ return;
+ }
+
+ if (more(seqno, buf_seqno(skb_peek_tail(list)))) {
+ __skb_queue_tail(list, skb);
+ return;
+ }
+
+ skb_queue_walk_safe(list, _skb, tmp) {
+ if (more(seqno, buf_seqno(_skb)))
+ continue;
+ if (seqno == buf_seqno(_skb))
+ break;
+ __skb_queue_before(list, _skb, skb);
+ return;
+ }
+ kfree_skb(skb);
+}
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 5351a3f97e8e..9f0ef54be612 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -790,6 +790,8 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m,
int offset, int dsz, int mtu, struct sk_buff_head *list);
bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err);
struct sk_buff *tipc_msg_reassemble(struct sk_buff_head *list);
+void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno,
+ struct sk_buff *skb);
static inline u16 buf_seqno(struct sk_buff *skb)
{
@@ -862,38 +864,6 @@ static inline struct sk_buff *tipc_skb_dequeue(struct sk_buff_head *list,
return skb;
}
-/* tipc_skb_queue_sorted(); sort pkt into list according to sequence number
- * @list: list to be appended to
- * @skb: buffer to add
- * Returns true if queue should treated further, otherwise false
- */
-static inline bool __tipc_skb_queue_sorted(struct sk_buff_head *list,
- struct sk_buff *skb)
-{
- struct sk_buff *_skb, *tmp;
- struct tipc_msg *hdr = buf_msg(skb);
- u16 seqno = msg_seqno(hdr);
-
- if (skb_queue_empty(list) || (msg_user(hdr) == LINK_PROTOCOL)) {
- __skb_queue_head(list, skb);
- return true;
- }
- if (likely(less(seqno, buf_seqno(skb_peek(list))))) {
- __skb_queue_head(list, skb);
- return true;
- }
- if (!more(seqno, buf_seqno(skb_peek_tail(list)))) {
- skb_queue_walk_safe(list, _skb, tmp) {
- if (likely(less(seqno, buf_seqno(_skb)))) {
- __skb_queue_before(list, _skb, skb);
- return true;
- }
- }
- }
- __skb_queue_tail(list, skb);
- return false;
-}
-
/* tipc_skb_queue_splice_tail - append an skb list to lock protected list
* @list: the new list to append. Not lock protected
* @head: target list. Lock protected.
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 2c32a83037a3..2670751d0e2e 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -317,7 +317,11 @@ static void __tipc_node_link_up(struct tipc_node *n, int bearer_id,
struct tipc_link *ol = node_active_link(n, 0);
struct tipc_link *nl = n->links[bearer_id].link;
- if (!nl || !tipc_link_is_up(nl))
+ if (!nl)
+ return;
+
+ tipc_link_fsm_evt(nl, LINK_ESTABLISH_EVT);
+ if (!tipc_link_is_up(nl))
return;
n->working_links++;
@@ -416,7 +420,13 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id,
}
if (!tipc_node_is_up(n)) {
+ if (tipc_link_peer_is_down(l))
+ tipc_node_fsm_evt(n, PEER_LOST_CONTACT_EVT);
+ tipc_node_fsm_evt(n, SELF_LOST_CONTACT_EVT);
+ tipc_link_fsm_evt(l, LINK_RESET_EVT);
tipc_link_reset(l);
+ tipc_link_build_reset_msg(l, xmitq);
+ *maddr = &n->links[*bearer_id].maddr;
node_lost_contact(n, &le->inputq);
return;
}
@@ -428,6 +438,7 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id,
n->sync_point = tnl->rcv_nxt + (U16_MAX / 2 - 1);
tipc_link_tnl_prepare(l, tnl, FAILOVER_MSG, xmitq);
tipc_link_reset(l);
+ tipc_link_fsm_evt(l, LINK_RESET_EVT);
tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT);
tipc_node_fsm_evt(n, NODE_FAILOVER_BEGIN_EVT);
*maddr = &n->links[tnl->bearer_id].maddr;
@@ -437,20 +448,28 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id,
static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete)
{
struct tipc_link_entry *le = &n->links[bearer_id];
+ struct tipc_link *l = le->link;
struct tipc_media_addr *maddr;
struct sk_buff_head xmitq;
+ if (!l)
+ return;
+
__skb_queue_head_init(&xmitq);
tipc_node_lock(n);
- __tipc_node_link_down(n, &bearer_id, &xmitq, &maddr);
- if (delete && le->link) {
- kfree(le->link);
- le->link = NULL;
- n->link_cnt--;
+ if (!tipc_link_is_establishing(l)) {
+ __tipc_node_link_down(n, &bearer_id, &xmitq, &maddr);
+ if (delete) {
+ kfree(l);
+ le->link = NULL;
+ n->link_cnt--;
+ }
+ } else {
+ /* Defuse pending tipc_node_link_up() */
+ tipc_link_fsm_evt(l, LINK_RESET_EVT);
}
tipc_node_unlock(n);
-
tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr);
tipc_sk_rcv(n->net, &le->inputq);
}
@@ -567,6 +586,7 @@ void tipc_node_check_dest(struct net *net, u32 onode,
goto exit;
}
tipc_link_reset(l);
+ tipc_link_fsm_evt(l, LINK_RESET_EVT);
if (n->state == NODE_FAILINGOVER)
tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT);
le->link = l;
@@ -579,7 +599,7 @@ void tipc_node_check_dest(struct net *net, u32 onode,
memcpy(&le->maddr, maddr, sizeof(*maddr));
exit:
tipc_node_unlock(n);
- if (reset)
+ if (reset && !tipc_link_is_reset(l))
tipc_node_link_down(n, b->identity, false);
tipc_node_put(n);
}
@@ -686,10 +706,10 @@ static void tipc_node_fsm_evt(struct tipc_node *n, int evt)
break;
case SELF_ESTABL_CONTACT_EVT:
case PEER_LOST_CONTACT_EVT:
- break;
case NODE_SYNCH_END_EVT:
- case NODE_SYNCH_BEGIN_EVT:
case NODE_FAILOVER_BEGIN_EVT:
+ break;
+ case NODE_SYNCH_BEGIN_EVT:
case NODE_FAILOVER_END_EVT:
default:
goto illegal_evt;
@@ -849,9 +869,6 @@ static void node_lost_contact(struct tipc_node *n_ptr,
tipc_link_fsm_evt(l, LINK_FAILOVER_END_EVT);
}
- /* Prevent re-contact with node until cleanup is done */
- tipc_node_fsm_evt(n_ptr, SELF_LOST_CONTACT_EVT);
-
/* Notify publications from this node */
n_ptr->action_flags |= TIPC_NOTIFY_NODE_DOWN;
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index c170d3138953..9bc0b1e515fa 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -425,7 +425,6 @@ static void tipc_udp_disable(struct tipc_bearer *b)
}
if (ub->ubsock)
sock_set_flag(ub->ubsock->sk, SOCK_DEAD);
- RCU_INIT_POINTER(b->media_ptr, NULL);
RCU_INIT_POINTER(ub->bearer, NULL);
/* sock_release need to be done outside of rtnl lock */
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 3893409dee95..f223026ddb03 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -419,6 +419,7 @@ use_default_name:
device_initialize(&rdev->wiphy.dev);
rdev->wiphy.dev.class = &ieee80211_class;
rdev->wiphy.dev.platform_data = rdev;
+ device_enable_async_suspend(&rdev->wiphy.dev);
INIT_LIST_HEAD(&rdev->destroy_list);
spin_lock_init(&rdev->destroy_list_lock);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 5d8748b4c8a2..f05ba8b7af61 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3,6 +3,7 @@
*
* Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
+ * Copyright 2015 Intel Deutschland GmbH
*/
#include <linux/if.h>
@@ -2403,6 +2404,16 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
}
}
+ if (rdev->ops->get_tx_power) {
+ int dbm, ret;
+
+ ret = rdev_get_tx_power(rdev, wdev, &dbm);
+ if (ret == 0 &&
+ nla_put_u32(msg, NL80211_ATTR_WIPHY_TX_POWER_LEVEL,
+ DBM_TO_MBM(dbm)))
+ goto nla_put_failure;
+ }
+
if (wdev->ssid_len) {
if (nla_put(msg, NL80211_ATTR_SSID, wdev->ssid_len, wdev->ssid))
goto nla_put_failure;
@@ -3998,7 +4009,8 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
params->sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER);
}
- if (statype != CFG80211_STA_TDLS_PEER_SETUP) {
+ if (statype != CFG80211_STA_TDLS_PEER_SETUP &&
+ statype != CFG80211_STA_AP_CLIENT_UNASSOC) {
/* reject other things that can't change */
if (params->sta_modify_mask & STATION_PARAM_APPLY_UAPSD)
return -EINVAL;
@@ -4010,7 +4022,8 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
return -EINVAL;
}
- if (statype != CFG80211_STA_AP_CLIENT) {
+ if (statype != CFG80211_STA_AP_CLIENT &&
+ statype != CFG80211_STA_AP_CLIENT_UNASSOC) {
if (params->vlan)
return -EINVAL;
}
@@ -4022,6 +4035,7 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
return -EOPNOTSUPP;
break;
case CFG80211_STA_AP_CLIENT:
+ case CFG80211_STA_AP_CLIENT_UNASSOC:
/* accept only the listed bits */
if (params->sta_flags_mask &
~(BIT(NL80211_STA_FLAG_AUTHORIZED) |
@@ -9938,6 +9952,9 @@ static int nl80211_vendor_cmd(struct sk_buff *skb, struct genl_info *info)
if (!wdev->netdev && !wdev->p2p_started)
return -ENETDOWN;
}
+
+ if (!vcmd->doit)
+ return -EOPNOTSUPP;
} else {
wdev = NULL;
}
@@ -9957,6 +9974,193 @@ static int nl80211_vendor_cmd(struct sk_buff *skb, struct genl_info *info)
return -EOPNOTSUPP;
}
+static int nl80211_prepare_vendor_dump(struct sk_buff *skb,
+ struct netlink_callback *cb,
+ struct cfg80211_registered_device **rdev,
+ struct wireless_dev **wdev)
+{
+ u32 vid, subcmd;
+ unsigned int i;
+ int vcmd_idx = -1;
+ int err;
+ void *data = NULL;
+ unsigned int data_len = 0;
+
+ rtnl_lock();
+
+ if (cb->args[0]) {
+ /* subtract the 1 again here */
+ struct wiphy *wiphy = wiphy_idx_to_wiphy(cb->args[0] - 1);
+ struct wireless_dev *tmp;
+
+ if (!wiphy) {
+ err = -ENODEV;
+ goto out_unlock;
+ }
+ *rdev = wiphy_to_rdev(wiphy);
+ *wdev = NULL;
+
+ if (cb->args[1]) {
+ list_for_each_entry(tmp, &(*rdev)->wdev_list, list) {
+ if (tmp->identifier == cb->args[1] - 1) {
+ *wdev = tmp;
+ break;
+ }
+ }
+ }
+
+ /* keep rtnl locked in successful case */
+ return 0;
+ }
+
+ err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize,
+ nl80211_fam.attrbuf, nl80211_fam.maxattr,
+ nl80211_policy);
+ if (err)
+ goto out_unlock;
+
+ if (!nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_ID] ||
+ !nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_SUBCMD]) {
+ err = -EINVAL;
+ goto out_unlock;
+ }
+
+ *wdev = __cfg80211_wdev_from_attrs(sock_net(skb->sk),
+ nl80211_fam.attrbuf);
+ if (IS_ERR(*wdev))
+ *wdev = NULL;
+
+ *rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk),
+ nl80211_fam.attrbuf);
+ if (IS_ERR(*rdev)) {
+ err = PTR_ERR(*rdev);
+ goto out_unlock;
+ }
+
+ vid = nla_get_u32(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_ID]);
+ subcmd = nla_get_u32(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_SUBCMD]);
+
+ for (i = 0; i < (*rdev)->wiphy.n_vendor_commands; i++) {
+ const struct wiphy_vendor_command *vcmd;
+
+ vcmd = &(*rdev)->wiphy.vendor_commands[i];
+
+ if (vcmd->info.vendor_id != vid || vcmd->info.subcmd != subcmd)
+ continue;
+
+ if (!vcmd->dumpit) {
+ err = -EOPNOTSUPP;
+ goto out_unlock;
+ }
+
+ vcmd_idx = i;
+ break;
+ }
+
+ if (vcmd_idx < 0) {
+ err = -EOPNOTSUPP;
+ goto out_unlock;
+ }
+
+ if (nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_DATA]) {
+ data = nla_data(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_DATA]);
+ data_len = nla_len(nl80211_fam.attrbuf[NL80211_ATTR_VENDOR_DATA]);
+ }
+
+ /* 0 is the first index - add 1 to parse only once */
+ cb->args[0] = (*rdev)->wiphy_idx + 1;
+ /* add 1 to know if it was NULL */
+ cb->args[1] = *wdev ? (*wdev)->identifier + 1 : 0;
+ cb->args[2] = vcmd_idx;
+ cb->args[3] = (unsigned long)data;
+ cb->args[4] = data_len;
+
+ /* keep rtnl locked in successful case */
+ return 0;
+ out_unlock:
+ rtnl_unlock();
+ return err;
+}
+
+static int nl80211_vendor_cmd_dump(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct cfg80211_registered_device *rdev;
+ struct wireless_dev *wdev;
+ unsigned int vcmd_idx;
+ const struct wiphy_vendor_command *vcmd;
+ void *data;
+ int data_len;
+ int err;
+ struct nlattr *vendor_data;
+
+ err = nl80211_prepare_vendor_dump(skb, cb, &rdev, &wdev);
+ if (err)
+ return err;
+
+ vcmd_idx = cb->args[2];
+ data = (void *)cb->args[3];
+ data_len = cb->args[4];
+ vcmd = &rdev->wiphy.vendor_commands[vcmd_idx];
+
+ if (vcmd->flags & (WIPHY_VENDOR_CMD_NEED_WDEV |
+ WIPHY_VENDOR_CMD_NEED_NETDEV)) {
+ if (!wdev)
+ return -EINVAL;
+ if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_NETDEV &&
+ !wdev->netdev)
+ return -EINVAL;
+
+ if (vcmd->flags & WIPHY_VENDOR_CMD_NEED_RUNNING) {
+ if (wdev->netdev &&
+ !netif_running(wdev->netdev))
+ return -ENETDOWN;
+ if (!wdev->netdev && !wdev->p2p_started)
+ return -ENETDOWN;
+ }
+ }
+
+ while (1) {
+ void *hdr = nl80211hdr_put(skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ NL80211_CMD_VENDOR);
+ if (!hdr)
+ break;
+
+ if (nla_put_u32(skb, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
+ (wdev && nla_put_u64(skb, NL80211_ATTR_WDEV,
+ wdev_id(wdev)))) {
+ genlmsg_cancel(skb, hdr);
+ break;
+ }
+
+ vendor_data = nla_nest_start(skb, NL80211_ATTR_VENDOR_DATA);
+ if (!vendor_data) {
+ genlmsg_cancel(skb, hdr);
+ break;
+ }
+
+ err = vcmd->dumpit(&rdev->wiphy, wdev, skb, data, data_len,
+ (unsigned long *)&cb->args[5]);
+ nla_nest_end(skb, vendor_data);
+
+ if (err == -ENOBUFS || err == -ENOENT) {
+ genlmsg_cancel(skb, hdr);
+ break;
+ } else if (err) {
+ genlmsg_cancel(skb, hdr);
+ goto out;
+ }
+
+ genlmsg_end(skb, hdr);
+ }
+
+ err = skb->len;
+ out:
+ rtnl_unlock();
+ return err;
+}
+
struct sk_buff *__cfg80211_alloc_reply_skb(struct wiphy *wiphy,
enum nl80211_commands cmd,
enum nl80211_attrs attr,
@@ -10994,6 +11198,7 @@ static const struct genl_ops nl80211_ops[] = {
{
.cmd = NL80211_CMD_VENDOR,
.doit = nl80211_vendor_cmd,
+ .dumpit = nl80211_vendor_cmd_dump,
.policy = nl80211_policy,
.flags = GENL_ADMIN_PERM,
.internal_flags = NL80211_FLAG_NEED_WIPHY |
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 2510b231451e..7258246b7458 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1040,8 +1040,8 @@ freq_reg_info_regd(struct wiphy *wiphy, u32 center_freq,
return ERR_PTR(-EINVAL);
}
-const struct ieee80211_reg_rule *__freq_reg_info(struct wiphy *wiphy,
- u32 center_freq, u32 min_bw)
+static const struct ieee80211_reg_rule *
+__freq_reg_info(struct wiphy *wiphy, u32 center_freq, u32 min_bw)
{
const struct ieee80211_regdomain *regd = reg_get_regdomain(wiphy);
const struct ieee80211_reg_rule *reg_rule = NULL;
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 68ada2ca4b60..cc3676eb6239 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -19,7 +19,7 @@
#include <net/dst.h>
#include <net/xfrm.h>
-static int xfrm_output2(struct sock *sk, struct sk_buff *skb);
+static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb);
static int xfrm_skb_check_space(struct sk_buff *skb)
{
@@ -131,18 +131,20 @@ out:
int xfrm_output_resume(struct sk_buff *skb, int err)
{
+ struct net *net = xs_net(skb_dst(skb)->xfrm);
+
while (likely((err = xfrm_output_one(skb, err)) == 0)) {
nf_reset(skb);
- err = skb_dst(skb)->ops->local_out(skb);
+ err = skb_dst(skb)->ops->local_out(net, skb->sk, skb);
if (unlikely(err != 1))
goto out;
if (!skb_dst(skb)->xfrm)
- return dst_output(skb);
+ return dst_output(net, skb->sk, skb);
err = nf_hook(skb_dst(skb)->ops->family,
- NF_INET_POST_ROUTING, skb->sk, skb,
+ NF_INET_POST_ROUTING, net, skb->sk, skb,
NULL, skb_dst(skb)->dev, xfrm_output2);
if (unlikely(err != 1))
goto out;
@@ -156,12 +158,12 @@ out:
}
EXPORT_SYMBOL_GPL(xfrm_output_resume);
-static int xfrm_output2(struct sock *sk, struct sk_buff *skb)
+static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
{
return xfrm_output_resume(skb, 1);
}
-static int xfrm_output_gso(struct sock *sk, struct sk_buff *skb)
+static int xfrm_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct sk_buff *segs;
@@ -177,7 +179,7 @@ static int xfrm_output_gso(struct sock *sk, struct sk_buff *skb)
int err;
segs->next = NULL;
- err = xfrm_output2(sk, segs);
+ err = xfrm_output2(net, sk, segs);
if (unlikely(err)) {
kfree_skb_list(nskb);
@@ -196,7 +198,7 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb)
int err;
if (skb_is_gso(skb))
- return xfrm_output_gso(sk, skb);
+ return xfrm_output_gso(net, sk, skb);
if (skb->ip_summed == CHECKSUM_PARTIAL) {
err = skb_checksum_help(skb);
@@ -207,7 +209,7 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb)
}
}
- return xfrm_output2(sk, skb);
+ return xfrm_output2(net, sk, skb);
}
EXPORT_SYMBOL_GPL(xfrm_output);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 94af3d065785..09bfcbac63bb 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1208,7 +1208,7 @@ static inline int policy_to_flow_dir(int dir)
}
}
-static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir,
+static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
const struct flowi *fl)
{
struct xfrm_policy *pol;
@@ -1583,8 +1583,6 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst));
xdst->flo.ops = &xfrm_bundle_fc_ops;
- if (afinfo->init_dst)
- afinfo->init_dst(net, xdst);
} else
xdst = ERR_PTR(-ENOBUFS);
@@ -1889,6 +1887,7 @@ static void xfrm_policy_queue_process(unsigned long arg)
struct sock *sk;
struct dst_entry *dst;
struct xfrm_policy *pol = (struct xfrm_policy *)arg;
+ struct net *net = xp_net(pol);
struct xfrm_policy_queue *pq = &pol->polq;
struct flowi fl;
struct sk_buff_head list;
@@ -1905,8 +1904,7 @@ static void xfrm_policy_queue_process(unsigned long arg)
spin_unlock(&pq->hold_queue.lock);
dst_hold(dst->path);
- dst = xfrm_lookup(xp_net(pol), dst->path, &fl,
- sk, 0);
+ dst = xfrm_lookup(net, dst->path, &fl, sk, 0);
if (IS_ERR(dst))
goto purge_queue;
@@ -1936,8 +1934,7 @@ static void xfrm_policy_queue_process(unsigned long arg)
xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family);
dst_hold(skb_dst(skb)->path);
- dst = xfrm_lookup(xp_net(pol), skb_dst(skb)->path,
- &fl, skb->sk, 0);
+ dst = xfrm_lookup(net, skb_dst(skb)->path, &fl, skb->sk, 0);
if (IS_ERR(dst)) {
kfree_skb(skb);
continue;
@@ -1947,7 +1944,7 @@ static void xfrm_policy_queue_process(unsigned long arg)
skb_dst_drop(skb);
skb_dst_set(skb, dst);
- dst_output(skb);
+ dst_output(net, skb->sk, skb);
}
out:
@@ -1960,7 +1957,7 @@ purge_queue:
xfrm_pol_put(pol);
}
-static int xdst_queue_output(struct sock *sk, struct sk_buff *skb)
+static int xdst_queue_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
unsigned long sched_next;
struct dst_entry *dst = skb_dst(skb);
@@ -2187,7 +2184,7 @@ static struct dst_entry *make_blackhole(struct net *net, u16 family,
*/
struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
const struct flowi *fl,
- struct sock *sk, int flags)
+ const struct sock *sk, int flags)
{
struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
struct flow_cache_object *flo;
@@ -2335,7 +2332,7 @@ EXPORT_SYMBOL(xfrm_lookup);
*/
struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig,
const struct flowi *fl,
- struct sock *sk, int flags)
+ const struct sock *sk, int flags)
{
struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk,
flags | XFRM_LOOKUP_QUEUE |